diff --git a/README.ARCH.md b/README.ARCH.md new file mode 100644 index 0000000..4453d5b --- /dev/null +++ b/README.ARCH.md @@ -0,0 +1,155 @@ +# Архитектура приложения + +Документ описывает модульную архитектуру backend-приложения, связи между модулями и контрактные границы. + +## 1. Диаграмма модулей и взаимосвязей + +```mermaid +flowchart LR + UI["Клиент / Frontend"] --> API["FastAPI (app/main.py)"] + + API --> CHAT["chat модуль"] + API --> RAG["rag модуль"] + API --> AGENT_INTERNAL["internal tools API"] + + CHAT -->|AgentRunner| AGENT["agent модуль"] + AGENT -->|RagRetriever| RAG + AGENT --> DB[(PostgreSQL + pgvector)] + CHAT --> DB + RAG --> DB + + AGENT --> GIGA["GigaChat API"] + RAG --> GIGA + + AGENT_INTERNAL --> AGENT + + APP["ModularApplication\n(app/modules/application.py)"] --> CHAT + APP --> AGENT + APP --> RAG +``` + +### Внутренние слои `agent` + +```mermaid +flowchart TB + ROUTER["Router\n(intent + context)"] --> ORCH["Orchestrator\n(task spec + plan + execution + quality)"] + ORCH --> GRAPHS["Action Graphs\nLangGraph"] + + ORCH --> METRICS["Quality Metrics\n(faithfulness/coverage)"] + METRICS --> DB[(agent_quality_metrics)] +``` + +## 2. Описание модулей + +### Модуль `app/modules/chat` + +- Цель: принять пользовательский запрос, управлять задачей обработки, отдать результат и прогресс. +- Кратко о реализации: + - `ChatModule` публикует HTTP API (`/api/chat/*`, `/api/tasks/*`, `/api/events`). + - `ChatOrchestrator` запускает обработку асинхронной задачи, публикует SSE-события, обрабатывает retry и ошибки. + - `TaskStore`, `DialogSessionStore`, `IdempotencyStore` держат состояние задач/диалогов. +- С кем взаимодействует: + - с `agent` через контракт `AgentRunner`. + - с `rag` для проверки `rag_session_id`. + - с `shared/event_bus` для стриминга прогресса. + - с БД через `ChatRepository` (`dialog_sessions`, `chat_messages`). +- Контракты: + - потребляет `AgentRunner.run(...)` из `app/modules/contracts.py`. + +### Модуль `app/modules/agent` + +- Цель: интеллектуальная обработка запроса, маршрутизация, оркестрация сценария, генерация ответа/changeset. +- Кратко о реализации: + - `GraphAgentRuntime` выполняет pipeline: route -> task spec -> orchestrator -> post-processing. + - Router (`engine/router/*`) выбирает `domain/process` и хранит routing-context. + - Orchestrator (`engine/orchestrator/*`) строит и валидирует plan, исполняет шаги, запускает графовые/функциональные actions. + - Graphs (`engine/graphs/*`) выполняют целевые действия (QA, edits, docs). + - Рассчитывает quality-метрики (`faithfulness`, `coverage`) и сохраняет их в БД. + - Поддерживает внутренний инструмент получения страниц Confluence (`/internal/tools/confluence/fetch`). +- С кем взаимодействует: + - с `rag` через контракт `RagRetriever`. + - с `shared/checkpointer` (LangGraph checkpoints в PostgreSQL). + - с GigaChat (LLM-запросы, промпты). + - с БД через `AgentRepository` (`router_context`, `agent_quality_metrics`). +- Контракты: + - реализует `AgentRunner` (используется `chat`). + - потребляет `RagRetriever` (реализуется `rag`). + +### Модуль `app/modules/rag` + +- Цель: индексация проектных файлов и retrieval релевантного контекста. +- Кратко о реализации: + - API для snapshot/changes индексации и retrieval. + - Индексация хранит чанки, эмбеддинги и состояние job. + - Retrieval ищет релевантные куски в `rag_chunks` (pgvector). +- С кем взаимодействует: + - с `agent` (выдача контекста через `RagRetriever`). + - с БД (`rag_sessions`, `rag_chunks`, `rag_index_jobs`). + - с GigaChat Embeddings. +- Контракты: + - реализует `RagRetriever` и `RagIndexer` из `app/modules/contracts.py`. + +### Модуль `app/modules/shared` + +- Цель: общие инфраструктурные компоненты, переиспользуемые всеми модулями. +- Кратко о реализации: + - `db.py`: engine/session factory. + - `event_bus.py`: pub/sub для SSE. + - `retry_executor.py`: общий retry. + - `checkpointer.py`: PostgresSaver для LangGraph. + - `bootstrap.py`: инициализация схем БД на старте. +- С кем взаимодействует: + - со всеми бизнес-модулями (`chat`, `agent`, `rag`). +- Контракты: + - внутренние инфраструктурные API без отдельного публичного контракта уровня `contracts.py`. + +### Модуль `app/modules/contracts.py` + +- Цель: зафиксировать межмодульные интерфейсы и отделить реализацию от потребителей. +- Кратко о реализации: + - `AgentRunner`, `RagRetriever`, `RagIndexer` определены как `Protocol`. +- С кем взаимодействует: + - используется `chat` (как потребитель `AgentRunner`), `agent` (как потребитель `RagRetriever`), `rag` (как реализация `RagRetriever`/`RagIndexer`). +- Контракты: + - это и есть контрактный слой. + +### Модуль композиции `app/modules/application.py` + +- Цель: централизованный wiring зависимостей. +- Кратко о реализации: + - `ModularApplication` создаёт `EventBus`, `RetryExecutor`, репозитории и модули. + - На `startup()` выполняет bootstrap БД. +- С кем взаимодействует: + - со всеми модулями, но не содержит бизнес-логики. +- Контракты: + - использует `contracts.py` для сборки зависимостей без жёсткого сцепления по реализациям. + +## 3. Ключевые контрактные границы + +- `chat -> agent`: только через `AgentRunner`. +- `agent -> rag`: только через `RagRetriever`. +- `rag`: не зависит от `agent` internals. +- `application.py`: единственная точка связывания реализаций. + +## 4. Схема данных (кратко) + +- `chat`: `dialog_sessions`, `chat_messages`. +- `rag`: `rag_sessions`, `rag_chunks`, `rag_index_jobs`. +- `agent`: + - `router_context` — контекст маршрутизации по диалогу. + - `agent_quality_metrics` — пер-сценарные quality-метрики для отчетности: + - `faithfulness_score`, `coverage_score`, `quality_status`, `metrics_json`, `created_at`. + - `story_records` — карточка Story (статус, владелец, метаданные). + - `story_artifacts` — артефакты по Story (analytics/doc_increment/test_model и версии). + - `story_links` — внешние связи Story (тикеты, документы, URL). + +## 5. Поток обработки запроса + +1. Пользователь отправляет сообщение в `chat`. +2. `chat` создаёт task и вызывает `AgentRunner.run(...)`. +3. `agent/router` выбирает маршрут (domain/process). +4. `agent/orchestrator` строит `TaskSpec` и `ExecutionPlan`. +5. Выполняются шаги плана (function/actions/graph steps). +6. Формируется `answer` или `changeset`. +7. Считаются `faithfulness/coverage`, сохраняются в `agent_quality_metrics`. +8. `chat` возвращает результат и стримит прогресс через SSE. diff --git a/README.DB.STORY_PLAN.md b/README.DB.STORY_PLAN.md new file mode 100644 index 0000000..2af8111 --- /dev/null +++ b/README.DB.STORY_PLAN.md @@ -0,0 +1,271 @@ +# План доработки БД для хранения контекста Story и метаданных RAG + +## Цель +Зафиксировать проект миграции, который: +- добавляет в таблицу чанков признаки артефакта (тип, источник, контекст), +- вводит отдельный контур хранения инкремента по `story_id`, +- не зависит от выбранного режима RAG (общий/сессионный/гибридный). + +## Границы +- Документ описывает план и целевую схему. +- Реализация SQL-миграций и backfill выполняется отдельным шагом после согласования. + +## 1) Метаданные чанков (RAG-слой) + +### 1.1. Что добавить +Для таблицы `rag_chunks` (или эквивалента таблицы чанков) добавить поля: +- `artifact_type` (`REQ|ARCH|API|DB|UI|CODE|OTHER`) +- `path` (нормализованный относительный путь файла) +- `section` (заголовок/логический раздел документа) +- `doc_id` (стабильный идентификатор документа) +- `doc_version` (версия документа/ревизия) +- `owner` (ответственная команда/человек) +- `system_component` (система/подсистема/компонент) +- `last_modified` (время последнего изменения источника) +- `staleness_score` (0..1, в первую очередь для `CODE`) + +### 1.2. Ограничения и индексы +- `CHECK` для `artifact_type` и диапазона `staleness_score`. +- Индексы: + - `(artifact_type)` + - `(doc_id, doc_version)` + - `(system_component)` + - `(path)` + - GIN/BTREE по потребности для фильтрации в retrieval. + +## 2) Контур Story (отдельно от чанков) + +### 2.1. Таблица `story_records` +Карточка Story: +- `story_id` (PK, строковый уникальный идентификатор) +- `project_id` (идентификатор проекта/репозитория) +- `title` +- `status` (`draft|in_progress|review|done|archived`) +- `baseline_commit_sha` (базовый снимок) +- `snapshot_id` (опционально для session-RAG) +- `created_at`, `updated_at` +- `created_by`, `updated_by` + +Индексы: +- `(project_id)` +- `(status)` +- `(updated_at)` + +### 2.2. Таблица `story_artifacts` +Связь Story с артефактами изменений: +- `id` (PK) +- `story_id` (FK -> `story_records.story_id`) +- `artifact_role` (`requirement|analysis|doc_change|test_model|note|decision|risk`) +- `doc_id` +- `doc_version` +- `path` +- `section` +- `chunk_id` (nullable; ссылка на chunk если стабильно поддерживается) +- `change_type` (`added|updated|removed|linked`) +- `summary` (краткое описание изменения) +- `source_ref` (ссылка/внешний id) +- `created_at` +- `created_by` + +Уникальность (черновик): +- `UNIQUE(story_id, artifact_role, COALESCE(doc_id,''), COALESCE(path,''), COALESCE(section,''), COALESCE(change_type,''))` + +Индексы: +- `(story_id, artifact_role)` +- `(story_id, change_type)` +- `(doc_id, doc_version)` +- `(path)` + +### 2.3. Таблица `story_links` +Связи Story с внешними сущностями и Story-to-Story: +- `id` (PK) +- `story_id` (FK) +- `link_type` (`story|adr|ticket|pr|commit|doc|external`) +- `target_ref` (идентификатор/ссылка) +- `description` +- `created_at` + +Индексы: +- `(story_id, link_type)` +- `(target_ref)` + +## 3) Почему `story_id` не в чанках +- Один чанк может относиться к нескольким Story. +- Чанки нестабильны при переиндексации. +- Разделение слоев упрощает поддержку и не привязывает модель к типу RAG. + +Итог: связь Story и чанков/документов хранить в `story_artifacts`, а не в `rag_chunks`. + +## 4) Целевая модель RAG: Hybrid-Lite +Выбранный вектор на текущем этапе: `Session-first + Shared Cache + Story Ledger`. + +### 4.1. Принципы +- Рабочий retrieval выполняется из сессионного индекса (видит незакоммиченные изменения). +- Общий кэш чанков/эмбеддингов используется только для ускорения индексации. +- Источник правды по инкременту Story находится в Story-таблицах, а не в RAG-индексе. + +### 4.2. Что хранить дополнительно +- `rag_blob_cache`: кэш файловых blob по `repo_id + blob_sha`. +- `rag_chunk_cache`: кэш чанков/эмбеддингов, привязанный к `blob_sha`. +- `rag_session_chunk_map`: привязка сессии к используемым chunk (чтобы retrieval был изолированным). +- `session_artifacts`: временные артефакты сессии до появления `story_id` (late binding). + +### 4.3. Алгоритм индексации (delta-only) +1. На старте сессии сканировать рабочее дерево и считать `blob_sha` для файлов индексации. +2. Для каждого файла: + - `cache hit`: взять chunk/embedding из кэша и связать с текущей сессией. + - `cache miss`: выполнить chunk+embed и записать результат в кэш. +3. Для retrieval использовать `rag_session_chunk_map` как первичный источник. +4. При необходимости делать fallback к cache-scoped данным по `repo_id` (опционально, под флагом). + +### 4.4. Почему это подходит +- Нет необходимости в сложном ACL общего RAG на уровне приложения. +- Нет обязательной зависимости от ручного commit, индекс отражает локальные изменения. +- Снижается время загрузки сессии за счет переиспользования эмбеддингов. +- История Story не теряется и не зависит от режима RAG. + +### 4.5. Late binding `story_id` (целевой процесс) +1. Аналитик запускает работу только со ссылкой на документ (без `story_id`). +2. Агент обрабатывает задачу в `session-RAG` и сохраняет все изменения в `session_artifacts`. +3. Аналитик вручную делает commit и указывает `story_id`. +4. Вебхук на commit: + - извлекает `story_id` из commit metadata/message, + - обновляет репозиторный RAG, + - выполняет `bind session -> story`: переносит/привязывает `session_artifacts` к `story_artifacts`, + - фиксирует связь `story_id <-> commit_sha <-> changed_files`. +5. Исходный документ аналитики тоже попадает в контекст Story ретроспективно, даже если изначально был без `story_id`. + +## 5) Черновик DDL (PostgreSQL) +```sql +-- 0. Enum-like checks можно заменить на справочники при необходимости + +-- A) Session artifacts (временный слой до появления story_id) +CREATE TABLE IF NOT EXISTS session_artifacts ( + id BIGSERIAL PRIMARY KEY, + session_id TEXT NOT NULL, + project_id TEXT NOT NULL, + artifact_role TEXT NOT NULL, + source_ref TEXT, + doc_id TEXT, + doc_version TEXT, + path TEXT, + section TEXT, + chunk_id TEXT, + change_type TEXT, + summary TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + created_by TEXT, + CONSTRAINT chk_session_artifact_role CHECK (artifact_role IN ( + 'analysis','doc_change','note','decision','risk','test_model' + )), + CONSTRAINT chk_session_change_type CHECK (change_type IS NULL OR change_type IN ( + 'added','updated','removed','linked' + )) +); + +CREATE INDEX IF NOT EXISTS idx_session_artifacts_session ON session_artifacts(session_id); +CREATE INDEX IF NOT EXISTS idx_session_artifacts_project ON session_artifacts(project_id); +CREATE INDEX IF NOT EXISTS idx_session_artifacts_role ON session_artifacts(artifact_role); + +-- 1) Story records +CREATE TABLE IF NOT EXISTS story_records ( + story_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + title TEXT, + status TEXT NOT NULL DEFAULT 'draft', + baseline_commit_sha TEXT, + snapshot_id TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + created_by TEXT, + updated_by TEXT, + CONSTRAINT chk_story_status CHECK (status IN ( + 'draft','in_progress','review','done','archived' + )) +); + +CREATE INDEX IF NOT EXISTS idx_story_records_project ON story_records(project_id); +CREATE INDEX IF NOT EXISTS idx_story_records_status ON story_records(status); +CREATE INDEX IF NOT EXISTS idx_story_records_updated_at ON story_records(updated_at DESC); + +-- 2) Story artifacts +CREATE TABLE IF NOT EXISTS story_artifacts ( + id BIGSERIAL PRIMARY KEY, + story_id TEXT NOT NULL REFERENCES story_records(story_id) ON DELETE CASCADE, + artifact_role TEXT NOT NULL, + doc_id TEXT, + doc_version TEXT, + path TEXT, + section TEXT, + chunk_id TEXT, + change_type TEXT, + summary TEXT, + source_ref TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + created_by TEXT, + CONSTRAINT chk_story_artifact_role CHECK (artifact_role IN ( + 'requirement','analysis','doc_change','test_model','note','decision','risk' + )), + CONSTRAINT chk_story_change_type CHECK (change_type IS NULL OR change_type IN ( + 'added','updated','removed','linked' + )) +); + +CREATE INDEX IF NOT EXISTS idx_story_artifacts_story_role ON story_artifacts(story_id, artifact_role); +CREATE INDEX IF NOT EXISTS idx_story_artifacts_story_change ON story_artifacts(story_id, change_type); +CREATE INDEX IF NOT EXISTS idx_story_artifacts_doc ON story_artifacts(doc_id, doc_version); +CREATE INDEX IF NOT EXISTS idx_story_artifacts_path ON story_artifacts(path); + +-- Вариант уникальности можно уточнить после согласования процессов + +-- 3) Story links +CREATE TABLE IF NOT EXISTS story_links ( + id BIGSERIAL PRIMARY KEY, + story_id TEXT NOT NULL REFERENCES story_records(story_id) ON DELETE CASCADE, + link_type TEXT NOT NULL, + target_ref TEXT NOT NULL, + description TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT chk_story_link_type CHECK (link_type IN ( + 'story','adr','ticket','pr','commit','doc','external' + )) +); + +CREATE INDEX IF NOT EXISTS idx_story_links_story_type ON story_links(story_id, link_type); +CREATE INDEX IF NOT EXISTS idx_story_links_target_ref ON story_links(target_ref); +``` + +## 6) План внедрения (после согласования) +1. Подтвердить перечень полей и enum-значений. +2. Подготовить SQL-миграцию `Vxxx__story_context.sql`. +3. Обновить bootstrap/инициализацию схемы. +4. Обновить репозитории для `story_records/story_artifacts/story_links`. +5. Добавить таблицу и репозиторий `session_artifacts` (session-scoped артефакты без `story_id`). +6. Добавить запись session-артефактов в оркестраторе во время работы аналитика. +7. Добавить webhook-обработчик `bind session -> story` при появлении commit со `story_id`. +8. Добавить API/сервисный метод `get_story_context(story_id)` для повторного входа в Story. +9. Добавить тесты: + - unit на репозитории, + - интеграционные на happy-path записи/чтения, + - регресс на отсутствие зависимости от типа RAG. +10. Добавить миграцию для `rag_blob_cache/rag_chunk_cache/rag_session_chunk_map`. +11. Внедрить `delta-only` индексацию для session-RAG с переиспользованием кэша. + +## 7) Открытые вопросы +- Нужен ли отдельный справочник для `artifact_type`, `artifact_role`, `link_type`. +- Что считать `doc_version`: semver, дата, commit, hash файла. +- Нужна ли soft-delete политика для Story. +- Требуется ли аудит (кто/когда менял `summary` и связи). +- Какой уровень обязательности `chunk_id` (опционален по умолчанию). +- Нужна ли TTL/очистка для `rag_blob_cache/rag_chunk_cache`. +- Делать ли fallback к репозиторному кэшу по умолчанию или только при explicit-флаге. +- Как определять соответствие `session_id` и commit в webhook (1:1, последний активный, explicit token). +- Как долго хранить `session_artifacts` до bind/cleanup. + +## 8) Критерии готовности +- По `story_id` можно восстановить инкремент без исходной сессии. +- История изменений не теряется при переиндексации RAG. +- Аналитик и тестировщик используют один `story_id` как общий ключ контекста. +- Схема работает при любом выбранном режиме RAG. +- Session-RAG поднимается быстрее за счет cache hit по неизмененным файлам. +- Артефакты аналитика, созданные до появления `story_id`, корректно попадают в Story после commit/webhook bind. diff --git a/README.md b/README.md index 304b0f4..62b827b 100644 --- a/README.md +++ b/README.md @@ -89,14 +89,14 @@ Notes: - Route selection: - `default/general` -> answer flow - `project/qa` -> answer flow - - `project/edits` -> conservative changeset flow for non-code file updates + - `project/edits` -> conservative changeset flow for non-code file updates (hybrid output: `proposed_content` + `hunks`) - `docs/generation` -> answer and/or changeset flow - LLM provider: GigaChat (`chat/completions`) - Prompts for graph LLM nodes: `app/modules/agent/prompts/*.txt` - `general_answer.txt` - `project_answer.txt` - `project_edits_plan.txt` - - `project_edits_apply.txt` + - `project_edits_hunks.txt` - `project_edits_self_check.txt` - `docs_generation.txt` - `docs_execution_summary.txt` diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc index 009e082..2ec3f68 100644 Binary files a/app/__pycache__/main.cpython-312.pyc and b/app/__pycache__/main.cpython-312.pyc differ diff --git a/app/main.py b/app/main.py index f8619d6..e28c308 100644 --- a/app/main.py +++ b/app/main.py @@ -18,8 +18,9 @@ def create_app() -> FastAPI: ) app.include_router(modules.chat.public_router()) - app.include_router(modules.rag.public_router()) - app.include_router(modules.rag.internal_router()) + app.include_router(modules.rag_session.public_router()) + app.include_router(modules.rag_session.internal_router()) + app.include_router(modules.rag_repo.internal_router()) app.include_router(modules.agent.internal_router()) register_error_handlers(app) diff --git a/app/modules/__pycache__/application.cpython-312.pyc b/app/modules/__pycache__/application.cpython-312.pyc index 1f2b3b0..ad16c08 100644 Binary files a/app/modules/__pycache__/application.cpython-312.pyc and b/app/modules/__pycache__/application.cpython-312.pyc differ diff --git a/app/modules/__pycache__/contracts.cpython-312.pyc b/app/modules/__pycache__/contracts.cpython-312.pyc index a7a5ed4..5428e77 100644 Binary files a/app/modules/__pycache__/contracts.cpython-312.pyc and b/app/modules/__pycache__/contracts.cpython-312.pyc differ diff --git a/app/modules/agent/README.md b/app/modules/agent/README.md new file mode 100644 index 0000000..ad8574c --- /dev/null +++ b/app/modules/agent/README.md @@ -0,0 +1,60 @@ +# Модуль agent + +## 1. Функции модуля +- Оркестрация выполнения пользовательского запроса поверх роутера интентов и графов. +- Формирование `TaskSpec`, запуск оркестратора шагов и сборка финального результата. +- Реализация необходимых для агента tools и их интеграция с остальной логикой выполнения. +- Сохранение quality-метрик и session-артефактов для последующей привязки к Story. + +## 2. Диаграмма классов и взаимосвязей +```mermaid +classDiagram + class AgentModule + class GraphAgentRuntime + class OrchestratorService + class TaskSpecBuilder + class StorySessionRecorder + class StoryContextRepository + class ConfluenceService + class AgentRepository + + AgentModule --> GraphAgentRuntime + AgentModule --> ConfluenceService + AgentModule --> StorySessionRecorder + StorySessionRecorder --> StoryContextRepository + GraphAgentRuntime --> OrchestratorService + GraphAgentRuntime --> TaskSpecBuilder + GraphAgentRuntime --> AgentRepository + GraphAgentRuntime --> ConfluenceService +``` + +## 3. Описание классов +- `AgentModule`: собирает runtime и публикует внутренние tools-роуты. + Методы: `__init__` — связывает зависимости модуля; `internal_router` — регистрирует internal API tools. +- `GraphAgentRuntime`: основной исполнитель агентного запроса. + Методы: `run` — выполняет цикл route -> retrieval -> orchestration -> ответ/changeset. +- `OrchestratorService`: управляет планом шагов и выполнением quality gates. + Методы: `run` — строит, валидирует и исполняет execution plan. +- `TaskSpecBuilder`: формирует спецификацию задачи для оркестратора. + Методы: `build` — собирает `TaskSpec` из route, контекстов и ограничений. +- `StorySessionRecorder`: пишет session-scoped артефакты для последующего bind к Story. + Методы: `record_run` — сохраняет входные источники и выходные артефакты сессии. +- `StoryContextRepository`: репозиторий Story-контекста и его связей. + Методы: `record_story_commit` — фиксирует commit-контекст Story; `upsert_story` — создает/обновляет карточку Story; `add_session_artifact` — добавляет session-артефакт; `bind_session_to_story` — переносит артефакты сессии в Story; `add_artifact` — добавляет версионный Story-артефакт; `get_story_context` — возвращает агрегированный контекст Story. +- `ConfluenceService`: tool для загрузки страницы по URL. + Методы: `fetch_page` — валидирует URL и возвращает нормализованный payload страницы. +- `AgentRepository`: хранение router-контекста и quality-метрик. + Методы: `ensure_tables` — создает таблицы модуля; `get_router_context` — читает контекст маршрутизации; `update_router_context` — обновляет историю диалога и last-route; `save_quality_metrics` — сохраняет метрики качества; `get_quality_metrics` — читает историю метрик. + +## 4. Сиквенс-диаграммы API + +### POST /internal/tools/confluence/fetch +Назначение: загружает страницу Confluence по URL и возвращает ее контент для дальнейшего использования в сценариях агента. +```mermaid +sequenceDiagram + participant Router as AgentModule.APIRouter + participant Confluence as ConfluenceService + + Router->>Confluence: fetch_page(url) + Confluence-->>Router: page(content_markdown, metadata) +``` diff --git a/app/modules/agent/__pycache__/module.cpython-312.pyc b/app/modules/agent/__pycache__/module.cpython-312.pyc index ce87d5a..8912070 100644 Binary files a/app/modules/agent/__pycache__/module.cpython-312.pyc and b/app/modules/agent/__pycache__/module.cpython-312.pyc differ diff --git a/app/modules/agent/__pycache__/repo_webhook_service.cpython-312.pyc b/app/modules/agent/__pycache__/repo_webhook_service.cpython-312.pyc new file mode 100644 index 0000000..451d087 Binary files /dev/null and b/app/modules/agent/__pycache__/repo_webhook_service.cpython-312.pyc differ diff --git a/app/modules/agent/__pycache__/repository.cpython-312.pyc b/app/modules/agent/__pycache__/repository.cpython-312.pyc index 2cdc549..fdbb0a2 100644 Binary files a/app/modules/agent/__pycache__/repository.cpython-312.pyc and b/app/modules/agent/__pycache__/repository.cpython-312.pyc differ diff --git a/app/modules/agent/__pycache__/service.cpython-312.pyc b/app/modules/agent/__pycache__/service.cpython-312.pyc index 52daeee..a030cfb 100644 Binary files a/app/modules/agent/__pycache__/service.cpython-312.pyc and b/app/modules/agent/__pycache__/service.cpython-312.pyc differ diff --git a/app/modules/agent/__pycache__/story_context_repository.cpython-312.pyc b/app/modules/agent/__pycache__/story_context_repository.cpython-312.pyc new file mode 100644 index 0000000..9652ab6 Binary files /dev/null and b/app/modules/agent/__pycache__/story_context_repository.cpython-312.pyc differ diff --git a/app/modules/agent/__pycache__/story_session_recorder.cpython-312.pyc b/app/modules/agent/__pycache__/story_session_recorder.cpython-312.pyc new file mode 100644 index 0000000..edf89cf Binary files /dev/null and b/app/modules/agent/__pycache__/story_session_recorder.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__init__.py b/app/modules/agent/engine/graphs/__init__.py index caf5035..2821b8a 100644 --- a/app/modules/agent/engine/graphs/__init__.py +++ b/app/modules/agent/engine/graphs/__init__.py @@ -1,11 +1,26 @@ -from app.modules.agent.engine.graphs.base_graph import BaseGraphFactory -from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory -from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory -from app.modules.agent.engine.graphs.project_qa_graph import ProjectQaGraphFactory - __all__ = [ "BaseGraphFactory", "DocsGraphFactory", "ProjectEditsGraphFactory", "ProjectQaGraphFactory", ] + + +def __getattr__(name: str): + if name == "BaseGraphFactory": + from app.modules.agent.engine.graphs.base_graph import BaseGraphFactory + + return BaseGraphFactory + if name == "DocsGraphFactory": + from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory + + return DocsGraphFactory + if name == "ProjectEditsGraphFactory": + from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory + + return ProjectEditsGraphFactory + if name == "ProjectQaGraphFactory": + from app.modules.agent.engine.graphs.project_qa_graph import ProjectQaGraphFactory + + return ProjectQaGraphFactory + raise AttributeError(name) diff --git a/app/modules/agent/engine/graphs/__pycache__/__init__.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/__init__.cpython-312.pyc index a7cc9ef..55518be 100644 Binary files a/app/modules/agent/engine/graphs/__pycache__/__init__.cpython-312.pyc and b/app/modules/agent/engine/graphs/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/base_graph.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/base_graph.cpython-312.pyc index 5d55c82..b8ac226 100644 Binary files a/app/modules/agent/engine/graphs/__pycache__/base_graph.cpython-312.pyc and b/app/modules/agent/engine/graphs/__pycache__/base_graph.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/project_edits_contract.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/project_edits_contract.cpython-312.pyc new file mode 100644 index 0000000..eeca60a Binary files /dev/null and b/app/modules/agent/engine/graphs/__pycache__/project_edits_contract.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/project_edits_graph.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/project_edits_graph.cpython-312.pyc index dcd3bcc..bcc97b5 100644 Binary files a/app/modules/agent/engine/graphs/__pycache__/project_edits_graph.cpython-312.pyc and b/app/modules/agent/engine/graphs/__pycache__/project_edits_graph.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/project_edits_logic.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/project_edits_logic.cpython-312.pyc index 91ab4d3..13de4c0 100644 Binary files a/app/modules/agent/engine/graphs/__pycache__/project_edits_logic.cpython-312.pyc and b/app/modules/agent/engine/graphs/__pycache__/project_edits_logic.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/project_edits_patcher.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/project_edits_patcher.cpython-312.pyc new file mode 100644 index 0000000..8a3773c Binary files /dev/null and b/app/modules/agent/engine/graphs/__pycache__/project_edits_patcher.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/project_edits_support.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/project_edits_support.cpython-312.pyc new file mode 100644 index 0000000..54377ce Binary files /dev/null and b/app/modules/agent/engine/graphs/__pycache__/project_edits_support.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/project_qa_graph.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/project_qa_graph.cpython-312.pyc index 8d231ff..5166f16 100644 Binary files a/app/modules/agent/engine/graphs/__pycache__/project_qa_graph.cpython-312.pyc and b/app/modules/agent/engine/graphs/__pycache__/project_qa_graph.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/__pycache__/state.cpython-312.pyc b/app/modules/agent/engine/graphs/__pycache__/state.cpython-312.pyc index 0332059..df2056d 100644 Binary files a/app/modules/agent/engine/graphs/__pycache__/state.cpython-312.pyc and b/app/modules/agent/engine/graphs/__pycache__/state.cpython-312.pyc differ diff --git a/app/modules/agent/engine/graphs/base_graph.py b/app/modules/agent/engine/graphs/base_graph.py index 28b4c81..b153ec9 100644 --- a/app/modules/agent/engine/graphs/base_graph.py +++ b/app/modules/agent/engine/graphs/base_graph.py @@ -1,9 +1,13 @@ +import logging + from langgraph.graph import END, START, StateGraph from app.modules.agent.engine.graphs.progress import emit_progress_sync from app.modules.agent.llm import AgentLlmService from app.modules.agent.engine.graphs.state import AgentGraphState +LOGGER = logging.getLogger(__name__) + class BaseGraphFactory: def __init__(self, llm: AgentLlmService) -> None: @@ -31,7 +35,13 @@ class BaseGraphFactory: stage="graph.default.context.done", message="Контекст собран, перехожу к формированию ответа.", ) - return {"rag_context": rag, "confluence_context": conf} + result = {"rag_context": rag, "confluence_context": conf} + LOGGER.warning( + "graph step result: graph=default step=context rag_len=%s confluence_len=%s", + len(rag or ""), + len(conf or ""), + ) + return result def _answer_node(self, state: AgentGraphState) -> dict: emit_progress_sync( @@ -55,4 +65,9 @@ class BaseGraphFactory: stage="graph.default.answer.done", message="Черновик ответа подготовлен.", ) - return {"answer": answer} + result = {"answer": answer} + LOGGER.warning( + "graph step result: graph=default step=answer answer_len=%s", + len(answer or ""), + ) + return result diff --git a/app/modules/agent/engine/graphs/project_edits_contract.py b/app/modules/agent/engine/graphs/project_edits_contract.py new file mode 100644 index 0000000..1c9e72a --- /dev/null +++ b/app/modules/agent/engine/graphs/project_edits_contract.py @@ -0,0 +1,171 @@ +import re +from dataclasses import dataclass, field + + +@dataclass +class BlockContract: + type: str + max_changed_lines: int = 6 + start_anchor: str = "" + end_anchor: str = "" + old_line: str = "" + + def as_dict(self) -> dict: + return { + "type": self.type, + "max_changed_lines": self.max_changed_lines, + "start_anchor": self.start_anchor, + "end_anchor": self.end_anchor, + "old_line": self.old_line, + } + + +@dataclass +class FileEditContract: + path: str + reason: str + intent: str = "update" + max_hunks: int = 1 + max_changed_lines: int = 8 + allowed_blocks: list[BlockContract] = field(default_factory=list) + + def as_dict(self) -> dict: + return { + "path": self.path, + "reason": self.reason, + "intent": self.intent, + "max_hunks": self.max_hunks, + "max_changed_lines": self.max_changed_lines, + "allowed_blocks": [block.as_dict() for block in self.allowed_blocks], + } + + +class ContractParser: + _supported_block_types = {"append_end", "replace_between", "replace_line_equals"} + + def parse(self, payload: dict, *, request: str, requested_path: str) -> list[dict]: + files = payload.get("files", []) if isinstance(payload, dict) else [] + parsed: list[FileEditContract] = [] + for item in files if isinstance(files, list) else []: + contract = self._parse_file_contract(item) + if contract: + parsed.append(contract) + + if not parsed: + fallback = self._fallback_contract(request=request, requested_path=requested_path) + if fallback: + parsed.append(fallback) + + return [item.as_dict() for item in parsed] + + def _parse_file_contract(self, item: object) -> FileEditContract | None: + if not isinstance(item, dict): + return None + path = str(item.get("path", "")).replace("\\", "/").strip() + if not path: + return None + reason = str(item.get("reason", "")).strip() or "Requested user adjustment." + intent = str(item.get("intent", "update")).strip().lower() or "update" + if intent not in {"update", "create"}: + intent = "update" + max_hunks = self._clamp_int(item.get("max_hunks"), default=1, min_value=1, max_value=5) + max_changed_lines = self._clamp_int(item.get("max_changed_lines"), default=8, min_value=1, max_value=120) + blocks: list[BlockContract] = [] + raw_blocks = item.get("allowed_blocks", []) + for raw in raw_blocks if isinstance(raw_blocks, list) else []: + block = self._parse_block(raw) + if block: + blocks.append(block) + if not blocks: + return None + return FileEditContract( + path=path, + reason=reason, + intent=intent, + max_hunks=max_hunks, + max_changed_lines=max_changed_lines, + allowed_blocks=blocks, + ) + + def _parse_block(self, raw: object) -> BlockContract | None: + if not isinstance(raw, dict): + return None + kind = self._normalize_block_type(str(raw.get("type", "")).strip().lower()) + if kind not in self._supported_block_types: + return None + max_changed_lines = self._clamp_int(raw.get("max_changed_lines"), default=6, min_value=1, max_value=80) + block = BlockContract( + type=kind, + max_changed_lines=max_changed_lines, + start_anchor=str(raw.get("start_anchor", "")).strip(), + end_anchor=str(raw.get("end_anchor", "")).strip(), + old_line=str(raw.get("old_line", "")).strip(), + ) + if block.type == "replace_between" and (not block.start_anchor or not block.end_anchor): + return None + if block.type == "replace_line_equals" and not block.old_line: + return None + return block + + def _fallback_contract(self, *, request: str, requested_path: str) -> FileEditContract | None: + path = requested_path.strip() + if not path: + return None + low = (request or "").lower() + if any(marker in low for marker in ("в конец", "в самый конец", "append to end", "append at the end")): + return FileEditContract( + path=path, + reason="Append-only update inferred from user request.", + intent="update", + max_hunks=1, + max_changed_lines=8, + allowed_blocks=[BlockContract(type="append_end", max_changed_lines=8)], + ) + quoted = self._extract_quoted_line(request) + if quoted: + return FileEditContract( + path=path, + reason="Single-line replacement inferred from quoted segment in user request.", + intent="update", + max_hunks=1, + max_changed_lines=4, + allowed_blocks=[BlockContract(type="replace_line_equals", old_line=quoted, max_changed_lines=4)], + ) + return None + + def _extract_quoted_line(self, text: str) -> str: + value = (text or "").strip() + patterns = [ + r"`([^`]+)`", + r"\"([^\"]+)\"", + r"'([^']+)'", + r"«([^»]+)»", + ] + for pattern in patterns: + match = re.search(pattern, value) + if not match: + continue + candidate = match.group(1).strip() + if candidate: + return candidate + return "" + + def _normalize_block_type(self, value: str) -> str: + mapping = { + "append": "append_end", + "append_eof": "append_end", + "end_append": "append_end", + "replace_block": "replace_between", + "replace_section": "replace_between", + "replace_range": "replace_between", + "replace_line": "replace_line_equals", + "line_equals": "replace_line_equals", + } + return mapping.get(value, value) + + def _clamp_int(self, value: object, *, default: int, min_value: int, max_value: int) -> int: + try: + numeric = int(value) # type: ignore[arg-type] + except Exception: + numeric = default + return max(min_value, min(max_value, numeric)) diff --git a/app/modules/agent/engine/graphs/project_edits_graph.py b/app/modules/agent/engine/graphs/project_edits_graph.py index c390847..47291d4 100644 --- a/app/modules/agent/engine/graphs/project_edits_graph.py +++ b/app/modules/agent/engine/graphs/project_edits_graph.py @@ -1,3 +1,5 @@ +import logging + from langgraph.graph import END, START, StateGraph from app.modules.agent.engine.graphs.progress import emit_progress_sync @@ -5,6 +7,8 @@ from app.modules.agent.engine.graphs.project_edits_logic import ProjectEditsLogi from app.modules.agent.engine.graphs.state import AgentGraphState from app.modules.agent.llm import AgentLlmService +LOGGER = logging.getLogger(__name__) + class ProjectEditsGraphFactory: _max_validation_attempts = 2 @@ -38,7 +42,9 @@ class ProjectEditsGraphFactory: stage="graph.project_edits.collect_context", message="Собираю контекст и релевантные файлы для правок.", ) - return self._logic.collect_context(state) + result = self._logic.collect_context(state) + self._log_step_result("collect_context", result) + return result def _plan_changes(self, state: AgentGraphState) -> dict: emit_progress_sync( @@ -46,7 +52,9 @@ class ProjectEditsGraphFactory: stage="graph.project_edits.plan_changes", message="Определяю, что именно нужно изменить и в каких файлах.", ) - return self._logic.plan_changes(state) + result = self._logic.plan_changes(state) + self._log_step_result("plan_changes", result) + return result def _generate_changeset(self, state: AgentGraphState) -> dict: emit_progress_sync( @@ -54,7 +62,9 @@ class ProjectEditsGraphFactory: stage="graph.project_edits.generate_changeset", message="Формирую предлагаемые правки по выбранным файлам.", ) - return self._logic.generate_changeset(state) + result = self._logic.generate_changeset(state) + self._log_step_result("generate_changeset", result) + return result def _self_check(self, state: AgentGraphState) -> dict: emit_progress_sync( @@ -62,7 +72,9 @@ class ProjectEditsGraphFactory: stage="graph.project_edits.self_check", message="Проверяю, что правки соответствуют запросу и не трогают лишнее.", ) - return self._logic.self_check(state) + result = self._logic.self_check(state) + self._log_step_result("self_check", result) + return result def _build_result(self, state: AgentGraphState) -> dict: emit_progress_sync( @@ -70,10 +82,21 @@ class ProjectEditsGraphFactory: stage="graph.project_edits.build_result", message="Формирую итоговый changeset и краткий обзор.", ) - return self._logic.build_result(state) + result = self._logic.build_result(state) + self._log_step_result("build_result", result) + return result def _route_after_self_check(self, state: AgentGraphState) -> str: if state.get("validation_passed"): return "ready" attempts = int(state.get("validation_attempts", 0) or 0) return "ready" if attempts >= self._max_validation_attempts else "retry" + + def _log_step_result(self, step: str, result: dict) -> None: + LOGGER.warning( + "graph step result: graph=project_edits step=%s keys=%s changeset_items=%s answer_len=%s", + step, + sorted(result.keys()), + len(result.get("changeset", []) or []), + len(str(result.get("answer", "") or "")), + ) diff --git a/app/modules/agent/engine/graphs/project_edits_logic.py b/app/modules/agent/engine/graphs/project_edits_logic.py index 47bce45..b48245f 100644 --- a/app/modules/agent/engine/graphs/project_edits_logic.py +++ b/app/modules/agent/engine/graphs/project_edits_logic.py @@ -1,127 +1,34 @@ import json -from difflib import SequenceMatcher -import re -from app.modules.agent.engine.graphs.file_targeting import FileTargeting +from app.modules.agent.engine.graphs.project_edits_contract import ContractParser +from app.modules.agent.engine.graphs.project_edits_patcher import ContractPatcher +from app.modules.agent.engine.graphs.project_edits_support import ProjectEditsSupport from app.modules.agent.engine.graphs.state import AgentGraphState from app.modules.agent.llm import AgentLlmService from app.schemas.changeset import ChangeItem -class ProjectEditsSupport: - def __init__(self, max_context_files: int = 12, max_preview_chars: int = 2500) -> None: - self._max_context_files = max_context_files - self._max_preview_chars = max_preview_chars - - def pick_relevant_files(self, message: str, files_map: dict[str, dict]) -> list[dict]: - tokens = {x for x in (message or "").lower().replace("/", " ").split() if len(x) >= 4} - scored: list[tuple[int, dict]] = [] - for path, payload in files_map.items(): - content = str(payload.get("content", "")) - score = 0 - low_path = path.lower() - low_content = content.lower() - for token in tokens: - if token in low_path: - score += 3 - if token in low_content: - score += 1 - scored.append((score, self.as_candidate(payload))) - scored.sort(key=lambda x: (-x[0], x[1]["path"])) - return [item for _, item in scored[: self._max_context_files]] - - def as_candidate(self, payload: dict) -> dict: - return { - "path": str(payload.get("path", "")).replace("\\", "/"), - "content": str(payload.get("content", "")), - "content_hash": str(payload.get("content_hash", "")), - } - - def build_summary(self, state: AgentGraphState, changeset: list[ChangeItem]) -> str: - if not changeset: - return "Правки не сформированы: changeset пуст." - lines = [ - "Выполненные действия:", - f"- Проанализирован запрос: {state.get('message', '')}", - "- Собран контекст проекта и выбран набор файлов для правок.", - f"- Проведен self-check: {state.get('validation_feedback', 'без замечаний')}", - "", - "Измененные файлы:", - ] - for item in changeset[:30]: - lines.append(f"- {item.op.value} {item.path}: {item.reason}") - return "\n".join(lines) - - def normalize_file_output(self, text: str) -> str: - value = (text or "").strip() - if value.startswith("```") and value.endswith("```"): - lines = value.splitlines() - if len(lines) >= 3: - return "\n".join(lines[1:-1]).strip() - return value - - def parse_json(self, raw: str): - text = self.normalize_file_output(raw) - try: - return json.loads(text) - except Exception: - return {} - - def similarity(self, original: str, updated: str) -> float: - return SequenceMatcher(None, original or "", updated or "").ratio() - - def shorten(self, text: str, max_chars: int | None = None) -> str: - limit = max_chars or self._max_preview_chars - value = (text or "").strip() - if len(value) <= limit: - return value - return value[:limit].rstrip() + "\n...[truncated]" - - def collapse_whitespace(self, text: str) -> str: - return re.sub(r"\s+", " ", (text or "").strip()) - - def line_change_ratio(self, original: str, updated: str) -> float: - orig_lines = (original or "").splitlines() - new_lines = (updated or "").splitlines() - if not orig_lines and not new_lines: - return 0.0 - matcher = SequenceMatcher(None, orig_lines, new_lines) - changed = 0 - for tag, i1, i2, j1, j2 in matcher.get_opcodes(): - if tag == "equal": - continue - changed += max(i2 - i1, j2 - j1) - total = max(len(orig_lines), len(new_lines), 1) - return changed / total - - def added_headings(self, original: str, updated: str) -> int: - old_heads = {line.strip() for line in (original or "").splitlines() if line.strip().startswith("#")} - new_heads = {line.strip() for line in (updated or "").splitlines() if line.strip().startswith("#")} - return len(new_heads - old_heads) - - class ProjectEditsLogic: def __init__(self, llm: AgentLlmService) -> None: self._llm = llm - self._targeting = FileTargeting() self._support = ProjectEditsSupport() + self._contracts = ContractParser() + self._patcher = ContractPatcher() def collect_context(self, state: AgentGraphState) -> dict: message = state.get("message", "") files_map = state.get("files_map", {}) or {} - requested_path = self._targeting.extract_target_path(message) - preferred = self._targeting.lookup_file(files_map, requested_path) if requested_path else None + requested_path = self._support.lookup_file(files_map, self._extract_path_hint(message)) candidates = self._support.pick_relevant_files(message, files_map) - if preferred and not any(x["path"] == preferred.get("path") for x in candidates): - candidates.insert(0, self._support.as_candidate(preferred)) + if requested_path and not any(x["path"] == requested_path.get("path") for x in candidates): + candidates.insert(0, self._support.as_candidate(requested_path)) return { - "edits_requested_path": str((preferred or {}).get("path") or (requested_path or "")).strip(), + "edits_requested_path": str((requested_path or {}).get("path", "")).strip() or self._extract_path_hint(message), "edits_context_files": candidates[:12], "validation_attempts": 0, } def plan_changes(self, state: AgentGraphState) -> dict: - context_files = state.get("edits_context_files", []) or [] user_input = json.dumps( { "request": state.get("message", ""), @@ -129,101 +36,110 @@ class ProjectEditsLogic: "context_files": [ { "path": item.get("path", ""), - "content_preview": self._support.shorten(str(item.get("content", ""))), + "content_preview": self._support.shorten(str(item.get("content", "")), 2200), } - for item in context_files + for item in (state.get("edits_context_files", []) or []) ], + "contract_requirements": { + "must_define_allowed_blocks": True, + "max_hunks_per_file": 5, + "default_intent": "update", + }, }, ensure_ascii=False, ) parsed = self._support.parse_json(self._llm.generate("project_edits_plan", user_input)) - files = parsed.get("files", []) if isinstance(parsed, dict) else [] - planned: list[dict] = [] - for item in files[:8] if isinstance(files, list) else []: - if not isinstance(item, dict): - continue - path = str(item.get("path", "")).replace("\\", "/").strip() - if not path: - continue - planned.append( - { - "path": path, - "reason": str(item.get("reason", "")).strip() or "Requested user adjustment.", - } - ) - if not planned: - fallback_path = state.get("edits_requested_path", "").strip() or "docs/REQUESTED_UPDATES.md" - planned = [{"path": fallback_path, "reason": "Fallback path from user request."}] - return {"edits_plan": planned} + contracts = self._contracts.parse( + parsed, + request=str(state.get("message", "")), + requested_path=str(state.get("edits_requested_path", "")), + ) + plan = [{"path": item.get("path", ""), "reason": item.get("reason", "")} for item in contracts] + return {"edits_contracts": contracts, "edits_plan": plan} def generate_changeset(self, state: AgentGraphState) -> dict: files_map = state.get("files_map", {}) or {} - planned = state.get("edits_plan", []) or [] + contracts = state.get("edits_contracts", []) or [] changeset: list[ChangeItem] = [] - for item in planned: - path = str(item.get("path", "")).replace("\\", "/").strip() + feedback: list[str] = [] + + for contract in contracts: + if not isinstance(contract, dict): + continue + path = str(contract.get("path", "")).replace("\\", "/").strip() if not path: continue - current = self._targeting.lookup_file(files_map, path) - current_content = str((current or {}).get("content", "")) - user_input = json.dumps( - { - "request": state.get("message", ""), - "path": path, - "reason": item.get("reason", ""), - "current_content": current_content, - "previous_validation_feedback": state.get("validation_feedback", ""), - "rag_context": self._support.shorten(state.get("rag_context", ""), 5000), - "confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000), - "instruction": "Modify only required parts and preserve unrelated content unchanged.", - }, - ensure_ascii=False, - ) - raw = self._llm.generate("project_edits_apply", user_input).strip() - normalized = self._support.normalize_file_output(raw) - if not normalized: + intent = str(contract.get("intent", "update")).strip().lower() or "update" + source = self._support.lookup_file(files_map, path) + if intent == "update" and source is None: + feedback.append(f"{path}: update requested but source file was not provided.") continue - if current: - if normalized == current_content: + current_content = str((source or {}).get("content", "")) + hunks, error = self._generate_hunks_for_contract(state, contract, current_content) + if error: + feedback.append(f"{path}: {error}") + continue + proposed, apply_error = self._patcher.apply(current_content, contract, hunks) + if apply_error: + feedback.append(f"{path}: {apply_error}") + continue + if proposed is None: + feedback.append(f"{path}: patch application returned empty result.") + continue + if intent == "update": + if proposed == current_content: + feedback.append(f"{path}: no-op update produced by model.") continue - if self._support.collapse_whitespace(normalized) == self._support.collapse_whitespace(current_content): + if self._support.collapse_whitespace(proposed) == self._support.collapse_whitespace(current_content): + feedback.append(f"{path}: whitespace-only update is not allowed.") continue - reason = str(item.get("reason", "")).strip() or "User-requested update." - if current and current.get("content_hash"): + reason = str(contract.get("reason", "")).strip() or "Requested user adjustment." + if source and source.get("content_hash"): changeset.append( ChangeItem( op="update", - path=str(current.get("path") or path), - base_hash=str(current.get("content_hash", "")), - proposed_content=normalized, + path=str(source.get("path") or path), + base_hash=str(source.get("content_hash", "")), + proposed_content=proposed, reason=reason, + hunks=hunks, ) ) else: - changeset.append(ChangeItem(op="create", path=path, proposed_content=normalized, reason=reason)) - return {"changeset": changeset} + changeset.append( + ChangeItem( + op="create", + path=path, + proposed_content=proposed, + reason=reason, + hunks=hunks, + ) + ) + + return {"changeset": changeset, "edits_generation_feedback": " | ".join(feedback)} def self_check(self, state: AgentGraphState) -> dict: attempts = int(state.get("validation_attempts", 0) or 0) + 1 changeset = state.get("changeset", []) or [] files_map = state.get("files_map", {}) or {} - is_broad_rewrite = self._is_broad_rewrite_request(str(state.get("message", ""))) if not changeset: - return {"validation_attempts": attempts, "validation_passed": False, "validation_feedback": "Generated changeset is empty."} + feedback = str(state.get("edits_generation_feedback", "")).strip() or "Generated changeset is empty." + return {"validation_attempts": attempts, "validation_passed": False, "validation_feedback": feedback} + broad = self._support.is_broad_rewrite_request(str(state.get("message", ""))) for item in changeset: if item.op.value != "update": continue - source = self._targeting.lookup_file(files_map, item.path) + source = self._support.lookup_file(files_map, item.path) if not source: continue original = str(source.get("content", "")) proposed = item.proposed_content or "" similarity = self._support.similarity(original, proposed) change_ratio = self._support.line_change_ratio(original, proposed) - headings_added = self._support.added_headings(original, proposed) - min_similarity = 0.75 if is_broad_rewrite else 0.9 - max_change_ratio = 0.7 if is_broad_rewrite else 0.35 + added_headings = self._support.added_headings(original, proposed) + min_similarity = 0.75 if broad else 0.9 + max_change_ratio = 0.7 if broad else 0.35 if similarity < min_similarity: return { "validation_attempts": attempts, @@ -236,7 +152,7 @@ class ProjectEditsLogic: "validation_passed": False, "validation_feedback": f"File {item.path} changed too broadly (change_ratio={change_ratio:.2f}).", } - if not is_broad_rewrite and headings_added > 0: + if not broad and added_headings > 0: return { "validation_attempts": attempts, "validation_passed": False, @@ -245,27 +161,68 @@ class ProjectEditsLogic: payload = { "request": state.get("message", ""), + "contracts": state.get("edits_contracts", []), "changeset": [{"op": x.op.value, "path": x.path, "reason": x.reason} for x in changeset[:20]], - "rule": "Changes must match request and avoid unrelated modifications.", + "rule": "Changes must stay inside contract blocks and not affect unrelated sections.", } parsed = self._support.parse_json(self._llm.generate("project_edits_self_check", json.dumps(payload, ensure_ascii=False))) passed = bool(parsed.get("pass")) if isinstance(parsed, dict) else False feedback = str(parsed.get("feedback", "")).strip() if isinstance(parsed, dict) else "" - return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback or "No feedback provided."} + return { + "validation_attempts": attempts, + "validation_passed": passed, + "validation_feedback": feedback or "No validation feedback provided.", + } def build_result(self, state: AgentGraphState) -> dict: changeset = state.get("changeset", []) or [] return {"changeset": changeset, "answer": self._support.build_summary(state, changeset)} - def _is_broad_rewrite_request(self, message: str) -> bool: - low = (message or "").lower() - markers = ( - "перепиши", - "полностью", - "целиком", - "с нуля", - "full rewrite", - "rewrite all", - "реорганизуй документ", - ) - return any(marker in low for marker in markers) + def _generate_hunks_for_contract( + self, + state: AgentGraphState, + contract: dict, + current_content: str, + ) -> tuple[list[dict], str | None]: + prompt_payload = { + "request": state.get("message", ""), + "contract": contract, + "current_content": self._support.shorten(current_content, 18000), + "previous_validation_feedback": state.get("validation_feedback", ""), + "rag_context": self._support.shorten(state.get("rag_context", ""), 5000), + "confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000), + } + raw = self._llm.generate("project_edits_hunks", json.dumps(prompt_payload, ensure_ascii=False)) + parsed = self._support.parse_json(raw) + hunks = parsed.get("hunks", []) if isinstance(parsed, dict) else [] + if not isinstance(hunks, list) or not hunks: + return [], "Model did not return contract hunks." + normalized: list[dict] = [] + for hunk in hunks: + if not isinstance(hunk, dict): + continue + kind = str(hunk.get("type", "")).strip().lower() + if kind not in {"append_end", "replace_between", "replace_line_equals"}: + continue + normalized.append( + { + "type": kind, + "start_anchor": str(hunk.get("start_anchor", "")), + "end_anchor": str(hunk.get("end_anchor", "")), + "old_line": str(hunk.get("old_line", "")), + "new_text": str(hunk.get("new_text", "")), + } + ) + if not normalized: + return [], "Model hunks are empty or invalid." + return normalized, None + + def _extract_path_hint(self, message: str) -> str: + words = (message or "").replace("\\", "/").split() + for token in words: + cleaned = token.strip("`'\".,:;()[]{}") + if "/" in cleaned and "." in cleaned: + return cleaned + if cleaned.lower().startswith("readme"): + return "README.md" + return "" diff --git a/app/modules/agent/engine/graphs/project_edits_patcher.py b/app/modules/agent/engine/graphs/project_edits_patcher.py new file mode 100644 index 0000000..71eecc1 --- /dev/null +++ b/app/modules/agent/engine/graphs/project_edits_patcher.py @@ -0,0 +1,142 @@ +from difflib import SequenceMatcher + + +class ContractPatcher: + def apply(self, current_content: str, contract: dict, hunks: list[dict]) -> tuple[str | None, str | None]: + if not hunks: + return None, "No hunks were generated." + + max_hunks = int(contract.get("max_hunks", 1) or 1) + if len(hunks) > max_hunks: + return None, f"Too many hunks: got={len(hunks)} allowed={max_hunks}." + + allowed_blocks = contract.get("allowed_blocks", []) + if not isinstance(allowed_blocks, list) or not allowed_blocks: + return None, "No allowed blocks in edit contract." + + result = current_content + total_changed_lines = 0 + for idx, hunk in enumerate(hunks, start=1): + applied, changed_lines, error = self._apply_hunk(result, hunk, allowed_blocks) + if error: + return None, f"Hunk {idx} rejected: {error}" + result = applied + total_changed_lines += changed_lines + + max_changed_lines = int(contract.get("max_changed_lines", 8) or 8) + if total_changed_lines > max_changed_lines: + return ( + None, + f"Changed lines exceed contract limit: changed={total_changed_lines} allowed={max_changed_lines}.", + ) + return result, None + + def _apply_hunk( + self, + content: str, + hunk: dict, + allowed_blocks: list[dict], + ) -> tuple[str, int, str | None]: + if not isinstance(hunk, dict): + return content, 0, "Invalid hunk payload." + kind = str(hunk.get("type", "")).strip().lower() + if kind not in {"append_end", "replace_between", "replace_line_equals"}: + return content, 0, f"Unsupported hunk type: {kind or '(empty)'}." + + block = self._find_matching_block(hunk, allowed_blocks) + if block is None: + return content, 0, "Hunk does not match allowed contract blocks." + + if kind == "append_end": + return self._apply_append_end(content, hunk, block) + if kind == "replace_between": + return self._apply_replace_between(content, hunk, block) + return self._apply_replace_line_equals(content, hunk, block) + + def _find_matching_block(self, hunk: dict, allowed_blocks: list[dict]) -> dict | None: + kind = str(hunk.get("type", "")).strip().lower() + for block in allowed_blocks: + if not isinstance(block, dict): + continue + block_type = str(block.get("type", "")).strip().lower() + if block_type != kind: + continue + if kind == "replace_between": + start = str(hunk.get("start_anchor", "")).strip() + end = str(hunk.get("end_anchor", "")).strip() + if start != str(block.get("start_anchor", "")).strip(): + continue + if end != str(block.get("end_anchor", "")).strip(): + continue + if kind == "replace_line_equals": + old_line = str(hunk.get("old_line", "")).strip() + if old_line != str(block.get("old_line", "")).strip(): + continue + return block + return None + + def _apply_append_end(self, content: str, hunk: dict, block: dict) -> tuple[str, int, str | None]: + new_text = str(hunk.get("new_text", "")) + if not new_text.strip(): + return content, 0, "append_end new_text is empty." + changed_lines = self._changed_line_count("", new_text) + block_limit = int(block.get("max_changed_lines", 6) or 6) + if changed_lines > block_limit: + return content, 0, f"append_end is too large: changed={changed_lines} allowed={block_limit}." + base = content.rstrip("\n") + suffix = new_text.strip("\n") + if not suffix: + return content, 0, "append_end resolved to empty suffix." + merged = f"{base}\n\n{suffix}\n" if base else f"{suffix}\n" + return merged, changed_lines, None + + def _apply_replace_between(self, content: str, hunk: dict, block: dict) -> tuple[str, int, str | None]: + start_anchor = str(hunk.get("start_anchor", "")).strip() + end_anchor = str(hunk.get("end_anchor", "")).strip() + new_text = str(hunk.get("new_text", "")) + if not start_anchor or not end_anchor: + return content, 0, "replace_between anchors are required." + start_pos = content.find(start_anchor) + if start_pos < 0: + return content, 0, "start_anchor not found in file." + middle_start = start_pos + len(start_anchor) + end_pos = content.find(end_anchor, middle_start) + if end_pos < 0: + return content, 0, "end_anchor not found after start_anchor." + old_segment = content[middle_start:end_pos] + changed_lines = self._changed_line_count(old_segment, new_text) + block_limit = int(block.get("max_changed_lines", 6) or 6) + if changed_lines > block_limit: + return content, 0, f"replace_between is too large: changed={changed_lines} allowed={block_limit}." + merged = content[:middle_start] + new_text + content[end_pos:] + return merged, changed_lines, None + + def _apply_replace_line_equals(self, content: str, hunk: dict, block: dict) -> tuple[str, int, str | None]: + old_line = str(hunk.get("old_line", "")).strip() + new_text = str(hunk.get("new_text", "")) + if not old_line: + return content, 0, "replace_line_equals old_line is required." + lines = content.splitlines(keepends=True) + matches = [idx for idx, line in enumerate(lines) if line.rstrip("\n") == old_line] + if len(matches) != 1: + return content, 0, f"replace_line_equals expected exactly one match, got={len(matches)}." + replacement = new_text.rstrip("\n") + "\n" + changed_lines = self._changed_line_count(old_line + "\n", replacement) + block_limit = int(block.get("max_changed_lines", 6) or 6) + if changed_lines > block_limit: + return content, 0, f"replace_line_equals is too large: changed={changed_lines} allowed={block_limit}." + lines[matches[0] : matches[0] + 1] = [replacement] + return "".join(lines), changed_lines, None + + def _changed_line_count(self, old_text: str, new_text: str) -> int: + old_lines = (old_text or "").splitlines() + new_lines = (new_text or "").splitlines() + if not old_lines and not new_lines: + return 0 + matcher = SequenceMatcher(None, old_lines, new_lines) + changed = 0 + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == "equal": + continue + changed += max(i2 - i1, j2 - j1) + return max(changed, 1) diff --git a/app/modules/agent/engine/graphs/project_edits_support.py b/app/modules/agent/engine/graphs/project_edits_support.py new file mode 100644 index 0000000..95cf245 --- /dev/null +++ b/app/modules/agent/engine/graphs/project_edits_support.py @@ -0,0 +1,116 @@ +import json +import re +from difflib import SequenceMatcher + +from app.modules.agent.engine.graphs.file_targeting import FileTargeting +from app.modules.agent.engine.graphs.state import AgentGraphState +from app.schemas.changeset import ChangeItem + + +class ProjectEditsSupport: + def __init__(self, max_context_files: int = 12, max_preview_chars: int = 2500) -> None: + self._max_context_files = max_context_files + self._max_preview_chars = max_preview_chars + self._targeting = FileTargeting() + + def pick_relevant_files(self, message: str, files_map: dict[str, dict]) -> list[dict]: + tokens = {x for x in (message or "").lower().replace("/", " ").split() if len(x) >= 4} + scored: list[tuple[int, dict]] = [] + for path, payload in files_map.items(): + content = str(payload.get("content", "")) + score = 0 + low_path = path.lower() + low_content = content.lower() + for token in tokens: + if token in low_path: + score += 3 + if token in low_content: + score += 1 + scored.append((score, self.as_candidate(payload))) + scored.sort(key=lambda x: (-x[0], x[1]["path"])) + return [item for _, item in scored[: self._max_context_files]] + + def as_candidate(self, payload: dict) -> dict: + return { + "path": str(payload.get("path", "")).replace("\\", "/"), + "content": str(payload.get("content", "")), + "content_hash": str(payload.get("content_hash", "")), + } + + def normalize_file_output(self, text: str) -> str: + value = (text or "").strip() + if value.startswith("```") and value.endswith("```"): + lines = value.splitlines() + if len(lines) >= 3: + return "\n".join(lines[1:-1]).strip() + return value + + def parse_json(self, raw: str): + text = self.normalize_file_output(raw) + try: + return json.loads(text) + except Exception: + return {} + + def shorten(self, text: str, max_chars: int | None = None) -> str: + limit = max_chars or self._max_preview_chars + value = (text or "").strip() + if len(value) <= limit: + return value + return value[:limit].rstrip() + "\n...[truncated]" + + def collapse_whitespace(self, text: str) -> str: + return re.sub(r"\s+", " ", (text or "").strip()) + + def similarity(self, original: str, updated: str) -> float: + return SequenceMatcher(None, original or "", updated or "").ratio() + + def line_change_ratio(self, original: str, updated: str) -> float: + orig_lines = (original or "").splitlines() + new_lines = (updated or "").splitlines() + if not orig_lines and not new_lines: + return 0.0 + matcher = SequenceMatcher(None, orig_lines, new_lines) + changed = 0 + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == "equal": + continue + changed += max(i2 - i1, j2 - j1) + total = max(len(orig_lines), len(new_lines), 1) + return changed / total + + def added_headings(self, original: str, updated: str) -> int: + old_heads = {line.strip() for line in (original or "").splitlines() if line.strip().startswith("#")} + new_heads = {line.strip() for line in (updated or "").splitlines() if line.strip().startswith("#")} + return len(new_heads - old_heads) + + def build_summary(self, state: AgentGraphState, changeset: list[ChangeItem]) -> str: + if not changeset: + return "Правки не сформированы: changeset пуст." + lines = [ + "Выполненные действия:", + f"- Проанализирован запрос: {state.get('message', '')}", + "- Сформирован контракт правок с разрешенными блоками изменений.", + f"- Проведен self-check: {state.get('validation_feedback', 'без замечаний')}", + "", + "Измененные файлы:", + ] + for item in changeset[:30]: + lines.append(f"- {item.op.value} {item.path}: {item.reason}") + return "\n".join(lines) + + def is_broad_rewrite_request(self, message: str) -> bool: + low = (message or "").lower() + markers = ( + "перепиши", + "полностью", + "целиком", + "с нуля", + "full rewrite", + "rewrite all", + "реорганизуй документ", + ) + return any(marker in low for marker in markers) + + def lookup_file(self, files_map: dict[str, dict], path: str) -> dict | None: + return self._targeting.lookup_file(files_map, path) diff --git a/app/modules/agent/engine/graphs/project_qa_graph.py b/app/modules/agent/engine/graphs/project_qa_graph.py index 681543f..6dead1d 100644 --- a/app/modules/agent/engine/graphs/project_qa_graph.py +++ b/app/modules/agent/engine/graphs/project_qa_graph.py @@ -1,9 +1,13 @@ +import logging + from langgraph.graph import END, START, StateGraph from app.modules.agent.engine.graphs.progress import emit_progress_sync from app.modules.agent.engine.graphs.state import AgentGraphState from app.modules.agent.llm import AgentLlmService +LOGGER = logging.getLogger(__name__) + class ProjectQaGraphFactory: def __init__(self, llm: AgentLlmService) -> None: @@ -35,4 +39,9 @@ class ProjectQaGraphFactory: stage="graph.project_qa.answer.done", message="Ответ по проекту сформирован.", ) - return {"answer": answer} + result = {"answer": answer} + LOGGER.warning( + "graph step result: graph=project_qa step=answer answer_len=%s", + len(answer or ""), + ) + return result diff --git a/app/modules/agent/engine/graphs/state.py b/app/modules/agent/engine/graphs/state.py index 14e63da..8492114 100644 --- a/app/modules/agent/engine/graphs/state.py +++ b/app/modules/agent/engine/graphs/state.py @@ -30,3 +30,5 @@ class AgentGraphState(TypedDict, total=False): edits_requested_path: str edits_context_files: list[dict] edits_plan: list[dict] + edits_contracts: list[dict] + edits_generation_feedback: str diff --git a/app/modules/agent/engine/orchestrator/__init__.py b/app/modules/agent/engine/orchestrator/__init__.py new file mode 100644 index 0000000..7163dac --- /dev/null +++ b/app/modules/agent/engine/orchestrator/__init__.py @@ -0,0 +1,21 @@ +from app.modules.agent.engine.orchestrator.models import ( + ExecutionPlan, + OrchestratorResult, + PlanStep, + Scenario, + StepResult, + TaskSpec, +) +from app.modules.agent.engine.orchestrator.service import OrchestratorService +from app.modules.agent.engine.orchestrator.task_spec_builder import TaskSpecBuilder + +__all__ = [ + "ExecutionPlan", + "OrchestratorResult", + "OrchestratorService", + "PlanStep", + "Scenario", + "StepResult", + "TaskSpec", + "TaskSpecBuilder", +] diff --git a/app/modules/agent/engine/orchestrator/__pycache__/__init__.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..7990a03 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/artifact_store.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/artifact_store.cpython-312.pyc new file mode 100644 index 0000000..79082f6 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/artifact_store.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/evidence_store.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/evidence_store.cpython-312.pyc new file mode 100644 index 0000000..7ba90aa Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/evidence_store.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/execution_context.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/execution_context.cpython-312.pyc new file mode 100644 index 0000000..993f9f0 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/execution_context.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/execution_engine.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/execution_engine.cpython-312.pyc new file mode 100644 index 0000000..48a46a2 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/execution_engine.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/metrics_persister.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/metrics_persister.cpython-312.pyc new file mode 100644 index 0000000..b9ff5fe Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/metrics_persister.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/plan_compiler.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/plan_compiler.cpython-312.pyc new file mode 100644 index 0000000..a6bbf42 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/plan_compiler.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/plan_validator.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/plan_validator.cpython-312.pyc new file mode 100644 index 0000000..99306a6 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/plan_validator.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/quality_gates.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/quality_gates.cpython-312.pyc new file mode 100644 index 0000000..100ae96 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/quality_gates.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/quality_metrics.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/quality_metrics.cpython-312.pyc new file mode 100644 index 0000000..a9b6bce Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/quality_metrics.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/result_assembler.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/result_assembler.cpython-312.pyc new file mode 100644 index 0000000..022bd7b Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/result_assembler.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/service.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/service.cpython-312.pyc new file mode 100644 index 0000000..91afc9e Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/service.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/step_registry.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/step_registry.cpython-312.pyc new file mode 100644 index 0000000..d945215 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/step_registry.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/task_spec_builder.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/task_spec_builder.cpython-312.pyc new file mode 100644 index 0000000..d8182c7 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/task_spec_builder.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/__pycache__/template_registry.cpython-312.pyc b/app/modules/agent/engine/orchestrator/__pycache__/template_registry.cpython-312.pyc new file mode 100644 index 0000000..d944184 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/__pycache__/template_registry.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/__init__.py b/app/modules/agent/engine/orchestrator/actions/__init__.py new file mode 100644 index 0000000..e220f9b --- /dev/null +++ b/app/modules/agent/engine/orchestrator/actions/__init__.py @@ -0,0 +1,13 @@ +from app.modules.agent.engine.orchestrator.actions.docs_actions import DocsActions +from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions +from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions +from app.modules.agent.engine.orchestrator.actions.gherkin_actions import GherkinActions +from app.modules.agent.engine.orchestrator.actions.review_actions import ReviewActions + +__all__ = [ + "DocsActions", + "EditActions", + "ExplainActions", + "GherkinActions", + "ReviewActions", +] diff --git a/app/modules/agent/engine/orchestrator/actions/__pycache__/__init__.cpython-312.pyc b/app/modules/agent/engine/orchestrator/actions/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..6960565 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/actions/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/__pycache__/common.cpython-312.pyc b/app/modules/agent/engine/orchestrator/actions/__pycache__/common.cpython-312.pyc new file mode 100644 index 0000000..39cb9a8 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/actions/__pycache__/common.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/__pycache__/docs_actions.cpython-312.pyc b/app/modules/agent/engine/orchestrator/actions/__pycache__/docs_actions.cpython-312.pyc new file mode 100644 index 0000000..8a6c393 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/actions/__pycache__/docs_actions.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/__pycache__/edit_actions.cpython-312.pyc b/app/modules/agent/engine/orchestrator/actions/__pycache__/edit_actions.cpython-312.pyc new file mode 100644 index 0000000..2d5dc26 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/actions/__pycache__/edit_actions.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/__pycache__/explain_actions.cpython-312.pyc b/app/modules/agent/engine/orchestrator/actions/__pycache__/explain_actions.cpython-312.pyc new file mode 100644 index 0000000..ad3fd1e Binary files /dev/null and b/app/modules/agent/engine/orchestrator/actions/__pycache__/explain_actions.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/__pycache__/gherkin_actions.cpython-312.pyc b/app/modules/agent/engine/orchestrator/actions/__pycache__/gherkin_actions.cpython-312.pyc new file mode 100644 index 0000000..8c7237a Binary files /dev/null and b/app/modules/agent/engine/orchestrator/actions/__pycache__/gherkin_actions.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/__pycache__/review_actions.cpython-312.pyc b/app/modules/agent/engine/orchestrator/actions/__pycache__/review_actions.cpython-312.pyc new file mode 100644 index 0000000..b23b57e Binary files /dev/null and b/app/modules/agent/engine/orchestrator/actions/__pycache__/review_actions.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/actions/common.py b/app/modules/agent/engine/orchestrator/actions/common.py new file mode 100644 index 0000000..bd9cd9f --- /dev/null +++ b/app/modules/agent/engine/orchestrator/actions/common.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from uuid import uuid4 + +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ArtifactType, EvidenceItem + + +class ActionSupport: + def put(self, ctx: ExecutionContext, key: str, artifact_type: ArtifactType, value, *, meta: dict | None = None) -> str: + item = ctx.artifacts.put(key=key, artifact_type=artifact_type, content=value, meta=meta) + return item.artifact_id + + def get(self, ctx: ExecutionContext, key: str, default=None): + return ctx.artifacts.get_content(key, default) + + def add_evidence(self, ctx: ExecutionContext, *, source_type: str, source_ref: str, snippet: str, score: float = 0.8) -> str: + evidence = EvidenceItem( + evidence_id=f"evidence_{uuid4().hex}", + source_type=source_type, + source_ref=source_ref, + snippet=(snippet or "").strip()[:600], + score=max(0.0, min(1.0, float(score))), + ) + ctx.evidences.put_many([evidence]) + return evidence.evidence_id diff --git a/app/modules/agent/engine/orchestrator/actions/docs_actions.py b/app/modules/agent/engine/orchestrator/actions/docs_actions.py new file mode 100644 index 0000000..b289b74 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/actions/docs_actions.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.actions.common import ActionSupport +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ArtifactType + + +class DocsActions(ActionSupport): + def extract_change_intents(self, ctx: ExecutionContext) -> list[str]: + text = str(self.get(ctx, "source_doc_text", "") or ctx.task.user_message) + intents = { + "summary": text[:240], + "api": ["Update endpoint behavior contract"], + "logic": ["Adjust reusable business rules"], + "db": ["Reflect schema/table notes if needed"], + "ui": ["Adjust form behavior and validation"], + } + return [self.put(ctx, "change_intents", ArtifactType.STRUCTURED_JSON, intents)] + + def map_to_doc_tree(self, ctx: ExecutionContext) -> list[str]: + targets = [ + "docs/api/increment.md", + "docs/logic/increment.md", + "docs/db/increment.md", + "docs/ui/increment.md", + ] + return [self.put(ctx, "doc_targets", ArtifactType.STRUCTURED_JSON, {"targets": targets})] + + def load_current_docs_context(self, ctx: ExecutionContext) -> list[str]: + files_map = dict(ctx.task.metadata.get("files_map", {}) or {}) + targets = (self.get(ctx, "doc_targets", {}) or {}).get("targets", []) + current = [] + for path in targets: + current.append( + { + "path": path, + "content": str((files_map.get(path) or {}).get("content", "")), + "content_hash": str((files_map.get(path) or {}).get("content_hash", "")), + } + ) + return [self.put(ctx, "current_docs_context", ArtifactType.STRUCTURED_JSON, {"files": current})] + + def generate_doc_updates(self, ctx: ExecutionContext) -> list[str]: + intents = self.get(ctx, "change_intents", {}) or {} + targets = (self.get(ctx, "doc_targets", {}) or {}).get("targets", []) + bundle = [] + for path in targets: + bundle.append( + { + "path": path, + "content": "\n".join( + [ + f"# Increment Update: {path}", + "", + "## Scope", + str(intents.get("summary", "")), + "", + "## Changes", + "- Updated according to analytics increment.", + ] + ), + "reason": "align docs with analytics increment", + } + ) + return [self.put(ctx, "generated_doc_bundle", ArtifactType.DOC_BUNDLE, bundle)] + + def cross_file_validation(self, ctx: ExecutionContext) -> list[str]: + bundle = self.get(ctx, "generated_doc_bundle", []) or [] + paths = [str(item.get("path", "")) for item in bundle if isinstance(item, dict)] + has_required = any(path.startswith("docs/api/") for path in paths) and any(path.startswith("docs/logic/") for path in paths) + report = {"paths": paths, "required_core_paths_present": has_required} + return [self.put(ctx, "consistency_report", ArtifactType.STRUCTURED_JSON, report)] + + def build_changeset(self, ctx: ExecutionContext) -> list[str]: + bundle = self.get(ctx, "generated_doc_bundle", []) or [] + changeset = [] + for item in bundle: + if not isinstance(item, dict): + continue + changeset.append( + { + "op": "update", + "path": str(item.get("path", "")).strip(), + "base_hash": "orchestrator-generated", + "proposed_content": str(item.get("content", "")), + "reason": str(item.get("reason", "documentation update")), + "hunks": [], + } + ) + return [self.put(ctx, "final_changeset", ArtifactType.CHANGESET, changeset)] + + def compose_summary(self, ctx: ExecutionContext) -> list[str]: + count = len(self.get(ctx, "final_changeset", []) or []) + text = f"Prepared documentation changeset with {count} files updated." + return [self.put(ctx, "final_answer", ArtifactType.TEXT, text)] diff --git a/app/modules/agent/engine/orchestrator/actions/edit_actions.py b/app/modules/agent/engine/orchestrator/actions/edit_actions.py new file mode 100644 index 0000000..14806f0 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/actions/edit_actions.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import re + +from app.modules.agent.engine.orchestrator.actions.common import ActionSupport +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ArtifactType + + +class EditActions(ActionSupport): + def resolve_target(self, ctx: ExecutionContext) -> list[str]: + message = ctx.task.user_message + files_map = dict(ctx.task.metadata.get("files_map", {}) or {}) + requested = self._extract_path(message) + matched = self._lookup_source(files_map, requested) + if matched: + requested = str(matched.get("path") or requested or "") + if not requested and files_map: + requested = next(iter(files_map.keys())) + payload = {"path": requested or "", "allowed": bool(requested)} + return [self.put(ctx, "resolved_target", ArtifactType.STRUCTURED_JSON, payload)] + + def load_target_context(self, ctx: ExecutionContext) -> list[str]: + files_map = dict(ctx.task.metadata.get("files_map", {}) or {}) + resolved = self.get(ctx, "resolved_target", {}) or {} + path = str(resolved.get("path", "")) + source = dict(self._lookup_source(files_map, path) or {}) + current = { + "path": str(source.get("path", "")) or path, + "content": str(source.get("content", "")), + "content_hash": str(source.get("content_hash", "")), + } + return [self.put(ctx, "target_context", ArtifactType.STRUCTURED_JSON, current)] + + def plan_minimal_patch(self, ctx: ExecutionContext) -> list[str]: + target = self.get(ctx, "target_context", {}) or {} + plan = { + "path": target.get("path", ""), + "intent": "minimal_update", + "instruction": ctx.task.user_message[:240], + } + return [self.put(ctx, "patch_plan", ArtifactType.STRUCTURED_JSON, plan)] + + def generate_patch(self, ctx: ExecutionContext) -> list[str]: + target = self.get(ctx, "target_context", {}) or {} + plan = self.get(ctx, "patch_plan", {}) or {} + path = str(target.get("path", "")) + base = str(target.get("content_hash", "") or "orchestrator-generated") + original = str(target.get("content", "")) + note = f"\n\n\n" + proposed = (original + note).strip() if original else note.strip() + changeset = [ + { + "op": "update" if original else "create", + "path": path, + "base_hash": base if original else None, + "proposed_content": proposed, + "reason": "targeted file update", + "hunks": [], + } + ] + return [self.put(ctx, "raw_changeset", ArtifactType.CHANGESET, changeset)] + + def validate_patch_safety(self, ctx: ExecutionContext) -> list[str]: + changeset = self.get(ctx, "raw_changeset", []) or [] + safe = len(changeset) == 1 + report = {"safe": safe, "items": len(changeset), "reason": "single-file patch expected"} + return [self.put(ctx, "patch_validation_report", ArtifactType.STRUCTURED_JSON, report)] + + def finalize_changeset(self, ctx: ExecutionContext) -> list[str]: + report = self.get(ctx, "patch_validation_report", {}) or {} + if not report.get("safe"): + return [self.put(ctx, "final_changeset", ArtifactType.CHANGESET, [])] + changeset = self.get(ctx, "raw_changeset", []) or [] + return [self.put(ctx, "final_changeset", ArtifactType.CHANGESET, changeset)] + + def compose_edit_summary(self, ctx: ExecutionContext) -> list[str]: + count = len(self.get(ctx, "final_changeset", []) or []) + text = f"Prepared targeted edit changeset with {count} item(s)." + return [self.put(ctx, "final_answer", ArtifactType.TEXT, text)] + + def _extract_path(self, text: str) -> str | None: + match = re.search(r"\b[\w./-]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg)\b", text or "", flags=re.IGNORECASE) + if not match: + return None + return match.group(0).replace("\\", "/").strip() + + def _lookup_source(self, files_map: dict[str, dict], path: str | None) -> dict | None: + if not path: + return None + normalized = str(path).replace("\\", "/").strip() + if not normalized: + return None + source = files_map.get(normalized) + if source: + return source + normalized_low = normalized.lower() + for key, value in files_map.items(): + if str(key).replace("\\", "/").lower() == normalized_low: + return value + return None diff --git a/app/modules/agent/engine/orchestrator/actions/explain_actions.py b/app/modules/agent/engine/orchestrator/actions/explain_actions.py new file mode 100644 index 0000000..eb97284 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/actions/explain_actions.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.actions.common import ActionSupport +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ArtifactType + + +class ExplainActions(ActionSupport): + def collect_sources(self, ctx: ExecutionContext) -> list[str]: + rag_context = str(ctx.task.metadata.get("rag_context", "")) + confluence_context = str(ctx.task.metadata.get("confluence_context", "")) + files_map = dict(ctx.task.metadata.get("files_map", {}) or {}) + payload = { + "rag_context": rag_context, + "confluence_context": confluence_context, + "files_count": len(files_map), + } + evidence_ids: list[str] = [] + if rag_context.strip(): + evidence_ids.append( + self.add_evidence( + ctx, + source_type="rag_chunk", + source_ref=ctx.task.rag_session_id, + snippet=rag_context, + score=0.9, + ) + ) + artifact_id = self.put( + ctx, + "sources", + ArtifactType.STRUCTURED_JSON, + payload, + meta={"evidence_ids": evidence_ids}, + ) + return [artifact_id] + + def extract_logic(self, ctx: ExecutionContext) -> list[str]: + sources = self.get(ctx, "sources", {}) or {} + message = ctx.task.user_message + logic = { + "request": message, + "assumptions": ["requirements-first"], + "notes": "Use requirements/docs as primary source over code.", + "source_summary": sources, + } + return [self.put(ctx, "logic_model", ArtifactType.STRUCTURED_JSON, logic)] + + def build_sequence(self, ctx: ExecutionContext) -> list[str]: + message = ctx.task.user_message + mermaid = "\n".join( + [ + "```mermaid", + "sequenceDiagram", + "participant User", + "participant Agent", + "participant Docs", + "User->>Agent: " + message[:80], + "Agent->>Docs: Find relevant requirements", + "Docs-->>Agent: Relevant context", + "Agent-->>User: Structured explanation", + "```", + ] + ) + return [self.put(ctx, "sequence_diagram", ArtifactType.TEXT, mermaid)] + + def build_use_cases(self, ctx: ExecutionContext) -> list[str]: + lines = [ + "### Use Cases", + "- Analyze requirement fragments relevant to user question", + "- Reconstruct behavior flow and decision points", + "- Return user-focused explanation with constraints", + ] + return [self.put(ctx, "use_cases", ArtifactType.TEXT, "\n".join(lines))] + + def summarize(self, ctx: ExecutionContext) -> list[str]: + sequence = str(self.get(ctx, "sequence_diagram", "") or "") + use_cases = str(self.get(ctx, "use_cases", "") or "") + answer = "\n\n".join( + [ + "## Summary", + "The requested project part is explained from requirements/docs context.", + sequence, + use_cases, + ] + ) + return [self.put(ctx, "final_answer", ArtifactType.TEXT, answer)] diff --git a/app/modules/agent/engine/orchestrator/actions/gherkin_actions.py b/app/modules/agent/engine/orchestrator/actions/gherkin_actions.py new file mode 100644 index 0000000..25ed754 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/actions/gherkin_actions.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.actions.common import ActionSupport +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ArtifactType + + +class GherkinActions(ActionSupport): + def extract_increment_scope(self, ctx: ExecutionContext) -> list[str]: + text = str(self.get(ctx, "source_doc_text", "") or ctx.task.user_message) + scope = { + "title": "Increment scope", + "summary": text[:220], + "entities": ["User", "System"], + } + return [self.put(ctx, "increment_scope", ArtifactType.STRUCTURED_JSON, scope)] + + def partition_features(self, ctx: ExecutionContext) -> list[str]: + scope = self.get(ctx, "increment_scope", {}) or {} + groups = [ + {"feature": "Main flow", "goal": scope.get("summary", "")}, + {"feature": "Validation", "goal": "Input validation and error behavior"}, + ] + return [self.put(ctx, "feature_groups", ArtifactType.STRUCTURED_JSON, groups)] + + def generate_gherkin_bundle(self, ctx: ExecutionContext) -> list[str]: + groups = self.get(ctx, "feature_groups", []) or [] + files = [] + for idx, group in enumerate(groups, start=1): + feature_name = str(group.get("feature", f"Feature {idx}")) + content = "\n".join( + [ + f"Feature: {feature_name}", + " Scenario: Happy path", + " Given system is available", + " When user performs increment action", + " Then system applies expected increment behavior", + ] + ) + files.append({"path": f"tests/gherkin/feature_{idx}.feature", "content": content}) + return [self.put(ctx, "gherkin_bundle", ArtifactType.GHERKIN_BUNDLE, files)] + + def lint_gherkin(self, ctx: ExecutionContext) -> list[str]: + bundle = self.get(ctx, "gherkin_bundle", []) or [] + invalid = [] + for item in bundle: + content = str(item.get("content", "")) if isinstance(item, dict) else "" + if "Feature:" not in content or "Scenario:" not in content: + invalid.append(str(item.get("path", "unknown"))) + report = {"valid": len(invalid) == 0, "invalid_files": invalid} + return [self.put(ctx, "gherkin_lint_report", ArtifactType.STRUCTURED_JSON, report)] + + def validate_coverage(self, ctx: ExecutionContext) -> list[str]: + bundle = self.get(ctx, "gherkin_bundle", []) or [] + report = {"covered": len(bundle) > 0, "feature_files": len(bundle)} + return [self.put(ctx, "coverage_report", ArtifactType.STRUCTURED_JSON, report)] + + def compose_test_model_summary(self, ctx: ExecutionContext) -> list[str]: + bundle = self.get(ctx, "gherkin_bundle", []) or [] + summary = f"Prepared gherkin model with {len(bundle)} feature file(s)." + changeset = [ + { + "op": "create", + "path": str(item.get("path", "")), + "base_hash": None, + "proposed_content": str(item.get("content", "")), + "reason": "generated gherkin feature", + "hunks": [], + } + for item in bundle + if isinstance(item, dict) + ] + return [ + self.put(ctx, "final_answer", ArtifactType.TEXT, summary), + self.put(ctx, "final_changeset", ArtifactType.CHANGESET, changeset), + ] diff --git a/app/modules/agent/engine/orchestrator/actions/review_actions.py b/app/modules/agent/engine/orchestrator/actions/review_actions.py new file mode 100644 index 0000000..da7e9e3 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/actions/review_actions.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from urllib.parse import urlparse + +from app.modules.agent.engine.orchestrator.actions.common import ActionSupport +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ArtifactType + + +class ReviewActions(ActionSupport): + def fetch_source_doc(self, ctx: ExecutionContext) -> list[str]: + attachment = next((a for a in ctx.task.attachments if a.value), None) + if attachment is None: + text = ctx.task.user_message + source_ref = "inline:message" + else: + parsed = urlparse(attachment.value) + source_ref = attachment.value + text = f"Source: {parsed.netloc}\nPath: {parsed.path}\nRequest: {ctx.task.user_message}" + evidence_id = self.add_evidence( + ctx, + source_type="external_doc", + source_ref=source_ref, + snippet=text, + score=0.75, + ) + return [ + self.put( + ctx, + "source_doc_raw", + ArtifactType.TEXT, + text, + meta={"source_ref": source_ref, "evidence_ids": [evidence_id]}, + ) + ] + + def normalize_document(self, ctx: ExecutionContext) -> list[str]: + raw = str(self.get(ctx, "source_doc_raw", "") or "") + normalized = "\n".join(line.rstrip() for line in raw.splitlines()).strip() + return [self.put(ctx, "source_doc_text", ArtifactType.TEXT, normalized)] + + def structural_check(self, ctx: ExecutionContext) -> list[str]: + text = str(self.get(ctx, "source_doc_text", "") or "") + required = ["цель", "границ", "риски", "api", "данные"] + found = [token for token in required if token in text.lower()] + findings = { + "required_sections": required, + "found_markers": found, + "missing_markers": [token for token in required if token not in found], + } + return [self.put(ctx, "structural_findings", ArtifactType.STRUCTURED_JSON, findings)] + + def semantic_consistency_check(self, ctx: ExecutionContext) -> list[str]: + text = str(self.get(ctx, "source_doc_text", "") or "") + contradictions = [] + if "без изменений" in text.lower() and "новый" in text.lower(): + contradictions.append("Contains both 'no changes' and 'new behavior' markers.") + payload = {"contradictions": contradictions, "status": "ok" if not contradictions else "needs_attention"} + return [self.put(ctx, "semantic_findings", ArtifactType.STRUCTURED_JSON, payload)] + + def architecture_fit_check(self, ctx: ExecutionContext) -> list[str]: + text = str(self.get(ctx, "source_doc_text", "") or "") + files_count = len(dict(ctx.task.metadata.get("files_map", {}) or {})) + payload = { + "architecture_fit": "medium" if files_count == 0 else "high", + "notes": "Evaluate fit against existing docs and interfaces.", + "markers": ["integration"] if "integr" in text.lower() else [], + } + return [self.put(ctx, "architecture_findings", ArtifactType.STRUCTURED_JSON, payload)] + + def optimization_check(self, ctx: ExecutionContext) -> list[str]: + text = str(self.get(ctx, "source_doc_text", "") or "") + has_perf = any(token in text.lower() for token in ("latency", "performance", "оптим")) + payload = { + "optimization_considered": has_perf, + "recommendation": "Add explicit non-functional targets." if not has_perf else "Optimization criteria present.", + } + return [self.put(ctx, "optimization_findings", ArtifactType.STRUCTURED_JSON, payload)] + + def compose_review_report(self, ctx: ExecutionContext) -> list[str]: + structural = self.get(ctx, "structural_findings", {}) or {} + semantic = self.get(ctx, "semantic_findings", {}) or {} + architecture = self.get(ctx, "architecture_findings", {}) or {} + optimization = self.get(ctx, "optimization_findings", {}) or {} + report = "\n".join( + [ + "## Findings", + f"- Missing structure markers: {', '.join(structural.get('missing_markers', [])) or 'none'}", + f"- Contradictions: {len(semantic.get('contradictions', []))}", + f"- Architecture fit: {architecture.get('architecture_fit', 'unknown')}", + f"- Optimization: {optimization.get('recommendation', 'n/a')}", + "", + "## Recommendations", + "- Clarify boundaries and data contracts.", + "- Add explicit error and rollback behavior.", + "- Add measurable non-functional requirements.", + ] + ) + return [ + self.put(ctx, "review_report", ArtifactType.REVIEW_REPORT, report), + self.put(ctx, "final_answer", ArtifactType.TEXT, report), + ] diff --git a/app/modules/agent/engine/orchestrator/artifact_store.py b/app/modules/agent/engine/orchestrator/artifact_store.py new file mode 100644 index 0000000..83c668e --- /dev/null +++ b/app/modules/agent/engine/orchestrator/artifact_store.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from uuid import uuid4 + +from app.modules.agent.engine.orchestrator.models import ArtifactItem, ArtifactType + + +class ArtifactStore: + def __init__(self) -> None: + self._by_id: dict[str, ArtifactItem] = {} + self._by_key: dict[str, ArtifactItem] = {} + + def put(self, *, key: str, artifact_type: ArtifactType, content=None, meta: dict | None = None) -> ArtifactItem: + item_meta = dict(meta or {}) + if content is not None and not isinstance(content, str): + item_meta.setdefault("value", content) + item = ArtifactItem( + artifact_id=f"artifact_{uuid4().hex}", + key=key, + type=artifact_type, + content=self._as_content(content), + meta=item_meta, + ) + self._by_id[item.artifact_id] = item + self._by_key[key] = item + return item + + def get(self, key: str) -> ArtifactItem | None: + return self._by_key.get(key) + + def get_content(self, key: str, default=None): + item = self.get(key) + if item is None: + return default + if item.content is not None: + return item.content + return item.meta.get("value", default) + + def has(self, key: str) -> bool: + return key in self._by_key + + def all_items(self) -> list[ArtifactItem]: + return list(self._by_id.values()) + + def _as_content(self, value): + if value is None: + return None + if isinstance(value, str): + return value + return None diff --git a/app/modules/agent/engine/orchestrator/evidence_store.py b/app/modules/agent/engine/orchestrator/evidence_store.py new file mode 100644 index 0000000..7197978 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/evidence_store.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.models import EvidenceItem + + +class EvidenceStore: + def __init__(self) -> None: + self._items: list[EvidenceItem] = [] + + def put_many(self, items: list[EvidenceItem]) -> None: + self._items.extend(items) + + def all_items(self) -> list[EvidenceItem]: + return list(self._items) diff --git a/app/modules/agent/engine/orchestrator/execution_context.py b/app/modules/agent/engine/orchestrator/execution_context.py new file mode 100644 index 0000000..f054d28 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/execution_context.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from collections.abc import Awaitable, Callable +from dataclasses import dataclass +from typing import Any + +from app.modules.agent.engine.orchestrator.artifact_store import ArtifactStore +from app.modules.agent.engine.orchestrator.evidence_store import EvidenceStore +from app.modules.agent.engine.orchestrator.models import ExecutionPlan, TaskSpec + +ProgressCallback = Callable[[str, str, str, dict | None], Awaitable[None] | None] +GraphResolver = Callable[[str, str], Any] +GraphInvoker = Callable[[Any, dict, str], dict] + + +@dataclass +class ExecutionContext: + task: TaskSpec + plan: ExecutionPlan + graph_resolver: GraphResolver + graph_invoker: GraphInvoker + progress_cb: ProgressCallback | None = None + artifacts: ArtifactStore | None = None + evidences: EvidenceStore | None = None + + def __post_init__(self) -> None: + if self.artifacts is None: + self.artifacts = ArtifactStore() + if self.evidences is None: + self.evidences = EvidenceStore() diff --git a/app/modules/agent/engine/orchestrator/execution_engine.py b/app/modules/agent/engine/orchestrator/execution_engine.py new file mode 100644 index 0000000..5d87aab --- /dev/null +++ b/app/modules/agent/engine/orchestrator/execution_engine.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import asyncio +import inspect +import time + +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import PlanStatus, PlanStep, StepResult, StepStatus +from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner +from app.modules.agent.engine.orchestrator.step_registry import StepRegistry + + +class ExecutionEngine: + def __init__(self, step_registry: StepRegistry, gates: QualityGateRunner) -> None: + self._steps = step_registry + self._gates = gates + + async def run(self, ctx: ExecutionContext) -> list[StepResult]: + ctx.plan.status = PlanStatus.RUNNING + step_results: list[StepResult] = [] + + for step in ctx.plan.steps: + dep_issue = self._dependency_issue(step, step_results) + if dep_issue: + step_results.append( + StepResult( + step_id=step.step_id, + status=StepStatus.SKIPPED, + warnings=[dep_issue], + ) + ) + continue + + result = await self._run_with_retry(step, ctx) + step_results.append(result) + if result.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} and step.on_failure == "fail": + ctx.plan.status = PlanStatus.FAILED + return step_results + + passed, global_messages = self._gates.check_global(ctx.plan.global_gates, ctx) + if not passed: + step_results.append( + StepResult( + step_id="global_gates", + status=StepStatus.FAILED, + warnings=global_messages, + ) + ) + ctx.plan.status = PlanStatus.FAILED + return step_results + + if any(item.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} for item in step_results): + ctx.plan.status = PlanStatus.FAILED + elif any(item.status == StepStatus.SKIPPED for item in step_results): + ctx.plan.status = PlanStatus.PARTIAL + else: + ctx.plan.status = PlanStatus.COMPLETED + return step_results + + async def _run_with_retry(self, step: PlanStep, ctx: ExecutionContext) -> StepResult: + max_attempts = max(1, int(step.retry.max_attempts or 1)) + attempt = 0 + last_error: Exception | None = None + + while attempt < max_attempts: + attempt += 1 + started_at = time.monotonic() + await self._emit_progress(ctx, f"orchestrator.step.{step.step_id}", step.title) + + try: + artifact_ids = await self._steps.execute(step, ctx) + passed, gate_messages = self._gates.check_step(step, ctx) + if not passed: + raise RuntimeError(";".join(gate_messages) or "step_quality_gate_failed") + + elapsed = int((time.monotonic() - started_at) * 1000) + return StepResult( + step_id=step.step_id, + status=StepStatus.SUCCESS, + produced_artifact_ids=artifact_ids, + warnings=gate_messages, + duration_ms=elapsed, + ) + except Exception as exc: + last_error = exc + if attempt < max_attempts and step.retry.backoff_sec > 0: + await asyncio.sleep(step.retry.backoff_sec) + + elapsed = int((time.monotonic() - started_at) * 1000) + return StepResult( + step_id=step.step_id, + status=StepStatus.RETRY_EXHAUSTED if max_attempts > 1 else StepStatus.FAILED, + error_code="step_execution_failed", + error_message=str(last_error) if last_error else "step_execution_failed", + duration_ms=elapsed, + ) + + def _dependency_issue(self, step: PlanStep, results: list[StepResult]) -> str | None: + if not step.depends_on: + return None + by_step = {item.step_id: item for item in results} + for dep in step.depends_on: + dep_result = by_step.get(dep) + if dep_result is None: + return f"dependency_not_executed:{dep}" + if dep_result.status != StepStatus.SUCCESS: + return f"dependency_not_success:{dep}:{dep_result.status.value}" + return None + + async def _emit_progress(self, ctx: ExecutionContext, stage: str, message: str) -> None: + if ctx.progress_cb is None: + return + result = ctx.progress_cb(stage, message, "task_progress", {"layer": "orchestrator"}) + if inspect.isawaitable(result): + await result diff --git a/app/modules/agent/engine/orchestrator/metrics_persister.py b/app/modules/agent/engine/orchestrator/metrics_persister.py new file mode 100644 index 0000000..618c73f --- /dev/null +++ b/app/modules/agent/engine/orchestrator/metrics_persister.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import logging + +from app.modules.agent.repository import AgentRepository + +LOGGER = logging.getLogger(__name__) + + +class MetricsPersister: + def __init__(self, repository: AgentRepository) -> None: + self._repository = repository + + def save( + self, + *, + task_id: str, + dialog_session_id: str, + rag_session_id: str, + scenario: str, + domain_id: str, + process_id: str, + quality: dict, + ) -> None: + try: + self._repository.save_quality_metrics( + task_id=task_id, + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + scenario=scenario, + domain_id=domain_id, + process_id=process_id, + quality=quality, + ) + except Exception: + LOGGER.exception("Failed to persist quality metrics: task_id=%s", task_id) diff --git a/app/modules/agent/engine/orchestrator/models/__init__.py b/app/modules/agent/engine/orchestrator/models/__init__.py new file mode 100644 index 0000000..e1402a3 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/models/__init__.py @@ -0,0 +1,51 @@ +from app.modules.agent.engine.orchestrator.models.plan import ( + ArtifactSpec, + ArtifactType, + ExecutionPlan, + PlanStatus, + PlanStep, + QualityGateRef, + RetryPolicy, +) +from app.modules.agent.engine.orchestrator.models.result import ( + ArtifactItem, + EvidenceItem, + OrchestratorResult, + StepResult, + StepStatus, +) +from app.modules.agent.engine.orchestrator.models.task_spec import ( + AttachmentRef, + FileRef, + OutputContract, + OutputSection, + RoutingMeta, + Scenario, + SourcePolicy, + TaskConstraints, + TaskSpec, +) + +__all__ = [ + "ArtifactItem", + "ArtifactSpec", + "ArtifactType", + "AttachmentRef", + "EvidenceItem", + "ExecutionPlan", + "FileRef", + "OrchestratorResult", + "OutputContract", + "OutputSection", + "PlanStatus", + "PlanStep", + "QualityGateRef", + "RetryPolicy", + "RoutingMeta", + "Scenario", + "SourcePolicy", + "StepResult", + "StepStatus", + "TaskConstraints", + "TaskSpec", +] diff --git a/app/modules/agent/engine/orchestrator/models/__pycache__/__init__.cpython-312.pyc b/app/modules/agent/engine/orchestrator/models/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..55042f6 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/models/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/models/__pycache__/plan.cpython-312.pyc b/app/modules/agent/engine/orchestrator/models/__pycache__/plan.cpython-312.pyc new file mode 100644 index 0000000..8271737 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/models/__pycache__/plan.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/models/__pycache__/result.cpython-312.pyc b/app/modules/agent/engine/orchestrator/models/__pycache__/result.cpython-312.pyc new file mode 100644 index 0000000..c8cf231 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/models/__pycache__/result.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/models/__pycache__/task_spec.cpython-312.pyc b/app/modules/agent/engine/orchestrator/models/__pycache__/task_spec.cpython-312.pyc new file mode 100644 index 0000000..4b06459 Binary files /dev/null and b/app/modules/agent/engine/orchestrator/models/__pycache__/task_spec.cpython-312.pyc differ diff --git a/app/modules/agent/engine/orchestrator/models/plan.py b/app/modules/agent/engine/orchestrator/models/plan.py new file mode 100644 index 0000000..8fad2aa --- /dev/null +++ b/app/modules/agent/engine/orchestrator/models/plan.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + +from app.modules.agent.engine.orchestrator.models.task_spec import Scenario + + +class ArtifactType(str, Enum): + TEXT = "text" + REVIEW_REPORT = "review_report" + CHANGESET = "changeset" + DOC_BUNDLE = "doc_bundle" + GHERKIN_BUNDLE = "gherkin_bundle" + STRUCTURED_JSON = "structured_json" + + +class PlanStatus(str, Enum): + DRAFT = "draft" + VALIDATED = "validated" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + PARTIAL = "partial" + + +class InputBinding(BaseModel): + model_config = ConfigDict(extra="forbid") + + name: str + from_key: str + required: bool = True + + +class ArtifactSpec(BaseModel): + model_config = ConfigDict(extra="forbid") + + key: str + type: ArtifactType + required: bool = True + + +class RetryPolicy(BaseModel): + model_config = ConfigDict(extra="forbid") + + max_attempts: int = 1 + backoff_sec: int = 0 + + +class QualityGateRef(BaseModel): + model_config = ConfigDict(extra="forbid") + + gate_id: str + blocking: bool = True + + +class PlanStep(BaseModel): + model_config = ConfigDict(extra="forbid") + + step_id: str + title: str + action_id: str + executor: Literal["function", "graph"] + graph_id: str | None = None + depends_on: list[str] = Field(default_factory=list) + inputs: list[InputBinding] = Field(default_factory=list) + outputs: list[ArtifactSpec] = Field(default_factory=list) + side_effect: Literal["read", "write", "external"] = "read" + retry: RetryPolicy = Field(default_factory=RetryPolicy) + timeout_sec: int = 120 + on_failure: Literal["fail", "skip", "replan"] = "fail" + quality_gates: list[QualityGateRef] = Field(default_factory=list) + + +class ExecutionPlan(BaseModel): + model_config = ConfigDict(extra="forbid") + + plan_id: str + task_id: str + scenario: Scenario + template_id: str + template_version: str + status: PlanStatus = PlanStatus.DRAFT + steps: list[PlanStep] + variables: dict[str, Any] = Field(default_factory=dict) + global_gates: list[QualityGateRef] = Field(default_factory=list) diff --git a/app/modules/agent/engine/orchestrator/models/result.py b/app/modules/agent/engine/orchestrator/models/result.py new file mode 100644 index 0000000..1e9642f --- /dev/null +++ b/app/modules/agent/engine/orchestrator/models/result.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + +from app.modules.agent.engine.orchestrator.models.plan import ArtifactType +from app.schemas.changeset import ChangeItem + + +class StepStatus(str, Enum): + SUCCESS = "success" + FAILED = "failed" + SKIPPED = "skipped" + RETRY_EXHAUSTED = "retry_exhausted" + + +class EvidenceItem(BaseModel): + model_config = ConfigDict(extra="forbid") + + evidence_id: str + source_type: Literal["rag_chunk", "project_file", "external_doc", "confluence"] + source_ref: str + snippet: str + score: float = Field(ge=0.0, le=1.0) + + +class ArtifactItem(BaseModel): + model_config = ConfigDict(extra="forbid") + + artifact_id: str + key: str + type: ArtifactType + content: str | None = None + path: str | None = None + content_hash: str | None = None + meta: dict[str, Any] = Field(default_factory=dict) + + +class StepResult(BaseModel): + model_config = ConfigDict(extra="forbid") + + step_id: str + status: StepStatus + produced_artifact_ids: list[str] = Field(default_factory=list) + evidence_ids: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + error_code: str | None = None + error_message: str | None = None + duration_ms: int = 0 + token_usage: int | None = None + replan_hint: str | None = None + + +class OrchestratorResult(BaseModel): + model_config = ConfigDict(extra="forbid") + + answer: str | None = None + changeset: list[ChangeItem] = Field(default_factory=list) + meta: dict[str, Any] = Field(default_factory=dict) + steps: list[StepResult] = Field(default_factory=list) diff --git a/app/modules/agent/engine/orchestrator/models/task_spec.py b/app/modules/agent/engine/orchestrator/models/task_spec.py new file mode 100644 index 0000000..b0d3cc1 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/models/task_spec.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class Scenario(str, Enum): + EXPLAIN_PART = "explain_part" + ANALYTICS_REVIEW = "analytics_review" + DOCS_FROM_ANALYTICS = "docs_from_analytics" + TARGETED_EDIT = "targeted_edit" + GHERKIN_MODEL = "gherkin_model" + GENERAL_QA = "general_qa" + + +class AttachmentRef(BaseModel): + model_config = ConfigDict(extra="forbid") + + type: Literal["confluence_url", "http_url", "file_ref"] + value: str + + +class FileRef(BaseModel): + model_config = ConfigDict(extra="forbid") + + path: str + content: str = "" + content_hash: str = "" + + +class RoutingMeta(BaseModel): + model_config = ConfigDict(extra="forbid") + + domain_id: str + process_id: str + confidence: float = Field(ge=0.0, le=1.0) + reason: str = "" + fallback_used: bool = False + + +class SourcePolicy(BaseModel): + model_config = ConfigDict(extra="forbid") + + priority: list[Literal["requirements", "tech_docs", "code", "external_doc"]] = Field( + default_factory=lambda: ["requirements", "tech_docs", "code"] + ) + require_evidence: bool = True + max_sources_per_step: int = 12 + + +class TaskConstraints(BaseModel): + model_config = ConfigDict(extra="forbid") + + allow_writes: bool = False + max_steps: int = 20 + max_retries_per_step: int = 2 + step_timeout_sec: int = 120 + target_paths: list[str] = Field(default_factory=list) + + +class OutputSection(BaseModel): + model_config = ConfigDict(extra="forbid") + + name: str + format: Literal["markdown", "mermaid", "gherkin", "json", "changeset"] + required: bool = True + + +class OutputContract(BaseModel): + model_config = ConfigDict(extra="forbid") + + result_type: Literal["answer", "changeset", "review_report", "doc_bundle", "gherkin_bundle"] + sections: list[OutputSection] = Field(default_factory=list) + + +class TaskSpec(BaseModel): + model_config = ConfigDict(extra="forbid") + + task_id: str + dialog_session_id: str + rag_session_id: str + mode: str = "auto" + user_message: str + scenario: Scenario + routing: RoutingMeta + attachments: list[AttachmentRef] = Field(default_factory=list) + files: list[FileRef] = Field(default_factory=list) + source_policy: SourcePolicy = Field(default_factory=SourcePolicy) + constraints: TaskConstraints = Field(default_factory=TaskConstraints) + output_contract: OutputContract + metadata: dict[str, Any] = Field(default_factory=dict) diff --git a/app/modules/agent/engine/orchestrator/plan_compiler.py b/app/modules/agent/engine/orchestrator/plan_compiler.py new file mode 100644 index 0000000..0216f1e --- /dev/null +++ b/app/modules/agent/engine/orchestrator/plan_compiler.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.models import ExecutionPlan, PlanStatus, TaskSpec + + +class PlanCompiler: + def compile(self, template: ExecutionPlan, task: TaskSpec) -> ExecutionPlan: + plan = template.model_copy(deep=True) + plan.plan_id = f"{task.task_id}:{template.template_id}" + plan.task_id = task.task_id + plan.status = PlanStatus.DRAFT + plan.variables = { + "scenario": task.scenario.value, + "route": { + "domain_id": task.routing.domain_id, + "process_id": task.routing.process_id, + "confidence": task.routing.confidence, + }, + } + + for step in plan.steps: + step.timeout_sec = max(1, min(step.timeout_sec, task.constraints.step_timeout_sec)) + step.retry.max_attempts = max(1, min(step.retry.max_attempts, task.constraints.max_retries_per_step)) + if step.side_effect == "write" and not task.constraints.allow_writes: + step.on_failure = "fail" + + if len(plan.steps) > task.constraints.max_steps: + plan.steps = plan.steps[: task.constraints.max_steps] + + return plan diff --git a/app/modules/agent/engine/orchestrator/plan_validator.py b/app/modules/agent/engine/orchestrator/plan_validator.py new file mode 100644 index 0000000..ffdb9cd --- /dev/null +++ b/app/modules/agent/engine/orchestrator/plan_validator.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.models import ExecutionPlan, TaskSpec + + +class PlanValidator: + def validate(self, plan: ExecutionPlan, task: TaskSpec) -> list[str]: + errors: list[str] = [] + if not plan.steps: + errors.append("execution_plan_has_no_steps") + return errors + + if len(plan.steps) > task.constraints.max_steps: + errors.append("execution_plan_exceeds_max_steps") + + errors.extend(self._validate_step_ids(plan)) + errors.extend(self._validate_dependencies(plan)) + errors.extend(self._validate_side_effects(plan, task)) + errors.extend(self._validate_step_shape(plan)) + return errors + + def _validate_step_ids(self, plan: ExecutionPlan) -> list[str]: + seen: set[str] = set() + out: list[str] = [] + for step in plan.steps: + if step.step_id in seen: + out.append(f"duplicate_step_id:{step.step_id}") + seen.add(step.step_id) + return out + + def _validate_dependencies(self, plan: ExecutionPlan) -> list[str]: + out: list[str] = [] + valid_ids = {step.step_id for step in plan.steps} + for step in plan.steps: + for dep in step.depends_on: + if dep not in valid_ids: + out.append(f"unknown_dependency:{step.step_id}->{dep}") + + # lightweight cycle detection for directed graph + graph = {step.step_id: list(step.depends_on) for step in plan.steps} + visiting: set[str] = set() + visited: set[str] = set() + + def dfs(node: str) -> bool: + if node in visiting: + return True + if node in visited: + return False + visiting.add(node) + for dep in graph.get(node, []): + if dfs(dep): + return True + visiting.remove(node) + visited.add(node) + return False + + if any(dfs(node) for node in graph): + out.append("dependency_cycle_detected") + return out + + def _validate_side_effects(self, plan: ExecutionPlan, task: TaskSpec) -> list[str]: + if task.constraints.allow_writes: + return [] + out: list[str] = [] + for step in plan.steps: + if step.side_effect == "write": + out.append(f"write_step_not_allowed:{step.step_id}") + return out + + def _validate_step_shape(self, plan: ExecutionPlan) -> list[str]: + out: list[str] = [] + for step in plan.steps: + if step.executor == "graph" and not step.graph_id: + out.append(f"graph_step_missing_graph_id:{step.step_id}") + if step.retry.max_attempts < 1: + out.append(f"invalid_retry_attempts:{step.step_id}") + if step.timeout_sec < 1: + out.append(f"invalid_step_timeout:{step.step_id}") + return out diff --git a/app/modules/agent/engine/orchestrator/quality_gates.py b/app/modules/agent/engine/orchestrator/quality_gates.py new file mode 100644 index 0000000..4230804 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/quality_gates.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import PlanStep, QualityGateRef + + +class QualityGateRunner: + def check_step(self, step: PlanStep, ctx: ExecutionContext) -> tuple[bool, list[str]]: + return self._run(step.quality_gates, step=step, ctx=ctx) + + def check_global(self, gates: list[QualityGateRef], ctx: ExecutionContext) -> tuple[bool, list[str]]: + return self._run(gates, step=None, ctx=ctx) + + def _run(self, gates: list[QualityGateRef], *, step: PlanStep | None, ctx: ExecutionContext) -> tuple[bool, list[str]]: + failures: list[str] = [] + warnings: list[str] = [] + for gate in gates: + ok, details = self._check(gate.gate_id, step=step, ctx=ctx) + if ok: + continue + if gate.blocking: + failures.extend(details) + else: + warnings.extend(details) + return len(failures) == 0, failures + warnings + + def _check(self, gate_id: str, *, step: PlanStep | None, ctx: ExecutionContext) -> tuple[bool, list[str]]: + checks = { + "required_outputs": lambda: self._required_outputs(step, ctx), + "non_empty_answer_or_changeset": lambda: self._non_empty_output(ctx), + "changeset_required_for_write": lambda: self._changeset_required(ctx), + "changeset_schema": lambda: self._changeset_schema(ctx), + "evidence_required": lambda: self._evidence_required(ctx), + "review_report_schema": lambda: self._review_schema(ctx), + "cross_file_consistency": lambda: self._cross_file_consistency(ctx), + "target_path_must_exist_or_be_allowed": lambda: self._target_path_gate(ctx), + "minimal_patch_policy": lambda: self._minimal_patch_policy(ctx), + "gherkin_syntax_lint": lambda: self._gherkin_lint(ctx), + "coverage_of_change_intents": lambda: self._coverage_gate(ctx), + "explain_format_hint": lambda: self._explain_hint(ctx), + } + fn = checks.get(gate_id) + if fn is None: + return True, [] + return fn() + + def _required_outputs(self, step: PlanStep | None, ctx: ExecutionContext) -> tuple[bool, list[str]]: + if step is None: + return True, [] + missing = [f"missing_required_artifact:{spec.key}" for spec in step.outputs if spec.required and not ctx.artifacts.has(spec.key)] + return len(missing) == 0, missing + + def _non_empty_output(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + answer = str(ctx.artifacts.get_content("final_answer", "") or "").strip() + changeset = ctx.artifacts.get_content("final_changeset", []) or [] + ok = bool(answer) or (isinstance(changeset, list) and len(changeset) > 0) + return ok, [] if ok else ["empty_final_output"] + + def _changeset_required(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + if not ctx.task.constraints.allow_writes: + return True, [] + changeset = ctx.artifacts.get_content("final_changeset", []) or [] + ok = isinstance(changeset, list) and len(changeset) > 0 + return ok, [] if ok else ["changeset_required_for_write"] + + def _changeset_schema(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + changeset = ctx.artifacts.get_content("final_changeset", []) or [] + if not isinstance(changeset, list): + return False, ["changeset_not_list"] + for idx, item in enumerate(changeset): + if not isinstance(item, dict): + return False, [f"changeset_item_not_object:{idx}"] + if not item.get("op") or not item.get("path"): + return False, [f"changeset_item_missing_fields:{idx}"] + return True, [] + + def _evidence_required(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + if not ctx.task.source_policy.require_evidence: + return True, [] + evidences = ctx.evidences.all_items() + return len(evidences) > 0, ([] if evidences else ["no_evidence_collected"]) + + def _review_schema(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + report = str(ctx.artifacts.get_content("review_report", "") or "") + ok = "## Findings" in report and "## Recommendations" in report + return ok, [] if ok else ["review_report_missing_sections"] + + def _cross_file_consistency(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + report = ctx.artifacts.get_content("consistency_report", {}) or {} + ok = bool(report.get("required_core_paths_present")) + return ok, [] if ok else ["cross_file_consistency_failed"] + + def _target_path_gate(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + target = ctx.artifacts.get_content("resolved_target", {}) or {} + ok = bool(str(target.get("path", "")).strip()) + return ok, [] if ok else ["target_path_not_resolved"] + + def _minimal_patch_policy(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + report = ctx.artifacts.get_content("patch_validation_report", {}) or {} + ok = bool(report.get("safe")) + return ok, [] if ok else ["patch_validation_failed"] + + def _gherkin_lint(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + report = ctx.artifacts.get_content("gherkin_lint_report", {}) or {} + ok = bool(report.get("valid")) + return ok, [] if ok else ["gherkin_lint_failed"] + + def _coverage_gate(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + report = ctx.artifacts.get_content("coverage_report", {}) or {} + ok = bool(report.get("covered")) + return ok, [] if ok else ["coverage_check_failed"] + + def _explain_hint(self, ctx: ExecutionContext) -> tuple[bool, list[str]]: + answer = str(ctx.artifacts.get_content("final_answer", "") or "") + ok = "```mermaid" in answer or "sequenceDiagram" in answer + return ok, [] if ok else ["hint:explain_answer_missing_mermaid_block"] diff --git a/app/modules/agent/engine/orchestrator/quality_metrics.py b/app/modules/agent/engine/orchestrator/quality_metrics.py new file mode 100644 index 0000000..b3f7ce8 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/quality_metrics.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import re + +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import StepResult + + +class QualityMetricsCalculator: + def build(self, ctx: ExecutionContext, step_results: list[StepResult]) -> dict: + answer = str(ctx.artifacts.get_content("final_answer", "") or "") + changeset = ctx.artifacts.get_content("final_changeset", []) or [] + evidences = ctx.evidences.all_items() + + faithfulness = self._faithfulness(answer=answer, changeset=changeset, evidence_count=len(evidences)) + coverage = self._coverage(ctx=ctx, answer=answer, changeset=changeset) + status = self._status(faithfulness["score"], coverage["score"]) + + return { + "faithfulness": faithfulness, + "coverage": coverage, + "status": status, + "steps": { + "total": len(ctx.plan.steps), + "completed": len([item for item in step_results if item.status.value == "success"]), + }, + } + + def _faithfulness(self, *, answer: str, changeset, evidence_count: int) -> dict: + claims_total = self._estimate_claims(answer, changeset) + if claims_total <= 0: + claims_total = 1 + + support_capacity = min(claims_total, evidence_count * 3) + claims_supported = support_capacity if evidence_count > 0 else 0 + score = claims_supported / claims_total + unsupported = max(0, claims_total - claims_supported) + + return { + "score": round(score, 4), + "claims_total": claims_total, + "claims_supported": claims_supported, + "claims_unsupported": unsupported, + "evidence_items": evidence_count, + } + + def _coverage(self, *, ctx: ExecutionContext, answer: str, changeset) -> dict: + required = [section.name for section in ctx.task.output_contract.sections if section.required] + if not required: + required = ["final_output"] + + covered: list[str] = [] + for item in required: + if self._is_item_covered(item=item, ctx=ctx, answer=answer, changeset=changeset): + covered.append(item) + + missing = [item for item in required if item not in covered] + score = len(covered) / len(required) + + return { + "score": round(score, 4), + "required_items": required, + "covered_items": covered, + "missing_items": missing, + "required_count": len(required), + "covered_count": len(covered), + } + + def _status(self, faithfulness: float, coverage: float) -> str: + if faithfulness >= 0.75 and coverage >= 0.85: + return "ok" + if faithfulness >= 0.55 and coverage >= 0.6: + return "needs_review" + return "fail" + + def _estimate_claims(self, answer: str, changeset) -> int: + lines = [line.strip() for line in answer.splitlines() if line.strip()] + bullet_claims = len([line for line in lines if line.startswith("-") or line.startswith("*")]) + sentence_claims = len([part for part in re.split(r"[.!?]\s+", answer) if part.strip()]) + + changeset_claims = 0 + if isinstance(changeset, list): + for item in changeset: + if isinstance(item, dict): + reason = str(item.get("reason", "")).strip() + if reason: + changeset_claims += 1 + else: + reason = str(getattr(item, "reason", "")).strip() + if reason: + changeset_claims += 1 + + return max(bullet_claims, min(sentence_claims, 12), changeset_claims) + + def _is_item_covered(self, *, item: str, ctx: ExecutionContext, answer: str, changeset) -> bool: + name = (item or "").strip().lower() + if name == "final_output": + return bool(answer.strip()) or (isinstance(changeset, list) and len(changeset) > 0) + if name in {"changeset", "final_changeset"}: + return isinstance(changeset, list) and len(changeset) > 0 + if name in {"sequence_diagram", "mermaid"}: + sequence = str(ctx.artifacts.get_content("sequence_diagram", "") or "").strip() + return "```mermaid" in answer or bool(sequence) + if name == "use_cases": + if ctx.artifacts.has("use_cases"): + return True + low = answer.lower() + return "use case" in low or "сценар" in low + if name in {"summary", "findings", "recommendations", "gherkin_bundle", "review_report"}: + if ctx.artifacts.has(name): + return True + if name == "gherkin_bundle": + bundle = ctx.artifacts.get_content("gherkin_bundle", []) or [] + return isinstance(bundle, list) and len(bundle) > 0 + return name.replace("_", " ") in answer.lower() + return ctx.artifacts.has(name) diff --git a/app/modules/agent/engine/orchestrator/result_assembler.py b/app/modules/agent/engine/orchestrator/result_assembler.py new file mode 100644 index 0000000..57430bb --- /dev/null +++ b/app/modules/agent/engine/orchestrator/result_assembler.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import OrchestratorResult, StepResult +from app.modules.agent.engine.orchestrator.quality_metrics import QualityMetricsCalculator +from app.schemas.changeset import ChangeItem + + +class ResultAssembler: + def __init__(self, quality: QualityMetricsCalculator | None = None) -> None: + self._quality = quality or QualityMetricsCalculator() + + def assemble(self, ctx: ExecutionContext, step_results: list[StepResult]) -> OrchestratorResult: + answer = str(ctx.artifacts.get_content("final_answer", "") or "").strip() or None + raw_changeset = ctx.artifacts.get_content("final_changeset", []) or [] + changeset = self._normalize_changeset(raw_changeset) + quality = self._quality.build(ctx, step_results) + + meta = { + "scenario": ctx.task.scenario.value, + "plan": { + "plan_id": ctx.plan.plan_id, + "template_id": ctx.plan.template_id, + "template_version": ctx.plan.template_version, + "status": ctx.plan.status.value, + }, + "route": { + "domain_id": ctx.task.routing.domain_id, + "process_id": ctx.task.routing.process_id, + "confidence": ctx.task.routing.confidence, + "reason": ctx.task.routing.reason, + "fallback_used": ctx.task.routing.fallback_used, + }, + "orchestrator": { + "steps_total": len(ctx.plan.steps), + "steps_success": len([step for step in step_results if step.status.value == "success"]), + }, + "quality": quality, + } + return OrchestratorResult(answer=answer, changeset=changeset, meta=meta, steps=step_results) + + def _normalize_changeset(self, value) -> list[ChangeItem]: + if not isinstance(value, list): + return [] + items: list[ChangeItem] = [] + for raw in value: + if isinstance(raw, ChangeItem): + items.append(raw) + continue + if isinstance(raw, dict): + try: + items.append(ChangeItem.model_validate(raw)) + except Exception: + continue + return items diff --git a/app/modules/agent/engine/orchestrator/service.py b/app/modules/agent/engine/orchestrator/service.py new file mode 100644 index 0000000..06227d0 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/service.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import inspect + +from app.core.exceptions import AppError +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext, GraphInvoker, GraphResolver, ProgressCallback +from app.modules.agent.engine.orchestrator.execution_engine import ExecutionEngine +from app.modules.agent.engine.orchestrator.models import OrchestratorResult, PlanStatus, TaskSpec +from app.modules.agent.engine.orchestrator.plan_compiler import PlanCompiler +from app.modules.agent.engine.orchestrator.plan_validator import PlanValidator +from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner +from app.modules.agent.engine.orchestrator.result_assembler import ResultAssembler +from app.modules.agent.engine.orchestrator.step_registry import StepRegistry +from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry +from app.schemas.common import ModuleName + + +class OrchestratorService: + def __init__( + self, + templates: ScenarioTemplateRegistry | None = None, + compiler: PlanCompiler | None = None, + validator: PlanValidator | None = None, + step_registry: StepRegistry | None = None, + gates: QualityGateRunner | None = None, + engine: ExecutionEngine | None = None, + assembler: ResultAssembler | None = None, + ) -> None: + self._templates = templates or ScenarioTemplateRegistry() + self._compiler = compiler or PlanCompiler() + self._validator = validator or PlanValidator() + self._registry = step_registry or StepRegistry() + self._gates = gates or QualityGateRunner() + self._engine = engine or ExecutionEngine(self._registry, self._gates) + self._assembler = assembler or ResultAssembler() + + async def run( + self, + *, + task: TaskSpec, + graph_resolver: GraphResolver, + graph_invoker: GraphInvoker, + progress_cb: ProgressCallback | None = None, + ) -> OrchestratorResult: + await self._emit_progress(progress_cb, "orchestrator.plan", "Building execution plan.") + template = self._templates.build(task) + plan = self._compiler.compile(template, task) + + errors = self._validator.validate(plan, task) + if errors: + raise AppError( + code="invalid_execution_plan", + desc=f"Execution plan validation failed: {'; '.join(errors)}", + module=ModuleName.AGENT, + ) + + plan.status = PlanStatus.VALIDATED + ctx = ExecutionContext( + task=task, + plan=plan, + graph_resolver=graph_resolver, + graph_invoker=graph_invoker, + progress_cb=progress_cb, + ) + + await self._emit_progress(progress_cb, "orchestrator.run", "Executing plan steps.") + step_results = await self._engine.run(ctx) + if plan.status == PlanStatus.FAILED: + errors = [f"{step.step_id}:{step.error_message or ','.join(step.warnings)}" for step in step_results if step.status.value != "success"] + raise AppError( + code="execution_plan_failed", + desc=f"Execution plan failed: {'; '.join(errors)}", + module=ModuleName.AGENT, + ) + result = self._assembler.assemble(ctx, step_results) + await self._emit_progress(progress_cb, "orchestrator.done", "Execution plan completed.") + return result + + async def _emit_progress(self, progress_cb: ProgressCallback | None, stage: str, message: str) -> None: + if progress_cb is None: + return + result = progress_cb(stage, message, "task_progress", {"layer": "orchestrator"}) + if inspect.isawaitable(result): + await result diff --git a/app/modules/agent/engine/orchestrator/step_registry.py b/app/modules/agent/engine/orchestrator/step_registry.py new file mode 100644 index 0000000..918809e --- /dev/null +++ b/app/modules/agent/engine/orchestrator/step_registry.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import asyncio +from collections.abc import Callable + +from app.modules.agent.engine.graphs.progress_registry import progress_registry +from app.modules.agent.engine.orchestrator.actions import DocsActions, EditActions, ExplainActions, GherkinActions, ReviewActions +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ArtifactType, PlanStep + +StepFn = Callable[[ExecutionContext], list[str]] + + +class StepRegistry: + def __init__(self) -> None: + explain = ExplainActions() + review = ReviewActions() + docs = DocsActions() + edits = EditActions() + gherkin = GherkinActions() + + self._functions: dict[str, StepFn] = { + "collect_state": self._collect_state, + "finalize_graph_output": self._finalize_graph_output, + "collect_sources": explain.collect_sources, + "extract_logic": explain.extract_logic, + "build_sequence": explain.build_sequence, + "build_use_cases": explain.build_use_cases, + "summarize": explain.summarize, + "fetch_source_doc": review.fetch_source_doc, + "normalize_document": review.normalize_document, + "structural_check": review.structural_check, + "semantic_consistency_check": review.semantic_consistency_check, + "architecture_fit_check": review.architecture_fit_check, + "optimization_check": review.optimization_check, + "compose_review_report": review.compose_review_report, + "extract_change_intents": docs.extract_change_intents, + "map_to_doc_tree": docs.map_to_doc_tree, + "load_current_docs_context": docs.load_current_docs_context, + "generate_doc_updates": docs.generate_doc_updates, + "cross_file_validation": docs.cross_file_validation, + "build_changeset": docs.build_changeset, + "compose_summary": docs.compose_summary, + "resolve_target": edits.resolve_target, + "load_target_context": edits.load_target_context, + "plan_minimal_patch": edits.plan_minimal_patch, + "generate_patch": edits.generate_patch, + "validate_patch_safety": edits.validate_patch_safety, + "finalize_changeset": edits.finalize_changeset, + "compose_edit_summary": edits.compose_edit_summary, + "extract_increment_scope": gherkin.extract_increment_scope, + "partition_features": gherkin.partition_features, + "generate_gherkin_bundle": gherkin.generate_gherkin_bundle, + "lint_gherkin": gherkin.lint_gherkin, + "validate_coverage": gherkin.validate_coverage, + "compose_test_model_summary": gherkin.compose_test_model_summary, + } + + async def execute(self, step: PlanStep, ctx: ExecutionContext) -> list[str]: + if step.executor == "graph": + return await self._execute_graph_step(step, ctx) + fn = self._functions.get(step.action_id) + if fn is None: + raise RuntimeError(f"Unknown function action_id: {step.action_id}") + return fn(ctx) + + def _collect_state(self, ctx: ExecutionContext) -> list[str]: + state = { + "task_id": ctx.task.task_id, + "project_id": ctx.task.rag_session_id, + "message": ctx.task.user_message, + "progress_key": ctx.task.task_id, + "rag_context": str(ctx.task.metadata.get("rag_context", "")), + "confluence_context": str(ctx.task.metadata.get("confluence_context", "")), + "files_map": dict(ctx.task.metadata.get("files_map", {}) or {}), + } + item = ctx.artifacts.put(key="agent_state", artifact_type=ArtifactType.STRUCTURED_JSON, content=state) + return [item.artifact_id] + + async def _execute_graph_step(self, step: PlanStep, ctx: ExecutionContext) -> list[str]: + graph_key = step.graph_id or "route" + if graph_key == "route": + domain_id = ctx.task.routing.domain_id + process_id = ctx.task.routing.process_id + elif "/" in graph_key: + domain_id, process_id = graph_key.split("/", 1) + else: + raise RuntimeError(f"Unsupported graph_id: {graph_key}") + + graph = ctx.graph_resolver(domain_id, process_id) + state = ctx.artifacts.get_content("agent_state", {}) or {} + + if ctx.progress_cb is not None: + progress_registry.register(ctx.task.task_id, ctx.progress_cb) + try: + result = await asyncio.to_thread(ctx.graph_invoker, graph, state, ctx.task.dialog_session_id) + finally: + if ctx.progress_cb is not None: + progress_registry.unregister(ctx.task.task_id) + + item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result) + return [item.artifact_id] + + def _finalize_graph_output(self, ctx: ExecutionContext) -> list[str]: + raw = ctx.artifacts.get_content("graph_result", {}) or {} + if not isinstance(raw, dict): + raise RuntimeError("graph_result must be an object") + + answer = raw.get("answer") + changeset = raw.get("changeset") or [] + output = [ + ctx.artifacts.put( + key="final_answer", + artifact_type=ArtifactType.TEXT, + content=(str(answer) if answer is not None else ""), + ).artifact_id, + ctx.artifacts.put( + key="final_changeset", + artifact_type=ArtifactType.CHANGESET, + content=changeset, + ).artifact_id, + ] + return output diff --git a/app/modules/agent/engine/orchestrator/task_spec_builder.py b/app/modules/agent/engine/orchestrator/task_spec_builder.py new file mode 100644 index 0000000..1c85dda --- /dev/null +++ b/app/modules/agent/engine/orchestrator/task_spec_builder.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.models import ( + AttachmentRef, + FileRef, + OutputContract, + OutputSection, + RoutingMeta, + Scenario, + TaskConstraints, + TaskSpec, +) + + +class TaskSpecBuilder: + def build( + self, + *, + task_id: str, + dialog_session_id: str, + rag_session_id: str, + mode: str, + message: str, + route: RoutingMeta, + attachments: list[dict], + files: list[dict], + rag_context: str, + confluence_context: str, + files_map: dict[str, dict], + ) -> TaskSpec: + scenario = self._detect_scenario(mode=mode, message=message, route=route) + output_contract = self._output_contract(scenario) + constraints = self._constraints_for(scenario) + metadata = { + "rag_context": rag_context, + "confluence_context": confluence_context, + "files_map": files_map, + } + return TaskSpec( + task_id=task_id, + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + mode=mode, + user_message=message, + scenario=scenario, + routing=route, + attachments=self._map_attachments(attachments), + files=self._map_files(files), + constraints=constraints, + output_contract=output_contract, + metadata=metadata, + ) + + def _detect_scenario(self, *, mode: str, message: str, route: RoutingMeta) -> Scenario: + mode_key = (mode or "").strip().lower() + text = (message or "").strip().lower() + + if mode_key == "analytics_review": + return Scenario.ANALYTICS_REVIEW + if "gherkin" in text or "cucumber" in text: + return Scenario.GHERKIN_MODEL + if any(token in text for token in ("review analytics", "ревью аналитики", "проведи ревью")): + return Scenario.ANALYTICS_REVIEW + if any(token in text for token in ("сформируй документацию", "документацию из аналитики", "generate docs")): + return Scenario.DOCS_FROM_ANALYTICS + if any(token in text for token in ("точечн", "измени файл", "targeted edit", "patch file")): + return Scenario.TARGETED_EDIT + if route.domain_id == "project" and route.process_id == "edits": + return Scenario.TARGETED_EDIT + if route.domain_id == "docs" and route.process_id == "generation": + return Scenario.DOCS_FROM_ANALYTICS + if route.domain_id == "project" and route.process_id == "qa" and self._looks_like_explain_request(text): + return Scenario.EXPLAIN_PART + if route.domain_id == "project" and route.process_id == "qa" and "review" in text: + return Scenario.ANALYTICS_REVIEW + return Scenario.GENERAL_QA + + def _looks_like_explain_request(self, text: str) -> bool: + markers = ( + "explain", + "how it works", + "sequence", + "diagram", + "obiasni", + "kak rabotaet", + "kak ustroeno", + "объясни", + "как работает", + "как устроен", + "диаграм", + ) + return any(marker in text for marker in markers) + + def _map_attachments(self, attachments: list[dict]) -> list[AttachmentRef]: + mapped: list[AttachmentRef] = [] + for item in attachments: + value = str(item.get("url") or item.get("value") or "").strip() + if not value: + continue + raw_type = str(item.get("type") or "http_url").strip().lower() + attachment_type = raw_type if raw_type in {"confluence_url", "http_url", "file_ref"} else "http_url" + mapped.append(AttachmentRef(type=attachment_type, value=value)) + return mapped + + def _map_files(self, files: list[dict]) -> list[FileRef]: + mapped: list[FileRef] = [] + for item in files: + path = str(item.get("path") or "").replace("\\", "/").strip() + if not path: + continue + mapped.append( + FileRef( + path=path, + content=str(item.get("content") or ""), + content_hash=str(item.get("content_hash") or ""), + ) + ) + return mapped + + def _constraints_for(self, scenario: Scenario) -> TaskConstraints: + if scenario in {Scenario.DOCS_FROM_ANALYTICS, Scenario.TARGETED_EDIT, Scenario.GHERKIN_MODEL}: + return TaskConstraints(allow_writes=True, max_steps=16, max_retries_per_step=2, step_timeout_sec=120) + return TaskConstraints(allow_writes=False, max_steps=12, max_retries_per_step=2, step_timeout_sec=90) + + def _output_contract(self, scenario: Scenario) -> OutputContract: + if scenario == Scenario.EXPLAIN_PART: + return OutputContract( + result_type="answer", + sections=[ + OutputSection(name="sequence_diagram", format="mermaid"), + OutputSection(name="use_cases", format="markdown"), + OutputSection(name="summary", format="markdown"), + ], + ) + if scenario == Scenario.ANALYTICS_REVIEW: + return OutputContract( + result_type="review_report", + sections=[ + OutputSection(name="findings", format="markdown"), + OutputSection(name="recommendations", format="markdown"), + ], + ) + if scenario in {Scenario.DOCS_FROM_ANALYTICS, Scenario.TARGETED_EDIT}: + return OutputContract(result_type="changeset", sections=[OutputSection(name="changeset", format="changeset")]) + if scenario == Scenario.GHERKIN_MODEL: + return OutputContract( + result_type="gherkin_bundle", + sections=[OutputSection(name="gherkin_bundle", format="gherkin")], + ) + return OutputContract(result_type="answer", sections=[OutputSection(name="summary", format="markdown")]) diff --git a/app/modules/agent/engine/orchestrator/template_registry.py b/app/modules/agent/engine/orchestrator/template_registry.py new file mode 100644 index 0000000..d1b6ff1 --- /dev/null +++ b/app/modules/agent/engine/orchestrator/template_registry.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.models import ArtifactSpec, ArtifactType, ExecutionPlan, PlanStep, QualityGateRef, Scenario, TaskSpec + + +class ScenarioTemplateRegistry: + def build(self, task: TaskSpec) -> ExecutionPlan: + builders = { + Scenario.EXPLAIN_PART: self._explain, + Scenario.ANALYTICS_REVIEW: self._review, + Scenario.DOCS_FROM_ANALYTICS: self._docs, + Scenario.TARGETED_EDIT: self._edit, + Scenario.GHERKIN_MODEL: self._gherkin, + Scenario.GENERAL_QA: self._general, + } + return builders.get(task.scenario, self._general)(task) + + def _general(self, task: TaskSpec) -> ExecutionPlan: + steps = [ + self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]), + self._step( + "execute_route_graph", + "Execute selected graph", + "execute_route_graph", + executor="graph", + graph_id="route", + depends_on=["collect_state"], + outputs=[self._out("graph_result", ArtifactType.STRUCTURED_JSON)], + gates=[self._gate("required_outputs")], + ), + self._step( + "finalize_graph_output", + "Finalize graph output", + "finalize_graph_output", + depends_on=["execute_route_graph"], + outputs=[self._out("final_answer", ArtifactType.TEXT, required=False)], + gates=[self._gate("non_empty_answer_or_changeset")], + ), + ] + return self._plan(task, "general_qa_v1", steps, [self._gate("non_empty_answer_or_changeset")]) + + def _explain(self, task: TaskSpec) -> ExecutionPlan: + steps = [ + self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]), + self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]), + self._step("build_sequence", "Build sequence", "build_sequence", depends_on=["extract_logic"], outputs=[self._out("sequence_diagram", ArtifactType.TEXT)]), + self._step("build_use_cases", "Build use cases", "build_use_cases", depends_on=["extract_logic"], outputs=[self._out("use_cases", ArtifactType.TEXT)]), + self._step("summarize", "Summarize", "summarize", depends_on=["build_sequence", "build_use_cases"], outputs=[self._out("final_answer", ArtifactType.TEXT)]), + ] + return self._plan(task, "explain_part_v1", steps, [self._gate("evidence_required"), self._gate("non_empty_answer_or_changeset")]) + + def _review(self, task: TaskSpec) -> ExecutionPlan: + steps = [ + self._step("fetch_source_doc", "Fetch source doc", "fetch_source_doc", outputs=[self._out("source_doc_raw", ArtifactType.TEXT)], side_effect="external"), + self._step("normalize_document", "Normalize document", "normalize_document", depends_on=["fetch_source_doc"], outputs=[self._out("source_doc_text", ArtifactType.TEXT)]), + self._step("structural_check", "Structural check", "structural_check", depends_on=["normalize_document"], outputs=[self._out("structural_findings", ArtifactType.STRUCTURED_JSON)]), + self._step("semantic_consistency_check", "Semantic check", "semantic_consistency_check", depends_on=["normalize_document"], outputs=[self._out("semantic_findings", ArtifactType.STRUCTURED_JSON)]), + self._step("architecture_fit_check", "Architecture fit", "architecture_fit_check", depends_on=["normalize_document"], outputs=[self._out("architecture_findings", ArtifactType.STRUCTURED_JSON)]), + self._step("optimization_check", "Optimization check", "optimization_check", depends_on=["normalize_document"], outputs=[self._out("optimization_findings", ArtifactType.STRUCTURED_JSON)]), + self._step( + "compose_review_report", + "Compose review report", + "compose_review_report", + depends_on=["structural_check", "semantic_consistency_check", "architecture_fit_check", "optimization_check"], + outputs=[self._out("review_report", ArtifactType.REVIEW_REPORT), self._out("final_answer", ArtifactType.TEXT)], + gates=[self._gate("review_report_schema")], + ), + ] + return self._plan(task, "analytics_review_v1", steps, [self._gate("evidence_required"), self._gate("non_empty_answer_or_changeset")]) + + def _docs(self, task: TaskSpec) -> ExecutionPlan: + steps = [ + self._step("fetch_source_doc", "Fetch source doc", "fetch_source_doc", outputs=[self._out("source_doc_raw", ArtifactType.TEXT)], side_effect="external"), + self._step("normalize_document", "Normalize document", "normalize_document", depends_on=["fetch_source_doc"], outputs=[self._out("source_doc_text", ArtifactType.TEXT)]), + self._step("extract_change_intents", "Extract intents", "extract_change_intents", depends_on=["normalize_document"], outputs=[self._out("change_intents", ArtifactType.STRUCTURED_JSON)]), + self._step("map_to_doc_tree", "Map to doc tree", "map_to_doc_tree", depends_on=["extract_change_intents"], outputs=[self._out("doc_targets", ArtifactType.STRUCTURED_JSON)]), + self._step("load_current_docs_context", "Load current docs", "load_current_docs_context", depends_on=["map_to_doc_tree"], outputs=[self._out("current_docs_context", ArtifactType.STRUCTURED_JSON)]), + self._step("generate_doc_updates", "Generate doc updates", "generate_doc_updates", depends_on=["load_current_docs_context"], outputs=[self._out("generated_doc_bundle", ArtifactType.DOC_BUNDLE)], side_effect="write"), + self._step("cross_file_validation", "Cross-file validation", "cross_file_validation", depends_on=["generate_doc_updates"], outputs=[self._out("consistency_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("cross_file_consistency")]), + self._step("build_changeset", "Build changeset", "build_changeset", depends_on=["cross_file_validation"], outputs=[self._out("final_changeset", ArtifactType.CHANGESET)], side_effect="write"), + self._step("compose_summary", "Compose summary", "compose_summary", depends_on=["build_changeset"], outputs=[self._out("final_answer", ArtifactType.TEXT)]), + ] + return self._plan(task, "docs_from_analytics_v1", steps, [self._gate("changeset_required_for_write"), self._gate("changeset_schema")]) + + def _edit(self, task: TaskSpec) -> ExecutionPlan: + steps = [ + self._step("resolve_target", "Resolve target", "resolve_target", outputs=[self._out("resolved_target", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("target_path_must_exist_or_be_allowed")]), + self._step("load_target_context", "Load target context", "load_target_context", depends_on=["resolve_target"], outputs=[self._out("target_context", ArtifactType.STRUCTURED_JSON)]), + self._step("plan_minimal_patch", "Plan minimal patch", "plan_minimal_patch", depends_on=["load_target_context"], outputs=[self._out("patch_plan", ArtifactType.STRUCTURED_JSON)]), + self._step("generate_patch", "Generate patch", "generate_patch", depends_on=["plan_minimal_patch"], outputs=[self._out("raw_changeset", ArtifactType.CHANGESET)], side_effect="write"), + self._step("validate_patch_safety", "Validate patch", "validate_patch_safety", depends_on=["generate_patch"], outputs=[self._out("patch_validation_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("minimal_patch_policy")]), + self._step("finalize_changeset", "Finalize changeset", "finalize_changeset", depends_on=["validate_patch_safety"], outputs=[self._out("final_changeset", ArtifactType.CHANGESET)], side_effect="write"), + self._step("compose_edit_summary", "Compose summary", "compose_edit_summary", depends_on=["finalize_changeset"], outputs=[self._out("final_answer", ArtifactType.TEXT)]), + ] + return self._plan(task, "targeted_edit_v1", steps, [self._gate("changeset_required_for_write"), self._gate("changeset_schema")]) + + def _gherkin(self, task: TaskSpec) -> ExecutionPlan: + steps = [ + self._step("fetch_source_doc", "Fetch source doc", "fetch_source_doc", outputs=[self._out("source_doc_raw", ArtifactType.TEXT)], side_effect="external"), + self._step("normalize_document", "Normalize document", "normalize_document", depends_on=["fetch_source_doc"], outputs=[self._out("source_doc_text", ArtifactType.TEXT)]), + self._step("extract_increment_scope", "Extract increment scope", "extract_increment_scope", depends_on=["normalize_document"], outputs=[self._out("increment_scope", ArtifactType.STRUCTURED_JSON)]), + self._step("partition_features", "Partition features", "partition_features", depends_on=["extract_increment_scope"], outputs=[self._out("feature_groups", ArtifactType.STRUCTURED_JSON)]), + self._step("generate_gherkin_bundle", "Generate gherkin", "generate_gherkin_bundle", depends_on=["partition_features"], outputs=[self._out("gherkin_bundle", ArtifactType.GHERKIN_BUNDLE)], side_effect="write"), + self._step("lint_gherkin", "Lint gherkin", "lint_gherkin", depends_on=["generate_gherkin_bundle"], outputs=[self._out("gherkin_lint_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("gherkin_syntax_lint")]), + self._step("validate_coverage", "Validate coverage", "validate_coverage", depends_on=["generate_gherkin_bundle"], outputs=[self._out("coverage_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("coverage_of_change_intents")]), + self._step("compose_test_model_summary", "Compose summary", "compose_test_model_summary", depends_on=["lint_gherkin", "validate_coverage"], outputs=[self._out("final_answer", ArtifactType.TEXT), self._out("final_changeset", ArtifactType.CHANGESET)], side_effect="write"), + ] + return self._plan(task, "gherkin_model_v1", steps, [self._gate("changeset_schema"), self._gate("non_empty_answer_or_changeset")]) + + def _plan(self, task: TaskSpec, template_id: str, steps: list[PlanStep], gates: list[QualityGateRef]) -> ExecutionPlan: + return ExecutionPlan( + plan_id=f"{task.task_id}:{template_id}", + task_id=task.task_id, + scenario=task.scenario, + template_id=template_id, + template_version="1.0", + steps=steps, + global_gates=gates, + ) + + def _step( + self, + step_id: str, + title: str, + action_id: str, + *, + executor: str = "function", + graph_id: str | None = None, + depends_on: list[str] | None = None, + outputs: list[ArtifactSpec] | None = None, + gates: list[QualityGateRef] | None = None, + side_effect: str = "read", + ) -> PlanStep: + return PlanStep( + step_id=step_id, + title=title, + action_id=action_id, + executor=executor, + graph_id=graph_id, + depends_on=depends_on or [], + outputs=outputs or [], + quality_gates=gates or [], + side_effect=side_effect, + ) + + def _out(self, key: str, artifact_type: ArtifactType, *, required: bool = True) -> ArtifactSpec: + return ArtifactSpec(key=key, type=artifact_type, required=required) + + def _gate(self, gate_id: str, *, blocking: bool = True) -> QualityGateRef: + return QualityGateRef(gate_id=gate_id, blocking=blocking) diff --git a/app/modules/agent/engine/router/__init__.py b/app/modules/agent/engine/router/__init__.py index cc36c49..50da4fa 100644 --- a/app/modules/agent/engine/router/__init__.py +++ b/app/modules/agent/engine/router/__init__.py @@ -1,20 +1,25 @@ from pathlib import Path +from typing import TYPE_CHECKING -from app.modules.agent.engine.graphs import ( - BaseGraphFactory, - DocsGraphFactory, - ProjectEditsGraphFactory, - ProjectQaGraphFactory, -) -from app.modules.agent.repository import AgentRepository from app.modules.agent.llm import AgentLlmService -from app.modules.agent.engine.router.context_store import RouterContextStore -from app.modules.agent.engine.router.intent_classifier import IntentClassifier -from app.modules.agent.engine.router.registry import IntentRegistry -from app.modules.agent.engine.router.router_service import RouterService + +if TYPE_CHECKING: + from app.modules.agent.repository import AgentRepository + from app.modules.agent.engine.router.router_service import RouterService -def build_router_service(llm: AgentLlmService, agent_repository: AgentRepository) -> RouterService: +def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository") -> "RouterService": + from app.modules.agent.engine.graphs import ( + BaseGraphFactory, + DocsGraphFactory, + ProjectEditsGraphFactory, + ProjectQaGraphFactory, + ) + from app.modules.agent.engine.router.context_store import RouterContextStore + from app.modules.agent.engine.router.intent_classifier import IntentClassifier + from app.modules.agent.engine.router.registry import IntentRegistry + from app.modules.agent.engine.router.router_service import RouterService + registry_path = Path(__file__).resolve().parent / "intents_registry.yaml" registry = IntentRegistry(registry_path=registry_path) registry.register("default", "general", BaseGraphFactory(llm).build) @@ -31,4 +36,4 @@ def build_router_service(llm: AgentLlmService, agent_repository: AgentRepository ) -__all__ = ["build_router_service", "IntentRegistry", "RouterService"] +__all__ = ["build_router_service"] diff --git a/app/modules/agent/engine/router/__pycache__/__init__.cpython-312.pyc b/app/modules/agent/engine/router/__pycache__/__init__.cpython-312.pyc index 1284b7e..12f70d9 100644 Binary files a/app/modules/agent/engine/router/__pycache__/__init__.cpython-312.pyc and b/app/modules/agent/engine/router/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/modules/agent/module.py b/app/modules/agent/module.py index 03c547b..78cf353 100644 --- a/app/modules/agent/module.py +++ b/app/modules/agent/module.py @@ -5,6 +5,8 @@ from app.modules.agent.changeset_validator import ChangeSetValidator from app.modules.agent.confluence_service import ConfluenceService from app.modules.agent.llm import AgentLlmService from app.modules.agent.prompt_loader import PromptLoader +from app.modules.agent.story_context_repository import StoryContextRepository +from app.modules.agent.story_session_recorder import StorySessionRecorder from app.modules.agent.service import GraphAgentRuntime from app.modules.agent.repository import AgentRepository from app.modules.contracts import RagRetriever @@ -18,26 +20,34 @@ class ConfluenceFetchRequest(BaseModel): class AgentModule: - def __init__(self, rag_retriever: RagRetriever, agent_repository: AgentRepository) -> None: + def __init__( + self, + rag_retriever: RagRetriever, + agent_repository: AgentRepository, + story_context_repository: StoryContextRepository, + ) -> None: self.confluence = ConfluenceService() self.changeset_validator = ChangeSetValidator() + self.story_context_repository = story_context_repository settings = GigaChatSettings.from_env() token_provider = GigaChatTokenProvider(settings) client = GigaChatClient(settings, token_provider) prompt_loader = PromptLoader() llm = AgentLlmService(client=client, prompts=prompt_loader) + story_recorder = StorySessionRecorder(story_context_repository) self.runtime = GraphAgentRuntime( rag=rag_retriever, confluence=self.confluence, changeset_validator=self.changeset_validator, llm=llm, agent_repository=agent_repository, + story_recorder=story_recorder, ) def internal_router(self) -> APIRouter: - router = APIRouter(prefix="/internal/tools/confluence", tags=["internal-confluence"]) + router = APIRouter(prefix="/internal/tools", tags=["internal-tools"]) - @router.post("/fetch") + @router.post("/confluence/fetch") async def fetch_page(request: ConfluenceFetchRequest) -> dict: return await self.confluence.fetch_page(str(request.url)) diff --git a/app/modules/agent/prompts/project_edits_apply.txt b/app/modules/agent/prompts/project_edits_apply.txt deleted file mode 100644 index 09cc113..0000000 --- a/app/modules/agent/prompts/project_edits_apply.txt +++ /dev/null @@ -1,10 +0,0 @@ -Ты вносишь правку в один файл по запросу пользователя. -На вход приходит JSON с request, path, reason, current_content, previous_validation_feedback, rag_context, confluence_context. - -Верни только полное итоговое содержимое файла (без JSON). - -Критичные правила: -- Измени только те части, которые нужны по запросу. -- Не переписывай файл целиком без необходимости. -- Сохрани структуру, стиль и все нерелевантные разделы без изменений. -- Если данных недостаточно, внеси минимально безопасную правку и явно отрази ограничение в тексте файла. diff --git a/app/modules/agent/prompts/project_edits_hunks.txt b/app/modules/agent/prompts/project_edits_hunks.txt new file mode 100644 index 0000000..dad0294 --- /dev/null +++ b/app/modules/agent/prompts/project_edits_hunks.txt @@ -0,0 +1,32 @@ +Ты формируешь hunks строго по контракту правок. +На вход приходит JSON с request, contract, current_content, previous_validation_feedback, rag_context, confluence_context. + +Верни только JSON: +{ + "hunks": [ + { + "type": "append_end", + "new_text": "<текст для добавления в конец>" + } + ] +} + +Для replace_between: +{ + "type": "replace_between", + "start_anchor": "<точно как в contract>", + "end_anchor": "<точно как в contract>", + "new_text": "<новый текст между якорями>" +} + +Для replace_line_equals: +{ + "type": "replace_line_equals", + "old_line": "<точно как в contract>", + "new_text": "<новая строка/текст>" +} + +Критичные правила: +- Не выходи за рамки allowed_blocks. +- Не добавляй hunks, которых нет в контракте. +- Минимизируй изменения и не трогай нерелевантные части файла. diff --git a/app/modules/agent/prompts/project_edits_plan.txt b/app/modules/agent/prompts/project_edits_plan.txt index f0600a7..217ba53 100644 --- a/app/modules/agent/prompts/project_edits_plan.txt +++ b/app/modules/agent/prompts/project_edits_plan.txt @@ -1,15 +1,32 @@ -Ты анализируешь запрос на правки файлов проекта (не про написание нового кода). -На вход приходит JSON с request, requested_path, context_files. +Ты планируешь строго ограниченный контракт правок файла. +На вход приходит JSON с request, requested_path, context_files, contract_requirements. Верни только JSON: { "files": [ - {"path": "", "reason": ""} + { + "path": "README.md", + "reason": "коротко зачем меняем", + "intent": "update", + "max_hunks": 1, + "max_changed_lines": 8, + "allowed_blocks": [ + { + "type": "append_end", + "max_changed_lines": 8 + } + ] + } ] } -Правила: -- Выбирай только файлы, реально нужные для выполнения запроса. -- Не добавляй лишние файлы. -- Обычно 1-3 файла, максимум 8. -- Если в request указан конкретный файл, включи его в первую очередь. +Поддерживаемые block type: +- append_end: добавить текст только в конец файла. +- replace_between: заменить текст только между start_anchor и end_anchor. +- replace_line_equals: заменить только строку old_line. + +Критичные правила: +- Обязательно задавай allowed_blocks для каждого файла. +- Не добавляй файлы, которых нет в запросе. +- Точечные запросы: max_hunks=1 и маленький max_changed_lines. +- Если запрос "добавь в конец", используй append_end. diff --git a/app/modules/agent/prompts/project_edits_self_check.txt b/app/modules/agent/prompts/project_edits_self_check.txt index bb00a32..4cc5ddf 100644 --- a/app/modules/agent/prompts/project_edits_self_check.txt +++ b/app/modules/agent/prompts/project_edits_self_check.txt @@ -1,8 +1,9 @@ Ты валидируешь changeset правок файла. -На вход приходит JSON с request и changeset (op, path, reason). +На вход приходит JSON с request, contracts и changeset (op, path, reason). Проверь: 1) изменения соответствуют запросу, +1.1) изменения соответствуют контракту (разрешенные блоки и лимиты), 2) нет лишних нерелевантных правок, 3) изменены только действительно нужные файлы, 4) нет косметических правок (пробелы/форматирование без смысла), diff --git a/app/modules/agent/repository.py b/app/modules/agent/repository.py index 552d188..e9d3d46 100644 --- a/app/modules/agent/repository.py +++ b/app/modules/agent/repository.py @@ -24,6 +24,46 @@ class AgentRepository: """ ) ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS agent_quality_metrics ( + id BIGSERIAL PRIMARY KEY, + task_id VARCHAR(64) NOT NULL, + dialog_session_id VARCHAR(64) NOT NULL, + rag_session_id VARCHAR(64) NOT NULL, + scenario VARCHAR(64) NOT NULL, + domain_id VARCHAR(64) NOT NULL, + process_id VARCHAR(64) NOT NULL, + faithfulness_score DOUBLE PRECISION NOT NULL, + coverage_score DOUBLE PRECISION NOT NULL, + faithfulness_claims_total INTEGER NOT NULL, + faithfulness_claims_supported INTEGER NOT NULL, + coverage_required_items INTEGER NOT NULL, + coverage_covered_items INTEGER NOT NULL, + quality_status VARCHAR(32) NOT NULL, + metrics_json JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE INDEX IF NOT EXISTS idx_agent_quality_metrics_task + ON agent_quality_metrics(task_id, created_at DESC) + """ + ) + ) + conn.execute( + text( + """ + CREATE INDEX IF NOT EXISTS idx_agent_quality_metrics_scenario + ON agent_quality_metrics(scenario, created_at DESC) + """ + ) + ) conn.commit() def get_router_context(self, conversation_key: str) -> RouterContext: @@ -104,3 +144,103 @@ class AgentRepository: }, ) conn.commit() + + def save_quality_metrics( + self, + *, + task_id: str, + dialog_session_id: str, + rag_session_id: str, + scenario: str, + domain_id: str, + process_id: str, + quality: dict, + ) -> None: + faithfulness = quality.get("faithfulness", {}) if isinstance(quality, dict) else {} + coverage = quality.get("coverage", {}) if isinstance(quality, dict) else {} + status = str(quality.get("status", "unknown")) if isinstance(quality, dict) else "unknown" + with get_engine().connect() as conn: + conn.execute( + text( + """ + INSERT INTO agent_quality_metrics ( + task_id, + dialog_session_id, + rag_session_id, + scenario, + domain_id, + process_id, + faithfulness_score, + coverage_score, + faithfulness_claims_total, + faithfulness_claims_supported, + coverage_required_items, + coverage_covered_items, + quality_status, + metrics_json + ) VALUES ( + :task_id, + :dialog_session_id, + :rag_session_id, + :scenario, + :domain_id, + :process_id, + :faithfulness_score, + :coverage_score, + :faithfulness_claims_total, + :faithfulness_claims_supported, + :coverage_required_items, + :coverage_covered_items, + :quality_status, + CAST(:metrics_json AS JSONB) + ) + """ + ), + { + "task_id": task_id, + "dialog_session_id": dialog_session_id, + "rag_session_id": rag_session_id, + "scenario": scenario, + "domain_id": domain_id, + "process_id": process_id, + "faithfulness_score": float(faithfulness.get("score", 0.0) or 0.0), + "coverage_score": float(coverage.get("score", 0.0) or 0.0), + "faithfulness_claims_total": int(faithfulness.get("claims_total", 0) or 0), + "faithfulness_claims_supported": int(faithfulness.get("claims_supported", 0) or 0), + "coverage_required_items": int(coverage.get("required_count", 0) or 0), + "coverage_covered_items": int(coverage.get("covered_count", 0) or 0), + "quality_status": status, + "metrics_json": json.dumps(quality if isinstance(quality, dict) else {}, ensure_ascii=False), + }, + ) + conn.commit() + + def get_quality_metrics(self, *, limit: int = 50, scenario: str | None = None) -> list[dict]: + query = """ + SELECT + task_id, + dialog_session_id, + rag_session_id, + scenario, + domain_id, + process_id, + faithfulness_score, + coverage_score, + faithfulness_claims_total, + faithfulness_claims_supported, + coverage_required_items, + coverage_covered_items, + quality_status, + metrics_json, + created_at + FROM agent_quality_metrics + """ + params: dict = {"limit": max(1, int(limit))} + if scenario: + query += " WHERE scenario = :scenario" + params["scenario"] = scenario + query += " ORDER BY created_at DESC LIMIT :limit" + + with get_engine().connect() as conn: + rows = conn.execute(text(query), params).mappings().fetchall() + return [dict(row) for row in rows] diff --git a/app/modules/agent/service.py b/app/modules/agent/service.py index a1036ae..9467b56 100644 --- a/app/modules/agent/service.py +++ b/app/modules/agent/service.py @@ -1,13 +1,15 @@ from dataclasses import dataclass, field from collections.abc import Awaitable, Callable import inspect -import asyncio import logging import re +from app.modules.agent.engine.orchestrator import OrchestratorService, TaskSpecBuilder +from app.modules.agent.engine.orchestrator.metrics_persister import MetricsPersister +from app.modules.agent.engine.orchestrator.models import RoutingMeta from app.modules.agent.engine.router import build_router_service -from app.modules.agent.engine.graphs.progress_registry import progress_registry from app.modules.agent.llm import AgentLlmService +from app.modules.agent.story_session_recorder import StorySessionRecorder from app.modules.agent.changeset_validator import ChangeSetValidator from app.modules.agent.confluence_service import ConfluenceService from app.modules.agent.repository import AgentRepository @@ -21,6 +23,13 @@ from app.schemas.common import ModuleName LOGGER = logging.getLogger(__name__) +def _truncate_for_log(text: str | None, max_chars: int = 1500) -> str: + value = (text or "").replace("\n", "\\n").strip() + if len(value) <= max_chars: + return value + return value[:max_chars].rstrip() + "...[truncated]" + + @dataclass class AgentResult: result_type: TaskResultType @@ -37,11 +46,16 @@ class GraphAgentRuntime: changeset_validator: ChangeSetValidator, llm: AgentLlmService, agent_repository: AgentRepository, + story_recorder: StorySessionRecorder | None = None, ) -> None: self._rag = rag self._confluence = confluence self._changeset_validator = changeset_validator self._router = build_router_service(llm, agent_repository) + self._task_spec_builder = TaskSpecBuilder() + self._orchestrator = OrchestratorService() + self._metrics_persister = MetricsPersister(agent_repository) + self._story_recorder = story_recorder self._checkpointer = None async def run( @@ -64,45 +78,71 @@ class GraphAgentRuntime: ) await self._emit_progress(progress_cb, "agent.route", "Определяю тип запроса и подбираю граф.", meta={"mode": mode}) route = self._router.resolve(message, dialog_session_id, mode=mode) + LOGGER.warning( + "router decision: task_id=%s dialog_session_id=%s mode=%s route=%s/%s reason=%s confidence=%s fallback_used=%s", + task_id, + dialog_session_id, + mode, + route.domain_id, + route.process_id, + route.reason, + route.confidence, + route.fallback_used, + ) await self._emit_progress( progress_cb, "agent.route.resolved", "Маршрут выбран, готовлю контекст для выполнения.", meta={"domain_id": route.domain_id, "process_id": route.process_id}, ) - graph = self._resolve_graph(route.domain_id, route.process_id) files_map = self._build_files_map(files) await self._emit_progress(progress_cb, "agent.rag", "Собираю релевантный контекст из RAG.") rag_ctx = await self._rag.retrieve(rag_session_id, message) await self._emit_progress(progress_cb, "agent.attachments", "Обрабатываю дополнительные вложения.") conf_pages = await self._fetch_confluence_pages(attachments) - state = { - "task_id": task_id, - "project_id": rag_session_id, - "message": message, - "progress_key": task_id, - "rag_context": self._format_rag(rag_ctx), - "confluence_context": self._format_confluence(conf_pages), - "files_map": files_map, - } + route_meta = RoutingMeta( + domain_id=route.domain_id, + process_id=route.process_id, + confidence=route.confidence, + reason=route.reason, + fallback_used=route.fallback_used, + ) + task_spec = self._task_spec_builder.build( + task_id=task_id, + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + mode=mode, + message=message, + route=route_meta, + attachments=attachments, + files=files, + rag_context=self._format_rag(rag_ctx), + confluence_context=self._format_confluence(conf_pages), + files_map=files_map, + ) - await self._emit_progress(progress_cb, "agent.graph", "Запускаю выполнение графа.") - if progress_cb is not None: - progress_registry.register(task_id, progress_cb) - try: - result = await asyncio.to_thread( - self._invoke_graph, - graph, - state, - dialog_session_id, - ) - finally: - if progress_cb is not None: - progress_registry.unregister(task_id) - await self._emit_progress(progress_cb, "agent.graph.done", "Граф завершил обработку результата.") - answer = result.get("answer") - changeset = result.get("changeset") or [] + await self._emit_progress(progress_cb, "agent.orchestrator", "Строю и выполняю план оркестрации.") + orchestrator_result = await self._orchestrator.run( + task=task_spec, + graph_resolver=self._resolve_graph, + graph_invoker=self._invoke_graph, + progress_cb=progress_cb, + ) + await self._emit_progress(progress_cb, "agent.orchestrator.done", "Оркестратор завершил выполнение плана.") + answer = orchestrator_result.answer + changeset = orchestrator_result.changeset or [] + orchestrator_meta = orchestrator_result.meta or {} + quality_meta = self._extract_quality_meta(orchestrator_meta) + orchestrator_steps = [item.model_dump(mode="json") for item in orchestrator_result.steps] + self._record_session_story_artifacts( + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)), + attachments=[a.model_dump(mode="json") for a in task_spec.attachments], + answer=answer, + changeset=changeset, + ) if changeset: await self._emit_progress(progress_cb, "agent.changeset", "Проверяю и валидирую предложенные изменения.") changeset = self._enrich_changeset_hashes(changeset, files_map) @@ -117,6 +157,21 @@ class GraphAgentRuntime: user_message=message, assistant_message=final_answer, ) + LOGGER.warning( + "final agent answer: task_id=%s route=%s/%s answer=%s", + task_id, + route.domain_id, + route.process_id, + _truncate_for_log(final_answer), + ) + self._persist_quality_metrics( + task_id=task_id, + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + route=route, + scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)), + quality=quality_meta, + ) return AgentResult( result_type=TaskResultType.ANSWER, answer=final_answer, @@ -125,6 +180,8 @@ class GraphAgentRuntime: "used_rag": True, "used_confluence": bool(conf_pages), "changeset_filtered_out": True, + "orchestrator": orchestrator_meta, + "orchestrator_steps": orchestrator_steps, }, ) validated = self._changeset_validator.validate(task_id, changeset) @@ -140,7 +197,21 @@ class GraphAgentRuntime: result_type=TaskResultType.CHANGESET, answer=final_answer, changeset=validated, - meta={"route": route.model_dump(), "used_rag": True, "used_confluence": bool(conf_pages)}, + meta={ + "route": route.model_dump(), + "used_rag": True, + "used_confluence": bool(conf_pages), + "orchestrator": orchestrator_meta, + "orchestrator_steps": orchestrator_steps, + }, + ) + self._persist_quality_metrics( + task_id=task_id, + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + route=route, + scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)), + quality=quality_meta, ) LOGGER.warning( "GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s changeset_items=%s", @@ -150,6 +221,13 @@ class GraphAgentRuntime: final.result_type.value, len(final.changeset), ) + LOGGER.warning( + "final agent answer: task_id=%s route=%s/%s answer=%s", + task_id, + route.domain_id, + route.process_id, + _truncate_for_log(final.answer), + ) return final final_answer = answer or "" @@ -164,7 +242,21 @@ class GraphAgentRuntime: final = AgentResult( result_type=TaskResultType.ANSWER, answer=final_answer, - meta={"route": route.model_dump(), "used_rag": True, "used_confluence": bool(conf_pages)}, + meta={ + "route": route.model_dump(), + "used_rag": True, + "used_confluence": bool(conf_pages), + "orchestrator": orchestrator_meta, + "orchestrator_steps": orchestrator_steps, + }, + ) + self._persist_quality_metrics( + task_id=task_id, + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + route=route, + scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)), + quality=quality_meta, ) LOGGER.warning( "GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s answer_len=%s", @@ -174,8 +266,67 @@ class GraphAgentRuntime: final.result_type.value, len(final.answer or ""), ) + LOGGER.warning( + "final agent answer: task_id=%s route=%s/%s answer=%s", + task_id, + route.domain_id, + route.process_id, + _truncate_for_log(final.answer), + ) return final + def _extract_quality_meta(self, orchestrator_meta: dict) -> dict: + if not isinstance(orchestrator_meta, dict): + return {} + quality = orchestrator_meta.get("quality") + return quality if isinstance(quality, dict) else {} + + def _persist_quality_metrics( + self, + *, + task_id: str, + dialog_session_id: str, + rag_session_id: str, + route, + scenario: str, + quality: dict, + ) -> None: + if not quality: + return + self._metrics_persister.save( + task_id=task_id, + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + scenario=scenario, + domain_id=str(route.domain_id), + process_id=str(route.process_id), + quality=quality, + ) + + def _record_session_story_artifacts( + self, + *, + dialog_session_id: str, + rag_session_id: str, + scenario: str, + attachments: list[dict], + answer: str | None, + changeset: list[ChangeItem], + ) -> None: + if self._story_recorder is None: + return + try: + self._story_recorder.record_run( + dialog_session_id=dialog_session_id, + rag_session_id=rag_session_id, + scenario=scenario, + attachments=attachments, + answer=answer, + changeset=changeset, + ) + except Exception: # noqa: BLE001 + LOGGER.exception("story session artifact recording failed") + async def _emit_progress( self, progress_cb: Callable[[str, str, str, dict | None], Awaitable[None] | None] | None, diff --git a/app/modules/agent/story_context_repository.py b/app/modules/agent/story_context_repository.py new file mode 100644 index 0000000..81f6dfd --- /dev/null +++ b/app/modules/agent/story_context_repository.py @@ -0,0 +1,745 @@ +from __future__ import annotations + +import json +from collections import defaultdict + +from sqlalchemy import text + +from app.modules.shared.db import get_engine + + +class StoryContextSchemaRepository: + def ensure_tables(self) -> None: + with get_engine().connect() as conn: + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS story_records ( + story_id VARCHAR(128) PRIMARY KEY, + project_id VARCHAR(512) NOT NULL, + title TEXT NOT NULL, + status VARCHAR(64) NOT NULL DEFAULT 'draft', + owner VARCHAR(256) NULL, + metadata_json JSONB NOT NULL DEFAULT '{}'::jsonb, + baseline_commit_sha VARCHAR(128) NULL, + snapshot_id VARCHAR(128) NULL, + created_by VARCHAR(256) NULL, + updated_by VARCHAR(256) NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS story_artifacts ( + id BIGSERIAL PRIMARY KEY, + story_id VARCHAR(128) NOT NULL, + artifact_type VARCHAR(64) NOT NULL, + revision INTEGER NOT NULL, + content TEXT NOT NULL, + content_hash VARCHAR(128) NULL, + rag_session_id VARCHAR(64) NULL, + created_by VARCHAR(256) NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + artifact_role VARCHAR(64) NULL, + doc_id TEXT NULL, + doc_version TEXT NULL, + path TEXT NULL, + section TEXT NULL, + chunk_id TEXT NULL, + change_type VARCHAR(32) NULL, + summary TEXT NULL, + source_ref TEXT NULL, + session_id VARCHAR(128) NULL, + CONSTRAINT fk_story_artifacts_story + FOREIGN KEY (story_id) + REFERENCES story_records(story_id) + ON DELETE CASCADE, + CONSTRAINT uq_story_artifact_revision + UNIQUE (story_id, artifact_type, revision) + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS story_links ( + id BIGSERIAL PRIMARY KEY, + story_id VARCHAR(128) NOT NULL, + link_type VARCHAR(64) NOT NULL, + link_value TEXT NOT NULL, + target_ref TEXT NULL, + description TEXT NULL, + metadata_json JSONB NOT NULL DEFAULT '{}'::jsonb, + created_by VARCHAR(256) NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT fk_story_links_story + FOREIGN KEY (story_id) + REFERENCES story_records(story_id) + ON DELETE CASCADE + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS session_artifacts ( + id BIGSERIAL PRIMARY KEY, + session_id VARCHAR(128) NOT NULL, + project_id VARCHAR(512) NOT NULL, + artifact_role VARCHAR(64) NOT NULL, + source_ref TEXT NULL, + doc_id TEXT NULL, + doc_version TEXT NULL, + path TEXT NULL, + section TEXT NULL, + chunk_id TEXT NULL, + change_type VARCHAR(32) NULL, + summary TEXT NULL, + created_by VARCHAR(256) NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + bound_story_id VARCHAR(128) NULL, + bound_at TIMESTAMPTZ NULL + ) + """ + ) + ) + + # Backward-compatible schema upgrades for existing installations. + conn.execute(text("ALTER TABLE story_records ADD COLUMN IF NOT EXISTS owner VARCHAR(256) NULL")) + conn.execute(text("ALTER TABLE story_records ADD COLUMN IF NOT EXISTS metadata_json JSONB NOT NULL DEFAULT '{}'::jsonb")) + conn.execute(text("ALTER TABLE story_records ADD COLUMN IF NOT EXISTS baseline_commit_sha VARCHAR(128) NULL")) + conn.execute(text("ALTER TABLE story_records ADD COLUMN IF NOT EXISTS snapshot_id VARCHAR(128) NULL")) + conn.execute(text("ALTER TABLE story_records ADD COLUMN IF NOT EXISTS created_by VARCHAR(256) NULL")) + conn.execute(text("ALTER TABLE story_records ADD COLUMN IF NOT EXISTS updated_by VARCHAR(256) NULL")) + + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS artifact_role VARCHAR(64) NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS doc_id TEXT NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS doc_version TEXT NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS path TEXT NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS section TEXT NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS chunk_id TEXT NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS change_type VARCHAR(32) NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS summary TEXT NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS source_ref TEXT NULL")) + conn.execute(text("ALTER TABLE story_artifacts ADD COLUMN IF NOT EXISTS session_id VARCHAR(128) NULL")) + + conn.execute(text("ALTER TABLE story_links ADD COLUMN IF NOT EXISTS target_ref TEXT NULL")) + conn.execute(text("ALTER TABLE story_links ADD COLUMN IF NOT EXISTS description TEXT NULL")) + conn.execute(text("ALTER TABLE story_links ADD COLUMN IF NOT EXISTS metadata_json JSONB NOT NULL DEFAULT '{}'::jsonb")) + conn.execute(text("ALTER TABLE story_links ADD COLUMN IF NOT EXISTS created_by VARCHAR(256) NULL")) + + conn.execute(text("ALTER TABLE session_artifacts ADD COLUMN IF NOT EXISTS bound_story_id VARCHAR(128) NULL")) + conn.execute(text("ALTER TABLE session_artifacts ADD COLUMN IF NOT EXISTS bound_at TIMESTAMPTZ NULL")) + + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_story_records_project ON story_records(project_id, updated_at DESC)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_story_artifacts_story_type ON story_artifacts(story_id, artifact_type, revision DESC)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_story_links_story ON story_links(story_id, link_type)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_session_artifacts_session ON session_artifacts(session_id, created_at DESC)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_session_artifacts_project ON session_artifacts(project_id, created_at DESC)")) + conn.commit() + + +class StoryContextRepository: + def record_story_commit( + self, + *, + story_id: str, + project_id: str, + title: str, + commit_sha: str | None, + branch: str | None, + changed_files: list[str], + summary: str, + actor: str | None, + ) -> None: + with get_engine().connect() as conn: + self._upsert_story_conn( + conn, + story_id=story_id, + project_id=project_id, + title=title, + status="in_progress", + metadata={"source": "repo_webhook"}, + baseline_commit_sha=commit_sha, + updated_by=actor, + ) + if commit_sha: + self._insert_story_link( + conn, + story_id=story_id, + link_type="commit", + target_ref=commit_sha, + description="Webhook commit", + metadata={"project_id": project_id}, + created_by=actor, + ) + if branch: + self._insert_story_link( + conn, + story_id=story_id, + link_type="external", + target_ref=branch, + description="Webhook branch", + metadata={"kind": "branch"}, + created_by=actor, + ) + for path in changed_files: + self._insert_story_link( + conn, + story_id=story_id, + link_type="doc", + target_ref=path, + description="Changed file in commit", + metadata={"commit_sha": commit_sha}, + created_by=actor, + ) + revision = self._next_revision(conn, story_id=story_id, artifact_type="commit") + conn.execute( + text( + """ + INSERT INTO story_artifacts ( + story_id, + artifact_type, + revision, + content, + content_hash, + rag_session_id, + created_by, + artifact_role, + change_type, + summary, + source_ref + ) VALUES ( + :story_id, + :artifact_type, + :revision, + :content, + NULL, + NULL, + :created_by, + :artifact_role, + :change_type, + :summary, + :source_ref + ) + """ + ), + { + "story_id": story_id, + "artifact_type": "commit", + "revision": revision, + "content": summary[:4000] or "commit", + "created_by": actor, + "artifact_role": "doc_change", + "change_type": "linked", + "summary": summary[:4000] or "commit", + "source_ref": commit_sha or branch or "webhook", + }, + ) + conn.commit() + + def upsert_story( + self, + *, + story_id: str, + project_id: str, + title: str, + status: str = "draft", + owner: str | None = None, + metadata: dict | None = None, + baseline_commit_sha: str | None = None, + snapshot_id: str | None = None, + created_by: str | None = None, + updated_by: str | None = None, + ) -> None: + with get_engine().connect() as conn: + conn.execute( + text( + """ + INSERT INTO story_records ( + story_id, + project_id, + title, + status, + owner, + metadata_json, + baseline_commit_sha, + snapshot_id, + created_by, + updated_by + ) + VALUES ( + :story_id, + :project_id, + :title, + :status, + :owner, + CAST(:metadata_json AS JSONB), + :baseline_commit_sha, + :snapshot_id, + :created_by, + :updated_by + ) + ON CONFLICT (story_id) DO UPDATE SET + project_id = EXCLUDED.project_id, + title = EXCLUDED.title, + status = EXCLUDED.status, + owner = EXCLUDED.owner, + metadata_json = EXCLUDED.metadata_json, + baseline_commit_sha = COALESCE(EXCLUDED.baseline_commit_sha, story_records.baseline_commit_sha), + snapshot_id = COALESCE(EXCLUDED.snapshot_id, story_records.snapshot_id), + updated_by = COALESCE(EXCLUDED.updated_by, story_records.updated_by), + updated_at = CURRENT_TIMESTAMP + """ + ), + { + "story_id": story_id, + "project_id": project_id, + "title": title, + "status": status, + "owner": owner, + "metadata_json": json.dumps(metadata or {}, ensure_ascii=False), + "baseline_commit_sha": baseline_commit_sha, + "snapshot_id": snapshot_id, + "created_by": created_by, + "updated_by": updated_by, + }, + ) + conn.commit() + + def add_session_artifact( + self, + *, + session_id: str, + project_id: str, + artifact_role: str, + summary: str, + change_type: str | None = None, + source_ref: str | None = None, + doc_id: str | None = None, + doc_version: str | None = None, + path: str | None = None, + section: str | None = None, + chunk_id: str | None = None, + created_by: str | None = None, + ) -> None: + with get_engine().connect() as conn: + conn.execute( + text( + """ + INSERT INTO session_artifacts ( + session_id, + project_id, + artifact_role, + source_ref, + doc_id, + doc_version, + path, + section, + chunk_id, + change_type, + summary, + created_by + ) VALUES ( + :session_id, + :project_id, + :artifact_role, + :source_ref, + :doc_id, + :doc_version, + :path, + :section, + :chunk_id, + :change_type, + :summary, + :created_by + ) + """ + ), + { + "session_id": session_id, + "project_id": project_id, + "artifact_role": artifact_role, + "source_ref": source_ref, + "doc_id": doc_id, + "doc_version": doc_version, + "path": path, + "section": section, + "chunk_id": chunk_id, + "change_type": change_type, + "summary": summary, + "created_by": created_by, + }, + ) + conn.commit() + + def bind_session_to_story( + self, + *, + session_id: str, + story_id: str, + project_id: str, + title: str, + commit_sha: str | None = None, + branch: str | None = None, + changed_files: list[str] | None = None, + actor: str | None = None, + ) -> dict: + with get_engine().connect() as conn: + self._upsert_story_conn( + conn, + story_id=story_id, + project_id=project_id, + title=title, + status="in_progress", + metadata={"bound_session_id": session_id}, + baseline_commit_sha=commit_sha, + updated_by=actor, + ) + + if commit_sha: + self._insert_story_link( + conn, + story_id=story_id, + link_type="commit", + target_ref=commit_sha, + description=f"Bound from session {session_id}", + metadata={"session_id": session_id}, + created_by=actor, + ) + if branch: + self._insert_story_link( + conn, + story_id=story_id, + link_type="external", + target_ref=branch, + description="Source branch", + metadata={"kind": "branch"}, + created_by=actor, + ) + for path in changed_files or []: + self._insert_story_link( + conn, + story_id=story_id, + link_type="doc", + target_ref=path, + description="Changed file from commit", + metadata={"session_id": session_id, "commit_sha": commit_sha}, + created_by=actor, + ) + + rows = conn.execute( + text( + """ + SELECT id, artifact_role, source_ref, doc_id, doc_version, path, section, chunk_id, change_type, summary, created_by, project_id + FROM session_artifacts + WHERE session_id = :session_id AND bound_story_id IS NULL + ORDER BY id ASC + """ + ), + {"session_id": session_id}, + ).mappings().fetchall() + + revisions = self._load_revisions(conn, story_id) + migrated = 0 + for row in rows: + role = str(row["artifact_role"]) + revisions[role] += 1 + summary = str(row["summary"] or "") + content = summary or "n/a" + conn.execute( + text( + """ + INSERT INTO story_artifacts ( + story_id, + artifact_type, + revision, + content, + content_hash, + rag_session_id, + created_by, + artifact_role, + doc_id, + doc_version, + path, + section, + chunk_id, + change_type, + summary, + source_ref, + session_id + ) VALUES ( + :story_id, + :artifact_type, + :revision, + :content, + NULL, + :rag_session_id, + :created_by, + :artifact_role, + :doc_id, + :doc_version, + :path, + :section, + :chunk_id, + :change_type, + :summary, + :source_ref, + :session_id + ) + """ + ), + { + "story_id": story_id, + "artifact_type": role, + "revision": revisions[role], + "content": content, + "rag_session_id": str(row["project_id"]), + "created_by": actor or row["created_by"], + "artifact_role": role, + "doc_id": row["doc_id"], + "doc_version": row["doc_version"], + "path": row["path"], + "section": row["section"], + "chunk_id": row["chunk_id"], + "change_type": row["change_type"], + "summary": summary, + "source_ref": row["source_ref"], + "session_id": session_id, + }, + ) + conn.execute( + text( + """ + UPDATE session_artifacts + SET bound_story_id = :story_id, bound_at = CURRENT_TIMESTAMP + WHERE id = :id + """ + ), + {"story_id": story_id, "id": int(row["id"])}, + ) + migrated += 1 + + conn.commit() + return {"story_id": story_id, "session_id": session_id, "migrated_artifacts": migrated} + + def add_artifact( + self, + *, + story_id: str, + artifact_type: str, + revision: int, + content: str, + content_hash: str | None = None, + rag_session_id: str | None = None, + created_by: str | None = None, + ) -> None: + with get_engine().connect() as conn: + conn.execute( + text( + """ + INSERT INTO story_artifacts ( + story_id, + artifact_type, + revision, + content, + content_hash, + rag_session_id, + created_by, + artifact_role, + summary + ) VALUES ( + :story_id, + :artifact_type, + :revision, + :content, + :content_hash, + :rag_session_id, + :created_by, + :artifact_role, + :summary + ) + ON CONFLICT (story_id, artifact_type, revision) DO UPDATE SET + content = EXCLUDED.content, + content_hash = EXCLUDED.content_hash, + rag_session_id = EXCLUDED.rag_session_id, + created_by = EXCLUDED.created_by, + artifact_role = EXCLUDED.artifact_role, + summary = EXCLUDED.summary + """ + ), + { + "story_id": story_id, + "artifact_type": artifact_type, + "revision": int(revision), + "content": content, + "content_hash": content_hash, + "rag_session_id": rag_session_id, + "created_by": created_by, + "artifact_role": artifact_type, + "summary": content, + }, + ) + conn.commit() + + def get_story_context(self, story_id: str) -> dict | None: + with get_engine().connect() as conn: + story = conn.execute( + text( + """ + SELECT story_id, project_id, title, status, owner, metadata_json, baseline_commit_sha, snapshot_id, created_by, updated_by, created_at, updated_at + FROM story_records + WHERE story_id = :story_id + """ + ), + {"story_id": story_id}, + ).mappings().fetchone() + if not story: + return None + + artifacts = conn.execute( + text( + """ + SELECT artifact_type, artifact_role, revision, content, summary, path, section, chunk_id, change_type, doc_id, doc_version, + source_ref, session_id, content_hash, rag_session_id, created_by, created_at + FROM story_artifacts + WHERE story_id = :story_id + ORDER BY artifact_type ASC, revision DESC + """ + ), + {"story_id": story_id}, + ).mappings().fetchall() + + links = conn.execute( + text( + """ + SELECT link_type, + COALESCE(target_ref, link_value) AS target_ref, + description, + metadata_json, + created_by, + created_at + FROM story_links + WHERE story_id = :story_id + ORDER BY id ASC + """ + ), + {"story_id": story_id}, + ).mappings().fetchall() + + return { + "story": dict(story), + "artifacts": [dict(item) for item in artifacts], + "links": [dict(item) for item in links], + } + + def _upsert_story_conn( + self, + conn, + *, + story_id: str, + project_id: str, + title: str, + status: str, + metadata: dict, + baseline_commit_sha: str | None, + updated_by: str | None, + ) -> None: + conn.execute( + text( + """ + INSERT INTO story_records (story_id, project_id, title, status, metadata_json, baseline_commit_sha, updated_by) + VALUES (:story_id, :project_id, :title, :status, CAST(:metadata_json AS JSONB), :baseline_commit_sha, :updated_by) + ON CONFLICT (story_id) DO UPDATE SET + project_id = EXCLUDED.project_id, + title = EXCLUDED.title, + status = EXCLUDED.status, + metadata_json = EXCLUDED.metadata_json, + baseline_commit_sha = COALESCE(EXCLUDED.baseline_commit_sha, story_records.baseline_commit_sha), + updated_by = COALESCE(EXCLUDED.updated_by, story_records.updated_by), + updated_at = CURRENT_TIMESTAMP + """ + ), + { + "story_id": story_id, + "project_id": project_id, + "title": title, + "status": status, + "metadata_json": json.dumps(metadata, ensure_ascii=False), + "baseline_commit_sha": baseline_commit_sha, + "updated_by": updated_by, + }, + ) + + def _insert_story_link( + self, + conn, + *, + story_id: str, + link_type: str, + target_ref: str, + description: str, + metadata: dict, + created_by: str | None, + ) -> None: + conn.execute( + text( + """ + INSERT INTO story_links (story_id, link_type, link_value, target_ref, description, metadata_json, created_by) + VALUES ( + :story_id, + :link_type, + :link_value, + :target_ref, + :description, + CAST(:metadata_json AS JSONB), + :created_by + ) + """ + ), + { + "story_id": story_id, + "link_type": link_type, + "link_value": target_ref, + "target_ref": target_ref, + "description": description, + "metadata_json": json.dumps(metadata, ensure_ascii=False), + "created_by": created_by, + }, + ) + + def _load_revisions(self, conn, story_id: str) -> defaultdict[str, int]: + rows = conn.execute( + text( + """ + SELECT artifact_type, COALESCE(MAX(revision), 0) AS max_revision + FROM story_artifacts + WHERE story_id = :story_id + GROUP BY artifact_type + """ + ), + {"story_id": story_id}, + ).mappings().fetchall() + revision_map: defaultdict[str, int] = defaultdict(int) + for row in rows: + revision_map[str(row["artifact_type"])] = int(row["max_revision"]) + return revision_map + + def _next_revision(self, conn, *, story_id: str, artifact_type: str) -> int: + row = conn.execute( + text( + """ + SELECT COALESCE(MAX(revision), 0) AS max_revision + FROM story_artifacts + WHERE story_id = :story_id AND artifact_type = :artifact_type + """ + ), + {"story_id": story_id, "artifact_type": artifact_type}, + ).mappings().fetchone() + return int(row["max_revision"]) + 1 if row else 1 diff --git a/app/modules/agent/story_session_recorder.py b/app/modules/agent/story_session_recorder.py new file mode 100644 index 0000000..4171e60 --- /dev/null +++ b/app/modules/agent/story_session_recorder.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from typing import Protocol + +from app.schemas.changeset import ChangeItem, ChangeOp + + +class SessionArtifactWriter(Protocol): + def add_session_artifact(self, **kwargs) -> None: ... + + +class StorySessionRecorder: + def __init__(self, repository: SessionArtifactWriter) -> None: + self._repo = repository + + def record_run( + self, + *, + dialog_session_id: str, + rag_session_id: str, + scenario: str, + attachments: list[dict], + answer: str | None, + changeset: list[ChangeItem], + actor: str | None = None, + ) -> None: + self._record_input_sources( + session_id=dialog_session_id, + project_id=rag_session_id, + attachments=attachments, + actor=actor, + ) + self._record_outputs( + session_id=dialog_session_id, + project_id=rag_session_id, + scenario=scenario, + answer=answer, + changeset=changeset, + actor=actor, + ) + + def _record_input_sources(self, *, session_id: str, project_id: str, attachments: list[dict], actor: str | None) -> None: + for item in attachments: + value = str(item.get("value") or "").strip() + if not value: + continue + if item.get("type") not in {"confluence_url", "http_url"}: + continue + self._repo.add_session_artifact( + session_id=session_id, + project_id=project_id, + artifact_role="analysis", + source_ref=value, + summary="Input analytics document", + change_type="linked", + created_by=actor, + ) + + def _record_outputs( + self, + *, + session_id: str, + project_id: str, + scenario: str, + answer: str | None, + changeset: list[ChangeItem], + actor: str | None, + ) -> None: + role = self._role_for_scenario(scenario) + if answer and answer.strip(): + self._repo.add_session_artifact( + session_id=session_id, + project_id=project_id, + artifact_role=role, + summary=answer.strip()[:4000], + created_by=actor, + ) + + for item in changeset: + self._repo.add_session_artifact( + session_id=session_id, + project_id=project_id, + artifact_role=role, + path=item.path, + summary=item.reason, + change_type=self._change_type(item.op), + created_by=actor, + ) + + def _role_for_scenario(self, scenario: str) -> str: + mapping = { + "docs_from_analytics": "doc_change", + "targeted_edit": "doc_change", + "gherkin_model": "test_model", + "analytics_review": "analysis", + "explain_part": "note", + "general_qa": "note", + } + return mapping.get(scenario, "note") + + def _change_type(self, op: ChangeOp) -> str: + if op == ChangeOp.CREATE: + return "added" + if op == ChangeOp.DELETE: + return "removed" + return "updated" diff --git a/app/modules/application.py b/app/modules/application.py index f82f165..8f4cbba 100644 --- a/app/modules/application.py +++ b/app/modules/application.py @@ -1,9 +1,11 @@ from app.modules.agent.module import AgentModule from app.modules.agent.repository import AgentRepository +from app.modules.agent.story_context_repository import StoryContextRepository, StoryContextSchemaRepository from app.modules.chat.repository import ChatRepository from app.modules.chat.module import ChatModule -from app.modules.rag.repository import RagRepository -from app.modules.rag.module import RagModule +from app.modules.rag_session.repository import RagRepository +from app.modules.rag_session.module import RagModule +from app.modules.rag_repo.module import RagRepoModule from app.modules.shared.bootstrap import bootstrap_database from app.modules.shared.event_bus import EventBus from app.modules.shared.retry_executor import RetryExecutor @@ -16,16 +18,31 @@ class ModularApplication: self.rag_repository = RagRepository() self.chat_repository = ChatRepository() self.agent_repository = AgentRepository() + self.story_context_schema_repository = StoryContextSchemaRepository() + self.story_context_repository = StoryContextRepository() - self.rag = RagModule(event_bus=self.events, retry=self.retry, repository=self.rag_repository) - self.agent = AgentModule(rag_retriever=self.rag.rag, agent_repository=self.agent_repository) + self.rag_session = RagModule(event_bus=self.events, retry=self.retry, repository=self.rag_repository) + self.rag_repo = RagRepoModule( + story_context_repository=self.story_context_repository, + rag_repository=self.rag_repository, + ) + self.agent = AgentModule( + rag_retriever=self.rag_session.rag, + agent_repository=self.agent_repository, + story_context_repository=self.story_context_repository, + ) self.chat = ChatModule( agent_runner=self.agent.runtime, event_bus=self.events, retry=self.retry, - rag_sessions=self.rag.sessions, + rag_sessions=self.rag_session.sessions, repository=self.chat_repository, ) def startup(self) -> None: - bootstrap_database(self.rag_repository, self.chat_repository, self.agent_repository) + bootstrap_database( + self.rag_repository, + self.chat_repository, + self.agent_repository, + self.story_context_schema_repository, + ) diff --git a/app/modules/chat/README.md b/app/modules/chat/README.md new file mode 100644 index 0000000..c6edc18 --- /dev/null +++ b/app/modules/chat/README.md @@ -0,0 +1,98 @@ +# Модуль chat + +## 1. Функции модуля +- Внешний API чата: создание диалога, отправка сообщения, получение статуса задачи. +- Асинхронная оркестрация выполнения через `ChatOrchestrator`. +- Idempotency и стриминг событий по SSE. + +## 2. Диаграмма классов и взаимосвязей +```mermaid +classDiagram + class ChatModule + class ChatOrchestrator + class TaskStore + class DialogSessionStore + class IdempotencyStore + class EventBus + class AgentRunner + + ChatModule --> ChatOrchestrator + ChatModule --> TaskStore + ChatModule --> DialogSessionStore + ChatModule --> IdempotencyStore + ChatModule --> EventBus + ChatOrchestrator --> AgentRunner + ChatOrchestrator --> TaskStore + ChatOrchestrator --> DialogSessionStore + ChatOrchestrator --> EventBus +``` + +## 3. Описание классов +- `ChatModule`: фасад модуля и регистрация публичных chat endpoint'ов. + Методы: `__init__` — собирает stores/orchestrator; `public_router` — публикует REST и SSE маршруты чата. +- `ChatOrchestrator`: выполняет жизненный цикл user-message как фоновой задачи. + Методы: `enqueue_message` — создает задачу и запускает обработку; `_process_task` — исполняет runtime и сохраняет результат; `_resolve_sessions` — валидирует и сопоставляет dialog/rag сессии. +- `TaskStore`: in-memory store состояний задач. + Методы: `create` — создает новую `TaskState`; `get` — возвращает задачу по `task_id`; `save` — обновляет состояние задачи. +- `DialogSessionStore`: хранилище dialog-сессий поверх БД. + Методы: `create` — создает новую dialog-сессию; `get` — читает dialog-сессию по id. +- `IdempotencyStore`: предотвращает дубль задач по идемпотентному ключу. + Методы: `get_task_id` — возвращает существующий `task_id` по ключу; `put` — сохраняет ключ и `task_id`. +- `EventBus`: асинхронная публикация/подписка событий. + Методы: `subscribe` — создает подписку на канал; `unsubscribe` — снимает подписку; `publish` — отправляет событие подписчикам; `as_sse` — сериализует событие в SSE формат. +- `AgentRunner` (контракт): интерфейс выполнения агентного запроса из chat-слоя. + Методы: `run` — принимает данные задачи и возвращает итог `answer`/`changeset`. + +## 4. Сиквенс-диаграммы API + +### POST /api/chat/dialogs +Назначение: создает новый диалог, связанный с существующей `rag_session`, чтобы пользователь мог отправлять сообщения в контексте конкретного индекса. +```mermaid +sequenceDiagram + participant Router as ChatModule.APIRouter + participant RagSessions as RagSessionStore + participant Dialogs as DialogSessionStore + + Router->>RagSessions: get(rag_session_id) + RagSessions-->>Router: exists + Router->>Dialogs: create(rag_session_id) + Dialogs-->>Router: dialog_session +``` + +### POST /api/chat/messages +Назначение: ставит сообщение пользователя в асинхронную обработку и возвращает `task_id` для отслеживания результата. +```mermaid +sequenceDiagram + participant Router as ChatModule.APIRouter + participant Orchestrator as ChatOrchestrator + participant TaskStore as TaskStore + + Router->>Orchestrator: enqueue_message(request, idempotency_key) + Orchestrator->>TaskStore: create()/save() + Orchestrator-->>Router: task_id,status +``` + +### GET /api/tasks/{task_id} +Назначение: отдает текущее состояние задачи и финальный результат (answer/changeset/error), когда обработка завершена. +```mermaid +sequenceDiagram + participant Router as ChatModule.APIRouter + participant TaskStore as TaskStore + + Router->>TaskStore: get(task_id) + TaskStore-->>Router: task_state +``` + +### GET /api/events?task_id=... +Назначение: открывает SSE-поток с прогрессом выполнения задачи и промежуточными событиями. +```mermaid +sequenceDiagram + participant Router as ChatModule.APIRouter + participant Events as EventBus + + Router->>Events: subscribe(task_id) + loop until disconnect + Events-->>Router: SSE event + end + Router->>Events: unsubscribe(task_id) +``` diff --git a/app/modules/chat/__pycache__/module.cpython-312.pyc b/app/modules/chat/__pycache__/module.cpython-312.pyc index f8e125a..ffc426a 100644 Binary files a/app/modules/chat/__pycache__/module.cpython-312.pyc and b/app/modules/chat/__pycache__/module.cpython-312.pyc differ diff --git a/app/modules/chat/__pycache__/service.cpython-312.pyc b/app/modules/chat/__pycache__/service.cpython-312.pyc index 167c100..b098011 100644 Binary files a/app/modules/chat/__pycache__/service.cpython-312.pyc and b/app/modules/chat/__pycache__/service.cpython-312.pyc differ diff --git a/app/modules/chat/module.py b/app/modules/chat/module.py index 6936758..10c8964 100644 --- a/app/modules/chat/module.py +++ b/app/modules/chat/module.py @@ -7,7 +7,7 @@ from app.modules.chat.repository import ChatRepository from app.modules.chat.service import ChatOrchestrator from app.modules.chat.task_store import TaskStore from app.modules.contracts import AgentRunner -from app.modules.rag.session_store import RagSessionStore +from app.modules.rag_session.session_store import RagSessionStore from app.modules.shared.event_bus import EventBus from app.modules.shared.idempotency_store import IdempotencyStore from app.modules.shared.retry_executor import RetryExecutor diff --git a/app/modules/chat/service.py b/app/modules/chat/service.py index f647560..2ae6277 100644 --- a/app/modules/chat/service.py +++ b/app/modules/chat/service.py @@ -14,6 +14,13 @@ from app.modules.shared.retry_executor import RetryExecutor LOGGER = logging.getLogger(__name__) +def _truncate_for_log(text: str, max_chars: int = 1200) -> str: + value = (text or "").replace("\n", "\\n").strip() + if len(value) <= max_chars: + return value + return value[:max_chars].rstrip() + "...[truncated]" + + class ChatOrchestrator: def __init__( self, @@ -78,6 +85,16 @@ class ChatOrchestrator: try: await self._publish_progress(task_id, "task.sessions", "Проверяю сессии диалога и проекта.", progress=10) dialog_session_id, rag_session_id = self._resolve_sessions(request) + LOGGER.warning( + "incoming chat request: task_id=%s dialog_session_id=%s rag_session_id=%s mode=%s attachments=%s files=%s message=%s", + task_id, + dialog_session_id, + rag_session_id, + request.mode.value, + len(request.attachments), + len(request.files), + _truncate_for_log(request.message), + ) await self._publish_progress(task_id, "task.sessions.done", "Сессии проверены, запускаю агента.", progress=15) loop = asyncio.get_running_loop() diff --git a/app/modules/contracts.py b/app/modules/contracts.py index 402c14d..d9898e1 100644 --- a/app/modules/contracts.py +++ b/app/modules/contracts.py @@ -37,11 +37,11 @@ class RagIndexer(Protocol): rag_session_id: str, files: list[dict], progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None, - ) -> tuple[int, int]: ... + ) -> tuple[int, int, int, int]: ... async def index_changes( self, rag_session_id: str, changed_files: list[dict], progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None, - ) -> tuple[int, int]: ... + ) -> tuple[int, int, int, int]: ... diff --git a/app/modules/rag/__pycache__/job_store.cpython-312.pyc b/app/modules/rag/__pycache__/job_store.cpython-312.pyc deleted file mode 100644 index e196f1e..0000000 Binary files a/app/modules/rag/__pycache__/job_store.cpython-312.pyc and /dev/null differ diff --git a/app/modules/rag/__pycache__/repository.cpython-312.pyc b/app/modules/rag/__pycache__/repository.cpython-312.pyc deleted file mode 100644 index dd6d4db..0000000 Binary files a/app/modules/rag/__pycache__/repository.cpython-312.pyc and /dev/null differ diff --git a/app/modules/rag/__pycache__/service.cpython-312.pyc b/app/modules/rag/__pycache__/service.cpython-312.pyc deleted file mode 100644 index 71628b3..0000000 Binary files a/app/modules/rag/__pycache__/service.cpython-312.pyc and /dev/null differ diff --git a/app/modules/rag/repository.py b/app/modules/rag/repository.py deleted file mode 100644 index b34c5fe..0000000 --- a/app/modules/rag/repository.py +++ /dev/null @@ -1,261 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from sqlalchemy import text - -from app.modules.shared.db import get_engine - - -@dataclass -class RagJobRow: - index_job_id: str - rag_session_id: str - status: str - indexed_files: int - failed_files: int - error_code: str | None - error_desc: str | None - error_module: str | None - - -class RagRepository: - def ensure_tables(self) -> None: - engine = get_engine() - with engine.connect() as conn: - conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) - conn.execute( - text( - """ - CREATE TABLE IF NOT EXISTS rag_sessions ( - rag_session_id VARCHAR(64) PRIMARY KEY, - project_id VARCHAR(512) NOT NULL, - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP - ) - """ - ) - ) - conn.execute( - text( - """ - CREATE TABLE IF NOT EXISTS rag_index_jobs ( - index_job_id VARCHAR(64) PRIMARY KEY, - rag_session_id VARCHAR(64) NOT NULL, - status VARCHAR(16) NOT NULL, - indexed_files INTEGER NOT NULL DEFAULT 0, - failed_files INTEGER NOT NULL DEFAULT 0, - error_code VARCHAR(128) NULL, - error_desc TEXT NULL, - error_module VARCHAR(64) NULL, - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP - ) - """ - ) - ) - conn.execute( - text( - """ - CREATE TABLE IF NOT EXISTS rag_chunks ( - id BIGSERIAL PRIMARY KEY, - rag_session_id VARCHAR(64) NOT NULL, - path TEXT NOT NULL, - chunk_index INTEGER NOT NULL, - content TEXT NOT NULL, - embedding vector NULL, - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP - ) - """ - ) - ) - conn.execute( - text( - """ - ALTER TABLE rag_chunks - ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP - """ - ) - ) - conn.execute( - text( - """ - ALTER TABLE rag_chunks - ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP - """ - ) - ) - conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)")) - conn.commit() - - def upsert_session(self, rag_session_id: str, project_id: str) -> None: - with get_engine().connect() as conn: - conn.execute( - text( - """ - INSERT INTO rag_sessions (rag_session_id, project_id) - VALUES (:sid, :pid) - ON CONFLICT (rag_session_id) DO UPDATE SET project_id = EXCLUDED.project_id - """ - ), - {"sid": rag_session_id, "pid": project_id}, - ) - conn.commit() - - def session_exists(self, rag_session_id: str) -> bool: - with get_engine().connect() as conn: - row = conn.execute( - text("SELECT 1 FROM rag_sessions WHERE rag_session_id = :sid"), - {"sid": rag_session_id}, - ).fetchone() - return bool(row) - - def get_session(self, rag_session_id: str) -> dict | None: - with get_engine().connect() as conn: - row = conn.execute( - text("SELECT rag_session_id, project_id FROM rag_sessions WHERE rag_session_id = :sid"), - {"sid": rag_session_id}, - ).mappings().fetchone() - return dict(row) if row else None - - def create_job(self, index_job_id: str, rag_session_id: str, status: str) -> None: - with get_engine().connect() as conn: - conn.execute( - text( - """ - INSERT INTO rag_index_jobs (index_job_id, rag_session_id, status) - VALUES (:jid, :sid, :status) - """ - ), - {"jid": index_job_id, "sid": rag_session_id, "status": status}, - ) - conn.commit() - - def update_job( - self, - index_job_id: str, - *, - status: str, - indexed_files: int, - failed_files: int, - error_code: str | None = None, - error_desc: str | None = None, - error_module: str | None = None, - ) -> None: - with get_engine().connect() as conn: - conn.execute( - text( - """ - UPDATE rag_index_jobs - SET status = :status, - indexed_files = :indexed, - failed_files = :failed, - error_code = :ecode, - error_desc = :edesc, - error_module = :emodule, - updated_at = CURRENT_TIMESTAMP - WHERE index_job_id = :jid - """ - ), - { - "jid": index_job_id, - "status": status, - "indexed": indexed_files, - "failed": failed_files, - "ecode": error_code, - "edesc": error_desc, - "emodule": error_module, - }, - ) - conn.commit() - - def get_job(self, index_job_id: str) -> RagJobRow | None: - with get_engine().connect() as conn: - row = conn.execute( - text( - """ - SELECT index_job_id, rag_session_id, status, indexed_files, failed_files, - error_code, error_desc, error_module - FROM rag_index_jobs - WHERE index_job_id = :jid - """ - ), - {"jid": index_job_id}, - ).mappings().fetchone() - if not row: - return None - return RagJobRow(**dict(row)) - - def replace_chunks(self, rag_session_id: str, items: list[dict]) -> None: - with get_engine().connect() as conn: - conn.execute(text("DELETE FROM rag_chunks WHERE rag_session_id = :sid"), {"sid": rag_session_id}) - self._insert_chunks(conn, rag_session_id, items) - conn.commit() - - def apply_changes(self, rag_session_id: str, delete_paths: list[str], upserts: list[dict]) -> None: - with get_engine().connect() as conn: - if delete_paths: - conn.execute( - text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"), - {"sid": rag_session_id, "paths": delete_paths}, - ) - if upserts: - paths = sorted({str(x["path"]) for x in upserts}) - conn.execute( - text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"), - {"sid": rag_session_id, "paths": paths}, - ) - self._insert_chunks(conn, rag_session_id, upserts) - conn.commit() - - def retrieve(self, rag_session_id: str, query_embedding: list[float], limit: int = 5) -> list[dict]: - emb = "[" + ",".join(str(x) for x in query_embedding) + "]" - with get_engine().connect() as conn: - rows = conn.execute( - text( - """ - SELECT path, content - FROM rag_chunks - WHERE rag_session_id = :sid - ORDER BY embedding <=> CAST(:emb AS vector) - LIMIT :lim - """ - ), - {"sid": rag_session_id, "emb": emb, "lim": limit}, - ).mappings().fetchall() - return [dict(x) for x in rows] - - def fallback_chunks(self, rag_session_id: str, limit: int = 5) -> list[dict]: - with get_engine().connect() as conn: - rows = conn.execute( - text( - """ - SELECT path, content - FROM rag_chunks - WHERE rag_session_id = :sid - ORDER BY id DESC - LIMIT :lim - """ - ), - {"sid": rag_session_id, "lim": limit}, - ).mappings().fetchall() - return [dict(x) for x in rows] - - def _insert_chunks(self, conn, rag_session_id: str, items: list[dict]) -> None: - for item in items: - emb = item.get("embedding") or [] - emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None - conn.execute( - text( - """ - INSERT INTO rag_chunks (rag_session_id, path, chunk_index, content, embedding, created_at, updated_at) - VALUES (:sid, :path, :idx, :content, CAST(:emb AS vector), CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) - """ - ), - { - "sid": rag_session_id, - "path": item["path"], - "idx": int(item["chunk_index"]), - "content": item["content"], - "emb": emb_str, - }, - ) diff --git a/app/modules/rag_repo/README.md b/app/modules/rag_repo/README.md new file mode 100644 index 0000000..2f032af --- /dev/null +++ b/app/modules/rag_repo/README.md @@ -0,0 +1,56 @@ +# Модуль rag_repo + +## 1. Функции модуля +- Прием webhook-событий от Git-провайдеров (`gitea`, `bitbucket`). +- Нормализация payload в единый формат. +- Определение `story_id` и фиксация контекста, необходимого для определения изменений, произведенных в Story. +- Запись контекста коммита в Story-хранилище через `StoryContextRepository`. +- Подготовка данных, которые позволяют ускорять формирование чанков в `rag_session` за счет кэш-переиспользования. + +Ускорение `rag_session` обеспечивается связкой полей: +- `project_id` (идентификатор репозитория/проекта), +- `commit_sha` (снимок состояния), +- `changed_files` (точный набор затронутых файлов), +- `story_id` (бизнес-контекст инкремента). + +Эта связка позволяет в `rag_session` выполнять delta-индексацию и переиспользовать ранее рассчитанные чанки/эмбеддинги для неизмененных файлов. + +## 2. Диаграмма классов и взаимосвязей +```mermaid +classDiagram + class RagRepoModule + class RepoWebhookService + class StoryContextRepository + + RagRepoModule --> RepoWebhookService + RepoWebhookService --> StoryContextRepository +``` + +## 3. Описание классов +- `RagRepoModule`: точка входа модуля и публикация webhook endpoint'а. + Методы: `__init__` — создает сервис обработки webhook; `internal_router` — регистрирует internal route `webhook`. +- `RepoWebhookService`: нормализует payload, извлекает `story_id` и формирует запись commit-контекста. + Методы: `process` — основной обработчик webhook; `_normalize_gitea` — маппинг payload Gitea; `_normalize_bitbucket` — маппинг payload Bitbucket; `_extract_story_id` — извлечение `story_id` из commit message. +- `StoryContextRepository`: persistence-слой Story-контекста. + Методы: `record_story_commit` — сохраняет commit, ветку и список измененных файлов в контекст Story. + +## 4. Сиквенс-диаграммы API + +### POST /internal/rag-repo/webhook +Назначение: принимает webhook коммита, автоматически определяет провайдера (`gitea`/`bitbucket`), извлекает `story_id` и сохраняет commit-контекст для трассировки изменений по Story. +```mermaid +sequenceDiagram + participant Router as RagRepoModule.APIRouter + participant Webhook as RepoWebhookService + participant StoryRepo as StoryContextRepository + + Router->>Webhook: process(payload, headers) + Webhook->>Webhook: normalize payload + extract story_id + alt story_id found + Webhook->>StoryRepo: record_story_commit(...) + StoryRepo-->>Webhook: ok + else story_id missing + Webhook-->>Router: accepted, story_bound=false + end + Webhook-->>Router: result +``` diff --git a/app/modules/rag_repo/__init__.py b/app/modules/rag_repo/__init__.py new file mode 100644 index 0000000..c9c2ef6 --- /dev/null +++ b/app/modules/rag_repo/__init__.py @@ -0,0 +1 @@ +__all__: list[str] = [] diff --git a/app/modules/rag_repo/__pycache__/__init__.cpython-312.pyc b/app/modules/rag_repo/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..b283512 Binary files /dev/null and b/app/modules/rag_repo/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/modules/rag_repo/__pycache__/module.cpython-312.pyc b/app/modules/rag_repo/__pycache__/module.cpython-312.pyc new file mode 100644 index 0000000..d1cf84e Binary files /dev/null and b/app/modules/rag_repo/__pycache__/module.cpython-312.pyc differ diff --git a/app/modules/rag_repo/__pycache__/webhook_service.cpython-312.pyc b/app/modules/rag_repo/__pycache__/webhook_service.cpython-312.pyc new file mode 100644 index 0000000..97d0953 Binary files /dev/null and b/app/modules/rag_repo/__pycache__/webhook_service.cpython-312.pyc differ diff --git a/app/modules/rag_repo/module.py b/app/modules/rag_repo/module.py new file mode 100644 index 0000000..15f7c86 --- /dev/null +++ b/app/modules/rag_repo/module.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from fastapi import APIRouter, Request + +from app.modules.agent.story_context_repository import StoryContextRepository +from app.modules.rag_repo.webhook_service import RepoWebhookService +from app.modules.rag_session.repository import RagRepository + + +class RagRepoModule: + def __init__(self, story_context_repository: StoryContextRepository, rag_repository: RagRepository) -> None: + self._webhook = RepoWebhookService(story_context_repository, rag_repository) + + def internal_router(self) -> APIRouter: + router = APIRouter(prefix="/internal/rag-repo", tags=["internal-rag-repo"]) + + @router.post("/webhook") + async def process_repo_webhook(request: Request, payload: dict) -> dict: + return self._webhook.process( + payload=payload, + headers={k: v for k, v in request.headers.items()}, + ) + + return router diff --git a/app/modules/rag_repo/webhook_service.py b/app/modules/rag_repo/webhook_service.py new file mode 100644 index 0000000..f4322e3 --- /dev/null +++ b/app/modules/rag_repo/webhook_service.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import re +from typing import Protocol + + +_STORY_ID_RE = re.compile(r"\b[A-Z][A-Z0-9_]*-\d+\b") + + +class StoryCommitWriter(Protocol): + def record_story_commit( + self, + *, + story_id: str, + project_id: str, + title: str, + commit_sha: str | None, + branch: str | None, + changed_files: list[str], + summary: str, + actor: str | None, + ) -> None: ... + + +class RepoCacheWriter(Protocol): + def record_repo_cache( + self, + *, + project_id: str, + commit_sha: str | None, + changed_files: list[str], + summary: str, + ) -> None: ... + + +class RepoWebhookService: + def __init__(self, story_writer: StoryCommitWriter, cache_writer: RepoCacheWriter | None = None) -> None: + self._story_writer = story_writer + self._cache_writer = cache_writer + + def process(self, *, payload: dict, provider: str | None = None, headers: dict | None = None) -> dict: + resolved_provider = self._resolve_provider(provider=provider, payload=payload, headers=headers or {}) + normalized = self._normalize(provider=resolved_provider, payload=payload) + if not normalized: + return {"accepted": False, "reason": "unsupported_or_invalid_payload"} + + cache_recorded = False + if self._cache_writer is not None: + self._cache_writer.record_repo_cache( + project_id=normalized["project_id"], + commit_sha=normalized["commit_sha"], + changed_files=normalized["changed_files"], + summary=normalized["summary"], + ) + cache_recorded = True + + story_id = self._extract_story_id(normalized["messages"]) + if not story_id: + return { + "accepted": True, + "indexed": False, + "story_bound": False, + "cache_recorded": cache_recorded, + "reason": "story_id_not_found", + } + + self._story_writer.record_story_commit( + story_id=story_id, + project_id=normalized["project_id"], + title=f"Story {story_id}", + commit_sha=normalized["commit_sha"], + branch=normalized["branch"], + changed_files=normalized["changed_files"], + summary=normalized["summary"], + actor=normalized["actor"], + ) + return { + "accepted": True, + "indexed": False, + "story_bound": True, + "cache_recorded": cache_recorded, + "story_id": story_id, + "project_id": normalized["project_id"], + "commit_sha": normalized["commit_sha"], + "changed_files": normalized["changed_files"], + } + + def _resolve_provider(self, *, provider: str | None, payload: dict, headers: dict[str, str]) -> str: + value = (provider or "").strip().lower() + if value in {"gitea", "bitbucket"}: + return value + + lowered = {str(k).lower(): str(v) for k, v in headers.items()} + if "x-gitea-event" in lowered: + return "gitea" + if "x-event-key" in lowered: + return "bitbucket" + + if isinstance(payload.get("commits"), list) and ("ref" in payload or "pusher" in payload): + return "gitea" + push = payload.get("push") + if isinstance(push, dict) and isinstance(push.get("changes"), list): + return "bitbucket" + return "" + + def _normalize(self, *, provider: str, payload: dict) -> dict | None: + key = provider.lower().strip() + if key == "gitea": + return self._normalize_gitea(payload) + if key == "bitbucket": + return self._normalize_bitbucket(payload) + return None + + def _normalize_gitea(self, payload: dict) -> dict: + repo = payload.get("repository") or {} + commits = payload.get("commits") or [] + project_id = str(repo.get("full_name") or repo.get("name") or "unknown_repo") + ref = str(payload.get("ref") or "") + branch = ref.replace("refs/heads/", "") if ref.startswith("refs/heads/") else ref or None + actor = str((payload.get("pusher") or {}).get("username") or "") or None + + messages: list[str] = [] + changed_files: set[str] = set() + commit_sha: str | None = None + for commit in commits: + if not isinstance(commit, dict): + continue + cid = str(commit.get("id") or "").strip() + if cid: + commit_sha = cid + msg = str(commit.get("message") or "").strip() + if msg: + messages.append(msg) + for key in ("added", "modified", "removed"): + for path in commit.get(key) or []: + path_value = str(path).strip() + if path_value: + changed_files.add(path_value) + + summary = messages[-1] if messages else "Webhook commit without message" + return { + "project_id": project_id, + "branch": branch, + "commit_sha": commit_sha, + "changed_files": sorted(changed_files), + "messages": messages, + "summary": summary, + "actor": actor, + } + + def _normalize_bitbucket(self, payload: dict) -> dict: + repo = payload.get("repository") or {} + project_id = str(repo.get("full_name") or repo.get("name") or "unknown_repo") + + changes = (((payload.get("push") or {}).get("changes")) or []) + messages: list[str] = [] + changed_files: set[str] = set() + commit_sha: str | None = None + branch: str | None = None + actor = None + + actor_raw = payload.get("actor") or {} + if isinstance(actor_raw, dict): + actor = str(actor_raw.get("display_name") or actor_raw.get("username") or "") or None + + for change in changes: + if not isinstance(change, dict): + continue + new_ref = change.get("new") or {} + if isinstance(new_ref, dict): + branch_name = str(new_ref.get("name") or "").strip() + if branch_name: + branch = branch_name + target = new_ref.get("target") or {} + if isinstance(target, dict): + h = str(target.get("hash") or "").strip() + if h: + commit_sha = h + msg = str(target.get("message") or "").strip() + if msg: + messages.append(msg) + + for commit in change.get("commits") or []: + if not isinstance(commit, dict): + continue + h = str(commit.get("hash") or "").strip() + if h: + commit_sha = h + msg = str(commit.get("message") or "").strip() + if msg: + messages.append(msg) + for key in ("added", "modified", "removed"): + for item in commit.get(key) or []: + if isinstance(item, dict): + path_value = str(item.get("path") or "").strip() + else: + path_value = str(item).strip() + if path_value: + changed_files.add(path_value) + + summary = messages[-1] if messages else "Webhook commit without message" + return { + "project_id": project_id, + "branch": branch, + "commit_sha": commit_sha, + "changed_files": sorted(changed_files), + "messages": messages, + "summary": summary, + "actor": actor, + } + + def _extract_story_id(self, messages: list[str]) -> str | None: + for msg in messages: + match = _STORY_ID_RE.search(msg) + if match: + return match.group(0) + return None diff --git a/app/modules/rag_session/README.md b/app/modules/rag_session/README.md new file mode 100644 index 0000000..09caf3b --- /dev/null +++ b/app/modules/rag_session/README.md @@ -0,0 +1,218 @@ +# Модуль rag_session + +## 1. Функции модуля +- Создание и обслуживание сессионного RAG индекса по загруженным пользователем файлам. +- Индексация снапшота и инкрементальных изменений. +- Хранение чанков, retrieval контекста, трекинг статуса index jobs. +- Публикация прогресса индексации через SSE. + +## 2. Диаграмма классов и взаимосвязей +```mermaid +classDiagram + class RagModule + class RagService + class RagRepository + class RagSessionStore + class IndexJobStore + class IndexingOrchestrator + class TextChunker + class GigaChatEmbedder + class EventBus + + RagModule --> RagService + RagModule --> RagRepository + RagModule --> RagSessionStore + RagModule --> IndexJobStore + RagModule --> IndexingOrchestrator + RagService --> RagRepository + RagService --> TextChunker + RagService --> GigaChatEmbedder + IndexingOrchestrator --> IndexJobStore + IndexingOrchestrator --> RagService + IndexingOrchestrator --> EventBus +``` + +## 3. Описание классов +- `RagModule`: composition-root для сессионного RAG и его API. + Методы: `__init__` — собирает сервисы индексации/retrieval; `public_router` — публикует внешние endpoint'ы; `internal_router` — публикует внутренние endpoint'ы. +- `RagService`: доменный сервис индексации и retrieval. + Методы: `index_snapshot` — индексирует полный набор файлов; `index_changes` — индексирует только изменения; `retrieve` — возвращает релевантные чанки по запросу. +- `RagRepository`: слой доступа к БД для сессий, джобов и чанков. + Методы: `ensure_tables` — создает/обновляет схему; `upsert_session/get_session/session_exists` — операции по сессиям; `create_job/update_job/get_job` — операции по задачам индексации; `replace_chunks/apply_changes/retrieve/fallback_chunks` — операции по chunk-данным. +- `RagSessionStore`: управление жизненным циклом `rag_session`. + Методы: `create` — создает новую сессию; `put` — upsert с внешним id; `get` — читает сессию. +- `IndexJobStore`: управление `index_job` на уровне приложения. + Методы: `create` — создает задачу индексации; `get` — читает задачу; `save` — обновляет статус/ошибку. +- `IndexingOrchestrator`: асинхронный оркестратор index-jobs. + Методы: `enqueue_snapshot` — ставит полную индексацию в очередь; `enqueue_changes` — ставит инкрементальную индексацию в очередь. +- `TextChunker`: разбивает текст файла на чанки для embedding. + Методы: `chunk` — возвращает список чанков заданного текста. +- `GigaChatEmbedder`: адаптер embeddings-модели. + Методы: `embed` — возвращает векторы для набора текстов. +- `EventBus`: доставка событий прогресса индексации. + Методы: `publish` — отправляет событие; `subscribe/unsubscribe` — управляет подписками SSE. + +## 4. Сиквенс-диаграммы API + +### POST /api/rag/sessions +Назначение: создает новую `rag_session` и запускает фоновую индексацию полного набора файлов. +```mermaid +sequenceDiagram + participant Router as RagModule.APIRouter + participant Sessions as RagSessionStore + participant Indexing as IndexingOrchestrator + + Router->>Sessions: create(project_id) + Sessions-->>Router: rag_session_id + Router->>Indexing: enqueue_snapshot(rag_session_id, files) + Indexing-->>Router: index_job_id,status +``` + +### POST /api/rag/sessions/{rag_session_id}/changes +Назначение: ставит в очередь инкрементальную переиндексацию изменений для существующей `rag_session`. +```mermaid +sequenceDiagram + participant Router as RagModule.APIRouter + participant Sessions as RagSessionStore + participant Indexing as IndexingOrchestrator + + Router->>Sessions: get(rag_session_id) + Sessions-->>Router: session + Router->>Indexing: enqueue_changes(rag_session_id, changed_files) + Indexing-->>Router: index_job_id,status +``` + +### GET /api/rag/sessions/{rag_session_id}/jobs/{index_job_id} +Назначение: возвращает состояние и статистику конкретной задачи индексации. +```mermaid +sequenceDiagram + participant Router as RagModule.APIRouter + participant Jobs as IndexJobStore + + Router->>Jobs: get(index_job_id) + Jobs-->>Router: job_state +``` + +### GET /api/rag/sessions/{rag_session_id}/jobs/{index_job_id}/events +Назначение: дает SSE-поток событий прогресса по задаче индексации. +```mermaid +sequenceDiagram + participant Router as RagModule.APIRouter + participant Jobs as IndexJobStore + participant Events as EventBus + + Router->>Jobs: get(index_job_id) + Router->>Events: subscribe(index_job_id, replay=True) + loop until terminal + Events-->>Router: index event + end + Router->>Events: unsubscribe(index_job_id) +``` + +### POST /api/index/snapshot (legacy) +Назначение: legacy-вход для полной индексации проекта с автоматическим созданием сессии по `project_id`. +```mermaid +sequenceDiagram + participant Router as LegacyAPIRouter + participant Sessions as RagSessionStore + participant Indexing as IndexingOrchestrator + + Router->>Sessions: put(project_id, project_id) + Router->>Indexing: enqueue_snapshot(project_id, files) + Indexing-->>Router: index_job_id,status +``` + +### POST /api/index/changes (legacy) +Назначение: legacy-вход для инкрементальной индексации изменений по `project_id`. +```mermaid +sequenceDiagram + participant Router as LegacyAPIRouter + participant Sessions as RagSessionStore + participant Indexing as IndexingOrchestrator + + Router->>Sessions: get(project_id) + alt missing + Router->>Sessions: put(project_id, project_id) + end + Router->>Indexing: enqueue_changes(project_id, changed_files) + Indexing-->>Router: index_job_id,status +``` + +### GET /api/index/jobs/{index_job_id} (legacy) +Назначение: legacy-чтение статуса index-job по `index_job_id`. +```mermaid +sequenceDiagram + participant Router as LegacyAPIRouter + participant Jobs as IndexJobStore + + Router->>Jobs: get(index_job_id) + Jobs-->>Router: job_state +``` + +### GET /api/index/jobs/{index_job_id}/events (legacy) +Назначение: legacy-SSE поток событий по index-job. +```mermaid +sequenceDiagram + participant Router as LegacyAPIRouter + participant Jobs as IndexJobStore + participant Events as EventBus + + Router->>Jobs: get(index_job_id) + Router->>Events: subscribe(index_job_id, replay=True) + loop until terminal + Events-->>Router: index event + end + Router->>Events: unsubscribe(index_job_id) +``` + +### POST /internal/rag/index/snapshot +Назначение: внутренний синхронный запуск полной индексации для сервисных сценариев. +```mermaid +sequenceDiagram + participant Router as InternalRagRouter + participant Sessions as RagSessionStore + participant RagService as RagService + + Router->>Sessions: get(project_id) + alt missing + Router->>Sessions: put(project_id, project_id) + end + Router->>RagService: index_snapshot(project_id, files) + RagService-->>Router: indexed_files,failed_files +``` + +### POST /internal/rag/index/changes +Назначение: внутренний синхронный запуск индексации изменений. +```mermaid +sequenceDiagram + participant Router as InternalRagRouter + participant RagService as RagService + + Router->>RagService: index_changes(project_id, changed_files) + RagService-->>Router: indexed_files,failed_files +``` + +### GET /internal/rag/index/jobs/{index_job_id} +Назначение: внутреннее получение статуса и ошибки index-job для сервисов оркестрации. +```mermaid +sequenceDiagram + participant Router as InternalRagRouter + participant Jobs as IndexJobStore + + Router->>Jobs: get(index_job_id) + Jobs-->>Router: job_state +``` + +### POST /internal/rag/retrieve +Назначение: внутренний retrieval релевантных чанков из `rag_session` по текстовому запросу. +```mermaid +sequenceDiagram + participant Router as InternalRagRouter + participant RagService as RagService + participant RagRepo as RagRepository + + Router->>RagService: retrieve(rag_session_id, query) + RagService->>RagRepo: retrieve/fallback_chunks + RagRepo-->>RagService: chunks + RagService-->>Router: items +``` diff --git a/app/modules/rag/__init__.py b/app/modules/rag_session/__init__.py similarity index 100% rename from app/modules/rag/__init__.py rename to app/modules/rag_session/__init__.py diff --git a/app/modules/rag/__pycache__/__init__.cpython-312.pyc b/app/modules/rag_session/__pycache__/__init__.cpython-312.pyc similarity index 100% rename from app/modules/rag/__pycache__/__init__.cpython-312.pyc rename to app/modules/rag_session/__pycache__/__init__.cpython-312.pyc diff --git a/app/modules/rag/__pycache__/indexing_service.cpython-312.pyc b/app/modules/rag_session/__pycache__/indexing_service.cpython-312.pyc similarity index 54% rename from app/modules/rag/__pycache__/indexing_service.cpython-312.pyc rename to app/modules/rag_session/__pycache__/indexing_service.cpython-312.pyc index f4f1f64..2272b9f 100644 Binary files a/app/modules/rag/__pycache__/indexing_service.cpython-312.pyc and b/app/modules/rag_session/__pycache__/indexing_service.cpython-312.pyc differ diff --git a/app/modules/rag_session/__pycache__/job_store.cpython-312.pyc b/app/modules/rag_session/__pycache__/job_store.cpython-312.pyc new file mode 100644 index 0000000..571b827 Binary files /dev/null and b/app/modules/rag_session/__pycache__/job_store.cpython-312.pyc differ diff --git a/app/modules/rag/__pycache__/module.cpython-312.pyc b/app/modules/rag_session/__pycache__/module.cpython-312.pyc similarity index 58% rename from app/modules/rag/__pycache__/module.cpython-312.pyc rename to app/modules/rag_session/__pycache__/module.cpython-312.pyc index aeff80b..79725b1 100644 Binary files a/app/modules/rag/__pycache__/module.cpython-312.pyc and b/app/modules/rag_session/__pycache__/module.cpython-312.pyc differ diff --git a/app/modules/rag_session/__pycache__/repository.cpython-312.pyc b/app/modules/rag_session/__pycache__/repository.cpython-312.pyc new file mode 100644 index 0000000..8e0b172 Binary files /dev/null and b/app/modules/rag_session/__pycache__/repository.cpython-312.pyc differ diff --git a/app/modules/rag_session/__pycache__/service.cpython-312.pyc b/app/modules/rag_session/__pycache__/service.cpython-312.pyc new file mode 100644 index 0000000..e20651a Binary files /dev/null and b/app/modules/rag_session/__pycache__/service.cpython-312.pyc differ diff --git a/app/modules/rag/__pycache__/session_store.cpython-312.pyc b/app/modules/rag_session/__pycache__/session_store.cpython-312.pyc similarity index 76% rename from app/modules/rag/__pycache__/session_store.cpython-312.pyc rename to app/modules/rag_session/__pycache__/session_store.cpython-312.pyc index fd56ff5..101d300 100644 Binary files a/app/modules/rag/__pycache__/session_store.cpython-312.pyc and b/app/modules/rag_session/__pycache__/session_store.cpython-312.pyc differ diff --git a/app/modules/rag/embedding/__init__.py b/app/modules/rag_session/embedding/__init__.py similarity index 100% rename from app/modules/rag/embedding/__init__.py rename to app/modules/rag_session/embedding/__init__.py diff --git a/app/modules/rag/embedding/__pycache__/__init__.cpython-312.pyc b/app/modules/rag_session/embedding/__pycache__/__init__.cpython-312.pyc similarity index 100% rename from app/modules/rag/embedding/__pycache__/__init__.cpython-312.pyc rename to app/modules/rag_session/embedding/__pycache__/__init__.cpython-312.pyc diff --git a/app/modules/rag/embedding/__pycache__/gigachat_embedder.cpython-312.pyc b/app/modules/rag_session/embedding/__pycache__/gigachat_embedder.cpython-312.pyc similarity index 100% rename from app/modules/rag/embedding/__pycache__/gigachat_embedder.cpython-312.pyc rename to app/modules/rag_session/embedding/__pycache__/gigachat_embedder.cpython-312.pyc diff --git a/app/modules/rag/embedding/gigachat_embedder.py b/app/modules/rag_session/embedding/gigachat_embedder.py similarity index 100% rename from app/modules/rag/embedding/gigachat_embedder.py rename to app/modules/rag_session/embedding/gigachat_embedder.py diff --git a/app/modules/rag/indexing_service.py b/app/modules/rag_session/indexing_service.py similarity index 91% rename from app/modules/rag/indexing_service.py rename to app/modules/rag_session/indexing_service.py index 7b956e3..8e3c0fe 100644 --- a/app/modules/rag/indexing_service.py +++ b/app/modules/rag_session/indexing_service.py @@ -4,7 +4,7 @@ from collections import defaultdict from app.schemas.common import ErrorPayload, ModuleName from app.schemas.indexing import IndexJobStatus from app.modules.contracts import RagIndexer -from app.modules.rag.job_store import IndexJob, IndexJobStore +from app.modules.rag_session.job_store import IndexJob, IndexJobStore from app.modules.shared.event_bus import EventBus from app.modules.shared.retry_executor import RetryExecutor @@ -85,10 +85,12 @@ class IndexingOrchestrator: }, ) - indexed, failed = await self._retry.run(lambda: operation(progress_cb)) + indexed, failed, cache_hits, cache_misses = await self._retry.run(lambda: operation(progress_cb)) job.status = IndexJobStatus.DONE job.indexed_files = indexed job.failed_files = failed + job.cache_hit_files = cache_hits + job.cache_miss_files = cache_misses self._store.save(job) await self._events.publish( job_id, @@ -98,6 +100,8 @@ class IndexingOrchestrator: "status": job.status.value, "indexed_files": indexed, "failed_files": failed, + "cache_hit_files": cache_hits, + "cache_miss_files": cache_misses, "total_files": total_files, }, ) @@ -109,6 +113,8 @@ class IndexingOrchestrator: "status": "done", "indexed_files": indexed, "failed_files": failed, + "cache_hit_files": cache_hits, + "cache_miss_files": cache_misses, "total_files": total_files, }, ) diff --git a/app/modules/rag/job_store.py b/app/modules/rag_session/job_store.py similarity index 86% rename from app/modules/rag/job_store.py rename to app/modules/rag_session/job_store.py index 089e9a8..a741dbe 100644 --- a/app/modules/rag/job_store.py +++ b/app/modules/rag_session/job_store.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from uuid import uuid4 -from app.modules.rag.repository import RagRepository +from app.modules.rag_session.repository import RagRepository from app.schemas.common import ErrorPayload, ModuleName from app.schemas.indexing import IndexJobStatus @@ -13,6 +13,8 @@ class IndexJob: status: IndexJobStatus = IndexJobStatus.QUEUED indexed_files: int = 0 failed_files: int = 0 + cache_hit_files: int = 0 + cache_miss_files: int = 0 error: ErrorPayload | None = None @@ -48,6 +50,8 @@ class IndexJobStore: status=IndexJobStatus(row.status), indexed_files=row.indexed_files, failed_files=row.failed_files, + cache_hit_files=row.cache_hit_files, + cache_miss_files=row.cache_miss_files, error=payload, ) @@ -60,6 +64,8 @@ class IndexJobStore: status=job.status.value, indexed_files=job.indexed_files, failed_files=job.failed_files, + cache_hit_files=job.cache_hit_files, + cache_miss_files=job.cache_miss_files, error_code=error_code, error_desc=error_desc, error_module=error_module, diff --git a/app/modules/rag/module.py b/app/modules/rag_session/module.py similarity index 88% rename from app/modules/rag/module.py rename to app/modules/rag_session/module.py index 84dcb72..b44f953 100644 --- a/app/modules/rag/module.py +++ b/app/modules/rag_session/module.py @@ -2,13 +2,13 @@ from fastapi import APIRouter from fastapi.responses import StreamingResponse from app.core.exceptions import AppError -from app.modules.rag.embedding.gigachat_embedder import GigaChatEmbedder -from app.modules.rag.indexing_service import IndexingOrchestrator -from app.modules.rag.job_store import IndexJobStore -from app.modules.rag.repository import RagRepository -from app.modules.rag.retrieval.chunker import TextChunker -from app.modules.rag.session_store import RagSessionStore -from app.modules.rag.service import RagService +from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder +from app.modules.rag_session.indexing_service import IndexingOrchestrator +from app.modules.rag_session.job_store import IndexJobStore +from app.modules.rag_session.repository import RagRepository +from app.modules.rag_session.retrieval.chunker import TextChunker +from app.modules.rag_session.session_store import RagSessionStore +from app.modules.rag_session.service import RagService from app.modules.shared.event_bus import EventBus from app.modules.shared.gigachat.client import GigaChatClient from app.modules.shared.gigachat.settings import GigaChatSettings @@ -88,6 +88,8 @@ class RagModule: status=job.status, indexed_files=job.indexed_files, failed_files=job.failed_files, + cache_hit_files=job.cache_hit_files, + cache_miss_files=job.cache_miss_files, error=job.error.model_dump(mode="json") if job.error else None, ) @@ -160,6 +162,8 @@ class RagModule: status=job.status, indexed_files=job.indexed_files, failed_files=job.failed_files, + cache_hit_files=job.cache_hit_files, + cache_miss_files=job.cache_miss_files, error=job.error, ) @@ -207,20 +211,30 @@ class RagModule: rag_session_id = request.project_id if not self.sessions.get(rag_session_id): self.sessions.put(rag_session_id=rag_session_id, project_id=rag_session_id) - indexed, failed = await self.rag.index_snapshot( + indexed, failed, cache_hits, cache_misses = await self.rag.index_snapshot( rag_session_id=rag_session_id, files=[x.model_dump() for x in request.files], ) - return {"indexed_files": indexed, "failed_files": failed} + return { + "indexed_files": indexed, + "failed_files": failed, + "cache_hit_files": cache_hits, + "cache_miss_files": cache_misses, + } @router.post("/index/changes") async def index_changes(request: IndexChangesRequest) -> dict: rag_session_id = request.project_id - indexed, failed = await self.rag.index_changes( + indexed, failed, cache_hits, cache_misses = await self.rag.index_changes( rag_session_id=rag_session_id, changed_files=[x.model_dump() for x in request.changed_files], ) - return {"indexed_files": indexed, "failed_files": failed} + return { + "indexed_files": indexed, + "failed_files": failed, + "cache_hit_files": cache_hits, + "cache_miss_files": cache_misses, + } @router.get("/index/jobs/{index_job_id}") async def get_job(index_job_id: str) -> dict: @@ -232,6 +246,8 @@ class RagModule: "status": job.status.value, "indexed_files": job.indexed_files, "failed_files": job.failed_files, + "cache_hit_files": job.cache_hit_files, + "cache_miss_files": job.cache_miss_files, "error": job.error.model_dump(mode="json") if job.error else None, } diff --git a/app/modules/rag_session/repository.py b/app/modules/rag_session/repository.py new file mode 100644 index 0000000..bdeca25 --- /dev/null +++ b/app/modules/rag_session/repository.py @@ -0,0 +1,660 @@ +from __future__ import annotations + +from dataclasses import dataclass +import hashlib +from sqlalchemy import text + +from app.modules.shared.db import get_engine + + +@dataclass +class RagJobRow: + index_job_id: str + rag_session_id: str + status: str + indexed_files: int + failed_files: int + cache_hit_files: int + cache_miss_files: int + error_code: str | None + error_desc: str | None + error_module: str | None + + +class RagRepository: + def ensure_tables(self) -> None: + engine = get_engine() + with engine.connect() as conn: + conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS rag_sessions ( + rag_session_id VARCHAR(64) PRIMARY KEY, + project_id VARCHAR(512) NOT NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS rag_index_jobs ( + index_job_id VARCHAR(64) PRIMARY KEY, + rag_session_id VARCHAR(64) NOT NULL, + status VARCHAR(16) NOT NULL, + indexed_files INTEGER NOT NULL DEFAULT 0, + failed_files INTEGER NOT NULL DEFAULT 0, + cache_hit_files INTEGER NOT NULL DEFAULT 0, + cache_miss_files INTEGER NOT NULL DEFAULT 0, + error_code VARCHAR(128) NULL, + error_desc TEXT NULL, + error_module VARCHAR(64) NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS rag_chunks ( + id BIGSERIAL PRIMARY KEY, + rag_session_id VARCHAR(64) NOT NULL, + path TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + content TEXT NOT NULL, + embedding vector NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + ) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL")) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section TEXT NULL")) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_id TEXT NULL")) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_version TEXT NULL")) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS owner TEXT NULL")) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS system_component TEXT NULL")) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL")) + conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL")) + conn.execute( + text( + """ + ALTER TABLE rag_chunks + ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + """ + ) + ) + conn.execute( + text( + """ + ALTER TABLE rag_chunks + ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS rag_blob_cache ( + id BIGSERIAL PRIMARY KEY, + repo_id VARCHAR(512) NOT NULL, + blob_sha VARCHAR(128) NOT NULL, + path TEXT NOT NULL, + artifact_type VARCHAR(16) NULL, + section TEXT NULL, + doc_id TEXT NULL, + doc_version TEXT NULL, + owner TEXT NULL, + system_component TEXT NULL, + last_modified TIMESTAMPTZ NULL, + staleness_score DOUBLE PRECISION NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT uq_rag_blob_cache UNIQUE (repo_id, blob_sha, path) + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS rag_chunk_cache ( + id BIGSERIAL PRIMARY KEY, + repo_id VARCHAR(512) NOT NULL, + blob_sha VARCHAR(128) NOT NULL, + chunk_index INTEGER NOT NULL, + content TEXT NOT NULL, + embedding vector NULL, + section TEXT NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT uq_rag_chunk_cache UNIQUE (repo_id, blob_sha, chunk_index) + ) + """ + ) + ) + conn.execute( + text( + """ + CREATE TABLE IF NOT EXISTS rag_session_chunk_map ( + id BIGSERIAL PRIMARY KEY, + rag_session_id VARCHAR(64) NOT NULL, + repo_id VARCHAR(512) NOT NULL, + blob_sha VARCHAR(128) NOT NULL, + chunk_index INTEGER NOT NULL, + path TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + ) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_artifact_type ON rag_chunks (artifact_type)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_doc ON rag_chunks (doc_id, doc_version)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_component ON rag_chunks (system_component)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_path ON rag_chunks (path)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_blob_cache_repo_blob ON rag_blob_cache (repo_id, blob_sha)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunk_cache_repo_blob ON rag_chunk_cache (repo_id, blob_sha, chunk_index)")) + conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_session_chunk_map_session ON rag_session_chunk_map (rag_session_id, created_at DESC)")) + conn.execute(text("ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_hit_files INTEGER NOT NULL DEFAULT 0")) + conn.execute(text("ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_miss_files INTEGER NOT NULL DEFAULT 0")) + conn.commit() + + def upsert_session(self, rag_session_id: str, project_id: str) -> None: + with get_engine().connect() as conn: + conn.execute( + text( + """ + INSERT INTO rag_sessions (rag_session_id, project_id) + VALUES (:sid, :pid) + ON CONFLICT (rag_session_id) DO UPDATE SET project_id = EXCLUDED.project_id + """ + ), + {"sid": rag_session_id, "pid": project_id}, + ) + conn.commit() + + def session_exists(self, rag_session_id: str) -> bool: + with get_engine().connect() as conn: + row = conn.execute( + text("SELECT 1 FROM rag_sessions WHERE rag_session_id = :sid"), + {"sid": rag_session_id}, + ).fetchone() + return bool(row) + + def get_session(self, rag_session_id: str) -> dict | None: + with get_engine().connect() as conn: + row = conn.execute( + text("SELECT rag_session_id, project_id FROM rag_sessions WHERE rag_session_id = :sid"), + {"sid": rag_session_id}, + ).mappings().fetchone() + return dict(row) if row else None + + def create_job(self, index_job_id: str, rag_session_id: str, status: str) -> None: + with get_engine().connect() as conn: + conn.execute( + text( + """ + INSERT INTO rag_index_jobs (index_job_id, rag_session_id, status) + VALUES (:jid, :sid, :status) + """ + ), + {"jid": index_job_id, "sid": rag_session_id, "status": status}, + ) + conn.commit() + + def update_job( + self, + index_job_id: str, + *, + status: str, + indexed_files: int, + failed_files: int, + cache_hit_files: int = 0, + cache_miss_files: int = 0, + error_code: str | None = None, + error_desc: str | None = None, + error_module: str | None = None, + ) -> None: + with get_engine().connect() as conn: + conn.execute( + text( + """ + UPDATE rag_index_jobs + SET status = :status, + indexed_files = :indexed, + failed_files = :failed, + cache_hit_files = :cache_hit_files, + cache_miss_files = :cache_miss_files, + error_code = :ecode, + error_desc = :edesc, + error_module = :emodule, + updated_at = CURRENT_TIMESTAMP + WHERE index_job_id = :jid + """ + ), + { + "jid": index_job_id, + "status": status, + "indexed": indexed_files, + "failed": failed_files, + "cache_hit_files": cache_hit_files, + "cache_miss_files": cache_miss_files, + "ecode": error_code, + "edesc": error_desc, + "emodule": error_module, + }, + ) + conn.commit() + + def get_job(self, index_job_id: str) -> RagJobRow | None: + with get_engine().connect() as conn: + row = conn.execute( + text( + """ + SELECT index_job_id, rag_session_id, status, indexed_files, failed_files, + cache_hit_files, cache_miss_files, error_code, error_desc, error_module + FROM rag_index_jobs + WHERE index_job_id = :jid + """ + ), + {"jid": index_job_id}, + ).mappings().fetchone() + if not row: + return None + return RagJobRow(**dict(row)) + + def replace_chunks(self, rag_session_id: str, items: list[dict]) -> None: + with get_engine().connect() as conn: + conn.execute(text("DELETE FROM rag_chunks WHERE rag_session_id = :sid"), {"sid": rag_session_id}) + conn.execute(text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid"), {"sid": rag_session_id}) + self._insert_chunks(conn, rag_session_id, items) + conn.commit() + + def apply_changes(self, rag_session_id: str, delete_paths: list[str], upserts: list[dict]) -> None: + with get_engine().connect() as conn: + if delete_paths: + conn.execute( + text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"), + {"sid": rag_session_id, "paths": delete_paths}, + ) + conn.execute( + text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid AND path = ANY(:paths)"), + {"sid": rag_session_id, "paths": delete_paths}, + ) + if upserts: + paths = sorted({str(x["path"]) for x in upserts}) + conn.execute( + text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"), + {"sid": rag_session_id, "paths": paths}, + ) + conn.execute( + text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid AND path = ANY(:paths)"), + {"sid": rag_session_id, "paths": paths}, + ) + self._insert_chunks(conn, rag_session_id, upserts) + conn.commit() + + def get_cached_chunks(self, repo_id: str, blob_sha: str) -> list[dict]: + with get_engine().connect() as conn: + rows = conn.execute( + text( + """ + SELECT chunk_index, content, embedding::text AS embedding_txt, section + FROM rag_chunk_cache + WHERE repo_id = :repo_id AND blob_sha = :blob_sha + ORDER BY chunk_index ASC + """ + ), + {"repo_id": repo_id, "blob_sha": blob_sha}, + ).mappings().fetchall() + output: list[dict] = [] + for row in rows: + output.append( + { + "chunk_index": int(row["chunk_index"]), + "content": str(row["content"] or ""), + "embedding": self._parse_vector(str(row["embedding_txt"] or "")), + "section": row.get("section"), + } + ) + return output + + def record_repo_cache( + self, + *, + project_id: str, + commit_sha: str | None, + changed_files: list[str], + summary: str, + ) -> None: + repo_session_id = f"repo:{project_id}" + with get_engine().connect() as conn: + for path in changed_files: + key = f"{commit_sha or 'no-commit'}:{path}" + blob_sha = hashlib.sha256(key.encode("utf-8")).hexdigest() + conn.execute( + text( + """ + INSERT INTO rag_blob_cache ( + repo_id, + blob_sha, + path, + artifact_type, + section + ) + VALUES ( + :repo_id, + :blob_sha, + :path, + :artifact_type, + :section + ) + ON CONFLICT (repo_id, blob_sha, path) DO UPDATE SET + updated_at = CURRENT_TIMESTAMP + """ + ), + { + "repo_id": project_id, + "blob_sha": blob_sha, + "path": path, + "artifact_type": "CODE", + "section": "repo_webhook", + }, + ) + conn.execute( + text( + """ + INSERT INTO rag_chunk_cache ( + repo_id, + blob_sha, + chunk_index, + content, + embedding, + section + ) + VALUES ( + :repo_id, + :blob_sha, + 0, + :content, + NULL, + :section + ) + ON CONFLICT (repo_id, blob_sha, chunk_index) DO UPDATE SET + content = EXCLUDED.content, + section = EXCLUDED.section, + updated_at = CURRENT_TIMESTAMP + """ + ), + { + "repo_id": project_id, + "blob_sha": blob_sha, + "content": f"repo_webhook:{path}:{summary[:300]}", + "section": "repo_webhook", + }, + ) + conn.execute( + text( + """ + INSERT INTO rag_session_chunk_map ( + rag_session_id, + repo_id, + blob_sha, + chunk_index, + path + ) + VALUES ( + :rag_session_id, + :repo_id, + :blob_sha, + 0, + :path + ) + """ + ), + { + "rag_session_id": repo_session_id, + "repo_id": project_id, + "blob_sha": blob_sha, + "path": path, + }, + ) + conn.commit() + + def cache_file_chunks(self, repo_id: str, path: str, blob_sha: str, items: list[dict]) -> None: + if not items: + return + meta = items[0] + with get_engine().connect() as conn: + conn.execute( + text( + """ + INSERT INTO rag_blob_cache ( + repo_id, + blob_sha, + path, + artifact_type, + section, + doc_id, + doc_version, + owner, + system_component, + last_modified, + staleness_score + ) + VALUES ( + :repo_id, + :blob_sha, + :path, + :artifact_type, + :section, + :doc_id, + :doc_version, + :owner, + :system_component, + :last_modified, + :staleness_score + ) + ON CONFLICT (repo_id, blob_sha, path) DO UPDATE SET + artifact_type = EXCLUDED.artifact_type, + section = EXCLUDED.section, + doc_id = EXCLUDED.doc_id, + doc_version = EXCLUDED.doc_version, + owner = EXCLUDED.owner, + system_component = EXCLUDED.system_component, + last_modified = EXCLUDED.last_modified, + staleness_score = EXCLUDED.staleness_score, + updated_at = CURRENT_TIMESTAMP + """ + ), + { + "repo_id": repo_id, + "blob_sha": blob_sha, + "path": path, + "artifact_type": meta.get("artifact_type"), + "section": meta.get("section"), + "doc_id": meta.get("doc_id"), + "doc_version": meta.get("doc_version"), + "owner": meta.get("owner"), + "system_component": meta.get("system_component"), + "last_modified": meta.get("last_modified"), + "staleness_score": meta.get("staleness_score"), + }, + ) + for item in items: + emb = item.get("embedding") or [] + emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None + conn.execute( + text( + """ + INSERT INTO rag_chunk_cache ( + repo_id, + blob_sha, + chunk_index, + content, + embedding, + section + ) + VALUES ( + :repo_id, + :blob_sha, + :chunk_index, + :content, + CAST(:embedding AS vector), + :section + ) + ON CONFLICT (repo_id, blob_sha, chunk_index) DO UPDATE SET + content = EXCLUDED.content, + embedding = EXCLUDED.embedding, + section = EXCLUDED.section, + updated_at = CURRENT_TIMESTAMP + """ + ), + { + "repo_id": repo_id, + "blob_sha": blob_sha, + "chunk_index": int(item["chunk_index"]), + "content": item["content"], + "embedding": emb_str, + "section": item.get("section"), + }, + ) + conn.commit() + + def retrieve(self, rag_session_id: str, query_embedding: list[float], limit: int = 5) -> list[dict]: + emb = "[" + ",".join(str(x) for x in query_embedding) + "]" + with get_engine().connect() as conn: + rows = conn.execute( + text( + """ + SELECT path, content + FROM rag_chunks + WHERE rag_session_id = :sid + ORDER BY embedding <=> CAST(:emb AS vector) + LIMIT :lim + """ + ), + {"sid": rag_session_id, "emb": emb, "lim": limit}, + ).mappings().fetchall() + return [dict(x) for x in rows] + + def fallback_chunks(self, rag_session_id: str, limit: int = 5) -> list[dict]: + with get_engine().connect() as conn: + rows = conn.execute( + text( + """ + SELECT path, content + FROM rag_chunks + WHERE rag_session_id = :sid + ORDER BY id DESC + LIMIT :lim + """ + ), + {"sid": rag_session_id, "lim": limit}, + ).mappings().fetchall() + return [dict(x) for x in rows] + + def _insert_chunks(self, conn, rag_session_id: str, items: list[dict]) -> None: + for item in items: + emb = item.get("embedding") or [] + emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None + conn.execute( + text( + """ + INSERT INTO rag_chunks ( + rag_session_id, + path, + chunk_index, + content, + embedding, + artifact_type, + section, + doc_id, + doc_version, + owner, + system_component, + last_modified, + staleness_score, + created_at, + updated_at + ) + VALUES ( + :sid, + :path, + :idx, + :content, + CAST(:emb AS vector), + :artifact_type, + :section, + :doc_id, + :doc_version, + :owner, + :system_component, + :last_modified, + :staleness_score, + CURRENT_TIMESTAMP, + CURRENT_TIMESTAMP + ) + """ + ), + { + "sid": rag_session_id, + "path": item["path"], + "idx": int(item["chunk_index"]), + "content": item["content"], + "emb": emb_str, + "artifact_type": item.get("artifact_type"), + "section": item.get("section"), + "doc_id": item.get("doc_id"), + "doc_version": item.get("doc_version"), + "owner": item.get("owner"), + "system_component": item.get("system_component"), + "last_modified": item.get("last_modified"), + "staleness_score": item.get("staleness_score"), + }, + ) + repo_id = str(item.get("repo_id") or "").strip() + blob_sha = str(item.get("blob_sha") or "").strip() + if repo_id and blob_sha: + conn.execute( + text( + """ + INSERT INTO rag_session_chunk_map ( + rag_session_id, + repo_id, + blob_sha, + chunk_index, + path + ) VALUES ( + :sid, + :repo_id, + :blob_sha, + :chunk_index, + :path + ) + """ + ), + { + "sid": rag_session_id, + "repo_id": repo_id, + "blob_sha": blob_sha, + "chunk_index": int(item["chunk_index"]), + "path": item["path"], + }, + ) + + def _parse_vector(self, value: str) -> list[float]: + text_value = value.strip() + if not text_value: + return [] + if text_value.startswith("[") and text_value.endswith("]"): + text_value = text_value[1:-1] + if not text_value: + return [] + return [float(part.strip()) for part in text_value.split(",") if part.strip()] diff --git a/app/modules/rag/retrieval/__init__.py b/app/modules/rag_session/retrieval/__init__.py similarity index 100% rename from app/modules/rag/retrieval/__init__.py rename to app/modules/rag_session/retrieval/__init__.py diff --git a/app/modules/rag/retrieval/__pycache__/__init__.cpython-312.pyc b/app/modules/rag_session/retrieval/__pycache__/__init__.cpython-312.pyc similarity index 100% rename from app/modules/rag/retrieval/__pycache__/__init__.cpython-312.pyc rename to app/modules/rag_session/retrieval/__pycache__/__init__.cpython-312.pyc diff --git a/app/modules/rag/retrieval/__pycache__/chunker.cpython-312.pyc b/app/modules/rag_session/retrieval/__pycache__/chunker.cpython-312.pyc similarity index 100% rename from app/modules/rag/retrieval/__pycache__/chunker.cpython-312.pyc rename to app/modules/rag_session/retrieval/__pycache__/chunker.cpython-312.pyc diff --git a/app/modules/rag/retrieval/__pycache__/scoring.cpython-312.pyc b/app/modules/rag_session/retrieval/__pycache__/scoring.cpython-312.pyc similarity index 100% rename from app/modules/rag/retrieval/__pycache__/scoring.cpython-312.pyc rename to app/modules/rag_session/retrieval/__pycache__/scoring.cpython-312.pyc diff --git a/app/modules/rag/retrieval/chunker.py b/app/modules/rag_session/retrieval/chunker.py similarity index 100% rename from app/modules/rag/retrieval/chunker.py rename to app/modules/rag_session/retrieval/chunker.py diff --git a/app/modules/rag/retrieval/scoring.py b/app/modules/rag_session/retrieval/scoring.py similarity index 100% rename from app/modules/rag/retrieval/scoring.py rename to app/modules/rag_session/retrieval/scoring.py diff --git a/app/modules/rag/service.py b/app/modules/rag_session/service.py similarity index 52% rename from app/modules/rag/service.py rename to app/modules/rag_session/service.py index 51753c3..89bcf84 100644 --- a/app/modules/rag/service.py +++ b/app/modules/rag_session/service.py @@ -1,11 +1,12 @@ import asyncio +import hashlib import os from collections.abc import Awaitable, Callable from inspect import isawaitable -from app.modules.rag.embedding.gigachat_embedder import GigaChatEmbedder -from app.modules.rag.repository import RagRepository -from app.modules.rag.retrieval.chunker import TextChunker +from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder +from app.modules.rag_session.repository import RagRepository +from app.modules.rag_session.retrieval.chunker import TextChunker class RagService: @@ -24,35 +25,49 @@ class RagService: rag_session_id: str, files: list[dict], progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None, - ) -> tuple[int, int]: + ) -> tuple[int, int, int, int]: total_files = len(files) indexed_files = 0 failed_files = 0 + cache_hit_files = 0 + cache_miss_files = 0 all_chunks: list[dict] = [] + repo_id = self._resolve_repo_id(rag_session_id) for index, file in enumerate(files, start=1): path = str(file.get("path", "")) try: - chunks = self._build_chunks_for_file(file) - embedded_chunks = await asyncio.to_thread(self._embed_chunks, chunks) - all_chunks.extend(embedded_chunks) + blob_sha = self._blob_sha(file) + cached = await asyncio.to_thread(self._repo.get_cached_chunks, repo_id, blob_sha) + if cached: + all_chunks.extend(self._build_cached_items(path, file, repo_id, blob_sha, cached)) + cache_hit_files += 1 + else: + chunks = self._build_chunks_for_file(file) + embedded_chunks = await asyncio.to_thread(self._embed_chunks, chunks, file, repo_id, blob_sha) + all_chunks.extend(embedded_chunks) + await asyncio.to_thread(self._repo.cache_file_chunks, repo_id, path, blob_sha, embedded_chunks) + cache_miss_files += 1 indexed_files += 1 except Exception: failed_files += 1 await self._notify_progress(progress_cb, index, total_files, path) await asyncio.to_thread(self._repo.replace_chunks, rag_session_id, all_chunks) - return indexed_files, failed_files + return indexed_files, failed_files, cache_hit_files, cache_miss_files async def index_changes( self, rag_session_id: str, changed_files: list[dict], progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None, - ) -> tuple[int, int]: + ) -> tuple[int, int, int, int]: total_files = len(changed_files) indexed_files = 0 failed_files = 0 + cache_hit_files = 0 + cache_miss_files = 0 delete_paths: list[str] = [] upsert_chunks: list[dict] = [] + repo_id = self._resolve_repo_id(rag_session_id) for index, file in enumerate(changed_files, start=1): path = str(file.get("path", "")) @@ -64,9 +79,17 @@ class RagService: await self._notify_progress(progress_cb, index, total_files, path) continue if op == "upsert" and file.get("content") is not None: - chunks = self._build_chunks_for_file(file) - embedded_chunks = await asyncio.to_thread(self._embed_chunks, chunks) - upsert_chunks.extend(embedded_chunks) + blob_sha = self._blob_sha(file) + cached = await asyncio.to_thread(self._repo.get_cached_chunks, repo_id, blob_sha) + if cached: + upsert_chunks.extend(self._build_cached_items(path, file, repo_id, blob_sha, cached)) + cache_hit_files += 1 + else: + chunks = self._build_chunks_for_file(file) + embedded_chunks = await asyncio.to_thread(self._embed_chunks, chunks, file, repo_id, blob_sha) + upsert_chunks.extend(embedded_chunks) + await asyncio.to_thread(self._repo.cache_file_chunks, repo_id, path, blob_sha, embedded_chunks) + cache_miss_files += 1 indexed_files += 1 await self._notify_progress(progress_cb, index, total_files, path) continue @@ -81,7 +104,7 @@ class RagService: delete_paths, upsert_chunks, ) - return indexed_files, failed_files + return indexed_files, failed_files, cache_hit_files, cache_miss_files async def retrieve(self, rag_session_id: str, query: str) -> list[dict]: try: @@ -99,10 +122,11 @@ class RagService: output.append((path, idx, chunk)) return output - def _embed_chunks(self, raw_chunks: list[tuple[str, int, str]]) -> list[dict]: + def _embed_chunks(self, raw_chunks: list[tuple[str, int, str]], file: dict, repo_id: str, blob_sha: str) -> list[dict]: if not raw_chunks: return [] batch_size = max(1, int(os.getenv("RAG_EMBED_BATCH_SIZE", "16"))) + metadata = self._chunk_metadata(file) indexed: list[dict] = [] for i in range(0, len(raw_chunks), batch_size): @@ -116,10 +140,63 @@ class RagService: "chunk_index": chunk_index, "content": content, "embedding": vector, + "repo_id": repo_id, + "blob_sha": blob_sha, + **metadata, } ) return indexed + def _build_cached_items( + self, + path: str, + file: dict, + repo_id: str, + blob_sha: str, + cached: list[dict], + ) -> list[dict]: + metadata = self._chunk_metadata(file) + output: list[dict] = [] + for item in cached: + output.append( + { + "path": path, + "chunk_index": int(item["chunk_index"]), + "content": str(item["content"]), + "embedding": item.get("embedding") or [], + "repo_id": repo_id, + "blob_sha": blob_sha, + **metadata, + "section": item.get("section") or metadata.get("section"), + } + ) + return output + + def _resolve_repo_id(self, rag_session_id: str) -> str: + session = self._repo.get_session(rag_session_id) + if not session: + return rag_session_id + return str(session.get("project_id") or rag_session_id) + + def _blob_sha(self, file: dict) -> str: + raw = str(file.get("content_hash") or "").strip() + if raw: + return raw + content = str(file.get("content") or "") + return hashlib.sha256(content.encode("utf-8")).hexdigest() + + def _chunk_metadata(self, file: dict) -> dict: + return { + "artifact_type": file.get("artifact_type"), + "section": file.get("section"), + "doc_id": file.get("doc_id"), + "doc_version": file.get("doc_version"), + "owner": file.get("owner"), + "system_component": file.get("system_component"), + "last_modified": file.get("last_modified"), + "staleness_score": file.get("staleness_score"), + } + async def _notify_progress( self, progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None, diff --git a/app/modules/rag/session_store.py b/app/modules/rag_session/session_store.py similarity index 94% rename from app/modules/rag/session_store.py rename to app/modules/rag_session/session_store.py index e513598..6e353b4 100644 --- a/app/modules/rag/session_store.py +++ b/app/modules/rag_session/session_store.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from uuid import uuid4 -from app.modules.rag.repository import RagRepository +from app.modules.rag_session.repository import RagRepository @dataclass diff --git a/app/modules/shared/README.md b/app/modules/shared/README.md new file mode 100644 index 0000000..0f49eea --- /dev/null +++ b/app/modules/shared/README.md @@ -0,0 +1,40 @@ +# Модуль shared + +## 1. Функции модуля +- Общие инфраструктурные компоненты для всех модулей: + - подключение к БД (`db.py`, `bootstrap.py`), + - шина событий (`event_bus.py`), + - retry и idempotency (`retry_executor.py`, `idempotency_store.py`), + - checkpointer, + - клиент и настройки GigaChat (`gigachat/*`). + +## 2. Диаграмма классов и взаимосвязей +```mermaid +classDiagram + class EventBus + class RetryExecutor + class IdempotencyStore + class GigaChatClient + class GigaChatSettings + class GigaChatTokenProvider + + GigaChatClient --> GigaChatSettings + GigaChatClient --> GigaChatTokenProvider +``` + +## 3. Описание классов +- `EventBus`: общий асинхронный event-bus для публикации и SSE-стриминга. + Методы: `subscribe` — подписка на канал; `unsubscribe` — удаление подписки; `publish` — отправка события; `as_sse` — сериализация события в SSE-формат. +- `RetryExecutor`: общий механизм повторных попыток для временных ошибок. + Методы: `run` — выполняет async-операцию с retry по временным исключениям. +- `IdempotencyStore`: in-memory хранилище идемпотентных ключей. + Методы: `get_task_id` — находит ранее созданную задачу по ключу; `put` — сохраняет ключ и task_id. +- `GigaChatSettings`: конфигурация доступа к GigaChat. + Методы: `from_env` — собирает настройки из переменных окружения. +- `GigaChatTokenProvider`: управление access token для GigaChat API. + Методы: `get_access_token` — возвращает валидный токен (с обновлением при необходимости). +- `GigaChatClient`: HTTP-клиент для completion и embeddings. + Методы: `complete` — выполняет генерацию ответа модели; `embed` — получает векторные представления для списка текстов. + +## 4. Сиквенс-диаграммы API +В модуле нет HTTP endpoint'ов: он предоставляет только инфраструктурные классы для других модулей. diff --git a/app/modules/shared/__pycache__/bootstrap.cpython-312.pyc b/app/modules/shared/__pycache__/bootstrap.cpython-312.pyc index 52f9c1c..b0a5382 100644 Binary files a/app/modules/shared/__pycache__/bootstrap.cpython-312.pyc and b/app/modules/shared/__pycache__/bootstrap.cpython-312.pyc differ diff --git a/app/modules/shared/bootstrap.py b/app/modules/shared/bootstrap.py index f13f70b..d62259d 100644 --- a/app/modules/shared/bootstrap.py +++ b/app/modules/shared/bootstrap.py @@ -3,13 +3,14 @@ import time from app.modules.shared.checkpointer import get_checkpointer -def bootstrap_database(rag_repository, chat_repository, agent_repository) -> None: +def bootstrap_database(rag_repository, chat_repository, agent_repository, story_context_repository) -> None: last_error: Exception | None = None for attempt in range(1, 16): try: rag_repository.ensure_tables() chat_repository.ensure_tables() agent_repository.ensure_tables() + story_context_repository.ensure_tables() get_checkpointer() return except Exception as exc: # noqa: BLE001 diff --git a/app/schemas/__pycache__/changeset.cpython-312.pyc b/app/schemas/__pycache__/changeset.cpython-312.pyc index 83f1947..5517a90 100644 Binary files a/app/schemas/__pycache__/changeset.cpython-312.pyc and b/app/schemas/__pycache__/changeset.cpython-312.pyc differ diff --git a/app/schemas/__pycache__/indexing.cpython-312.pyc b/app/schemas/__pycache__/indexing.cpython-312.pyc index 5486f51..04be6ba 100644 Binary files a/app/schemas/__pycache__/indexing.cpython-312.pyc and b/app/schemas/__pycache__/indexing.cpython-312.pyc differ diff --git a/app/schemas/__pycache__/rag_sessions.cpython-312.pyc b/app/schemas/__pycache__/rag_sessions.cpython-312.pyc index 11fa637..5ad9da4 100644 Binary files a/app/schemas/__pycache__/rag_sessions.cpython-312.pyc and b/app/schemas/__pycache__/rag_sessions.cpython-312.pyc differ diff --git a/app/schemas/changeset.py b/app/schemas/changeset.py index 112835b..5c19b10 100644 --- a/app/schemas/changeset.py +++ b/app/schemas/changeset.py @@ -1,4 +1,5 @@ from enum import Enum +from typing import Literal from typing import Optional from pydantic import BaseModel, Field, model_validator @@ -10,12 +11,39 @@ class ChangeOp(str, Enum): DELETE = "delete" +class PatchHunk(BaseModel): + type: Literal["append_end", "replace_between", "replace_line_equals"] + new_text: str = Field(default="") + start_anchor: Optional[str] = None + end_anchor: Optional[str] = None + old_line: Optional[str] = None + + @model_validator(mode="after") + def validate_hunk(self) -> "PatchHunk": + if self.type == "append_end": + if not self.new_text.strip(): + raise ValueError("append_end requires non-empty new_text") + return self + if self.type == "replace_between": + if not (self.start_anchor and self.end_anchor): + raise ValueError("replace_between requires start_anchor and end_anchor") + return self + if self.type == "replace_line_equals": + if not self.old_line: + raise ValueError("replace_line_equals requires old_line") + if not self.new_text: + raise ValueError("replace_line_equals requires new_text") + return self + return self + + class ChangeItem(BaseModel): op: ChangeOp path: str = Field(min_length=1) base_hash: Optional[str] = None proposed_content: Optional[str] = None reason: str = Field(min_length=1, max_length=500) + hunks: list[PatchHunk] = Field(default_factory=list) @model_validator(mode="after") def validate_op_fields(self) -> "ChangeItem": diff --git a/app/schemas/indexing.py b/app/schemas/indexing.py index 26e6a4e..cc56187 100644 --- a/app/schemas/indexing.py +++ b/app/schemas/indexing.py @@ -51,4 +51,6 @@ class IndexJobResponse(BaseModel): status: IndexJobStatus indexed_files: int = 0 failed_files: int = 0 + cache_hit_files: int = 0 + cache_miss_files: int = 0 error: Optional[ErrorPayload] = None diff --git a/app/schemas/rag_sessions.py b/app/schemas/rag_sessions.py index 01a3bf7..3643150 100644 --- a/app/schemas/rag_sessions.py +++ b/app/schemas/rag_sessions.py @@ -24,4 +24,6 @@ class RagSessionJobResponse(BaseModel): status: IndexJobStatus indexed_files: int = 0 failed_files: int = 0 + cache_hit_files: int = 0 + cache_miss_files: int = 0 error: dict | None = None diff --git a/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..84b1b6e Binary files /dev/null and b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/__pycache__/test_repo_webhook_service.cpython-312-pytest-9.0.2.pyc b/tests/agent/__pycache__/test_repo_webhook_service.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..c713378 Binary files /dev/null and b/tests/agent/__pycache__/test_repo_webhook_service.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/__pycache__/test_story_session_recorder.cpython-312-pytest-9.0.2.pyc b/tests/agent/__pycache__/test_story_session_recorder.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..a960848 Binary files /dev/null and b/tests/agent/__pycache__/test_story_session_recorder.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_edit_actions_case_insensitive_path.cpython-312-pytest-9.0.2.pyc b/tests/agent/orchestrator/__pycache__/test_edit_actions_case_insensitive_path.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..aa75ae2 Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_edit_actions_case_insensitive_path.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_eval_suite.cpython-312-pytest-9.0.2.pyc b/tests/agent/orchestrator/__pycache__/test_eval_suite.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..c5b213d Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_eval_suite.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_eval_suite.cpython-312.pyc b/tests/agent/orchestrator/__pycache__/test_eval_suite.cpython-312.pyc new file mode 100644 index 0000000..728c2aa Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_eval_suite.cpython-312.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_orchestrator_service.cpython-312-pytest-9.0.2.pyc b/tests/agent/orchestrator/__pycache__/test_orchestrator_service.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..b9b202a Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_orchestrator_service.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_orchestrator_service.cpython-312.pyc b/tests/agent/orchestrator/__pycache__/test_orchestrator_service.cpython-312.pyc new file mode 100644 index 0000000..f19bce8 Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_orchestrator_service.cpython-312.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_plan_validator.cpython-312-pytest-9.0.2.pyc b/tests/agent/orchestrator/__pycache__/test_plan_validator.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..83f2e6f Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_plan_validator.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_plan_validator.cpython-312.pyc b/tests/agent/orchestrator/__pycache__/test_plan_validator.cpython-312.pyc new file mode 100644 index 0000000..b003007 Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_plan_validator.cpython-312.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_quality_metrics.cpython-312-pytest-9.0.2.pyc b/tests/agent/orchestrator/__pycache__/test_quality_metrics.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..746fd1c Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_quality_metrics.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_quality_metrics_gate_expectations.cpython-312-pytest-9.0.2.pyc b/tests/agent/orchestrator/__pycache__/test_quality_metrics_gate_expectations.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..3b2e4dc Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_quality_metrics_gate_expectations.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_template_registry.cpython-312-pytest-9.0.2.pyc b/tests/agent/orchestrator/__pycache__/test_template_registry.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..fb434ec Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_template_registry.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/agent/orchestrator/__pycache__/test_template_registry.cpython-312.pyc b/tests/agent/orchestrator/__pycache__/test_template_registry.cpython-312.pyc new file mode 100644 index 0000000..049efd0 Binary files /dev/null and b/tests/agent/orchestrator/__pycache__/test_template_registry.cpython-312.pyc differ diff --git a/tests/agent/orchestrator/test_edit_actions_case_insensitive_path.py b/tests/agent/orchestrator/test_edit_actions_case_insensitive_path.py new file mode 100644 index 0000000..a46fe5e --- /dev/null +++ b/tests/agent/orchestrator/test_edit_actions_case_insensitive_path.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import ( + ExecutionPlan, + OutputContract, + RoutingMeta, + Scenario, + TaskConstraints, + TaskSpec, +) + + +def _ctx() -> ExecutionContext: + task = TaskSpec( + task_id="task-1", + dialog_session_id="dialog-1", + rag_session_id="rag-1", + mode="auto", + user_message="Добавь в readme.md в конце строку про автора", + scenario=Scenario.TARGETED_EDIT, + routing=RoutingMeta(domain_id="project", process_id="edits", confidence=0.95, reason="test"), + constraints=TaskConstraints(allow_writes=True), + output_contract=OutputContract(result_type="changeset"), + metadata={ + "files_map": { + "README.md": { + "path": "README.md", + "content": "# Title\n", + "content_hash": "hash123", + } + } + }, + ) + plan = ExecutionPlan( + plan_id="plan-1", + task_id="task-1", + scenario=Scenario.TARGETED_EDIT, + template_id="targeted_edit_v1", + template_version="1.0", + steps=[], + ) + return ExecutionContext(task=task, plan=plan, graph_resolver=lambda *_: None, graph_invoker=lambda *_: {}) + + +def test_edit_actions_resolve_path_case_insensitive_and_keep_update() -> None: + actions = EditActions() + ctx = _ctx() + + actions.resolve_target(ctx) + actions.load_target_context(ctx) + actions.plan_minimal_patch(ctx) + actions.generate_patch(ctx) + + target = ctx.artifacts.get_content("target_context", {}) + changeset = ctx.artifacts.get_content("raw_changeset", []) + + assert target["path"] == "README.md" + assert changeset[0]["path"] == "README.md" + assert changeset[0]["op"] == "update" diff --git a/tests/agent/orchestrator/test_eval_suite.py b/tests/agent/orchestrator/test_eval_suite.py new file mode 100644 index 0000000..62dd548 --- /dev/null +++ b/tests/agent/orchestrator/test_eval_suite.py @@ -0,0 +1,56 @@ +import asyncio + +import pytest + +from app.modules.agent.engine.orchestrator.models import OutputContract, RoutingMeta, Scenario, TaskConstraints, TaskSpec +from app.modules.agent.engine.orchestrator.service import OrchestratorService + + +@pytest.mark.parametrize( + "scenario,expect_changeset", + [ + (Scenario.EXPLAIN_PART, False), + (Scenario.ANALYTICS_REVIEW, False), + (Scenario.DOCS_FROM_ANALYTICS, True), + (Scenario.TARGETED_EDIT, True), + (Scenario.GHERKIN_MODEL, True), + ], +) +def test_eval_suite_scenarios_run(scenario: Scenario, expect_changeset: bool) -> None: + service = OrchestratorService() + + task = TaskSpec( + task_id=f"task-{scenario.value}", + dialog_session_id="dialog-1", + rag_session_id="rag-1", + mode="auto", + user_message="Please process this scenario using project docs and requirements.", + scenario=scenario, + routing=RoutingMeta(domain_id="project", process_id="qa", confidence=0.95, reason="eval"), + constraints=TaskConstraints( + allow_writes=scenario in {Scenario.DOCS_FROM_ANALYTICS, Scenario.TARGETED_EDIT, Scenario.GHERKIN_MODEL}, + max_steps=20, + max_retries_per_step=2, + step_timeout_sec=90, + ), + output_contract=OutputContract(result_type="answer"), + attachments=[{"type": "http_url", "value": "https://example.com/doc"}], + metadata={ + "rag_context": "Requirements context is available.", + "confluence_context": "", + "files_map": {"docs/api/increment.md": {"content": "old", "content_hash": "h1"}}, + }, + ) + + result = asyncio.run( + service.run( + task=task, + graph_resolver=lambda _domain, _process: object(), + graph_invoker=lambda _graph, _state, _dialog: {"answer": "fallback", "changeset": []}, + ) + ) + + assert result.meta["plan"]["status"] in {"completed", "partial"} + assert bool(result.changeset) is expect_changeset + if not expect_changeset: + assert result.answer diff --git a/tests/agent/orchestrator/test_orchestrator_service.py b/tests/agent/orchestrator/test_orchestrator_service.py new file mode 100644 index 0000000..d1cdce2 --- /dev/null +++ b/tests/agent/orchestrator/test_orchestrator_service.py @@ -0,0 +1,72 @@ +import asyncio + +from app.modules.agent.engine.orchestrator.models import ( + OutputContract, + RoutingMeta, + Scenario, + TaskConstraints, + TaskSpec, +) +from app.modules.agent.engine.orchestrator.service import OrchestratorService + + +class DummyGraph: + pass + + +def _task(scenario: Scenario) -> TaskSpec: + allow_writes = scenario in {Scenario.DOCS_FROM_ANALYTICS, Scenario.TARGETED_EDIT, Scenario.GHERKIN_MODEL} + return TaskSpec( + task_id="task-1", + dialog_session_id="dialog-1", + rag_session_id="rag-1", + mode="auto", + user_message="Explain this module", + scenario=scenario, + routing=RoutingMeta(domain_id="project", process_id="qa", confidence=0.95, reason="unit-test"), + constraints=TaskConstraints(allow_writes=allow_writes, max_steps=16, max_retries_per_step=2, step_timeout_sec=90), + output_contract=OutputContract(result_type="answer"), + metadata={ + "rag_context": "RAG", + "confluence_context": "", + "files_map": {}, + }, + ) + + +def test_orchestrator_service_returns_answer() -> None: + service = OrchestratorService() + + def graph_resolver(domain_id: str, process_id: str): + assert domain_id == "project" + assert process_id == "qa" + return DummyGraph() + + def graph_invoker(_graph, state: dict, dialog_session_id: str): + assert state["message"] == "Explain this module" + assert dialog_session_id == "dialog-1" + return {"answer": "It works.", "changeset": []} + + result = asyncio.run(service.run(task=_task(Scenario.GENERAL_QA), graph_resolver=graph_resolver, graph_invoker=graph_invoker)) + assert result.answer == "It works." + assert result.meta["plan"]["status"] == "completed" + + +def test_orchestrator_service_generates_changeset_for_docs_scenario() -> None: + service = OrchestratorService() + + def graph_resolver(_domain_id: str, _process_id: str): + return DummyGraph() + + def graph_invoker(_graph, _state: dict, _dialog_session_id: str): + return {"answer": "unused", "changeset": []} + + result = asyncio.run( + service.run( + task=_task(Scenario.DOCS_FROM_ANALYTICS), + graph_resolver=graph_resolver, + graph_invoker=graph_invoker, + ) + ) + assert result.meta["plan"]["status"] == "completed" + assert len(result.changeset) > 0 diff --git a/tests/agent/orchestrator/test_plan_validator.py b/tests/agent/orchestrator/test_plan_validator.py new file mode 100644 index 0000000..d8d893d --- /dev/null +++ b/tests/agent/orchestrator/test_plan_validator.py @@ -0,0 +1,49 @@ +from app.modules.agent.engine.orchestrator.models import ( + ExecutionPlan, + OutputContract, + PlanStep, + RetryPolicy, + RoutingMeta, + Scenario, + TaskConstraints, + TaskSpec, +) +from app.modules.agent.engine.orchestrator.plan_validator import PlanValidator + + +def _task(*, allow_writes: bool) -> TaskSpec: + return TaskSpec( + task_id="t1", + dialog_session_id="d1", + rag_session_id="r1", + mode="auto", + user_message="hello", + scenario=Scenario.GENERAL_QA, + routing=RoutingMeta(domain_id="default", process_id="general", confidence=0.9, reason="test"), + constraints=TaskConstraints(allow_writes=allow_writes, max_steps=10, max_retries_per_step=2, step_timeout_sec=60), + output_contract=OutputContract(result_type="answer"), + ) + + +def test_plan_validator_rejects_write_step_when_not_allowed() -> None: + plan = ExecutionPlan( + plan_id="p1", + task_id="t1", + scenario=Scenario.GENERAL_QA, + template_id="tmp", + template_version="1.0", + steps=[ + PlanStep( + step_id="s1", + title="write", + action_id="collect_state", + executor="function", + side_effect="write", + retry=RetryPolicy(max_attempts=1), + ) + ], + ) + + errors = PlanValidator().validate(plan, _task(allow_writes=False)) + + assert "write_step_not_allowed:s1" in errors diff --git a/tests/agent/orchestrator/test_quality_metrics.py b/tests/agent/orchestrator/test_quality_metrics.py new file mode 100644 index 0000000..53b7b46 --- /dev/null +++ b/tests/agent/orchestrator/test_quality_metrics.py @@ -0,0 +1,42 @@ +import asyncio + +from app.modules.agent.engine.orchestrator.models import OutputContract, OutputSection, RoutingMeta, Scenario, TaskConstraints, TaskSpec +from app.modules.agent.engine.orchestrator.service import OrchestratorService + + +def test_quality_metrics_present_and_scored() -> None: + service = OrchestratorService() + task = TaskSpec( + task_id="quality-1", + dialog_session_id="dialog-1", + rag_session_id="rag-1", + mode="auto", + user_message="Explain architecture", + scenario=Scenario.EXPLAIN_PART, + routing=RoutingMeta(domain_id="project", process_id="qa", confidence=0.9, reason="test"), + constraints=TaskConstraints(allow_writes=False), + output_contract=OutputContract( + result_type="answer", + sections=[ + OutputSection(name="sequence_diagram", format="mermaid"), + OutputSection(name="use_cases", format="markdown"), + OutputSection(name="summary", format="markdown"), + ], + ), + metadata={"rag_context": "A\nB", "confluence_context": "", "files_map": {}}, + ) + + result = asyncio.run( + service.run( + task=task, + graph_resolver=lambda _d, _p: object(), + graph_invoker=lambda _g, _s, _id: {"answer": "unused", "changeset": []}, + ) + ) + + quality = result.meta.get("quality", {}) + assert quality + assert quality.get("faithfulness", {}).get("score") is not None + assert quality.get("coverage", {}).get("score") is not None + assert quality.get("status") in {"ok", "needs_review", "fail"} + assert quality.get("coverage", {}).get("covered_count", 0) >= 1 diff --git a/tests/agent/orchestrator/test_quality_metrics_gate_expectations.py b/tests/agent/orchestrator/test_quality_metrics_gate_expectations.py new file mode 100644 index 0000000..a9daf61 --- /dev/null +++ b/tests/agent/orchestrator/test_quality_metrics_gate_expectations.py @@ -0,0 +1,50 @@ +from app.modules.agent.engine.orchestrator.models import ( + ArtifactType, + OutputContract, + OutputSection, + RoutingMeta, + Scenario, + TaskConstraints, + TaskSpec, +) +from app.modules.agent.engine.orchestrator.quality_metrics import QualityMetricsCalculator +from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry +from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext +from app.modules.agent.engine.orchestrator.models import PlanStatus + + +def test_quality_metrics_coverage_reflects_missing_required_sections() -> None: + task = TaskSpec( + task_id="quality-2", + dialog_session_id="dialog-1", + rag_session_id="rag-1", + mode="auto", + user_message="Explain architecture", + scenario=Scenario.EXPLAIN_PART, + routing=RoutingMeta(domain_id="project", process_id="qa", confidence=0.9, reason="test"), + constraints=TaskConstraints(allow_writes=False), + output_contract=OutputContract( + result_type="answer", + sections=[ + OutputSection(name="sequence_diagram", format="mermaid"), + OutputSection(name="use_cases", format="markdown"), + OutputSection(name="summary", format="markdown"), + ], + ), + metadata={"rag_context": "A", "confluence_context": "", "files_map": {}}, + ) + + plan = ScenarioTemplateRegistry().build(task) + plan.status = PlanStatus.COMPLETED + ctx = ExecutionContext( + task=task, + plan=plan, + graph_resolver=lambda _d, _p: object(), + graph_invoker=lambda _g, _s, _id: {}, + ) + ctx.artifacts.put(key="final_answer", artifact_type=ArtifactType.TEXT, content="Only summary text") + + metrics = QualityMetricsCalculator().build(ctx, step_results=[]) + + assert metrics["coverage"]["score"] < 1.0 + assert "sequence_diagram" in metrics["coverage"]["missing_items"] diff --git a/tests/agent/orchestrator/test_template_registry.py b/tests/agent/orchestrator/test_template_registry.py new file mode 100644 index 0000000..30878fd --- /dev/null +++ b/tests/agent/orchestrator/test_template_registry.py @@ -0,0 +1,38 @@ +from app.modules.agent.engine.orchestrator.models import OutputContract, RoutingMeta, Scenario, TaskConstraints, TaskSpec +from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry + + +def _task(scenario: Scenario) -> TaskSpec: + return TaskSpec( + task_id="t1", + dialog_session_id="d1", + rag_session_id="r1", + mode="auto", + user_message="run scenario", + scenario=scenario, + routing=RoutingMeta(domain_id="project", process_id="qa", confidence=0.9, reason="test"), + constraints=TaskConstraints( + allow_writes=scenario in {Scenario.DOCS_FROM_ANALYTICS, Scenario.TARGETED_EDIT, Scenario.GHERKIN_MODEL} + ), + output_contract=OutputContract(result_type="answer"), + metadata={"rag_context": "ctx", "confluence_context": "", "files_map": {}}, + ) + + +def test_template_registry_has_multi_step_review_docs_edit_gherkin() -> None: + registry = ScenarioTemplateRegistry() + + review_steps = [step.step_id for step in registry.build(_task(Scenario.ANALYTICS_REVIEW)).steps] + docs_steps = [step.step_id for step in registry.build(_task(Scenario.DOCS_FROM_ANALYTICS)).steps] + edit_steps = [step.step_id for step in registry.build(_task(Scenario.TARGETED_EDIT)).steps] + gherkin_steps = [step.step_id for step in registry.build(_task(Scenario.GHERKIN_MODEL)).steps] + + assert "structural_check" in review_steps and "compose_review_report" in review_steps + assert "extract_change_intents" in docs_steps and "build_changeset" in docs_steps + assert "resolve_target" in edit_steps and "finalize_changeset" in edit_steps + assert "generate_gherkin_bundle" in gherkin_steps and "validate_coverage" in gherkin_steps + + assert len(review_steps) >= 7 + assert len(docs_steps) >= 9 + assert len(edit_steps) >= 7 + assert len(gherkin_steps) >= 8 diff --git a/tests/agent/test_repo_webhook_service.py b/tests/agent/test_repo_webhook_service.py new file mode 100644 index 0000000..dac713f --- /dev/null +++ b/tests/agent/test_repo_webhook_service.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from app.modules.rag_repo.webhook_service import RepoWebhookService + + +class FakeStoryWriter: + def __init__(self) -> None: + self.calls: list[dict] = [] + + def record_story_commit(self, **kwargs) -> None: + self.calls.append(kwargs) + + +class FakeCacheWriter: + def __init__(self) -> None: + self.calls: list[dict] = [] + + def record_repo_cache(self, **kwargs) -> None: + self.calls.append(kwargs) + + +def test_gitea_webhook_binds_story() -> None: + writer = FakeStoryWriter() + cache = FakeCacheWriter() + service = RepoWebhookService(writer, cache) + + result = service.process( + provider="gitea", + payload={ + "repository": {"full_name": "acme/proj"}, + "ref": "refs/heads/feature/AAAA-1234", + "pusher": {"username": "alice"}, + "commits": [ + { + "id": "abc123", + "message": "FEAT-1 update docs", + "added": ["docs/new.md"], + "modified": ["docs/api.md"], + "removed": [], + } + ], + }, + ) + + assert result["accepted"] is True + assert result["story_bound"] is True + assert result["story_id"] == "FEAT-1" + assert result["cache_recorded"] is True + assert len(writer.calls) == 1 + assert len(cache.calls) == 1 + assert writer.calls[0]["project_id"] == "acme/proj" + + +def test_webhook_without_story_id_is_non_fatal() -> None: + writer = FakeStoryWriter() + cache = FakeCacheWriter() + service = RepoWebhookService(writer, cache) + + result = service.process( + provider="bitbucket", + payload={ + "repository": {"full_name": "acme/proj"}, + "push": { + "changes": [ + { + "new": { + "name": "feature/no-story", + "target": {"hash": "abc123", "message": "update docs"}, + } + } + ] + }, + }, + ) + + assert result["accepted"] is True + assert result["story_bound"] is False + assert result["cache_recorded"] is True + assert len(cache.calls) == 1 + assert writer.calls == [] + + +def test_provider_autodetect_by_headers() -> None: + writer = FakeStoryWriter() + service = RepoWebhookService(writer) + + result = service.process( + headers={"X-Gitea-Event": "push"}, + payload={ + "repository": {"full_name": "acme/proj"}, + "ref": "refs/heads/feature/AAAA-1234", + "commits": [{"id": "abc123", "message": "AAAA-1234 update"}], + }, + ) + + assert result["accepted"] is True + assert result["story_bound"] is True + assert result["story_id"] == "AAAA-1234" diff --git a/tests/agent/test_story_session_recorder.py b/tests/agent/test_story_session_recorder.py new file mode 100644 index 0000000..4d00f92 --- /dev/null +++ b/tests/agent/test_story_session_recorder.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from app.modules.agent.story_session_recorder import StorySessionRecorder +from app.schemas.changeset import ChangeItem, ChangeOp + + +class FakeStoryRepo: + def __init__(self) -> None: + self.calls: list[dict] = [] + + def add_session_artifact(self, **kwargs) -> None: + self.calls.append(kwargs) + + +def test_record_run_stores_attachment_and_changeset_artifacts() -> None: + repo = FakeStoryRepo() + recorder = StorySessionRecorder(repo) + + recorder.record_run( + dialog_session_id="dialog-1", + rag_session_id="rag-1", + scenario="docs_from_analytics", + attachments=[ + {"type": "confluence_url", "value": "https://example.org/doc"}, + {"type": "file_ref", "value": "local.md"}, + ], + answer="Generated docs update summary", + changeset=[ + ChangeItem( + op=ChangeOp.UPDATE, + path="docs/api.md", + base_hash="abc", + proposed_content="new", + reason="sync endpoint section", + ) + ], + ) + + assert len(repo.calls) == 3 + assert repo.calls[0]["artifact_role"] == "analysis" + assert repo.calls[0]["source_ref"] == "https://example.org/doc" + + assert repo.calls[1]["artifact_role"] == "doc_change" + assert repo.calls[1]["summary"] == "Generated docs update summary" + + assert repo.calls[2]["artifact_role"] == "doc_change" + assert repo.calls[2]["path"] == "docs/api.md" + assert repo.calls[2]["change_type"] == "updated" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f96b4d8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT))