Перенес workflow
This commit is contained in:
617
README.md
617
README.md
@@ -1,481 +1,206 @@
|
|||||||
# PLBA
|
# PLBA
|
||||||
|
|
||||||
## 1. Общее описание сервиса и его назначение
|
`PLBA` (`Platform Runtime for Business Applications`) - runtime для бизнес-приложений.
|
||||||
|
|
||||||
`PLBA` (`Platform Runtime for Business Applications`) - это платформенный runtime для бизнес-приложений. Библиотека выносит из прикладного кода типовые инфраструктурные задачи: жизненный цикл приложения, запуск и остановку фоновых воркеров, загрузку конфигурации, health-check, tracing, логирование и control plane.
|
Платформа берет на себя инфраструктурные обязанности:
|
||||||
|
- lifecycle приложения
|
||||||
|
- запуск и остановку воркеров
|
||||||
|
- health/status
|
||||||
|
- tracing
|
||||||
|
- logging
|
||||||
|
- control plane
|
||||||
|
- загрузку конфигурации
|
||||||
|
|
||||||
Назначение сервиса:
|
Бизнес-приложение на базе `plba` собирается вокруг трех уровней:
|
||||||
- дать единый каркас для запуска бизнес-модулей;
|
- `ApplicationModule` собирает приложение и регистрирует воркеры
|
||||||
- отделить платформенные обязанности от предметной логики;
|
- `Worker` управляет исполнением и lifecycle
|
||||||
- упростить разработку сервисов с очередями, polling-обработчиками и фоновыми процессами;
|
- `Routine` реализует бизнес-функцию
|
||||||
- обеспечить наблюдаемость и управляемость runtime через health/status и HTTP control endpoints.
|
|
||||||
|
|
||||||
Текущая модель работы выглядит так:
|
`Routine` не является контрактом `plba`. Это рекомендуемый архитектурный паттерн для прикладного кода.
|
||||||
1. приложение создаёт `RuntimeManager`;
|
|
||||||
2. runtime загружает конфигурацию;
|
|
||||||
3. применяется logging-конфигурация;
|
|
||||||
4. модуль приложения регистрирует очереди, обработчики, воркеры и health contributors;
|
|
||||||
5. `WorkerSupervisor` запускает все рабочие компоненты;
|
|
||||||
6. runtime агрегирует состояние, health и tracing;
|
|
||||||
7. control plane предоставляет снимок состояния и команды запуска/остановки.
|
|
||||||
|
|
||||||
`PLBA` особенно полезен для:
|
Правила построения приложений на платформе собраны отдельно в [application_guidelines.md](/Users/alex/Dev_projects_v2/apps/plba/requirements/application_guidelines.md).
|
||||||
- почтовых ботов и интеграционных сервисов;
|
|
||||||
- event-driven и queue-driven приложений;
|
|
||||||
- фоновых бизнес-процессов;
|
|
||||||
- внутренних платформенных сервисов с единым operational-контуром.
|
|
||||||
|
|
||||||
## 2. Архитектура - диаграмма классов
|
## Runtime model
|
||||||
|
|
||||||
```mermaid
|
1. приложение объявляет `ApplicationModule`
|
||||||
classDiagram
|
2. модуль регистрирует один или несколько `Worker`
|
||||||
class RuntimeManager {
|
3. `RuntimeManager` запускает все воркеры
|
||||||
+register_module(module)
|
4. каждый `Worker` запускает свою бизнес-активность
|
||||||
+add_config_file(path)
|
5. runtime агрегирует health и status
|
||||||
+start()
|
6. runtime останавливает воркеры graceful или forcefully
|
||||||
+stop(timeout, force, stop_control_plane)
|
|
||||||
+status()
|
|
||||||
+current_health()
|
|
||||||
}
|
|
||||||
|
|
||||||
class ConfigurationManager {
|
## Main contracts
|
||||||
+add_provider(provider)
|
|
||||||
+load()
|
|
||||||
+reload()
|
|
||||||
+get()
|
|
||||||
+section(name, default)
|
|
||||||
}
|
|
||||||
|
|
||||||
class ServiceContainer {
|
### `ApplicationModule`
|
||||||
+register(name, service)
|
|
||||||
+get(name)
|
|
||||||
+require(name, expected_type)
|
|
||||||
+snapshot()
|
|
||||||
}
|
|
||||||
|
|
||||||
class ModuleRegistry {
|
Описывает, из чего состоит приложение.
|
||||||
+register_module(name)
|
|
||||||
+add_queue(name, queue)
|
|
||||||
+add_handler(name, handler)
|
|
||||||
+add_worker(worker)
|
|
||||||
+add_health_contributor(contributor)
|
|
||||||
}
|
|
||||||
|
|
||||||
class ApplicationModule {
|
Ответственность:
|
||||||
<<abstract>>
|
- дать имя модуля
|
||||||
+name
|
- зарегистрировать воркеры
|
||||||
+register(registry)
|
- зарегистрировать health contributors при необходимости
|
||||||
}
|
- собрать прикладные зависимости
|
||||||
|
|
||||||
class WorkerSupervisor {
|
### `Worker`
|
||||||
+register(worker)
|
|
||||||
+start()
|
|
||||||
+stop(timeout, force)
|
|
||||||
+snapshot()
|
|
||||||
+healths()
|
|
||||||
+statuses()
|
|
||||||
}
|
|
||||||
|
|
||||||
class Worker {
|
Главный runtime-контракт платформы.
|
||||||
<<abstract>>
|
|
||||||
+start()
|
|
||||||
+stop(force)
|
|
||||||
+health()
|
|
||||||
+status()
|
|
||||||
}
|
|
||||||
|
|
||||||
class QueueWorker {
|
Контракт:
|
||||||
+name
|
- `name`
|
||||||
+critical
|
- `critical`
|
||||||
+start()
|
- `start()`
|
||||||
+stop(force)
|
- `stop(force=False)`
|
||||||
+health()
|
- `health()`
|
||||||
+status()
|
- `status()`
|
||||||
}
|
|
||||||
|
|
||||||
class TaskQueue {
|
`Worker` отвечает только за runtime-поведение:
|
||||||
<<abstract>>
|
- как запускается бизнес-активность
|
||||||
+publish(task)
|
- в одном потоке или нескольких
|
||||||
+consume(timeout)
|
- single-run или loop
|
||||||
+ack(task)
|
- graceful shutdown
|
||||||
+nack(task, retry_delay)
|
- интерпретацию ошибок в `health/status`
|
||||||
+stats()
|
|
||||||
}
|
|
||||||
|
|
||||||
class InMemoryTaskQueue {
|
### `Routine`
|
||||||
+publish(task)
|
|
||||||
+consume(timeout)
|
|
||||||
+ack(task)
|
|
||||||
+nack(task, retry_delay)
|
|
||||||
+stats()
|
|
||||||
}
|
|
||||||
|
|
||||||
class TaskHandler {
|
Рекомендуемый application-level паттерн.
|
||||||
<<abstract>>
|
|
||||||
+handle(task)
|
|
||||||
}
|
|
||||||
|
|
||||||
class TraceService {
|
`Routine` описывает бизнес-функцию:
|
||||||
+create_context(...)
|
- что читать
|
||||||
+open_context(...)
|
- какие сервисы вызывать
|
||||||
+resume(task_metadata, operation)
|
- какие бизнес-решения принимать
|
||||||
+attach(task_metadata, context)
|
- что сохранять или отправлять наружу
|
||||||
+info(message, status, attrs)
|
|
||||||
+warning(message, status, attrs)
|
|
||||||
+error(message, status, attrs)
|
|
||||||
}
|
|
||||||
|
|
||||||
class HealthRegistry {
|
Обычно воркер получает одну routine через конструктор и вызывает ее в `start()` или во внутренних helper-методах.
|
||||||
+register(contributor)
|
|
||||||
+snapshot(worker_healths)
|
|
||||||
+payload(state, worker_healths)
|
|
||||||
}
|
|
||||||
|
|
||||||
class ControlPlaneService {
|
## Minimal example
|
||||||
+register_channel(channel)
|
|
||||||
+start(runtime)
|
|
||||||
+stop()
|
|
||||||
+snapshot(runtime)
|
|
||||||
}
|
|
||||||
|
|
||||||
class LogManager {
|
|
||||||
+apply_config(config)
|
|
||||||
}
|
|
||||||
|
|
||||||
class FileConfigProvider {
|
|
||||||
+load()
|
|
||||||
}
|
|
||||||
|
|
||||||
RuntimeManager --> ConfigurationManager
|
|
||||||
RuntimeManager --> ServiceContainer
|
|
||||||
RuntimeManager --> ModuleRegistry
|
|
||||||
RuntimeManager --> WorkerSupervisor
|
|
||||||
RuntimeManager --> TraceService
|
|
||||||
RuntimeManager --> HealthRegistry
|
|
||||||
RuntimeManager --> ControlPlaneService
|
|
||||||
RuntimeManager --> LogManager
|
|
||||||
RuntimeManager --> ApplicationModule
|
|
||||||
ModuleRegistry --> ServiceContainer
|
|
||||||
ModuleRegistry --> Worker
|
|
||||||
ModuleRegistry --> TaskQueue
|
|
||||||
ModuleRegistry --> TaskHandler
|
|
||||||
WorkerSupervisor --> Worker
|
|
||||||
QueueWorker --|> Worker
|
|
||||||
QueueWorker --> TaskQueue
|
|
||||||
QueueWorker --> TaskHandler
|
|
||||||
QueueWorker --> TraceService
|
|
||||||
InMemoryTaskQueue --|> TaskQueue
|
|
||||||
ConfigurationManager --> FileConfigProvider
|
|
||||||
```
|
|
||||||
|
|
||||||
### Архитектурные слои
|
|
||||||
|
|
||||||
- `app_runtime.core` - оркестрация runtime, контейнер сервисов, регистрация модулей, типы состояния.
|
|
||||||
- `app_runtime.contracts` - абстракции для интеграции бизнес-приложений.
|
|
||||||
- `app_runtime.workers`, `queue`, `config`, `logging`, `health`, `tracing`, `control` - инфраструктурные адаптеры и платформенные сервисы.
|
|
||||||
- `plba` - публичный фасад, который реэкспортирует ключевые классы как API пакета.
|
|
||||||
|
|
||||||
## 3. Описание доступных модулей, их назначение, краткое устройство, примеры применения в бизнес приложениях
|
|
||||||
|
|
||||||
### `plba`
|
|
||||||
|
|
||||||
Публичный API пакета. Реэкспортирует `RuntimeManager`, `ApplicationModule`, `QueueWorker`, `InMemoryTaskQueue`, `TraceService`, `HealthRegistry`, `ControlPlaneService` и другие классы.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- служит фасадом над `app_runtime`;
|
|
||||||
- упрощает импорт для прикладного кода;
|
|
||||||
- позволяет использовать пакет как библиотеку без знания внутренней структуры.
|
|
||||||
|
|
||||||
Пример применения:
|
|
||||||
- бизнес-сервис импортирует `create_runtime` и `ApplicationModule`, собирает свой модуль и запускает runtime.
|
|
||||||
|
|
||||||
### `app_runtime.core`
|
|
||||||
|
|
||||||
Основной orchestration-слой.
|
|
||||||
|
|
||||||
Ключевые классы:
|
|
||||||
- `RuntimeManager` - центральная точка запуска и остановки;
|
|
||||||
- `ConfigurationManager` - загрузка и merge конфигурации;
|
|
||||||
- `ServiceContainer` - DI-like контейнер платформенных сервисов;
|
|
||||||
- `ModuleRegistry` - регистрация очередей, обработчиков, воркеров и health contributors.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- `RuntimeManager` создаёт и связывает инфраструктурные сервисы;
|
|
||||||
- при старте регистрирует health contributors, воркеры и поднимает control plane;
|
|
||||||
- `ModuleRegistry` связывает бизнес-модуль с runtime без жёсткой зависимости на конкретные реализации.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- CRM-интеграция с несколькими фоновых воркерами;
|
|
||||||
- сервис обработки заявок, где один модуль регистрирует очередь, handler и worker pool;
|
|
||||||
- back-office процесс с управляемым graceful shutdown.
|
|
||||||
|
|
||||||
### `app_runtime.contracts`
|
|
||||||
|
|
||||||
Набор абстракций для расширения платформы.
|
|
||||||
|
|
||||||
Ключевые контракты:
|
|
||||||
- `ApplicationModule`;
|
|
||||||
- `Worker`;
|
|
||||||
- `TaskQueue`;
|
|
||||||
- `TaskHandler`;
|
|
||||||
- `ConfigProvider`;
|
|
||||||
- `HealthContributor`;
|
|
||||||
- trace-related контракты.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- бизнес-код реализует интерфейсы, а runtime работает только через контракты;
|
|
||||||
- это позволяет менять инфраструктуру без переписывания прикладной логики.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- реализовать свой `ApplicationModule` для почтового бота;
|
|
||||||
- подключить собственный `ConfigProvider` для БД или secrets storage;
|
|
||||||
- реализовать кастомный `Worker` для long-running polling процесса.
|
|
||||||
|
|
||||||
### `app_runtime.workers`
|
|
||||||
|
|
||||||
Модуль управления рабочими процессами.
|
|
||||||
|
|
||||||
Ключевые классы:
|
|
||||||
- `WorkerSupervisor` - запускает и останавливает набор воркеров;
|
|
||||||
- `QueueWorker` - стандартный worker для обработки задач из очереди.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- `WorkerSupervisor` агрегирует health/status всех воркеров;
|
|
||||||
- `QueueWorker` поднимает нужное число потоков, читает задачи из `TaskQueue`, вызывает `TaskHandler`, делает `ack/nack` и обновляет operational-метрики.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- параллельная обработка входящих писем;
|
|
||||||
- обработка очереди заказов;
|
|
||||||
- фоновая генерация документов или актов.
|
|
||||||
|
|
||||||
### `app_runtime.queue`
|
|
||||||
|
|
||||||
Очереди задач.
|
|
||||||
|
|
||||||
Ключевой класс:
|
|
||||||
- `InMemoryTaskQueue`.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- использует стандартный `Queue` из Python;
|
|
||||||
- хранит счётчики `published`, `acked`, `nacked`, `queued`;
|
|
||||||
- подходит как базовая реализация для разработки, тестов и простых сценариев.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- локальная очередь задач в небольшом внутреннем сервисе;
|
|
||||||
- тестовая среда без внешнего брокера;
|
|
||||||
- staging-сценарии для отладки worker pipeline.
|
|
||||||
|
|
||||||
### `app_runtime.config`
|
|
||||||
|
|
||||||
Подсистема загрузки конфигурации.
|
|
||||||
|
|
||||||
Ключевые классы:
|
|
||||||
- `FileConfigProvider`;
|
|
||||||
- `ConfigFileLoader`.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- `ConfigurationManager` собирает данные из провайдеров;
|
|
||||||
- текущая штатная реализация читает YAML-файл;
|
|
||||||
- поддерживается глубокое слияние секций конфигурации.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- конфигурация платформы и прикладных модулей из `config.yml`;
|
|
||||||
- раздельное хранение `platform`, `log` и app-specific секций;
|
|
||||||
- подключение нескольких источников конфигурации с последующим merge.
|
|
||||||
|
|
||||||
### `app_runtime.logging`
|
|
||||||
|
|
||||||
Управление логированием.
|
|
||||||
|
|
||||||
Ключевой класс:
|
|
||||||
- `LogManager`.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- применяет `dictConfig` из секции `log`;
|
|
||||||
- хранит последнюю валидную конфигурацию и пытается восстановиться при ошибке.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- единообразная настройка JSON-логов;
|
|
||||||
- переключение уровней логирования между окружениями;
|
|
||||||
- централизованная logging-конфигурация для нескольких модулей.
|
|
||||||
|
|
||||||
### `app_runtime.health`
|
|
||||||
|
|
||||||
Подсистема health aggregation.
|
|
||||||
|
|
||||||
Ключевой класс:
|
|
||||||
- `HealthRegistry`.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- собирает health от воркеров и дополнительных contributors;
|
|
||||||
- агрегирует статус в `ok`, `degraded`, `unhealthy`;
|
|
||||||
- формирует payload для readiness/liveness и operational snapshot.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- показывать degraded, если обработка идёт с ошибками;
|
|
||||||
- маркировать сервис unhealthy при падении критичного worker;
|
|
||||||
- добавлять health внешней зависимости, например IMAP или ERP API.
|
|
||||||
|
|
||||||
### `app_runtime.tracing`
|
|
||||||
|
|
||||||
Подсистема трассировки выполнения.
|
|
||||||
|
|
||||||
Ключевые классы:
|
|
||||||
- `TraceService`;
|
|
||||||
- `TraceContextStore`;
|
|
||||||
- `NoOpTraceTransport`.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- создаёт trace contexts;
|
|
||||||
- связывает source/queue/worker/handler через metadata;
|
|
||||||
- пишет контексты и сообщения через транспортный слой.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- трассировка обработки письма от polling до бизнес-handler;
|
|
||||||
- аудит прохождения заказа по pipeline;
|
|
||||||
- отладка проблемных задач в фоне.
|
|
||||||
|
|
||||||
### `app_runtime.control`
|
|
||||||
|
|
||||||
Control plane и HTTP-канал управления.
|
|
||||||
|
|
||||||
Ключевые классы:
|
|
||||||
- `ControlPlaneService`;
|
|
||||||
- `HttpControlChannel`;
|
|
||||||
- `ControlActionSet`.
|
|
||||||
|
|
||||||
Краткое устройство:
|
|
||||||
- публикует health/status и команды управления runtime;
|
|
||||||
- может поднимать HTTP endpoints для start/stop/status;
|
|
||||||
- строит snapshot состояния на основе `RuntimeManager`.
|
|
||||||
|
|
||||||
Примеры применения в бизнес-приложениях:
|
|
||||||
- административный endpoint для оператора;
|
|
||||||
- health endpoint для Kubernetes/nomad;
|
|
||||||
- runtime status для monitoring dashboard.
|
|
||||||
|
|
||||||
## 4. Установка - `git@git.lesha.spb.ru:alex/plba.git`
|
|
||||||
|
|
||||||
### Требования
|
|
||||||
|
|
||||||
- Python `3.12+`
|
|
||||||
- `pip`
|
|
||||||
- SSH-доступ к `git.lesha.spb.ru`
|
|
||||||
|
|
||||||
### Установка напрямую через `pip` из Git-репозитория
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install "plba @ git+ssh://git@git.lesha.spb.ru/alex/plba.git"
|
|
||||||
```
|
|
||||||
|
|
||||||
При такой установке `pip` ставит не только сам пакет `plba`, но и все его зависимости, объявленные в [pyproject.toml](/Users/alex/Dev_projects_v2/apps/plba/pyproject.toml), например `fastapi`, `uvicorn` и `PyYAML`.
|
|
||||||
|
|
||||||
Если нужна установка из конкретной ветки:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install "plba @ git+ssh://git@git.lesha.spb.ru/alex/plba.git@main"
|
|
||||||
```
|
|
||||||
|
|
||||||
Если нужна установка из конкретного тега или commit hash:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install "plba @ git+ssh://git@git.lesha.spb.ru/alex/plba.git@v0.1.0"
|
|
||||||
```
|
|
||||||
|
|
||||||
или
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install "plba @ git+ssh://git@git.lesha.spb.ru/alex/plba.git@<commit-hash>"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Установка в виртуальное окружение
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m venv .venv
|
|
||||||
source .venv/bin/activate
|
|
||||||
pip install --upgrade pip
|
|
||||||
pip install "plba @ git+ssh://git@git.lesha.spb.ru/alex/plba.git"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Подключение `plba` в бизнес-приложении
|
|
||||||
|
|
||||||
Чтобы при установке бизнес-приложения автоматически подтягивались зависимости `plba`, нужно добавить `plba` в зависимости самого бизнес-приложения как Git dependency.
|
|
||||||
|
|
||||||
Пример для `requirements.txt`:
|
|
||||||
|
|
||||||
```txt
|
|
||||||
plba @ git+ssh://git@git.lesha.spb.ru/alex/plba.git
|
|
||||||
```
|
|
||||||
|
|
||||||
Пример для `pyproject.toml`:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
[project]
|
|
||||||
dependencies = [
|
|
||||||
"plba @ git+ssh://git@git.lesha.spb.ru/alex/plba.git",
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
Если бизнес-приложение собирается в Docker, достаточно чтобы на этапе сборки выполнялся обычный `pip install`, например:
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
COPY pyproject.toml .
|
|
||||||
RUN pip install .
|
|
||||||
```
|
|
||||||
|
|
||||||
или при использовании `requirements.txt`:
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
В обоих случаях `pip` установит `plba` из Git и автоматически подтянет его транзитивные зависимости.
|
|
||||||
|
|
||||||
### Локальная разработка
|
|
||||||
|
|
||||||
Если пакет нужно не только использовать, но и разрабатывать:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone git@git.lesha.spb.ru:alex/plba.git
|
|
||||||
cd plba
|
|
||||||
python -m venv .venv
|
|
||||||
source .venv/bin/activate
|
|
||||||
pip install -e .
|
|
||||||
```
|
|
||||||
|
|
||||||
### Быстрая проверка
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -c "import plba; print(plba.__all__[:5])"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Минимальный пример использования
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from plba import ApplicationModule, InMemoryTaskQueue, QueueWorker, RuntimeManager, Task, TaskHandler
|
from threading import Event, Lock, Thread
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
from plba import (
|
||||||
|
ApplicationModule,
|
||||||
|
Worker,
|
||||||
|
WorkerHealth,
|
||||||
|
WorkerStatus,
|
||||||
|
create_runtime,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PrintHandler(TaskHandler):
|
class OrdersRoutine:
|
||||||
def handle(self, task: Task) -> None:
|
def __init__(self, service) -> None:
|
||||||
print(task.payload)
|
self._service = service
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
self._service.process_new_orders()
|
||||||
|
|
||||||
|
|
||||||
class DemoModule(ApplicationModule):
|
class OrdersWorker(Worker):
|
||||||
|
def __init__(self, routine: OrdersRoutine, interval: float = 1.0) -> None:
|
||||||
|
self._routine = routine
|
||||||
|
self._interval = interval
|
||||||
|
self._thread: Thread | None = None
|
||||||
|
self._stop_requested = Event()
|
||||||
|
self._lock = Lock()
|
||||||
|
self._in_flight = 0
|
||||||
|
self._failures = 0
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return "demo"
|
return "orders-worker"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def critical(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
if self._thread and self._thread.is_alive():
|
||||||
|
return
|
||||||
|
self._stop_requested.clear()
|
||||||
|
self._thread = Thread(target=self._run_loop, daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
|
||||||
|
def stop(self, force: bool = False) -> None:
|
||||||
|
del force
|
||||||
|
self._stop_requested.set()
|
||||||
|
|
||||||
|
def health(self) -> WorkerHealth:
|
||||||
|
if self._failures > 0:
|
||||||
|
return WorkerHealth(self.name, "degraded", self.critical)
|
||||||
|
return WorkerHealth(self.name, "ok", self.critical)
|
||||||
|
|
||||||
|
def status(self) -> WorkerStatus:
|
||||||
|
alive = self._thread is not None and self._thread.is_alive()
|
||||||
|
state = "busy" if self._in_flight else "idle"
|
||||||
|
if not alive:
|
||||||
|
state = "stopped"
|
||||||
|
elif self._stop_requested.is_set():
|
||||||
|
state = "stopping"
|
||||||
|
return WorkerStatus(name=self.name, state=state, in_flight=self._in_flight)
|
||||||
|
|
||||||
|
def _run_loop(self) -> None:
|
||||||
|
while not self._stop_requested.is_set():
|
||||||
|
with self._lock:
|
||||||
|
self._in_flight += 1
|
||||||
|
try:
|
||||||
|
self._routine.run()
|
||||||
|
except Exception:
|
||||||
|
self._failures += 1
|
||||||
|
finally:
|
||||||
|
with self._lock:
|
||||||
|
self._in_flight -= 1
|
||||||
|
sleep(self._interval)
|
||||||
|
|
||||||
|
|
||||||
|
class OrdersModule(ApplicationModule):
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
return "orders"
|
||||||
|
|
||||||
def register(self, registry) -> None:
|
def register(self, registry) -> None:
|
||||||
queue = InMemoryTaskQueue()
|
service = OrderService()
|
||||||
traces = registry.services.get("traces")
|
routine = OrdersRoutine(service)
|
||||||
handler = PrintHandler()
|
registry.add_worker(OrdersWorker(routine))
|
||||||
queue.publish(Task(name="demo-task", payload={"id": 1}, metadata={}))
|
|
||||||
registry.add_worker(QueueWorker("demo-worker", queue, handler, traces))
|
|
||||||
|
|
||||||
|
|
||||||
runtime = RuntimeManager()
|
runtime = create_runtime(OrdersModule(), config_path="config.yml")
|
||||||
runtime.register_module(DemoModule())
|
|
||||||
runtime.start()
|
runtime.start()
|
||||||
runtime.stop()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Health and status
|
||||||
|
|
||||||
|
Практика такая:
|
||||||
|
- `Routine` выполняет бизнес-работу
|
||||||
|
- `Worker` ловит ее ошибки
|
||||||
|
- `Worker` интерпретирует outcome в `health()` и `status()`
|
||||||
|
|
||||||
|
То есть routine не выставляет health напрямую.
|
||||||
|
|
||||||
|
## In-memory queue
|
||||||
|
|
||||||
|
`InMemoryTaskQueue` остается в платформе как простой in-memory utility.
|
||||||
|
|
||||||
|
Это не базовый платформенный контракт и не обязательный паттерн архитектуры.
|
||||||
|
Ее можно использовать в прикладном коде как локальный буфер между компонентами, если это действительно помогает.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from plba import InMemoryTaskQueue
|
||||||
|
|
||||||
|
queue = InMemoryTaskQueue[str]()
|
||||||
|
queue.put("payload")
|
||||||
|
item = queue.get(timeout=0.1)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Public API
|
||||||
|
|
||||||
|
Основные публичные сущности:
|
||||||
|
- `ApplicationModule`
|
||||||
|
- `Worker`
|
||||||
|
- `WorkerHealth`
|
||||||
|
- `WorkerStatus`
|
||||||
|
- `RuntimeManager`
|
||||||
|
- `WorkerSupervisor`
|
||||||
|
- `TraceService`
|
||||||
|
- `HealthRegistry`
|
||||||
|
- `InMemoryTaskQueue`
|
||||||
|
- `create_runtime(...)`
|
||||||
|
|||||||
@@ -1,111 +1,50 @@
|
|||||||
|
# Architectural Constraints
|
||||||
|
|
||||||
**`docs/adr/0001-new-runtime.md`**
|
## Main constraints
|
||||||
```md
|
|
||||||
# ADR 0001: Create a new runtime project instead of evolving the legacy ConfigManager model
|
|
||||||
|
|
||||||
## Status
|
1. `Worker` is the primary runtime abstraction.
|
||||||
|
2. `ApplicationModule` assembles the application and registers workers.
|
||||||
|
3. Business behavior should live outside worker lifecycle code.
|
||||||
|
4. The platform should avoid queue-centric abstractions as first-class architecture.
|
||||||
|
5. Utility components are allowed only when they remain optional and transparent.
|
||||||
|
|
||||||
Accepted
|
## Worker constraints
|
||||||
|
|
||||||
## Context
|
Worker should own:
|
||||||
|
- thread/process lifecycle
|
||||||
|
- execution strategy
|
||||||
|
- graceful stop
|
||||||
|
- runtime state
|
||||||
|
- health state
|
||||||
|
|
||||||
The previous generation of the application runtime was centered around a timer-driven execution model:
|
Worker should not own:
|
||||||
- a manager loaded configuration
|
- large business workflows
|
||||||
- a worker loop periodically called `execute()`
|
- domain rules
|
||||||
- applications placed most operational logic behind that entry point
|
- parsing plus persistence plus integration logic in one class
|
||||||
|
|
||||||
That model is adequate for simple periodic jobs, but it does not match the direction of the new platform.
|
## Business-code constraints
|
||||||
|
|
||||||
The new platform must support:
|
Business routines and services should own:
|
||||||
- task sources
|
- business decisions
|
||||||
- queue-driven processing
|
- external integration calls
|
||||||
- multiple parallel workers
|
- persistence
|
||||||
- trace propagation across producer/consumer boundaries
|
- domain validation
|
||||||
- richer lifecycle and status management
|
|
||||||
- health aggregation
|
|
||||||
- future admin web interface
|
|
||||||
- future authentication and user management
|
|
||||||
|
|
||||||
These are platform concerns, not business concerns.
|
They should not own:
|
||||||
|
- platform lifecycle
|
||||||
|
- worker supervision
|
||||||
|
- runtime status management
|
||||||
|
|
||||||
We also want business applications to describe only business functionality and rely on the runtime for infrastructure behavior.
|
## API constraints
|
||||||
|
|
||||||
## Decision
|
Public API should stay small and clear.
|
||||||
|
|
||||||
We will create a new runtime project instead of implementing a `V3` directly inside the current legacy ConfigManager codebase.
|
Prefer:
|
||||||
|
|
||||||
The new runtime will be built around a platform-oriented model with explicit concepts such as:
|
|
||||||
- `RuntimeManager`
|
|
||||||
- `ApplicationModule`
|
- `ApplicationModule`
|
||||||
- `TaskSource`
|
- `Worker`
|
||||||
- `TaskQueue`
|
- runtime services
|
||||||
- `WorkerSupervisor`
|
- utility queue
|
||||||
- `TaskHandler`
|
|
||||||
- `TraceService`
|
|
||||||
- `HealthRegistry`
|
|
||||||
|
|
||||||
The old execute-centered model is treated as a previous-generation design and is not the architectural basis of the new runtime.
|
Avoid:
|
||||||
|
- legacy abstractions preserved only for compatibility
|
||||||
## Rationale
|
- specialized platform roles without strong need
|
||||||
|
|
||||||
Creating a new runtime project gives us:
|
|
||||||
- freedom to design the correct abstractions from the start
|
|
||||||
- no pressure to preserve legacy contracts
|
|
||||||
- cleaner boundaries between platform and business logic
|
|
||||||
- simpler documentation and tests
|
|
||||||
- lower long-term complexity than mixing old and new models in one codebase
|
|
||||||
|
|
||||||
If we built this as `V3` inside the old project, we would likely inherit:
|
|
||||||
- compatibility constraints
|
|
||||||
- mixed abstractions
|
|
||||||
- transitional adapters
|
|
||||||
- conceptual confusion between periodic execution and event/queue processing
|
|
||||||
|
|
||||||
The expected long-term cost of such coupling is higher than creating a clean new runtime.
|
|
||||||
|
|
||||||
## Consequences
|
|
||||||
|
|
||||||
### Positive
|
|
||||||
|
|
||||||
- the platform can be modeled cleanly
|
|
||||||
- business applications can integrate through explicit contracts
|
|
||||||
- new runtime capabilities can be added without legacy pressure
|
|
||||||
- mail_order_bot can become the first pilot application on the new runtime
|
|
||||||
|
|
||||||
### Negative
|
|
||||||
|
|
||||||
- some existing capabilities will need to be reintroduced in the new project
|
|
||||||
- there will be a temporary period with both legacy and new runtime lines
|
|
||||||
- migration requires explicit planning
|
|
||||||
|
|
||||||
## Initial migration target
|
|
||||||
|
|
||||||
The first application on the new runtime will be `mail_order_bot`.
|
|
||||||
|
|
||||||
Initial runtime design for that application:
|
|
||||||
- IMAP polling source
|
|
||||||
- in-memory queue
|
|
||||||
- parallel workers
|
|
||||||
- business handler for email processing
|
|
||||||
- message marked as read only after successful processing
|
|
||||||
|
|
||||||
Later:
|
|
||||||
- swap IMAP polling source for IMAP IDLE source
|
|
||||||
- keep the queue and worker model unchanged
|
|
||||||
|
|
||||||
## What we intentionally do not carry over
|
|
||||||
|
|
||||||
We do not keep the old architecture as the central organizing principle:
|
|
||||||
- no `execute()`-centric application model
|
|
||||||
- no timer-loop as the main abstraction
|
|
||||||
- no implicit mixing of lifecycle and business processing
|
|
||||||
|
|
||||||
## Follow-up
|
|
||||||
|
|
||||||
Next design work should define:
|
|
||||||
- core platform contracts
|
|
||||||
- package structure
|
|
||||||
- runtime lifecycle
|
|
||||||
- queue and worker interfaces
|
|
||||||
- config model split between platform and application
|
|
||||||
- pilot integration for `mail_order_bot`
|
|
||||||
|
|||||||
@@ -1,116 +1,62 @@
|
|||||||
# Mail Order Bot Migration Plan
|
# Mail Order Bot Migration Plan
|
||||||
|
|
||||||
## Purpose
|
## Goal
|
||||||
|
|
||||||
This document describes how `mail_order_bot` will be adapted to the new runtime as the first pilot business application.
|
Migrate `mail_order_bot` to the new PLBA model:
|
||||||
|
- application assembled by `ApplicationModule`
|
||||||
|
- runtime execution owned by `Worker`
|
||||||
|
- business behavior implemented in routines/services
|
||||||
|
|
||||||
## Scope
|
## Target structure
|
||||||
|
|
||||||
This is not a full migration specification for all future applications.
|
### Application module
|
||||||
It is a practical first use case to validate the runtime architecture.
|
|
||||||
|
|
||||||
## Current model
|
`MailOrderBotModule` should:
|
||||||
|
- build domain services
|
||||||
The current application flow is tightly coupled:
|
- build routines
|
||||||
- the manager checks IMAP
|
- register workers
|
||||||
- unread emails are fetched
|
- register optional health contributors
|
||||||
- emails are processed synchronously
|
|
||||||
- messages are marked as read after processing
|
|
||||||
|
|
||||||
Polling and processing happen in one execution path.
|
|
||||||
|
|
||||||
## Target model
|
|
||||||
|
|
||||||
The new runtime-based flow should be:
|
|
||||||
|
|
||||||
1. a mail source detects new tasks
|
|
||||||
2. tasks are published to a queue
|
|
||||||
3. workers consume tasks in parallel
|
|
||||||
4. a domain handler processes each email
|
|
||||||
5. successful tasks lead to `mark_as_read`
|
|
||||||
6. failed tasks remain retriable
|
|
||||||
|
|
||||||
## Phase 1
|
|
||||||
|
|
||||||
### Source
|
|
||||||
- IMAP polling source
|
|
||||||
|
|
||||||
### Queue
|
|
||||||
- in-memory task queue
|
|
||||||
|
|
||||||
### Workers
|
### Workers
|
||||||
- 2 to 4 parallel workers initially
|
|
||||||
|
|
||||||
### Handler
|
Initial workers:
|
||||||
- domain email processing handler built around the current processing logic
|
- `MailPollingWorker`
|
||||||
|
- `EmailProcessingWorker`
|
||||||
|
- optional `ReconciliationWorker`
|
||||||
|
|
||||||
### Delivery semantics
|
Each worker should own only runtime behavior:
|
||||||
- email is marked as read only after successful processing
|
- start/stop
|
||||||
- unread state acts as the first safety mechanism against message loss
|
- execution loop
|
||||||
|
- thread model
|
||||||
|
- health/status
|
||||||
|
|
||||||
## Why in-memory queue is acceptable at first
|
### Routines
|
||||||
|
|
||||||
For the first phase:
|
Initial routines:
|
||||||
- infrastructure complexity stays low
|
- `MailPollingRoutine`
|
||||||
- the runtime contracts can be tested quickly
|
- `EmailProcessingRoutine`
|
||||||
- unread emails in IMAP provide a simple recovery path after crashes
|
- `ReconciliationRoutine`
|
||||||
|
|
||||||
This allows us to validate the runtime architecture before adopting an external broker.
|
Each routine should own business behavior:
|
||||||
|
- fetch messages
|
||||||
|
- parse message payload
|
||||||
|
- call domain services
|
||||||
|
- persist changes
|
||||||
|
- report business failures upward
|
||||||
|
|
||||||
## Phase 2
|
## Migration steps
|
||||||
|
|
||||||
Replace:
|
1. Extract business logic from current worker-like components into routines/services.
|
||||||
- IMAP polling source
|
2. Implement thin workers that call those routines.
|
||||||
|
3. Register workers from `MailOrderBotModule`.
|
||||||
|
4. Use runtime health and status only through worker state.
|
||||||
|
5. Keep any local queue only as an implementation detail if it still helps.
|
||||||
|
|
||||||
With:
|
## Optional queue usage
|
||||||
- IMAP IDLE source
|
|
||||||
|
|
||||||
The queue, workers, and handler should remain unchanged.
|
If email processing still benefits from buffering, `InMemoryTaskQueue` may be used inside the application.
|
||||||
|
|
||||||
This is an explicit architectural goal:
|
Important:
|
||||||
source replacement without redesigning the processing pipeline.
|
- it should remain an app-level detail
|
||||||
|
- it should not define the platform contract
|
||||||
## Domain responsibilities that remain inside mail_order_bot
|
- the main architecture should still be described through workers and routines
|
||||||
|
|
||||||
The runtime should not own:
|
|
||||||
- email parsing rules
|
|
||||||
- client resolution logic
|
|
||||||
- attachment processing rules
|
|
||||||
- order creation logic
|
|
||||||
- client-specific behavior
|
|
||||||
|
|
||||||
These remain in the business application.
|
|
||||||
|
|
||||||
## Platform responsibilities used by mail_order_bot
|
|
||||||
|
|
||||||
The new runtime should provide:
|
|
||||||
- lifecycle
|
|
||||||
- configuration
|
|
||||||
- queue abstraction
|
|
||||||
- worker orchestration
|
|
||||||
- tracing
|
|
||||||
- health checks
|
|
||||||
- status/control APIs
|
|
||||||
|
|
||||||
## Migration boundaries
|
|
||||||
|
|
||||||
### Move into runtime
|
|
||||||
- source orchestration
|
|
||||||
- worker supervision
|
|
||||||
- queue management
|
|
||||||
- trace provisioning
|
|
||||||
- health aggregation
|
|
||||||
|
|
||||||
### Keep in mail_order_bot
|
|
||||||
- email business handler
|
|
||||||
- mail domain services
|
|
||||||
- business pipeline
|
|
||||||
- business-specific config validation beyond platform-level schema
|
|
||||||
|
|
||||||
## Success criteria
|
|
||||||
|
|
||||||
The migration is successful when:
|
|
||||||
- mail polling is no longer tightly coupled to processing
|
|
||||||
- workers can process emails in parallel
|
|
||||||
- business logic is not moved into the runtime
|
|
||||||
- replacing polling with IDLE is localized to the source layer
|
|
||||||
|
|||||||
@@ -1,469 +1,134 @@
|
|||||||
# PLBA
|
# PLBA Requirements
|
||||||
|
|
||||||
`PLBA` is a reusable platform runtime for business applications.
|
## Goal
|
||||||
|
|
||||||
It solves platform concerns that should not live inside domain code:
|
`PLBA` is a reusable runtime for business applications.
|
||||||
- application lifecycle
|
|
||||||
|
The platform owns:
|
||||||
|
- lifecycle
|
||||||
- worker orchestration
|
- worker orchestration
|
||||||
- configuration loading from YAML
|
- configuration loading
|
||||||
- tracing
|
|
||||||
- health aggregation
|
|
||||||
- runtime status reporting
|
|
||||||
- HTTP control endpoints
|
|
||||||
- logging configuration
|
- logging configuration
|
||||||
|
- health aggregation
|
||||||
|
- tracing
|
||||||
|
- control endpoints
|
||||||
|
|
||||||
Business applications depend on `plba` as a package and implement only their own business behavior.
|
Business applications own:
|
||||||
|
- business workflows
|
||||||
|
- domain services
|
||||||
|
- app-specific configuration schema
|
||||||
|
- business error semantics
|
||||||
|
|
||||||
## Architecture
|
## Core runtime model
|
||||||
|
|
||||||
Current PLBA architecture is built around one core idea:
|
The platform is built around workers.
|
||||||
- the runtime manages a set of application workers
|
|
||||||
|
|
||||||
A worker is any runtime-managed active component with a unified lifecycle:
|
1. application defines an `ApplicationModule`
|
||||||
|
2. module registers workers
|
||||||
|
3. runtime starts workers
|
||||||
|
4. workers execute business activity
|
||||||
|
5. runtime aggregates status and health
|
||||||
|
6. runtime stops workers gracefully
|
||||||
|
|
||||||
|
## Contracts
|
||||||
|
|
||||||
|
### `ApplicationModule`
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
- provide module name
|
||||||
|
- assemble application components
|
||||||
|
- register workers
|
||||||
|
- register optional health contributors
|
||||||
|
|
||||||
|
### `Worker`
|
||||||
|
|
||||||
|
Main runtime contract.
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
- `start()`
|
- `start()`
|
||||||
- `stop(force=False)`
|
- `stop(force=False)`
|
||||||
- `health()`
|
- `health()`
|
||||||
- `status()`
|
- `status()`
|
||||||
|
|
||||||
This means PLBA does not require separate platform categories like `source` and `consumer`.
|
The worker owns execution mechanics:
|
||||||
If an application needs polling, queue processing, listening, scheduled work, or another active loop, it is implemented as a worker.
|
- single-run or loop
|
||||||
|
- thread model
|
||||||
|
- stop conditions
|
||||||
|
- runtime status
|
||||||
|
- health interpretation
|
||||||
|
|
||||||
### Main runtime model
|
### `Routine`
|
||||||
|
|
||||||
1. application creates `RuntimeManager`
|
`Routine` is an application pattern, not a PLBA contract.
|
||||||
2. runtime loads configuration
|
|
||||||
3. runtime applies logging configuration
|
|
||||||
4. application module registers workers and supporting services
|
|
||||||
5. runtime starts all workers
|
|
||||||
6. workers execute business-related loops or processing
|
|
||||||
7. runtime aggregates health and status
|
|
||||||
8. runtime stops workers gracefully or forcefully
|
|
||||||
|
|
||||||
## Core concepts
|
|
||||||
|
|
||||||
### `ApplicationModule`
|
|
||||||
|
|
||||||
File: [application.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/contracts/application.py)
|
|
||||||
|
|
||||||
Describes a business application to the runtime.
|
|
||||||
|
|
||||||
Responsibilities:
|
Responsibilities:
|
||||||
- provide module name
|
- execute business behavior
|
||||||
- register workers
|
- call domain services
|
||||||
- register queues if needed
|
- apply business rules
|
||||||
- register handlers if needed
|
- talk to external integrations
|
||||||
- register health contributors
|
|
||||||
- compose application-specific objects
|
|
||||||
|
|
||||||
`ApplicationModule` does not run the application itself.
|
Recommended rule:
|
||||||
It only declares how the application is assembled.
|
- worker orchestrates
|
||||||
|
- routine executes business behavior
|
||||||
|
|
||||||
### `Worker`
|
## Architectural rules
|
||||||
|
|
||||||
File: [worker.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/contracts/worker.py)
|
### 1. Keep lifecycle and business behavior separate
|
||||||
|
|
||||||
The main runtime-managed contract.
|
|
||||||
|
|
||||||
Responsibilities:
|
|
||||||
- start its own execution
|
|
||||||
- stop gracefully or forcefully
|
|
||||||
- report health
|
|
||||||
- report runtime status
|
|
||||||
|
|
||||||
This is the main extension point for business applications.
|
|
||||||
|
|
||||||
### `TaskQueue`
|
|
||||||
|
|
||||||
File: [queue.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/contracts/queue.py)
|
|
||||||
|
|
||||||
Optional queue abstraction.
|
|
||||||
|
|
||||||
Use it when application workers need buffered or decoupled processing.
|
|
||||||
|
|
||||||
PLBA does not force every application to use a queue.
|
|
||||||
Queue is one supported pattern, not the foundation of the whole platform.
|
|
||||||
|
|
||||||
### `TaskHandler`
|
|
||||||
|
|
||||||
File: [tasks.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/contracts/tasks.py)
|
|
||||||
|
|
||||||
Optional unit of business processing for one task.
|
|
||||||
|
|
||||||
Useful when a worker follows queue-driven logic:
|
|
||||||
- worker takes a task
|
|
||||||
- handler executes business logic
|
|
||||||
|
|
||||||
### `TraceService`
|
|
||||||
|
|
||||||
File: [service.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/tracing/service.py)
|
|
||||||
|
|
||||||
Platform trace service.
|
|
||||||
|
|
||||||
Responsibilities:
|
|
||||||
- create trace contexts
|
|
||||||
- resume trace from task metadata
|
|
||||||
- write context records
|
|
||||||
- write trace messages
|
|
||||||
|
|
||||||
Business code should use it as a platform service and should not implement its own tracing infrastructure.
|
|
||||||
|
|
||||||
### `HealthRegistry`
|
|
||||||
|
|
||||||
File: [registry.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/health/registry.py)
|
|
||||||
|
|
||||||
Aggregates application health.
|
|
||||||
|
|
||||||
PLBA uses three health states:
|
|
||||||
- `ok` — all critical parts work
|
|
||||||
- `degraded` — application still works, but there is a problem
|
|
||||||
- `unhealthy` — application should not be considered operational
|
|
||||||
|
|
||||||
### Runtime status
|
|
||||||
|
|
||||||
File: [types.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/core/types.py)
|
|
||||||
|
|
||||||
Status is separate from health.
|
|
||||||
|
|
||||||
Current runtime states:
|
|
||||||
- `starting`
|
|
||||||
- `idle`
|
|
||||||
- `busy`
|
|
||||||
- `stopping`
|
|
||||||
- `stopped`
|
|
||||||
|
|
||||||
Status is used for operational lifecycle decisions such as graceful shutdown.
|
|
||||||
|
|
||||||
### `ControlPlaneService`
|
|
||||||
|
|
||||||
Files:
|
|
||||||
- [service.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/control/service.py)
|
|
||||||
- [http_channel.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/control/http_channel.py)
|
|
||||||
|
|
||||||
Provides control and observability endpoints.
|
|
||||||
|
|
||||||
Currently supported:
|
|
||||||
- health access
|
|
||||||
- runtime start action
|
|
||||||
- runtime stop action
|
|
||||||
- runtime status action
|
|
||||||
|
|
||||||
### `ConfigurationManager`
|
|
||||||
|
|
||||||
Files:
|
|
||||||
- [configuration.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/core/configuration.py)
|
|
||||||
- [file_loader.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/config/file_loader.py)
|
|
||||||
- [providers.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/config/providers.py)
|
|
||||||
|
|
||||||
Loads and merges configuration.
|
|
||||||
|
|
||||||
Current built-in source:
|
|
||||||
- YAML file provider
|
|
||||||
|
|
||||||
### `LogManager`
|
|
||||||
|
|
||||||
File: [manager.py](/Users/alex/Dev_projects_v2/apps/plba/src/app_runtime/logging/manager.py)
|
|
||||||
|
|
||||||
Applies logging configuration from config.
|
|
||||||
|
|
||||||
Current expectation:
|
|
||||||
- logging config lives in the `log` section of YAML
|
|
||||||
|
|
||||||
## Available platform services
|
|
||||||
|
|
||||||
PLBA currently provides these reusable services.
|
|
||||||
|
|
||||||
### 1. Runtime lifecycle
|
|
||||||
|
|
||||||
Service:
|
|
||||||
- `RuntimeManager`
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- startup orchestration
|
|
||||||
- worker registration and startup
|
|
||||||
- graceful stop with timeout
|
|
||||||
- force stop
|
|
||||||
- status snapshot
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- start `mail_order_bot`
|
|
||||||
- stop it after active email processing is drained
|
|
||||||
|
|
||||||
### 2. Worker supervision
|
|
||||||
|
|
||||||
Service:
|
|
||||||
- `WorkerSupervisor`
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- unified worker orchestration
|
|
||||||
- aggregated worker statuses
|
|
||||||
- aggregated worker health
|
|
||||||
- stop coordination
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- run one polling worker and three processing workers in the same application
|
|
||||||
|
|
||||||
### 3. Queue support
|
|
||||||
|
|
||||||
Services:
|
|
||||||
- `TaskQueue`
|
|
||||||
- `InMemoryTaskQueue`
|
|
||||||
- `QueueWorker`
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- buffered processing
|
|
||||||
- decoupling between task production and task consumption
|
|
||||||
- worker concurrency for task handling
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- worker A polls IMAP and pushes tasks to queue
|
|
||||||
- worker B processes queued email tasks with concurrency `3`
|
|
||||||
|
|
||||||
### 4. Configuration
|
|
||||||
|
|
||||||
Services:
|
|
||||||
- `ConfigurationManager`
|
|
||||||
- `FileConfigProvider`
|
|
||||||
- `ConfigFileLoader`
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- YAML config loading
|
|
||||||
- config merging
|
|
||||||
- access to platform and application config
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- load `platform` section for runtime
|
|
||||||
- load `mail_order_bot` section for app-specific config
|
|
||||||
|
|
||||||
### 5. Tracing
|
|
||||||
|
|
||||||
Services:
|
|
||||||
- `TraceService`
|
|
||||||
- `TraceTransport`
|
|
||||||
- `NoOpTraceTransport`
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- trace context creation
|
|
||||||
- trace propagation through task metadata
|
|
||||||
- trace messages for processing steps
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- polling worker creates trace when it discovers a mail
|
|
||||||
- processing worker resumes trace and writes business steps
|
|
||||||
|
|
||||||
### 6. Health
|
|
||||||
|
|
||||||
Services:
|
|
||||||
- `HealthRegistry`
|
|
||||||
- `WorkerHealth`
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- per-worker health
|
|
||||||
- aggregated application health
|
|
||||||
- critical vs non-critical component handling
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- email processing workers are critical
|
|
||||||
- optional diagnostic worker may be non-critical
|
|
||||||
|
|
||||||
### 7. Status
|
|
||||||
|
|
||||||
Services:
|
|
||||||
- `WorkerStatus`
|
|
||||||
- runtime aggregated state
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- current activity visibility
|
|
||||||
- ability to stop application only after in-flight work is completed
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- stop application only after processing workers become `idle` or `stopped`
|
|
||||||
|
|
||||||
### 8. HTTP control
|
|
||||||
|
|
||||||
Services:
|
|
||||||
- `ControlPlaneService`
|
|
||||||
- `HttpControlChannel`
|
|
||||||
|
|
||||||
What it gives:
|
|
||||||
- HTTP health/status/actions
|
|
||||||
- operational integration point
|
|
||||||
|
|
||||||
Example use:
|
|
||||||
- inspect current health from orchestration
|
|
||||||
- request graceful stop remotely
|
|
||||||
|
|
||||||
## Public package API
|
|
||||||
|
|
||||||
Public namespace is `plba`.
|
|
||||||
|
|
||||||
Main imports for external applications:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from plba import ApplicationModule, QueueWorker, RuntimeManager, create_runtime
|
|
||||||
from plba.contracts import Task, TaskHandler, TaskQueue, Worker, WorkerHealth, WorkerStatus
|
|
||||||
from plba.queue import InMemoryTaskQueue
|
|
||||||
from plba.tracing import TraceService
|
|
||||||
```
|
|
||||||
|
|
||||||
## Example application pattern
|
|
||||||
|
|
||||||
Minimal queue-based application:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from plba import ApplicationModule, QueueWorker, Task, TaskHandler, create_runtime
|
|
||||||
from plba.queue import InMemoryTaskQueue
|
|
||||||
|
|
||||||
|
|
||||||
class ExampleHandler(TaskHandler):
|
|
||||||
def handle(self, task: Task) -> None:
|
|
||||||
print(task.payload)
|
|
||||||
|
|
||||||
|
|
||||||
class ExampleModule(ApplicationModule):
|
|
||||||
@property
|
|
||||||
def name(self) -> str:
|
|
||||||
return "example"
|
|
||||||
|
|
||||||
def register(self, registry) -> None:
|
|
||||||
queue = InMemoryTaskQueue()
|
|
||||||
traces = registry.services.get("traces")
|
|
||||||
|
|
||||||
queue.publish(Task(name="incoming", payload={"hello": "world"}))
|
|
||||||
|
|
||||||
registry.add_queue("incoming", queue)
|
|
||||||
registry.add_worker(QueueWorker("example-worker", queue, ExampleHandler(), traces))
|
|
||||||
|
|
||||||
|
|
||||||
runtime = create_runtime(
|
|
||||||
ExampleModule(),
|
|
||||||
config_path="config.yml",
|
|
||||||
enable_http_control=False,
|
|
||||||
)
|
|
||||||
runtime.start()
|
|
||||||
```
|
|
||||||
|
|
||||||
## Building business applications on PLBA
|
|
||||||
|
|
||||||
These are the current rules for building business applications correctly.
|
|
||||||
|
|
||||||
### 1. Keep platform and business concerns separate
|
|
||||||
|
|
||||||
PLBA owns:
|
|
||||||
- lifecycle
|
|
||||||
- worker management
|
|
||||||
- logging
|
|
||||||
- trace infrastructure
|
|
||||||
- health aggregation
|
|
||||||
- HTTP control
|
|
||||||
- config loading
|
|
||||||
|
|
||||||
Business application owns:
|
|
||||||
- business workflows
|
|
||||||
- domain services
|
|
||||||
- application-specific config schema
|
|
||||||
- business task payloads
|
|
||||||
- business error semantics
|
|
||||||
|
|
||||||
### 2. Build app behavior from workers
|
|
||||||
|
|
||||||
A business application should be described as a small set of workers.
|
|
||||||
|
|
||||||
Typical examples:
|
|
||||||
- polling worker
|
|
||||||
- processing worker
|
|
||||||
- reconciliation worker
|
|
||||||
|
|
||||||
Do not introduce new worker types at platform level unless there is clear need for custom runtime behavior.
|
|
||||||
|
|
||||||
### 3. Use queues only when they help
|
|
||||||
|
|
||||||
Queue is optional.
|
|
||||||
|
|
||||||
Use queue when:
|
|
||||||
- one worker discovers work
|
|
||||||
- another worker processes it
|
|
||||||
- buffering or decoupling helps
|
|
||||||
- concurrency is needed
|
|
||||||
|
|
||||||
Do not force queue into applications that do not need it.
|
|
||||||
|
|
||||||
### 4. Keep business logic out of worker lifecycle code
|
|
||||||
|
|
||||||
Worker should orchestrate execution.
|
|
||||||
Business rules should live in dedicated services and handlers.
|
|
||||||
|
|
||||||
Good:
|
Good:
|
||||||
- worker gets config
|
- worker manages threading, loop, stop flags, health, status
|
||||||
- worker calls domain service
|
- routine contains business logic
|
||||||
- worker reports trace and status
|
|
||||||
|
|
||||||
Bad:
|
Bad:
|
||||||
- worker contains all parsing, decision logic, integration rules, and persistence rules in one class
|
- worker mixes thread management, retry policy, parsing, persistence, integration rules, and domain decisions in one class
|
||||||
|
|
||||||
### 5. Use trace as a platform service
|
### 2. Treat `Worker` as the main extension point
|
||||||
|
|
||||||
Business application should:
|
Do not center the platform around queues, handlers, sources, or other specialized runtime categories.
|
||||||
- create meaningful trace steps
|
|
||||||
- propagate trace through task metadata if queue is used
|
|
||||||
- record business-relevant processing milestones
|
|
||||||
|
|
||||||
Business application should not:
|
### 3. Keep `Routine` out of the platform contract set
|
||||||
- implement its own trace store
|
|
||||||
- control trace transport directly unless explicitly needed
|
|
||||||
|
|
||||||
### 6. Read config through PLBA
|
At the current stage PLBA should not force a universal `Routine` interface.
|
||||||
|
|
||||||
Business application should not read YAML directly.
|
Applications may use:
|
||||||
|
- `run()`
|
||||||
|
- `run_once()`
|
||||||
|
- `poll()`
|
||||||
|
- `sync_window()`
|
||||||
|
|
||||||
Recommended flow:
|
The exact business API belongs to the application.
|
||||||
- PLBA loads config
|
|
||||||
- application reads only its own config section
|
|
||||||
- application converts it to typed app config object
|
|
||||||
- services receive typed config object
|
|
||||||
|
|
||||||
### 7. Distinguish health from status
|
### 4. Health is computed by the worker
|
||||||
|
|
||||||
Use `health` for:
|
Routine should not directly mutate platform health state.
|
||||||
- is application operational?
|
|
||||||
|
|
||||||
Use `status` for:
|
Instead:
|
||||||
- what is application doing right now?
|
- routine succeeds
|
||||||
|
- routine returns outcome
|
||||||
|
- or routine raises typed exceptions
|
||||||
|
|
||||||
This is important for graceful stop:
|
Worker interprets the outcome into:
|
||||||
- health may still be `ok`
|
- `ok`
|
||||||
- status may be `busy`
|
- `degraded`
|
||||||
|
- `unhealthy`
|
||||||
|
|
||||||
### 8. Design workers for graceful stop
|
### 5. `InMemoryTaskQueue` is utility-only
|
||||||
|
|
||||||
Workers should support:
|
`InMemoryTaskQueue` may stay as a reusable component for business applications.
|
||||||
- stop accepting new work
|
|
||||||
- finish current in-flight work when possible
|
|
||||||
- report `busy`, `idle`, `stopping`, `stopped`
|
|
||||||
|
|
||||||
This allows runtime to stop application safely.
|
It is:
|
||||||
|
- optional
|
||||||
|
- local
|
||||||
|
- not part of the main runtime contract model
|
||||||
|
|
||||||
## Recommended repository model
|
## Public package direction
|
||||||
|
|
||||||
PLBA is intended to live in its own repository as a reusable package.
|
Public namespace `plba` should expose:
|
||||||
|
- application/runtime contracts
|
||||||
|
- tracing
|
||||||
|
- health
|
||||||
|
- config
|
||||||
|
- control plane
|
||||||
|
- utility queue
|
||||||
|
|
||||||
Recommended setup:
|
It should not expose queue-centric runtime abstractions as primary architecture.
|
||||||
- repository `plba`: platform package only
|
|
||||||
- repository `mail_order_bot`: business application depending on `plba`
|
|
||||||
- repository `service_b`: business application depending on `plba`
|
|
||||||
|
|
||||||
## Example: `mail_order_bot`
|
|
||||||
|
|
||||||
Simple first version of `mail_order_bot` on PLBA:
|
|
||||||
- `MailPollingWorker`, concurrency `1`
|
|
||||||
- `EmailProcessingWorker`, concurrency `3`
|
|
||||||
- shared `InMemoryTaskQueue`
|
|
||||||
- domain services for mail parsing and order processing
|
|
||||||
|
|
||||||
Flow:
|
|
||||||
1. polling worker checks IMAP
|
|
||||||
2. polling worker pushes email tasks into queue
|
|
||||||
3. processing workers consume tasks
|
|
||||||
4. processing workers execute domain logic
|
|
||||||
5. runtime aggregates health and status
|
|
||||||
|
|
||||||
This keeps `mail_order_bot` small, explicit, and aligned with current PLBA architecture.
|
|
||||||
|
|||||||
148
requirements/application_guidelines.md
Normal file
148
requirements/application_guidelines.md
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
# Application Guidelines
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
This document defines the default rules for building business applications on top of `plba`.
|
||||||
|
|
||||||
|
The goal is to keep applications:
|
||||||
|
- explicit
|
||||||
|
- small
|
||||||
|
- easy to debug
|
||||||
|
- free from platform legacy artifacts
|
||||||
|
|
||||||
|
## Main model
|
||||||
|
|
||||||
|
Build every application around this chain:
|
||||||
|
|
||||||
|
`ApplicationModule` -> `Worker` -> business `Routine`
|
||||||
|
|
||||||
|
Meaning:
|
||||||
|
- `ApplicationModule` assembles the application
|
||||||
|
- `Worker` owns runtime execution and lifecycle
|
||||||
|
- `Routine` owns business behavior
|
||||||
|
|
||||||
|
`Routine` is an application pattern, not a mandatory platform contract.
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
|
||||||
|
### 1. Assemble the app in `ApplicationModule`
|
||||||
|
|
||||||
|
`ApplicationModule` should:
|
||||||
|
- create application services
|
||||||
|
- create routines
|
||||||
|
- create workers
|
||||||
|
- register workers
|
||||||
|
- register optional health contributors
|
||||||
|
|
||||||
|
`ApplicationModule` should not:
|
||||||
|
- execute business logic itself
|
||||||
|
- contain runtime loops
|
||||||
|
|
||||||
|
### 2. Treat `Worker` as the only primary runtime abstraction
|
||||||
|
|
||||||
|
`Worker` is the core runtime contract of the platform.
|
||||||
|
|
||||||
|
Worker should own:
|
||||||
|
- `start()`
|
||||||
|
- `stop(force=False)`
|
||||||
|
- `health()`
|
||||||
|
- `status()`
|
||||||
|
- thread ownership
|
||||||
|
- execution strategy
|
||||||
|
- graceful shutdown
|
||||||
|
|
||||||
|
Worker should not own:
|
||||||
|
- large business flows
|
||||||
|
- domain decisions
|
||||||
|
- parsing, persistence, and integration rules all mixed together
|
||||||
|
|
||||||
|
### 3. Keep one worker focused on one business activity
|
||||||
|
|
||||||
|
Default recommendation:
|
||||||
|
- one worker -> one routine
|
||||||
|
|
||||||
|
If a process has multiple distinct behaviors:
|
||||||
|
- split it into multiple workers
|
||||||
|
- or compose several services behind one focused routine
|
||||||
|
|
||||||
|
Do not make a worker a container for unrelated business scenarios.
|
||||||
|
|
||||||
|
### 4. Put business logic into routines and services
|
||||||
|
|
||||||
|
Routine should contain:
|
||||||
|
- business flow steps
|
||||||
|
- domain service calls
|
||||||
|
- business validation
|
||||||
|
- integration calls
|
||||||
|
- persistence orchestration
|
||||||
|
|
||||||
|
If the routine becomes too large:
|
||||||
|
- split business logic into dedicated services
|
||||||
|
- keep routine as a thin application-level orchestrator
|
||||||
|
|
||||||
|
### 5. Let the worker define the run model
|
||||||
|
|
||||||
|
The worker decides:
|
||||||
|
- single-run or loop
|
||||||
|
- one thread or multiple threads
|
||||||
|
- interval between iterations
|
||||||
|
- batch or long-running mode
|
||||||
|
- stop conditions
|
||||||
|
|
||||||
|
The routine does not decide lifecycle strategy.
|
||||||
|
|
||||||
|
### 6. Let the worker compute health
|
||||||
|
|
||||||
|
Routine should not directly set platform health state.
|
||||||
|
|
||||||
|
Instead:
|
||||||
|
- routine completes successfully
|
||||||
|
- or returns outcome information
|
||||||
|
- or raises typed exceptions
|
||||||
|
|
||||||
|
Then worker interprets that into:
|
||||||
|
- `ok`
|
||||||
|
- `degraded`
|
||||||
|
- `unhealthy`
|
||||||
|
|
||||||
|
### 7. Use queues only as optional app-level utilities
|
||||||
|
|
||||||
|
`InMemoryTaskQueue` may be used inside an application when buffering helps.
|
||||||
|
|
||||||
|
But:
|
||||||
|
- queue is not a core platform concept
|
||||||
|
- queue usage should stay a local implementation choice
|
||||||
|
- the app should still be described through workers and routines
|
||||||
|
|
||||||
|
### 8. Keep tracing vocabulary neutral
|
||||||
|
|
||||||
|
Use tracing to describe operations and execution context, not legacy architectural roles.
|
||||||
|
|
||||||
|
Prefer terms like:
|
||||||
|
- operation
|
||||||
|
- worker
|
||||||
|
- routine
|
||||||
|
- metadata
|
||||||
|
- step
|
||||||
|
|
||||||
|
Avoid making trace terminology define the application architecture.
|
||||||
|
|
||||||
|
### 9. Keep classes small and responsibilities clear
|
||||||
|
|
||||||
|
Preferred shape:
|
||||||
|
- thin `ApplicationModule`
|
||||||
|
- thin `Worker`
|
||||||
|
- focused `Routine`
|
||||||
|
- dedicated domain services
|
||||||
|
|
||||||
|
If a class grows too much, split it by responsibility instead of adding more platform abstractions.
|
||||||
|
|
||||||
|
## Checklist
|
||||||
|
|
||||||
|
Before adding a new application component, check:
|
||||||
|
|
||||||
|
1. Is this runtime behavior or business behavior?
|
||||||
|
2. If runtime behavior, should it live in a `Worker`?
|
||||||
|
3. If business behavior, should it live in a `Routine` or service?
|
||||||
|
4. Does this component stay small and single-purpose?
|
||||||
|
5. Am I adding a queue because it is useful, or because of old mental models?
|
||||||
@@ -2,247 +2,130 @@
|
|||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
The runtime is built as a platform layer for business applications.
|
PLBA consists of four logical layers:
|
||||||
|
|
||||||
It consists of four logical layers:
|
|
||||||
- platform core
|
- platform core
|
||||||
- platform contracts
|
- platform contracts
|
||||||
- infrastructure adapters
|
- infrastructure services
|
||||||
- business applications
|
- business applications
|
||||||
|
|
||||||
|
The runtime is centered on `Worker`.
|
||||||
|
|
||||||
## Layers
|
## Layers
|
||||||
|
|
||||||
### Platform core
|
### Platform core
|
||||||
|
|
||||||
The core contains long-lived runtime services:
|
Core services:
|
||||||
- `RuntimeManager`
|
- `RuntimeManager`
|
||||||
- `ConfigurationManager`
|
- `ConfigurationManager`
|
||||||
- `WorkerSupervisor`
|
- `WorkerSupervisor`
|
||||||
- `TraceService`
|
|
||||||
- `HealthRegistry`
|
|
||||||
- `ControlPlaneService`
|
|
||||||
- `ServiceContainer`
|
- `ServiceContainer`
|
||||||
|
|
||||||
The core is responsible for orchestration, not domain behavior.
|
|
||||||
|
|
||||||
### Platform contracts
|
|
||||||
|
|
||||||
Contracts define how business applications integrate with the runtime.
|
|
||||||
|
|
||||||
Main contracts:
|
|
||||||
- `ApplicationModule`
|
|
||||||
- `TaskSource`
|
|
||||||
- `TaskQueue`
|
|
||||||
- `Worker`
|
|
||||||
- `TaskHandler`
|
|
||||||
- `ConfigProvider`
|
|
||||||
- `HealthContributor`
|
|
||||||
- `TraceFactory`
|
|
||||||
|
|
||||||
These contracts must remain domain-agnostic.
|
|
||||||
|
|
||||||
### Infrastructure adapters
|
|
||||||
|
|
||||||
Adapters implement concrete runtime capabilities:
|
|
||||||
- in-memory queue
|
|
||||||
- Redis queue
|
|
||||||
- file config loader
|
|
||||||
- database config loader
|
|
||||||
- polling source
|
|
||||||
- IMAP IDLE source
|
|
||||||
- HTTP control plane
|
|
||||||
- trace transport adapters
|
|
||||||
|
|
||||||
Adapters may change between applications and deployments.
|
|
||||||
|
|
||||||
### Business applications
|
|
||||||
|
|
||||||
Applications are built on top of the contracts and adapters.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
- `mail_order_bot`
|
|
||||||
- future event-driven business services
|
|
||||||
|
|
||||||
Applications contain:
|
|
||||||
- domain models
|
|
||||||
- domain handlers
|
|
||||||
- application-specific configuration schema
|
|
||||||
- source/handler composition
|
|
||||||
|
|
||||||
## Core runtime components
|
|
||||||
|
|
||||||
### RuntimeManager
|
|
||||||
|
|
||||||
The main platform facade.
|
|
||||||
|
|
||||||
Responsibilities:
|
Responsibilities:
|
||||||
- bootstrap runtime
|
- bootstrap runtime
|
||||||
- initialize services
|
- wire core services
|
||||||
- register application modules
|
- register modules
|
||||||
- start and stop all runtime-managed components
|
- start and stop workers
|
||||||
- expose status
|
- expose runtime snapshot
|
||||||
- coordinate graceful shutdown
|
|
||||||
|
|
||||||
### ConfigurationManager
|
### Platform contracts
|
||||||
|
|
||||||
Responsibilities:
|
Main contracts:
|
||||||
- load configuration
|
- `ApplicationModule`
|
||||||
- validate configuration
|
- `Worker`
|
||||||
- publish config updates
|
- `ConfigProvider`
|
||||||
- provide typed config access
|
- `HealthContributor`
|
||||||
- notify subscribers on reload
|
- tracing contracts
|
||||||
|
|
||||||
Configuration should be divided into:
|
These contracts must remain domain-agnostic.
|
||||||
- platform config
|
|
||||||
- application config
|
|
||||||
- environment/runtime overrides
|
|
||||||
|
|
||||||
### WorkerSupervisor
|
### Infrastructure services
|
||||||
|
|
||||||
Responsibilities:
|
Platform services include:
|
||||||
- register worker definitions
|
- tracing
|
||||||
- start worker pools
|
- health registry
|
||||||
- monitor worker health
|
- logging manager
|
||||||
- restart failed workers when appropriate
|
- control plane
|
||||||
- manage parallelism and backpressure
|
- config providers
|
||||||
- expose worker-level status
|
- `InMemoryTaskQueue` as optional utility
|
||||||
|
|
||||||
### TraceService
|
### Business applications
|
||||||
|
|
||||||
Responsibilities:
|
Applications define:
|
||||||
- create traces for operations
|
- routines
|
||||||
- propagate trace context across source -> queue -> worker -> handler boundaries
|
- domain services
|
||||||
- provide trace factories to applications
|
- custom worker implementations
|
||||||
- remain transport-agnostic
|
- typed app config
|
||||||
|
|
||||||
### HealthRegistry
|
## Runtime flow
|
||||||
|
|
||||||
Responsibilities:
|
|
||||||
- collect health from registered contributors
|
|
||||||
- aggregate health into liveness/readiness/status views
|
|
||||||
- expose structured runtime health
|
|
||||||
|
|
||||||
### ControlPlaneService
|
|
||||||
|
|
||||||
Responsibilities:
|
|
||||||
- control endpoints
|
|
||||||
- runtime state visibility
|
|
||||||
- administrative actions
|
|
||||||
- later authentication and user/session-aware access
|
|
||||||
|
|
||||||
## Main runtime model
|
|
||||||
|
|
||||||
The runtime should operate on this conceptual flow:
|
|
||||||
|
|
||||||
1. runtime starts
|
1. runtime starts
|
||||||
2. configuration is loaded
|
2. configuration is loaded
|
||||||
3. services are initialized
|
3. core services become available
|
||||||
4. application modules register sources, queues, handlers, and workers
|
4. application modules register workers
|
||||||
5. task sources start producing tasks
|
5. workers start execution
|
||||||
6. tasks are published into queues
|
6. workers call business routines
|
||||||
7. workers consume tasks
|
7. runtime aggregates health and status
|
||||||
8. handlers execute business logic
|
8. runtime stops workers on request
|
||||||
9. traces and health are updated throughout the flow
|
|
||||||
10. runtime stops gracefully on request
|
|
||||||
|
|
||||||
## Contracts
|
## Worker model
|
||||||
|
|
||||||
### ApplicationModule
|
Worker is responsible for runtime behavior:
|
||||||
|
- execution strategy
|
||||||
|
- thread ownership
|
||||||
|
- graceful shutdown
|
||||||
|
- runtime status
|
||||||
|
- health interpretation
|
||||||
|
|
||||||
Describes a business application to the runtime.
|
Routine is responsible for business behavior:
|
||||||
|
- business decisions
|
||||||
|
- domain orchestration
|
||||||
|
- persistence and integrations
|
||||||
|
|
||||||
Responsibilities:
|
Recommended shape:
|
||||||
- register domain services
|
|
||||||
- register task sources
|
|
||||||
- register queues
|
|
||||||
- register worker pools
|
|
||||||
- register handlers
|
|
||||||
- declare config requirements
|
|
||||||
- optionally register health contributors
|
|
||||||
|
|
||||||
### TaskSource
|
```python
|
||||||
|
class SomeWorker(Worker):
|
||||||
|
def __init__(self, routine) -> None:
|
||||||
|
self._routine = routine
|
||||||
|
|
||||||
Produces tasks into queues.
|
def start(self) -> None:
|
||||||
|
...
|
||||||
|
|
||||||
Examples:
|
def stop(self, force: bool = False) -> None:
|
||||||
- IMAP polling source
|
...
|
||||||
- IMAP IDLE source
|
|
||||||
- webhook source
|
|
||||||
- scheduled source
|
|
||||||
|
|
||||||
Responsibilities:
|
def health(self) -> WorkerHealth:
|
||||||
- start
|
...
|
||||||
- stop
|
|
||||||
- publish tasks
|
|
||||||
- expose source status
|
|
||||||
|
|
||||||
### TaskQueue
|
def status(self) -> WorkerStatus:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
A queue abstraction.
|
## Design rules
|
||||||
|
|
||||||
Expected operations:
|
### 1. Runtime should not know business semantics
|
||||||
- `publish(task)`
|
|
||||||
- `consume()`
|
|
||||||
- `ack(task)`
|
|
||||||
- `nack(task, retry_delay=None)`
|
|
||||||
- `stats()`
|
|
||||||
|
|
||||||
The first implementation may be in-memory, but the interface should support future backends.
|
PLBA knows:
|
||||||
|
- worker started
|
||||||
|
- worker stopped
|
||||||
|
- routine succeeded
|
||||||
|
- routine failed
|
||||||
|
|
||||||
### Worker
|
PLBA does not know:
|
||||||
|
- what the business operation means
|
||||||
|
- which domain decision was made
|
||||||
|
|
||||||
Consumes tasks from a queue and passes them to a handler.
|
### 2. Queue is not a core architecture primitive
|
||||||
|
|
||||||
Responsibilities:
|
Queues may exist inside applications as implementation details.
|
||||||
- obtain task from queue
|
|
||||||
- open or resume trace context
|
|
||||||
- call business handler
|
|
||||||
- ack or nack the task
|
|
||||||
- expose worker state
|
|
||||||
|
|
||||||
### TaskHandler
|
They must not define the platform mental model.
|
||||||
|
|
||||||
Executes business logic for one task.
|
### 3. Keep components small
|
||||||
|
|
||||||
The runtime should not know what the handler does.
|
Prefer:
|
||||||
It only knows that a task is processed.
|
- thin workers
|
||||||
|
- focused routines
|
||||||
|
- dedicated domain services
|
||||||
|
|
||||||
## Mail Order Bot as first application
|
Avoid large platform abstractions that exist only for hypothetical reuse.
|
||||||
|
|
||||||
### Phase 1
|
|
||||||
|
|
||||||
- source: IMAP polling
|
|
||||||
- queue: in-memory queue
|
|
||||||
- workers: parallel email processing workers
|
|
||||||
- handler: domain email processing handler
|
|
||||||
- mark message as read only after successful processing
|
|
||||||
|
|
||||||
### Phase 2
|
|
||||||
|
|
||||||
- source changes from polling to IMAP IDLE
|
|
||||||
- queue and workers remain the same
|
|
||||||
|
|
||||||
This demonstrates one of the architectural goals:
|
|
||||||
the source can change without redesigning the rest of the processing pipeline.
|
|
||||||
|
|
||||||
## Suggested package structure
|
|
||||||
|
|
||||||
```text
|
|
||||||
src/
|
|
||||||
app_runtime/
|
|
||||||
core/
|
|
||||||
contracts/
|
|
||||||
config/
|
|
||||||
workers/
|
|
||||||
queue/
|
|
||||||
tracing/
|
|
||||||
health/
|
|
||||||
control/
|
|
||||||
container/
|
|
||||||
adapters/
|
|
||||||
mail_order_bot_app/
|
|
||||||
module/
|
|
||||||
sources/
|
|
||||||
handlers/
|
|
||||||
services/
|
|
||||||
domain/
|
|
||||||
|
|||||||
@@ -1,119 +1,38 @@
|
|||||||
# Vision
|
# Vision
|
||||||
|
|
||||||
## Purpose
|
## Product vision
|
||||||
|
|
||||||
This project provides a reusable runtime platform for business applications.
|
PLBA should be a transparent runtime for business applications.
|
||||||
|
|
||||||
The runtime exists to solve service and infrastructure concerns that are needed by many applications but do not belong to business logic:
|
The desired feeling of the platform:
|
||||||
- start and stop
|
- simple to read
|
||||||
- status and lifecycle
|
- explicit in behavior
|
||||||
- configuration
|
- small number of core concepts
|
||||||
- worker execution
|
- easy to debug
|
||||||
- trace propagation
|
- no architectural legacy artifacts
|
||||||
- health checks
|
|
||||||
- control and administration
|
|
||||||
- later authentication and user management
|
|
||||||
|
|
||||||
The runtime should allow a business application to focus only on domain behavior.
|
## Core concepts
|
||||||
|
|
||||||
## Vision statement
|
The platform should be understandable through three ideas:
|
||||||
|
- `ApplicationModule` assembles the app
|
||||||
|
- `Worker` owns lifecycle and execution
|
||||||
|
- business behavior lives in application routines and services
|
||||||
|
|
||||||
We build a platform where business applications are assembled from small domain modules, while the runtime consistently provides lifecycle, workers, tracing, configuration, and control-plane capabilities.
|
## Non-goals
|
||||||
|
|
||||||
## Problem
|
PLBA should not become:
|
||||||
|
- a framework of many specialized runtime roles
|
||||||
|
- a queue-centric architecture by default
|
||||||
|
- a compatibility shell for legacy abstractions
|
||||||
|
- a place where business logic hides inside infrastructure classes
|
||||||
|
|
||||||
In the previous generation, the execution model was centered around a single `execute()` entry point called by a timer loop.
|
## Utility components
|
||||||
|
|
||||||
That model works for simple periodic jobs, but it becomes too narrow when we need:
|
Some utility components may still exist, for example `InMemoryTaskQueue`.
|
||||||
- queue-driven processing
|
|
||||||
- multiple concurrent workers
|
|
||||||
- independent task sources
|
|
||||||
- richer health semantics
|
|
||||||
- trace propagation across producer and consumer boundaries
|
|
||||||
- reusable runtime patterns across different applications
|
|
||||||
- future admin and authentication capabilities
|
|
||||||
|
|
||||||
The old model couples orchestration and execution too tightly.
|
They are acceptable when they stay:
|
||||||
|
- optional
|
||||||
|
- local
|
||||||
|
- implementation-oriented
|
||||||
|
|
||||||
## Desired future state
|
They should not redefine the main platform model.
|
||||||
|
|
||||||
The new runtime should provide:
|
|
||||||
- a reusable lifecycle and orchestration core
|
|
||||||
- a clean contract for business applications
|
|
||||||
- support for sources, queues, workers, and handlers
|
|
||||||
- explicit separation between platform and domain
|
|
||||||
- trace and health as first-class platform services
|
|
||||||
- the ability to evolve into an admin platform
|
|
||||||
|
|
||||||
## Design principles
|
|
||||||
|
|
||||||
### 1. Platform vs domain separation
|
|
||||||
|
|
||||||
The runtime owns platform concerns.
|
|
||||||
Applications own domain concerns.
|
|
||||||
|
|
||||||
The runtime must not contain business rules such as:
|
|
||||||
- email parsing policies
|
|
||||||
- order creation logic
|
|
||||||
- invoice decisions
|
|
||||||
- client-specific rules
|
|
||||||
|
|
||||||
### 2. Composition over inheritance
|
|
||||||
|
|
||||||
Applications should be composed by registering modules and services in the runtime, not by inheriting a timer-driven class and overriding one method.
|
|
||||||
|
|
||||||
### 3. Explicit task model
|
|
||||||
|
|
||||||
Applications should model processing as:
|
|
||||||
- task source
|
|
||||||
- task queue
|
|
||||||
- worker
|
|
||||||
- handler
|
|
||||||
|
|
||||||
This is more scalable than one monolithic execute loop.
|
|
||||||
|
|
||||||
### 4. Parallelism as a first-class concern
|
|
||||||
|
|
||||||
The runtime should supervise worker pools and concurrency safely instead of leaving this to ad hoc application code.
|
|
||||||
|
|
||||||
### 5. Observability by default
|
|
||||||
|
|
||||||
Trace, logs, metrics, and health should be available as platform services from the start.
|
|
||||||
|
|
||||||
### 6. Evolvability
|
|
||||||
|
|
||||||
The runtime should be able to support:
|
|
||||||
- different queue backends
|
|
||||||
- different task sources
|
|
||||||
- different control planes
|
|
||||||
- different admin capabilities
|
|
||||||
without forcing business applications to change their domain code.
|
|
||||||
|
|
||||||
## First target use case
|
|
||||||
|
|
||||||
The first business application is `mail_order_bot`.
|
|
||||||
|
|
||||||
Its business domain is:
|
|
||||||
- fetching incoming mail
|
|
||||||
- processing attachments
|
|
||||||
- executing order-related pipelines
|
|
||||||
|
|
||||||
Its platform/runtime needs are:
|
|
||||||
- lifecycle management
|
|
||||||
- polling or IMAP IDLE source
|
|
||||||
- queueing
|
|
||||||
- worker pools
|
|
||||||
- tracing
|
|
||||||
- health checks
|
|
||||||
- future admin API
|
|
||||||
|
|
||||||
This makes it a good pilot for the new runtime.
|
|
||||||
|
|
||||||
## Success criteria
|
|
||||||
|
|
||||||
We consider the runtime direction successful if:
|
|
||||||
- `mail_order_bot` business logic can run on top of it without leaking infrastructure details into domain code
|
|
||||||
- the runtime can manage concurrent workers
|
|
||||||
- the runtime can support a queue-based flow
|
|
||||||
- the runtime can expose status and health
|
|
||||||
- the runtime can later host admin/auth features without redesigning the core
|
|
||||||
|
|||||||
Binary file not shown.
@@ -13,4 +13,4 @@ class ApplicationModule(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def register(self, registry: ModuleRegistry) -> None:
|
def register(self, registry: ModuleRegistry) -> None:
|
||||||
"""Register workers, queues, handlers, services, and health contributors."""
|
"""Register workers, services, and health contributors."""
|
||||||
|
|||||||
@@ -1,28 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from app_runtime.contracts.tasks import Task
|
|
||||||
|
|
||||||
|
|
||||||
class TaskQueue(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def publish(self, task: Task) -> None:
|
|
||||||
"""Push a task into the queue."""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def consume(self, timeout: float = 0.1) -> Task | None:
|
|
||||||
"""Return the next available task or None."""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def ack(self, task: Task) -> None:
|
|
||||||
"""Confirm successful task processing."""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def nack(self, task: Task, retry_delay: float | None = None) -> None:
|
|
||||||
"""Signal failed task processing."""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def stats(self) -> dict[str, Any]:
|
|
||||||
"""Return transport-level queue statistics."""
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
|
||||||
class Task:
|
|
||||||
name: str
|
|
||||||
payload: dict[str, Any]
|
|
||||||
metadata: dict[str, Any] = field(default_factory=dict)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskHandler(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def handle(self, task: Task) -> None:
|
|
||||||
"""Execute domain logic for a task."""
|
|
||||||
Binary file not shown.
@@ -1,8 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from app_runtime.contracts.health import HealthContributor
|
from app_runtime.contracts.health import HealthContributor
|
||||||
from app_runtime.contracts.queue import TaskQueue
|
|
||||||
from app_runtime.contracts.tasks import TaskHandler
|
|
||||||
from app_runtime.contracts.worker import Worker
|
from app_runtime.contracts.worker import Worker
|
||||||
from app_runtime.core.service_container import ServiceContainer
|
from app_runtime.core.service_container import ServiceContainer
|
||||||
|
|
||||||
@@ -10,8 +8,6 @@ from app_runtime.core.service_container import ServiceContainer
|
|||||||
class ModuleRegistry:
|
class ModuleRegistry:
|
||||||
def __init__(self, services: ServiceContainer) -> None:
|
def __init__(self, services: ServiceContainer) -> None:
|
||||||
self.services = services
|
self.services = services
|
||||||
self.queues: dict[str, TaskQueue] = {}
|
|
||||||
self.handlers: dict[str, TaskHandler] = {}
|
|
||||||
self.workers: list[Worker] = []
|
self.workers: list[Worker] = []
|
||||||
self.health_contributors: list[HealthContributor] = []
|
self.health_contributors: list[HealthContributor] = []
|
||||||
self.modules: list[str] = []
|
self.modules: list[str] = []
|
||||||
@@ -19,12 +15,6 @@ class ModuleRegistry:
|
|||||||
def register_module(self, name: str) -> None:
|
def register_module(self, name: str) -> None:
|
||||||
self.modules.append(name)
|
self.modules.append(name)
|
||||||
|
|
||||||
def add_queue(self, name: str, queue: TaskQueue) -> None:
|
|
||||||
self.queues[name] = queue
|
|
||||||
|
|
||||||
def add_handler(self, name: str, handler: TaskHandler) -> None:
|
|
||||||
self.handlers[name] = handler
|
|
||||||
|
|
||||||
def add_worker(self, worker: Worker) -> None:
|
def add_worker(self, worker: Worker) -> None:
|
||||||
self.workers.append(worker)
|
self.workers.append(worker)
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@@ -1,43 +1,38 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from queue import Empty, Queue
|
from queue import Empty, Queue
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
from app_runtime.contracts.queue import TaskQueue
|
T = TypeVar("T")
|
||||||
from app_runtime.contracts.tasks import Task
|
|
||||||
|
|
||||||
|
|
||||||
class InMemoryTaskQueue(TaskQueue):
|
class InMemoryTaskQueue(Generic[T]):
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._queue: Queue[Task] = Queue()
|
self._queue: Queue[T] = Queue()
|
||||||
self._published = 0
|
self._put_count = 0
|
||||||
self._acked = 0
|
self._get_count = 0
|
||||||
self._nacked = 0
|
|
||||||
|
|
||||||
def publish(self, task: Task) -> None:
|
def put(self, item: T) -> None:
|
||||||
self._published += 1
|
self._put_count += 1
|
||||||
self._queue.put(task)
|
self._queue.put(item)
|
||||||
|
|
||||||
def consume(self, timeout: float = 0.1) -> Task | None:
|
def get(self, timeout: float = 0.1) -> T | None:
|
||||||
try:
|
try:
|
||||||
return self._queue.get(timeout=timeout)
|
item = self._queue.get(timeout=timeout)
|
||||||
except Empty:
|
except Empty:
|
||||||
return None
|
return None
|
||||||
|
self._get_count += 1
|
||||||
|
return item
|
||||||
|
|
||||||
def ack(self, task: Task) -> None:
|
def task_done(self) -> None:
|
||||||
del task
|
|
||||||
self._acked += 1
|
|
||||||
self._queue.task_done()
|
self._queue.task_done()
|
||||||
|
|
||||||
def nack(self, task: Task, retry_delay: float | None = None) -> None:
|
def qsize(self) -> int:
|
||||||
del retry_delay
|
return self._queue.qsize()
|
||||||
self._nacked += 1
|
|
||||||
self._queue.put(task)
|
|
||||||
self._queue.task_done()
|
|
||||||
|
|
||||||
def stats(self) -> dict[str, int]:
|
def stats(self) -> dict[str, int]:
|
||||||
return {
|
return {
|
||||||
"published": self._published,
|
"put": self._put_count,
|
||||||
"acked": self._acked,
|
"got": self._get_count,
|
||||||
"nacked": self._nacked,
|
|
||||||
"queued": self._queue.qsize(),
|
"queued": self._queue.qsize(),
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -99,7 +99,7 @@ class TraceService(TraceContextFactory):
|
|||||||
self._write_message("ERROR", message, status, attrs)
|
self._write_message("ERROR", message, status, attrs)
|
||||||
|
|
||||||
def new_root(self, operation: str) -> TraceContext:
|
def new_root(self, operation: str) -> TraceContext:
|
||||||
trace_id = self.create_context(alias=operation, kind="source", attrs={"operation": operation})
|
trace_id = self.create_context(alias=operation, kind="operation", attrs={"operation": operation})
|
||||||
return TraceContext(trace_id=trace_id, span_id=trace_id, attributes={"operation": operation})
|
return TraceContext(trace_id=trace_id, span_id=trace_id, attributes={"operation": operation})
|
||||||
|
|
||||||
def child_of(self, parent: TraceContext, operation: str) -> TraceContext:
|
def child_of(self, parent: TraceContext, operation: str) -> TraceContext:
|
||||||
@@ -116,22 +116,22 @@ class TraceService(TraceContextFactory):
|
|||||||
attributes={"operation": operation},
|
attributes={"operation": operation},
|
||||||
)
|
)
|
||||||
|
|
||||||
def attach(self, task_metadata: dict[str, object], context: TraceContext) -> dict[str, object]:
|
def attach(self, metadata: dict[str, object], context: TraceContext) -> dict[str, object]:
|
||||||
updated = dict(task_metadata)
|
updated = dict(metadata)
|
||||||
updated["trace_id"] = context.trace_id
|
updated["trace_id"] = context.trace_id
|
||||||
updated["span_id"] = context.span_id
|
updated["span_id"] = context.span_id
|
||||||
updated["parent_span_id"] = context.parent_span_id
|
updated["parent_span_id"] = context.parent_span_id
|
||||||
return updated
|
return updated
|
||||||
|
|
||||||
def resume(self, task_metadata: dict[str, object], operation: str) -> TraceContext:
|
def resume(self, metadata: dict[str, object], operation: str) -> TraceContext:
|
||||||
trace_id = str(task_metadata.get("trace_id") or uuid4().hex)
|
trace_id = str(metadata.get("trace_id") or uuid4().hex)
|
||||||
span_id = str(task_metadata.get("span_id") or trace_id)
|
span_id = str(metadata.get("span_id") or trace_id)
|
||||||
parent_id = task_metadata.get("parent_span_id")
|
parent_id = metadata.get("parent_span_id")
|
||||||
self.create_context(
|
self.create_context(
|
||||||
alias=operation,
|
alias=operation,
|
||||||
parent_id=str(parent_id) if parent_id else None,
|
parent_id=str(parent_id) if parent_id else None,
|
||||||
kind="handler",
|
kind="worker",
|
||||||
attrs=dict(task_metadata),
|
attrs=dict(metadata),
|
||||||
)
|
)
|
||||||
return TraceContext(
|
return TraceContext(
|
||||||
trace_id=trace_id,
|
trace_id=trace_id,
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
from app_runtime.workers.queue_worker import QueueWorker
|
|
||||||
from app_runtime.workers.supervisor import WorkerSupervisor
|
from app_runtime.workers.supervisor import WorkerSupervisor
|
||||||
|
|
||||||
__all__ = ["QueueWorker", "WorkerSupervisor"]
|
__all__ = ["WorkerSupervisor"]
|
||||||
|
|||||||
Binary file not shown.
@@ -1,125 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from threading import Event, Lock, Thread
|
|
||||||
|
|
||||||
from app_runtime.contracts.queue import TaskQueue
|
|
||||||
from app_runtime.contracts.tasks import TaskHandler
|
|
||||||
from app_runtime.contracts.worker import Worker, WorkerHealth, WorkerStatus
|
|
||||||
from app_runtime.tracing.service import TraceService
|
|
||||||
|
|
||||||
|
|
||||||
class QueueWorker(Worker):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
name: str,
|
|
||||||
queue: TaskQueue,
|
|
||||||
handler: TaskHandler,
|
|
||||||
traces: TraceService,
|
|
||||||
*,
|
|
||||||
concurrency: int = 1,
|
|
||||||
critical: bool = True,
|
|
||||||
) -> None:
|
|
||||||
self._name = name
|
|
||||||
self._queue = queue
|
|
||||||
self._handler = handler
|
|
||||||
self._traces = traces
|
|
||||||
self._concurrency = concurrency
|
|
||||||
self._critical = critical
|
|
||||||
self._threads: list[Thread] = []
|
|
||||||
self._stop_requested = Event()
|
|
||||||
self._force_stop = Event()
|
|
||||||
self._lock = Lock()
|
|
||||||
self._started = False
|
|
||||||
self._in_flight = 0
|
|
||||||
self._processed = 0
|
|
||||||
self._failures = 0
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self) -> str:
|
|
||||||
return self._name
|
|
||||||
|
|
||||||
@property
|
|
||||||
def critical(self) -> bool:
|
|
||||||
return self._critical
|
|
||||||
|
|
||||||
def start(self) -> None:
|
|
||||||
if any(thread.is_alive() for thread in self._threads):
|
|
||||||
return
|
|
||||||
self._threads.clear()
|
|
||||||
self._stop_requested.clear()
|
|
||||||
self._force_stop.clear()
|
|
||||||
self._started = True
|
|
||||||
for index in range(self._concurrency):
|
|
||||||
thread = Thread(target=self._run_loop, name=f"{self._name}-{index + 1}", daemon=True)
|
|
||||||
self._threads.append(thread)
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
def stop(self, force: bool = False) -> None:
|
|
||||||
self._stop_requested.set()
|
|
||||||
if force:
|
|
||||||
self._force_stop.set()
|
|
||||||
|
|
||||||
def health(self) -> WorkerHealth:
|
|
||||||
status = self.status()
|
|
||||||
if self._started and not self._stop_requested.is_set() and self._alive_threads() == 0:
|
|
||||||
return WorkerHealth(self.name, "unhealthy", self.critical, "worker threads are not running", status.meta)
|
|
||||||
if self._failures > 0:
|
|
||||||
return WorkerHealth(self.name, "degraded", self.critical, "worker has processing failures", status.meta)
|
|
||||||
return WorkerHealth(self.name, "ok", self.critical, meta=status.meta)
|
|
||||||
|
|
||||||
def status(self) -> WorkerStatus:
|
|
||||||
alive_threads = self._alive_threads()
|
|
||||||
with self._lock:
|
|
||||||
in_flight = self._in_flight
|
|
||||||
processed = self._processed
|
|
||||||
failures = self._failures
|
|
||||||
if self._started and alive_threads == 0:
|
|
||||||
state = "stopped"
|
|
||||||
elif self._stop_requested.is_set():
|
|
||||||
state = "stopping" if alive_threads > 0 else "stopped"
|
|
||||||
elif not self._started:
|
|
||||||
state = "stopped"
|
|
||||||
elif in_flight > 0:
|
|
||||||
state = "busy"
|
|
||||||
else:
|
|
||||||
state = "idle"
|
|
||||||
return WorkerStatus(
|
|
||||||
name=self.name,
|
|
||||||
state=state,
|
|
||||||
in_flight=in_flight,
|
|
||||||
meta={
|
|
||||||
"alive_threads": alive_threads,
|
|
||||||
"concurrency": self._concurrency,
|
|
||||||
"processed": processed,
|
|
||||||
"failures": failures,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
def _run_loop(self) -> None:
|
|
||||||
while True:
|
|
||||||
if self._force_stop.is_set() or self._stop_requested.is_set():
|
|
||||||
return
|
|
||||||
task = self._queue.consume(timeout=0.1)
|
|
||||||
if task is None:
|
|
||||||
continue
|
|
||||||
with self._lock:
|
|
||||||
self._in_flight += 1
|
|
||||||
self._traces.resume(task.metadata, f"worker:{self.name}")
|
|
||||||
try:
|
|
||||||
self._handler.handle(task)
|
|
||||||
except Exception:
|
|
||||||
with self._lock:
|
|
||||||
self._failures += 1
|
|
||||||
self._queue.nack(task)
|
|
||||||
else:
|
|
||||||
with self._lock:
|
|
||||||
self._processed += 1
|
|
||||||
self._queue.ack(task)
|
|
||||||
finally:
|
|
||||||
with self._lock:
|
|
||||||
self._in_flight -= 1
|
|
||||||
if self._stop_requested.is_set():
|
|
||||||
return
|
|
||||||
|
|
||||||
def _alive_threads(self) -> int:
|
|
||||||
return sum(1 for thread in self._threads if thread.is_alive())
|
|
||||||
1
src/app_runtime/workflow/__init__.py
Normal file
1
src/app_runtime/workflow/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
__all__: list[str] = []
|
||||||
BIN
src/app_runtime/workflow/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
src/app_runtime/workflow/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
1
src/app_runtime/workflow/contracts/__init__.py
Normal file
1
src/app_runtime/workflow/contracts/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
__all__: list[str] = []
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
16
src/app_runtime/workflow/contracts/context.py
Normal file
16
src/app_runtime/workflow/contracts/context.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class WorkflowContext:
|
||||||
|
payload: dict[str, Any]
|
||||||
|
state: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def snapshot(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"payload": dict(self.payload),
|
||||||
|
"state": dict(self.state),
|
||||||
|
}
|
||||||
11
src/app_runtime/workflow/contracts/result.py
Normal file
11
src/app_runtime/workflow/contracts/result.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class StepResult:
|
||||||
|
transition: str = "success"
|
||||||
|
updates: dict[str, Any] = field(default_factory=dict)
|
||||||
|
status: str = "completed"
|
||||||
12
src/app_runtime/workflow/contracts/step.py
Normal file
12
src/app_runtime/workflow/contracts/step.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from app_runtime.workflow.contracts.context import WorkflowContext
|
||||||
|
from app_runtime.workflow.contracts.result import StepResult
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowStep(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def run(self, context: WorkflowContext) -> StepResult:
|
||||||
|
"""Run the step and return transition metadata."""
|
||||||
19
src/app_runtime/workflow/contracts/workflow.py
Normal file
19
src/app_runtime/workflow/contracts/workflow.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from app_runtime.workflow.contracts.step import WorkflowStep
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class WorkflowNode:
|
||||||
|
name: str
|
||||||
|
step: WorkflowStep
|
||||||
|
transitions: dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class WorkflowDefinition:
|
||||||
|
name: str
|
||||||
|
start_at: str
|
||||||
|
nodes: dict[str, WorkflowNode]
|
||||||
1
src/app_runtime/workflow/engine/__init__.py
Normal file
1
src/app_runtime/workflow/engine/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
__all__: list[str] = []
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
14
src/app_runtime/workflow/engine/hooks.py
Normal file
14
src/app_runtime/workflow/engine/hooks.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app_runtime.workflow.contracts.context import WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowEngineHooks:
|
||||||
|
def on_step_started(self, context: WorkflowContext, step: str) -> None:
|
||||||
|
del context, step
|
||||||
|
|
||||||
|
def on_step_finished(self, context: WorkflowContext, step: str) -> None:
|
||||||
|
del context, step
|
||||||
|
|
||||||
|
def on_step_failed(self, context: WorkflowContext, step: str) -> None:
|
||||||
|
del context, step
|
||||||
9
src/app_runtime/workflow/engine/transition_resolver.py
Normal file
9
src/app_runtime/workflow/engine/transition_resolver.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app_runtime.workflow.contracts.result import StepResult
|
||||||
|
from app_runtime.workflow.contracts.workflow import WorkflowNode
|
||||||
|
|
||||||
|
|
||||||
|
class TransitionResolver:
|
||||||
|
def resolve(self, node: WorkflowNode, result: StepResult) -> str | None:
|
||||||
|
return node.transitions.get(result.transition)
|
||||||
68
src/app_runtime/workflow/engine/workflow_engine.py
Normal file
68
src/app_runtime/workflow/engine/workflow_engine.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from app_runtime.workflow.contracts.context import WorkflowContext
|
||||||
|
from app_runtime.workflow.engine.hooks import WorkflowEngineHooks
|
||||||
|
from app_runtime.workflow.engine.transition_resolver import TransitionResolver
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowEngine:
|
||||||
|
def __init__(self, workflow, persistence, *, traces, hooks: WorkflowEngineHooks | None = None) -> None:
|
||||||
|
self._workflow = workflow
|
||||||
|
self._persistence = persistence
|
||||||
|
self._transition_resolver = TransitionResolver()
|
||||||
|
self._traces = traces
|
||||||
|
self._hooks = hooks or WorkflowEngineHooks()
|
||||||
|
self._logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> dict[str, object]:
|
||||||
|
run_id = self._persistence.start_run(
|
||||||
|
self._workflow.definition.name,
|
||||||
|
self._workflow.definition.start_at,
|
||||||
|
context.snapshot(),
|
||||||
|
)
|
||||||
|
context.state.setdefault("runtime", {})
|
||||||
|
context.state["runtime"]["workflow_run_id"] = run_id
|
||||||
|
self._traces.step("workflow")
|
||||||
|
self._traces.info("Workflow started.", status="started", attrs={"workflow_run_id": run_id})
|
||||||
|
current_name = self._workflow.definition.start_at
|
||||||
|
while current_name is not None:
|
||||||
|
node = self._workflow.definition.nodes[current_name]
|
||||||
|
self._logger.info("Workflow run %s: step '%s' started.", run_id, node.name)
|
||||||
|
self._hooks.on_step_started(context, node.name)
|
||||||
|
self._persistence.start_step(run_id, node.name, context.snapshot())
|
||||||
|
self._traces.step(node.name)
|
||||||
|
self._traces.info(f"Step '{node.name}' started.", status="started")
|
||||||
|
try:
|
||||||
|
result = node.step.run(context)
|
||||||
|
except Exception as error:
|
||||||
|
self._persistence.fail_step(run_id, node.name, context.snapshot(), error)
|
||||||
|
self._persistence.fail_run(run_id, context.snapshot())
|
||||||
|
self._traces.error(
|
||||||
|
f"Step '{node.name}' failed: {error}",
|
||||||
|
status="failed",
|
||||||
|
attrs={"exception_type": type(error).__name__},
|
||||||
|
)
|
||||||
|
self._logger.exception("Workflow run %s: step '%s' failed.", run_id, node.name)
|
||||||
|
self._hooks.on_step_failed(context, node.name)
|
||||||
|
raise
|
||||||
|
context.state.update(result.updates)
|
||||||
|
self._persistence.complete_step(run_id, node.name, result.status, result.transition, context.snapshot())
|
||||||
|
self._traces.info(
|
||||||
|
f"Step '{node.name}' completed with transition '{result.transition}'.",
|
||||||
|
status=result.status,
|
||||||
|
)
|
||||||
|
self._logger.info(
|
||||||
|
"Workflow run %s: step '%s' completed with transition '%s'.",
|
||||||
|
run_id,
|
||||||
|
node.name,
|
||||||
|
result.transition,
|
||||||
|
)
|
||||||
|
self._hooks.on_step_finished(context, node.name)
|
||||||
|
current_name = self._transition_resolver.resolve(node, result)
|
||||||
|
self._persistence.complete_run(run_id, context.snapshot())
|
||||||
|
self._traces.step("workflow")
|
||||||
|
self._traces.info("Workflow completed.", status="completed", attrs={"workflow_run_id": run_id})
|
||||||
|
self._logger.info("Workflow run %s completed.", run_id)
|
||||||
|
return {"run_id": run_id, "status": "completed", "context": context.snapshot()}
|
||||||
3
src/app_runtime/workflow/persistence/__init__.py
Normal file
3
src/app_runtime/workflow/persistence/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from app_runtime.workflow.persistence.workflow_persistence import WorkflowPersistence
|
||||||
|
|
||||||
|
__all__ = ["WorkflowPersistence"]
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class CheckpointRepository:
|
||||||
|
def __init__(self, connection_factory: object | None = None) -> None:
|
||||||
|
self._connection_factory = connection_factory
|
||||||
|
self._checkpoints: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
def save(
|
||||||
|
self,
|
||||||
|
workflow_run_id: int,
|
||||||
|
node_name: str,
|
||||||
|
checkpoint_kind: str,
|
||||||
|
snapshot: dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
if self._use_memory():
|
||||||
|
self._checkpoints.append(
|
||||||
|
{
|
||||||
|
"workflow_run_id": workflow_run_id,
|
||||||
|
"node_name": node_name,
|
||||||
|
"checkpoint_kind": checkpoint_kind,
|
||||||
|
"snapshot": snapshot,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return
|
||||||
|
query = """
|
||||||
|
INSERT INTO workflow_checkpoints (
|
||||||
|
workflow_run_id, node_name, checkpoint_kind, snapshot_json, created_at
|
||||||
|
) VALUES (%s, %s, %s, %s, UTC_TIMESTAMP(6))
|
||||||
|
"""
|
||||||
|
with self._connection_factory.connect() as connection:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
query,
|
||||||
|
(
|
||||||
|
workflow_run_id,
|
||||||
|
node_name,
|
||||||
|
checkpoint_kind,
|
||||||
|
self._connection_factory.dumps(snapshot),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _use_memory(self) -> bool:
|
||||||
|
return self._connection_factory is None or not self._connection_factory.is_configured()
|
||||||
20
src/app_runtime/workflow/persistence/snapshot_sanitizer.py
Normal file
20
src/app_runtime/workflow/persistence/snapshot_sanitizer.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowSnapshotSanitizer:
|
||||||
|
def sanitize(self, snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
payload = dict(snapshot.get("payload", {}))
|
||||||
|
state = dict(snapshot.get("state", {}))
|
||||||
|
return {
|
||||||
|
"payload": self._sanitize_dict(payload),
|
||||||
|
"state": self._sanitize_dict(state),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _sanitize_dict(self, value: Any) -> Any:
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {str(key): self._sanitize_dict(item) for key, item in value.items()}
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [self._sanitize_dict(item) for item in value]
|
||||||
|
return value
|
||||||
53
src/app_runtime/workflow/persistence/workflow_persistence.py
Normal file
53
src/app_runtime/workflow/persistence/workflow_persistence.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app_runtime.workflow.persistence.checkpoint_repository import CheckpointRepository
|
||||||
|
from app_runtime.workflow.persistence.snapshot_sanitizer import WorkflowSnapshotSanitizer
|
||||||
|
from app_runtime.workflow.persistence.workflow_repository import WorkflowRepository
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowPersistence:
|
||||||
|
def __init__(self, workflow_repository, checkpoint_repository) -> None:
|
||||||
|
self._workflow_repository = workflow_repository
|
||||||
|
self._checkpoint_repository = checkpoint_repository
|
||||||
|
self._snapshot_sanitizer = WorkflowSnapshotSanitizer()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_default(cls, connection_factory=None) -> "WorkflowPersistence":
|
||||||
|
return cls(
|
||||||
|
workflow_repository=WorkflowRepository(connection_factory),
|
||||||
|
checkpoint_repository=CheckpointRepository(connection_factory),
|
||||||
|
)
|
||||||
|
|
||||||
|
def start_run(self, workflow_name: str, start_at: str, snapshot: dict[str, object]) -> int:
|
||||||
|
sanitized = self._snapshot_sanitizer.sanitize(snapshot)
|
||||||
|
run_id = self._workflow_repository.create_run(workflow_name, sanitized)
|
||||||
|
self._checkpoint_repository.save(run_id, start_at, "workflow_started", sanitized)
|
||||||
|
return run_id
|
||||||
|
|
||||||
|
def start_step(self, run_id: int, node_name: str, snapshot: dict[str, object]) -> None:
|
||||||
|
self._checkpoint_repository.save(run_id, node_name, "step_started", self._snapshot_sanitizer.sanitize(snapshot))
|
||||||
|
|
||||||
|
def complete_step(
|
||||||
|
self,
|
||||||
|
run_id: int,
|
||||||
|
node_name: str,
|
||||||
|
status: str,
|
||||||
|
transition: str,
|
||||||
|
snapshot: dict[str, object],
|
||||||
|
) -> None:
|
||||||
|
sanitized = self._snapshot_sanitizer.sanitize(snapshot)
|
||||||
|
self._workflow_repository.record_step(run_id, node_name, status, transition, sanitized)
|
||||||
|
self._checkpoint_repository.save(run_id, node_name, "step_completed", sanitized)
|
||||||
|
|
||||||
|
def fail_step(self, run_id: int, node_name: str, snapshot: dict[str, object], error: Exception) -> None:
|
||||||
|
sanitized = self._snapshot_sanitizer.sanitize(snapshot)
|
||||||
|
self._workflow_repository.fail_step(run_id, node_name, sanitized, error)
|
||||||
|
self._checkpoint_repository.save(run_id, node_name, "step_failed", sanitized)
|
||||||
|
|
||||||
|
def complete_run(self, run_id: int, snapshot: dict[str, object]) -> None:
|
||||||
|
sanitized = self._snapshot_sanitizer.sanitize(snapshot)
|
||||||
|
self._workflow_repository.complete_run(run_id, sanitized)
|
||||||
|
self._checkpoint_repository.save(run_id, "workflow_done", "workflow_finished", sanitized)
|
||||||
|
|
||||||
|
def fail_run(self, run_id: int, snapshot: dict[str, object]) -> None:
|
||||||
|
self._workflow_repository.fail_run(run_id, self._snapshot_sanitizer.sanitize(snapshot))
|
||||||
140
src/app_runtime/workflow/persistence/workflow_repository.py
Normal file
140
src/app_runtime/workflow/persistence/workflow_repository.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from itertools import count
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowRepository:
|
||||||
|
def __init__(self, connection_factory: object | None = None) -> None:
|
||||||
|
self._connection_factory = connection_factory
|
||||||
|
self._counter = count(1)
|
||||||
|
self._runs: dict[int, dict[str, Any]] = {}
|
||||||
|
|
||||||
|
def create_run(self, workflow_name: str, snapshot: dict[str, Any]) -> int:
|
||||||
|
if self._use_memory():
|
||||||
|
run_id = next(self._counter)
|
||||||
|
self._runs[run_id] = {
|
||||||
|
"workflow_name": workflow_name,
|
||||||
|
"status": "running",
|
||||||
|
"snapshot": snapshot,
|
||||||
|
"steps": [],
|
||||||
|
}
|
||||||
|
return run_id
|
||||||
|
payload = self._build_run_payload(workflow_name, snapshot)
|
||||||
|
query = """
|
||||||
|
INSERT INTO workflow_runs (
|
||||||
|
workflow_name, workflow_version, business_key, queue_task_id, inbox_message_id,
|
||||||
|
current_node, status, context_json, trace_id, started_at, created_at, updated_at
|
||||||
|
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, UTC_TIMESTAMP(6), UTC_TIMESTAMP(6), UTC_TIMESTAMP(6))
|
||||||
|
"""
|
||||||
|
with self._connection_factory.connect() as connection:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(query, payload)
|
||||||
|
return int(cursor.lastrowid)
|
||||||
|
|
||||||
|
def record_step(
|
||||||
|
self,
|
||||||
|
run_id: int,
|
||||||
|
node_name: str,
|
||||||
|
status: str,
|
||||||
|
transition: str,
|
||||||
|
snapshot: dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
if self._use_memory():
|
||||||
|
self._runs[run_id]["steps"].append(
|
||||||
|
{"node_name": node_name, "status": status, "transition": transition, "snapshot": snapshot}
|
||||||
|
)
|
||||||
|
return
|
||||||
|
context_json = self._connection_factory.dumps(snapshot)
|
||||||
|
insert_query = """
|
||||||
|
INSERT INTO workflow_steps (
|
||||||
|
workflow_run_id, node_name, status, transition_name, input_json, output_json, created_at, started_at, finished_at
|
||||||
|
) VALUES (%s, %s, %s, %s, %s, %s, UTC_TIMESTAMP(6), UTC_TIMESTAMP(6), UTC_TIMESTAMP(6))
|
||||||
|
"""
|
||||||
|
update_query = """
|
||||||
|
UPDATE workflow_runs
|
||||||
|
SET current_node = %s, status = %s, context_json = %s, updated_at = UTC_TIMESTAMP(6)
|
||||||
|
WHERE id = %s
|
||||||
|
"""
|
||||||
|
with self._connection_factory.connect() as connection:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(insert_query, (run_id, node_name, status, transition, context_json, context_json))
|
||||||
|
cursor.execute(update_query, (node_name, "running", context_json, run_id))
|
||||||
|
|
||||||
|
def complete_run(self, run_id: int, snapshot: dict[str, Any]) -> None:
|
||||||
|
if self._use_memory():
|
||||||
|
self._runs[run_id]["status"] = "completed"
|
||||||
|
self._runs[run_id]["snapshot"] = snapshot
|
||||||
|
return
|
||||||
|
query = """
|
||||||
|
UPDATE workflow_runs
|
||||||
|
SET current_node = NULL, status = 'completed', context_json = %s, finished_at = UTC_TIMESTAMP(6), updated_at = UTC_TIMESTAMP(6)
|
||||||
|
WHERE id = %s
|
||||||
|
"""
|
||||||
|
with self._connection_factory.connect() as connection:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(query, (self._connection_factory.dumps(snapshot), run_id))
|
||||||
|
|
||||||
|
def fail_step(self, run_id: int, node_name: str, snapshot: dict[str, Any], error: Exception) -> None:
|
||||||
|
if self._use_memory():
|
||||||
|
self._runs[run_id]["steps"].append(
|
||||||
|
{
|
||||||
|
"node_name": node_name,
|
||||||
|
"status": "failed",
|
||||||
|
"transition": "error",
|
||||||
|
"snapshot": snapshot,
|
||||||
|
"error": str(error),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self._runs[run_id]["status"] = "failed"
|
||||||
|
return
|
||||||
|
snapshot_json = self._connection_factory.dumps(snapshot)
|
||||||
|
error_json = self._connection_factory.dumps({"message": str(error), "exception_type": type(error).__name__})
|
||||||
|
insert_query = """
|
||||||
|
INSERT INTO workflow_steps (
|
||||||
|
workflow_run_id, node_name, status, transition_name, input_json, output_json, error_json, created_at, started_at, finished_at
|
||||||
|
) VALUES (%s, %s, 'failed', 'error', %s, %s, %s, UTC_TIMESTAMP(6), UTC_TIMESTAMP(6), UTC_TIMESTAMP(6))
|
||||||
|
"""
|
||||||
|
update_query = """
|
||||||
|
UPDATE workflow_runs
|
||||||
|
SET current_node = %s, status = 'failed', context_json = %s, updated_at = UTC_TIMESTAMP(6)
|
||||||
|
WHERE id = %s
|
||||||
|
"""
|
||||||
|
with self._connection_factory.connect() as connection:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(insert_query, (run_id, node_name, snapshot_json, snapshot_json, error_json))
|
||||||
|
cursor.execute(update_query, (node_name, snapshot_json, run_id))
|
||||||
|
|
||||||
|
def fail_run(self, run_id: int, snapshot: dict[str, Any]) -> None:
|
||||||
|
if self._use_memory():
|
||||||
|
self._runs[run_id]["status"] = "failed"
|
||||||
|
self._runs[run_id]["snapshot"] = snapshot
|
||||||
|
return
|
||||||
|
query = """
|
||||||
|
UPDATE workflow_runs
|
||||||
|
SET status = 'failed', context_json = %s, finished_at = UTC_TIMESTAMP(6), updated_at = UTC_TIMESTAMP(6)
|
||||||
|
WHERE id = %s
|
||||||
|
"""
|
||||||
|
with self._connection_factory.connect() as connection:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(query, (self._connection_factory.dumps(snapshot), run_id))
|
||||||
|
|
||||||
|
def _build_run_payload(self, workflow_name: str, snapshot: dict[str, Any]) -> tuple:
|
||||||
|
payload = snapshot.get("payload", {})
|
||||||
|
state = snapshot.get("state", {})
|
||||||
|
runtime = state.get("runtime", {})
|
||||||
|
business_key = payload.get("inbox_message", {}).get("external_message_id") or str(next(self._counter))
|
||||||
|
return (
|
||||||
|
workflow_name,
|
||||||
|
"v1",
|
||||||
|
business_key,
|
||||||
|
runtime.get("queue_task_id"),
|
||||||
|
payload.get("inbox_message", {}).get("id"),
|
||||||
|
None,
|
||||||
|
"running",
|
||||||
|
self._connection_factory.dumps(snapshot),
|
||||||
|
runtime.get("email_trace_id"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _use_memory(self) -> bool:
|
||||||
|
return self._connection_factory is None or not self._connection_factory.is_configured()
|
||||||
15
src/app_runtime/workflow/runtime_factory.py
Normal file
15
src/app_runtime/workflow/runtime_factory.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app_runtime.workflow.engine.workflow_engine import WorkflowEngine
|
||||||
|
from app_runtime.workflow.persistence import WorkflowPersistence
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowRuntimeFactory:
|
||||||
|
def __init__(self, connection_factory=None, *, traces, hooks=None) -> None:
|
||||||
|
self._connection_factory = connection_factory
|
||||||
|
self._traces = traces
|
||||||
|
self._hooks = hooks
|
||||||
|
|
||||||
|
def create_engine(self, workflow) -> WorkflowEngine:
|
||||||
|
persistence = WorkflowPersistence.create_default(self._connection_factory)
|
||||||
|
return WorkflowEngine(workflow, persistence, traces=self._traces, hooks=self._hooks)
|
||||||
@@ -5,9 +5,6 @@ from plba.contracts import (
|
|||||||
ApplicationModule,
|
ApplicationModule,
|
||||||
ConfigProvider,
|
ConfigProvider,
|
||||||
HealthContributor,
|
HealthContributor,
|
||||||
Task,
|
|
||||||
TaskHandler,
|
|
||||||
TaskQueue,
|
|
||||||
TraceContext,
|
TraceContext,
|
||||||
TraceContextRecord,
|
TraceContextRecord,
|
||||||
TraceLogMessage,
|
TraceLogMessage,
|
||||||
@@ -21,7 +18,17 @@ from plba.health import HealthRegistry
|
|||||||
from plba.logging import LogManager
|
from plba.logging import LogManager
|
||||||
from plba.queue import InMemoryTaskQueue
|
from plba.queue import InMemoryTaskQueue
|
||||||
from plba.tracing import MySqlTraceTransport, NoOpTraceTransport, TraceService
|
from plba.tracing import MySqlTraceTransport, NoOpTraceTransport, TraceService
|
||||||
from plba.workers import QueueWorker, WorkerSupervisor
|
from plba.workflow import (
|
||||||
|
StepResult,
|
||||||
|
WorkflowContext,
|
||||||
|
WorkflowDefinition,
|
||||||
|
WorkflowEngine,
|
||||||
|
WorkflowEngineHooks,
|
||||||
|
WorkflowNode,
|
||||||
|
WorkflowRuntimeFactory,
|
||||||
|
WorkflowStep,
|
||||||
|
)
|
||||||
|
from plba.workers import WorkerSupervisor
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"ApplicationModule",
|
"ApplicationModule",
|
||||||
@@ -40,19 +47,23 @@ __all__ = [
|
|||||||
"LogManager",
|
"LogManager",
|
||||||
"MySqlTraceTransport",
|
"MySqlTraceTransport",
|
||||||
"NoOpTraceTransport",
|
"NoOpTraceTransport",
|
||||||
"QueueWorker",
|
|
||||||
"RuntimeManager",
|
"RuntimeManager",
|
||||||
"ServiceContainer",
|
"ServiceContainer",
|
||||||
"Task",
|
|
||||||
"TaskHandler",
|
|
||||||
"TaskQueue",
|
|
||||||
"TraceContext",
|
"TraceContext",
|
||||||
"TraceContextRecord",
|
"TraceContextRecord",
|
||||||
"TraceLogMessage",
|
"TraceLogMessage",
|
||||||
"TraceService",
|
"TraceService",
|
||||||
"TraceTransport",
|
"TraceTransport",
|
||||||
|
"StepResult",
|
||||||
"Worker",
|
"Worker",
|
||||||
"WorkerHealth",
|
"WorkerHealth",
|
||||||
"WorkerStatus",
|
"WorkerStatus",
|
||||||
|
"WorkflowContext",
|
||||||
|
"WorkflowDefinition",
|
||||||
|
"WorkflowEngine",
|
||||||
|
"WorkflowEngineHooks",
|
||||||
|
"WorkflowNode",
|
||||||
|
"WorkflowRuntimeFactory",
|
||||||
|
"WorkflowStep",
|
||||||
"WorkerSupervisor",
|
"WorkerSupervisor",
|
||||||
]
|
]
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
src/plba/__pycache__/workflow.cpython-312.pyc
Normal file
BIN
src/plba/__pycache__/workflow.cpython-312.pyc
Normal file
Binary file not shown.
@@ -1,8 +1,6 @@
|
|||||||
from app_runtime.contracts.application import ApplicationModule
|
from app_runtime.contracts.application import ApplicationModule
|
||||||
from app_runtime.contracts.config import ConfigProvider
|
from app_runtime.contracts.config import ConfigProvider
|
||||||
from app_runtime.contracts.health import HealthContributor
|
from app_runtime.contracts.health import HealthContributor
|
||||||
from app_runtime.contracts.queue import TaskQueue
|
|
||||||
from app_runtime.contracts.tasks import Task, TaskHandler
|
|
||||||
from app_runtime.contracts.trace import (
|
from app_runtime.contracts.trace import (
|
||||||
TraceContext,
|
TraceContext,
|
||||||
TraceContextRecord,
|
TraceContextRecord,
|
||||||
@@ -15,9 +13,6 @@ __all__ = [
|
|||||||
"ApplicationModule",
|
"ApplicationModule",
|
||||||
"ConfigProvider",
|
"ConfigProvider",
|
||||||
"HealthContributor",
|
"HealthContributor",
|
||||||
"Task",
|
|
||||||
"TaskHandler",
|
|
||||||
"TaskQueue",
|
|
||||||
"TraceContext",
|
"TraceContext",
|
||||||
"TraceContextRecord",
|
"TraceContextRecord",
|
||||||
"TraceLogMessage",
|
"TraceLogMessage",
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
from app_runtime.workers.queue_worker import QueueWorker
|
|
||||||
from app_runtime.workers.supervisor import WorkerSupervisor
|
from app_runtime.workers.supervisor import WorkerSupervisor
|
||||||
|
|
||||||
__all__ = ["QueueWorker", "WorkerSupervisor"]
|
__all__ = ["WorkerSupervisor"]
|
||||||
|
|||||||
18
src/plba/workflow.py
Normal file
18
src/plba/workflow.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from app_runtime.workflow.contracts.context import WorkflowContext
|
||||||
|
from app_runtime.workflow.contracts.result import StepResult
|
||||||
|
from app_runtime.workflow.contracts.step import WorkflowStep
|
||||||
|
from app_runtime.workflow.contracts.workflow import WorkflowDefinition, WorkflowNode
|
||||||
|
from app_runtime.workflow.engine.hooks import WorkflowEngineHooks
|
||||||
|
from app_runtime.workflow.engine.workflow_engine import WorkflowEngine
|
||||||
|
from app_runtime.workflow.runtime_factory import WorkflowRuntimeFactory
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"StepResult",
|
||||||
|
"WorkflowContext",
|
||||||
|
"WorkflowDefinition",
|
||||||
|
"WorkflowEngine",
|
||||||
|
"WorkflowEngineHooks",
|
||||||
|
"WorkflowNode",
|
||||||
|
"WorkflowRuntimeFactory",
|
||||||
|
"WorkflowStep",
|
||||||
|
]
|
||||||
Binary file not shown.
@@ -2,35 +2,36 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from threading import Event, Thread
|
from threading import Event, Lock, Thread
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from app_runtime.contracts.application import ApplicationModule
|
from app_runtime.contracts.application import ApplicationModule
|
||||||
from app_runtime.contracts.health import HealthContributor
|
from app_runtime.contracts.health import HealthContributor
|
||||||
from app_runtime.contracts.tasks import Task, TaskHandler
|
from app_runtime.contracts.worker import Worker, WorkerHealth, WorkerStatus
|
||||||
from app_runtime.contracts.worker import WorkerHealth
|
|
||||||
from app_runtime.core.registration import ModuleRegistry
|
from app_runtime.core.registration import ModuleRegistry
|
||||||
from app_runtime.core.runtime import RuntimeManager
|
from app_runtime.core.runtime import RuntimeManager
|
||||||
from app_runtime.queue.in_memory import InMemoryTaskQueue
|
from app_runtime.queue.in_memory import InMemoryTaskQueue
|
||||||
from app_runtime.tracing.transport import NoOpTraceTransport
|
from app_runtime.tracing.transport import NoOpTraceTransport
|
||||||
from app_runtime.workers.queue_worker import QueueWorker
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CollectingHandler(TaskHandler):
|
class CollectingRoutine:
|
||||||
processed: list[dict[str, object]] = field(default_factory=list)
|
processed: list[dict[str, object]] = field(default_factory=list)
|
||||||
|
_done: bool = False
|
||||||
|
|
||||||
def handle(self, task: Task) -> None:
|
def run(self) -> None:
|
||||||
self.processed.append(task.payload)
|
if self._done:
|
||||||
|
return
|
||||||
|
self.processed.append({"id": 1})
|
||||||
|
self._done = True
|
||||||
|
|
||||||
|
|
||||||
class BlockingHandler(TaskHandler):
|
class BlockingRoutine:
|
||||||
def __init__(self, started: Event, release: Event) -> None:
|
def __init__(self, started: Event, release: Event) -> None:
|
||||||
self._started = started
|
self._started = started
|
||||||
self._release = release
|
self._release = release
|
||||||
|
|
||||||
def handle(self, task: Task) -> None:
|
def run(self) -> None:
|
||||||
del task
|
|
||||||
self._started.set()
|
self._started.set()
|
||||||
self._release.wait(timeout=2.0)
|
self._release.wait(timeout=2.0)
|
||||||
|
|
||||||
@@ -40,37 +41,139 @@ class StaticHealthContributor(HealthContributor):
|
|||||||
return WorkerHealth(name="example-module", status="ok", critical=False, meta={"kind": "test"})
|
return WorkerHealth(name="example-module", status="ok", critical=False, meta={"kind": "test"})
|
||||||
|
|
||||||
|
|
||||||
|
class RoutineWorker(Worker):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
routine: object,
|
||||||
|
*,
|
||||||
|
interval: float = 0.01,
|
||||||
|
concurrency: int = 1,
|
||||||
|
critical: bool = True,
|
||||||
|
) -> None:
|
||||||
|
self._name = name
|
||||||
|
self._routine = routine
|
||||||
|
self._interval = interval
|
||||||
|
self._concurrency = concurrency
|
||||||
|
self._critical = critical
|
||||||
|
self._threads: list[Thread] = []
|
||||||
|
self._stop_requested = Event()
|
||||||
|
self._force_stop = Event()
|
||||||
|
self._lock = Lock()
|
||||||
|
self._started = False
|
||||||
|
self._in_flight = 0
|
||||||
|
self._runs = 0
|
||||||
|
self._failures = 0
|
||||||
|
self._last_error: str | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def critical(self) -> bool:
|
||||||
|
return self._critical
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
if any(thread.is_alive() for thread in self._threads):
|
||||||
|
return
|
||||||
|
self._threads.clear()
|
||||||
|
self._stop_requested.clear()
|
||||||
|
self._force_stop.clear()
|
||||||
|
self._started = True
|
||||||
|
for index in range(self._concurrency):
|
||||||
|
thread = Thread(target=self._run_loop, name=f"{self._name}-{index + 1}", daemon=True)
|
||||||
|
self._threads.append(thread)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
def stop(self, force: bool = False) -> None:
|
||||||
|
self._stop_requested.set()
|
||||||
|
if force:
|
||||||
|
self._force_stop.set()
|
||||||
|
|
||||||
|
def health(self) -> WorkerHealth:
|
||||||
|
status = self.status()
|
||||||
|
if self._started and not self._stop_requested.is_set() and self._alive_threads() == 0:
|
||||||
|
return WorkerHealth(self.name, "unhealthy", self.critical, "worker threads are not running", status.meta)
|
||||||
|
if self._failures > 0:
|
||||||
|
return WorkerHealth(self.name, "degraded", self.critical, self._last_error, status.meta)
|
||||||
|
return WorkerHealth(self.name, "ok", self.critical, meta=status.meta)
|
||||||
|
|
||||||
|
def status(self) -> WorkerStatus:
|
||||||
|
alive_threads = self._alive_threads()
|
||||||
|
with self._lock:
|
||||||
|
in_flight = self._in_flight
|
||||||
|
runs = self._runs
|
||||||
|
failures = self._failures
|
||||||
|
detail = self._last_error
|
||||||
|
if self._started and alive_threads == 0:
|
||||||
|
state = "stopped"
|
||||||
|
elif self._stop_requested.is_set():
|
||||||
|
state = "stopping" if alive_threads > 0 else "stopped"
|
||||||
|
elif not self._started:
|
||||||
|
state = "stopped"
|
||||||
|
elif in_flight > 0:
|
||||||
|
state = "busy"
|
||||||
|
else:
|
||||||
|
state = "idle"
|
||||||
|
return WorkerStatus(
|
||||||
|
name=self.name,
|
||||||
|
state=state,
|
||||||
|
in_flight=in_flight,
|
||||||
|
detail=detail,
|
||||||
|
meta={"alive_threads": alive_threads, "concurrency": self._concurrency, "runs": runs, "failures": failures},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run_loop(self) -> None:
|
||||||
|
while True:
|
||||||
|
if self._force_stop.is_set() or self._stop_requested.is_set():
|
||||||
|
return
|
||||||
|
with self._lock:
|
||||||
|
self._in_flight += 1
|
||||||
|
try:
|
||||||
|
self._routine.run()
|
||||||
|
except Exception as exc:
|
||||||
|
with self._lock:
|
||||||
|
self._failures += 1
|
||||||
|
self._last_error = str(exc)
|
||||||
|
else:
|
||||||
|
with self._lock:
|
||||||
|
self._runs += 1
|
||||||
|
self._last_error = None
|
||||||
|
finally:
|
||||||
|
with self._lock:
|
||||||
|
self._in_flight -= 1
|
||||||
|
if self._stop_requested.is_set():
|
||||||
|
return
|
||||||
|
sleep(self._interval)
|
||||||
|
|
||||||
|
def _alive_threads(self) -> int:
|
||||||
|
return sum(1 for thread in self._threads if thread.is_alive())
|
||||||
|
|
||||||
|
|
||||||
class ExampleModule(ApplicationModule):
|
class ExampleModule(ApplicationModule):
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.handler = CollectingHandler()
|
self.routine = CollectingRoutine()
|
||||||
self.queue = InMemoryTaskQueue()
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return "example"
|
return "example"
|
||||||
|
|
||||||
def register(self, registry: ModuleRegistry) -> None:
|
def register(self, registry: ModuleRegistry) -> None:
|
||||||
traces = registry.services.get("traces")
|
registry.add_worker(RoutineWorker("collector", self.routine))
|
||||||
registry.add_queue("incoming", self.queue)
|
|
||||||
registry.add_handler("collect", self.handler)
|
|
||||||
self.queue.publish(Task(name="incoming", payload={"id": 1}, metadata={}))
|
|
||||||
registry.add_worker(QueueWorker("collector", self.queue, self.handler, traces, concurrency=1))
|
|
||||||
registry.add_health_contributor(StaticHealthContributor())
|
registry.add_health_contributor(StaticHealthContributor())
|
||||||
|
|
||||||
|
|
||||||
class BlockingModule(ApplicationModule):
|
class BlockingModule(ApplicationModule):
|
||||||
def __init__(self, started: Event, release: Event) -> None:
|
def __init__(self, started: Event, release: Event) -> None:
|
||||||
self.queue = InMemoryTaskQueue()
|
self.routine = BlockingRoutine(started, release)
|
||||||
self.handler = BlockingHandler(started, release)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return "blocking"
|
return "blocking"
|
||||||
|
|
||||||
def register(self, registry: ModuleRegistry) -> None:
|
def register(self, registry: ModuleRegistry) -> None:
|
||||||
traces = registry.services.get("traces")
|
registry.add_worker(RoutineWorker("blocking-worker", self.routine))
|
||||||
self.queue.publish(Task(name="incoming", payload={"id": 1}, metadata={}))
|
|
||||||
registry.add_worker(QueueWorker("blocking-worker", self.queue, self.handler, traces, concurrency=1))
|
|
||||||
|
|
||||||
|
|
||||||
class RecordingTransport(NoOpTraceTransport):
|
class RecordingTransport(NoOpTraceTransport):
|
||||||
@@ -85,7 +188,7 @@ class RecordingTransport(NoOpTraceTransport):
|
|||||||
self.messages.append(record)
|
self.messages.append(record)
|
||||||
|
|
||||||
|
|
||||||
def test_runtime_processes_tasks_and_exposes_status(tmp_path) -> None:
|
def test_runtime_runs_worker_routine_and_exposes_status(tmp_path) -> None:
|
||||||
config_path = tmp_path / "config.yml"
|
config_path = tmp_path / "config.yml"
|
||||||
config_path.write_text(
|
config_path.write_text(
|
||||||
"""
|
"""
|
||||||
@@ -113,7 +216,7 @@ log:
|
|||||||
status = runtime.status()
|
status = runtime.status()
|
||||||
runtime.stop()
|
runtime.stop()
|
||||||
|
|
||||||
assert module.handler.processed == [{"id": 1}]
|
assert module.routine.processed == [{"id": 1}]
|
||||||
assert status["modules"] == ["example"]
|
assert status["modules"] == ["example"]
|
||||||
assert status["runtime"]["state"] == "idle"
|
assert status["runtime"]["state"] == "idle"
|
||||||
assert status["health"]["status"] == "ok"
|
assert status["health"]["status"] == "ok"
|
||||||
@@ -146,7 +249,7 @@ def test_trace_service_writes_contexts_and_messages() -> None:
|
|||||||
transport = RecordingTransport()
|
transport = RecordingTransport()
|
||||||
manager = TraceService(transport=transport)
|
manager = TraceService(transport=transport)
|
||||||
|
|
||||||
with manager.open_context(alias="worker", kind="task", attrs={"task": "incoming"}):
|
with manager.open_context(alias="worker", kind="worker", attrs={"routine": "incoming"}):
|
||||||
manager.step("parse")
|
manager.step("parse")
|
||||||
manager.info("started", status="ok", attrs={"attempt": 1})
|
manager.info("started", status="ok", attrs={"attempt": 1})
|
||||||
|
|
||||||
@@ -202,29 +305,67 @@ def test_http_control_channel_exposes_health_and_actions() -> None:
|
|||||||
asyncio.run(scenario())
|
asyncio.run(scenario())
|
||||||
|
|
||||||
|
|
||||||
def test_public_plba_package_exports_runtime_builder(tmp_path) -> None:
|
def test_public_plba_package_exports_runtime_builder_and_worker_contract(tmp_path) -> None:
|
||||||
|
import plba
|
||||||
from plba import ApplicationModule as PublicApplicationModule
|
from plba import ApplicationModule as PublicApplicationModule
|
||||||
from plba import QueueWorker as PublicQueueWorker
|
from plba import InMemoryTaskQueue
|
||||||
|
from plba import Worker as PublicWorker
|
||||||
|
from plba import WorkerHealth as PublicWorkerHealth
|
||||||
|
from plba import WorkerStatus as PublicWorkerStatus
|
||||||
from plba import create_runtime
|
from plba import create_runtime
|
||||||
|
|
||||||
config_path = tmp_path / "config.yml"
|
config_path = tmp_path / "config.yml"
|
||||||
config_path.write_text("platform: {}\n", encoding="utf-8")
|
config_path.write_text("platform: {}\n", encoding="utf-8")
|
||||||
|
|
||||||
|
queue = InMemoryTaskQueue[int]()
|
||||||
|
queue.put(2)
|
||||||
|
assert queue.get(timeout=0.01) == 2
|
||||||
|
queue.task_done()
|
||||||
|
|
||||||
|
class PublicRoutine:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.runs = 0
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
if self.runs == 0:
|
||||||
|
self.runs += 1
|
||||||
|
|
||||||
|
class PublicWorkerImpl(PublicWorker):
|
||||||
|
def __init__(self, routine: PublicRoutine) -> None:
|
||||||
|
self._inner = RoutineWorker("public-worker", routine)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
return self._inner.name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def critical(self) -> bool:
|
||||||
|
return self._inner.critical
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
self._inner.start()
|
||||||
|
|
||||||
|
def stop(self, force: bool = False) -> None:
|
||||||
|
self._inner.stop(force=force)
|
||||||
|
|
||||||
|
def health(self) -> PublicWorkerHealth:
|
||||||
|
return self._inner.health()
|
||||||
|
|
||||||
|
def status(self) -> PublicWorkerStatus:
|
||||||
|
return self._inner.status()
|
||||||
|
|
||||||
class PublicExampleModule(PublicApplicationModule):
|
class PublicExampleModule(PublicApplicationModule):
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return "public-example"
|
return "public-example"
|
||||||
|
|
||||||
def register(self, registry: ModuleRegistry) -> None:
|
def register(self, registry: ModuleRegistry) -> None:
|
||||||
queue = InMemoryTaskQueue()
|
registry.add_worker(PublicWorkerImpl(PublicRoutine()))
|
||||||
traces = registry.services.get("traces")
|
|
||||||
handler = CollectingHandler()
|
|
||||||
queue.publish(Task(name="incoming", payload={"id": 2}, metadata={}))
|
|
||||||
registry.add_worker(PublicQueueWorker("public-worker", queue, handler, traces))
|
|
||||||
|
|
||||||
runtime = create_runtime(PublicExampleModule(), config_path=str(config_path))
|
runtime = create_runtime(PublicExampleModule(), config_path=str(config_path))
|
||||||
runtime.start()
|
runtime.start()
|
||||||
sleep(0.2)
|
sleep(0.2)
|
||||||
assert runtime.configuration.get() == {"platform": {}}
|
assert runtime.configuration.get() == {"platform": {}}
|
||||||
assert runtime.status()["workers"]["registered"] == 1
|
assert runtime.status()["workers"]["registered"] == 1
|
||||||
|
assert hasattr(plba, "QueueWorker") is False
|
||||||
runtime.stop()
|
runtime.stop()
|
||||||
|
|||||||
Reference in New Issue
Block a user