Фикс состояния

This commit is contained in:
2026-03-12 16:55:23 +03:00
parent 417b8b6f72
commit 6ba0a18ac9
1445 changed files with 620025 additions and 885 deletions
View File
View File
+5
View File
@@ -0,0 +1,5 @@
from datetime import timedelta
IDEMPOTENCY_TTL = timedelta(minutes=10)
MAX_RETRIES = 5
SUPPORTED_SCHEMA_VERSION = "1.0"
+37
View File
@@ -0,0 +1,37 @@
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from pydantic import ValidationError
from app.core.exceptions import AppError
from app.schemas.common import ModuleName
def register_error_handlers(app: FastAPI) -> None:
@app.exception_handler(AppError)
async def app_error_handler(_: Request, exc: AppError) -> JSONResponse:
return JSONResponse(
status_code=400,
content={"code": exc.code, "desc": exc.desc, "module": exc.module.value},
)
@app.exception_handler(ValidationError)
async def validation_error_handler(_: Request, exc: ValidationError) -> JSONResponse:
return JSONResponse(
status_code=422,
content={
"code": "validation_error",
"desc": str(exc),
"module": ModuleName.BACKEND.value,
},
)
@app.exception_handler(Exception)
async def generic_error_handler(_: Request, exc: Exception) -> JSONResponse:
return JSONResponse(
status_code=500,
content={
"code": "internal_error",
"desc": str(exc),
"module": ModuleName.BACKEND.value,
},
)
+9
View File
@@ -0,0 +1,9 @@
from app.schemas.common import ModuleName
class AppError(Exception):
def __init__(self, code: str, desc: str, module: ModuleName) -> None:
super().__init__(desc)
self.code = code
self.desc = desc
self.module = module
+49
View File
@@ -0,0 +1,49 @@
from __future__ import annotations
import logging
import re
class ScrubbingFormatter(logging.Formatter):
_KEY_VALUE_PATTERNS = (
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*id)=([^\s,]+)"),
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*_key)=([^\s,]+)"),
)
_TEXT_PATTERNS = (
re.compile(r"\b(index|task|dialog|rag|session|plan|artifact|evidence|symbol|edge|entry) id\b[:=]\s*([^\s,]+)", re.IGNORECASE),
)
def format(self, record: logging.LogRecord) -> str:
rendered = super().format(record)
scrubbed = self._scrub(rendered).rstrip("\n")
return scrubbed + "\n"
def _scrub(self, message: str) -> str:
output = message
for pattern in self._KEY_VALUE_PATTERNS:
output = pattern.sub(self._replace_key_value, output)
for pattern in self._TEXT_PATTERNS:
output = pattern.sub(self._replace_text, output)
return output
def _replace_key_value(self, match: re.Match[str]) -> str:
return f"{match.group(1)}=<redacted>"
def _replace_text(self, match: re.Match[str]) -> str:
return f"{match.group(1)} id=<redacted>"
def configure_logging() -> None:
logging.basicConfig(
level=logging.WARNING,
force=True,
format="%(levelname)s:%(name)s:%(message)s",
)
root_logger = logging.getLogger()
root_logger.setLevel(logging.WARNING)
formatter = ScrubbingFormatter("%(levelname)s:%(name)s:%(message)s")
for handler in root_logger.handlers:
handler.setFormatter(formatter)
logging.getLogger("uvicorn").setLevel(logging.WARNING)
logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+49
View File
@@ -0,0 +1,49 @@
import logging
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.core.logging_setup import configure_logging
from app.core.error_handlers import register_error_handlers
from app.modules.application import ModularApplication
def _configure_logging() -> None:
configure_logging()
_configure_logging()
def create_app() -> FastAPI:
app = FastAPI(title="Agent Backend MVP", version="0.1.0")
modules = ModularApplication()
app.state.modules = modules
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(modules.chat.public_router())
app.include_router(modules.rag.public_router())
app.include_router(modules.rag.internal_router())
app.include_router(modules.rag_repo.internal_router())
app.include_router(modules.agent.internal_router())
register_error_handlers(app)
@app.on_event("startup")
async def startup() -> None:
modules.startup()
@app.get("/health")
async def health() -> dict:
return {"status": "ok"}
return app
app = create_app()
View File
+91
View File
@@ -0,0 +1,91 @@
# Модуль agent
## 1. Функции модуля
- Оркестрация выполнения пользовательского запроса поверх роутера интентов и графов.
- Формирование `TaskSpec`, запуск оркестратора шагов и сборка финального результата.
- Реализация необходимых для агента tools и их интеграция с остальной логикой выполнения.
- Сохранение quality-метрик и session-артефактов для последующей привязки к Story.
## 2. Диаграмма классов и взаимосвязей
```mermaid
classDiagram
class AgentModule
class GraphAgentRuntime
class OrchestratorService
class TaskSpecBuilder
class StorySessionRecorder
class StoryContextRepository
class ConfluenceService
class AgentRepository
AgentModule --> GraphAgentRuntime
AgentModule --> ConfluenceService
AgentModule --> StorySessionRecorder
StorySessionRecorder --> StoryContextRepository
GraphAgentRuntime --> OrchestratorService
GraphAgentRuntime --> TaskSpecBuilder
GraphAgentRuntime --> AgentRepository
GraphAgentRuntime --> ConfluenceService
```
## 3. Описание классов
- `AgentModule`: собирает runtime и публикует внутренние tools-роуты.
Методы: `__init__` — связывает зависимости модуля; `internal_router` — регистрирует internal API tools.
- `GraphAgentRuntime`: основной исполнитель агентного запроса.
Методы: `run` — выполняет цикл route -> retrieval -> orchestration -> ответ/changeset.
- `OrchestratorService`: управляет планом шагов и выполнением quality gates.
Методы: `run` — строит, валидирует и исполняет execution plan.
- `TaskSpecBuilder`: формирует спецификацию задачи для оркестратора.
Методы: `build` — собирает `TaskSpec` из route, контекстов и ограничений.
- `ProjectQaConversationGraphFactory`, `ProjectQaClassificationGraphFactory`, `ProjectQaRetrievalGraphFactory`, `ProjectQaAnalysisGraphFactory`, `ProjectQaAnswerGraphFactory`: набор маленьких graph-исполнителей для `project/qa`.
Роли: нормализация запроса; классификация project-question; поздний retrieval из `RAG`; анализ code/docs контекста; сборка финального ответа.
- `StorySessionRecorder`: пишет session-scoped артефакты для последующего bind к Story.
Методы: `record_run` — сохраняет входные источники и выходные артефакты сессии.
- `StoryContextRepository`: репозиторий Story-контекста и его связей.
Методы: `record_story_commit` — фиксирует commit-контекст Story; `upsert_story` — создает/обновляет карточку Story; `add_session_artifact` — добавляет session-артефакт; `bind_session_to_story` — переносит артефакты сессии в Story; `add_artifact` — добавляет версионный Story-артефакт; `get_story_context` — возвращает агрегированный контекст Story.
- `ConfluenceService`: tool для загрузки страницы по URL.
Методы: `fetch_page` — валидирует URL и возвращает нормализованный payload страницы.
- `AgentRepository`: хранение router-контекста и quality-метрик.
Методы: `ensure_tables` — создает таблицы модуля; `get_router_context` — читает контекст маршрутизации; `update_router_context` — обновляет историю диалога и last-route; `save_quality_metrics` — сохраняет метрики качества; `get_quality_metrics` — читает историю метрик.
## 4. Сиквенс-диаграммы API
### POST /internal/tools/confluence/fetch
Назначение: загружает страницу Confluence по URL и возвращает ее контент для дальнейшего использования в сценариях агента.
```mermaid
sequenceDiagram
participant Router as AgentModule.APIRouter
participant Confluence as ConfluenceService
Router->>Confluence: fetch_page(url)
Confluence-->>Router: page(content_markdown, metadata)
```
### `project/qa` reasoning flow
Назначение: оркестратор планирует шаги, а каждый шаг исполняется отдельным graph. Retrieval вызывается поздно, внутри шага `context_retrieval`.
```mermaid
sequenceDiagram
participant Runtime as GraphAgentRuntime
participant Orch as OrchestratorService
participant G1 as conversation_understanding
participant G2 as question_classification
participant G3 as context_retrieval
participant Rag as RagService
participant G4 as context_analysis
participant G5 as answer_composition
Runtime->>Orch: run(task)
Orch->>G1: execute
G1-->>Orch: resolved_request
Orch->>G2: execute
G2-->>Orch: question_profile
Orch->>G3: execute
G3->>Rag: retrieve(query)
Rag-->>G3: rag_items
G3-->>Orch: source_bundle
Orch->>G4: execute
G4-->>Orch: analysis_brief
Orch->>G5: execute
G5-->>Orch: final_answer
Orch-->>Runtime: final_answer
```
View File
@@ -0,0 +1,20 @@
from app.core.constants import SUPPORTED_SCHEMA_VERSION
from app.core.exceptions import AppError
from app.schemas.changeset import ChangeItem, ChangeSetPayload
from app.schemas.common import ModuleName
class ChangeSetValidator:
def validate(self, task_id: str, changeset: list[ChangeItem]) -> list[ChangeItem]:
payload = ChangeSetPayload(
schema_version=SUPPORTED_SCHEMA_VERSION,
task_id=task_id,
changeset=changeset,
)
if payload.schema_version != SUPPORTED_SCHEMA_VERSION:
raise AppError(
"unsupported_schema",
f"Unsupported schema version: {payload.schema_version}",
ModuleName.AGENT,
)
return payload.changeset
@@ -0,0 +1,15 @@
from app.modules.agent.code_qa_runtime.executor import CodeQaRuntimeExecutor
from app.modules.agent.code_qa_runtime.models import (
CodeQaDraftAnswer,
CodeQaExecutionState,
CodeQaFinalResult,
CodeQaValidationResult,
)
__all__ = [
"CodeQaDraftAnswer",
"CodeQaExecutionState",
"CodeQaFinalResult",
"CodeQaRuntimeExecutor",
"CodeQaValidationResult",
]
@@ -0,0 +1,62 @@
from __future__ import annotations
from dataclasses import dataclass
from app.modules.rag.code_qa_pipeline.evidence_gate import EvidenceGateDecision
from app.modules.rag.intent_router_v2.models import IntentRouterResult
@dataclass(slots=True, frozen=True)
class CodeQaPolicyDecision:
answer_mode: str
answer: str = ""
should_call_llm: bool = True
branch: str = "normal_answer"
reason: str = "evidence_sufficient"
class CodeQaAnswerPolicy:
def decide(
self,
*,
router_result: IntentRouterResult,
gate_decision: EvidenceGateDecision,
) -> CodeQaPolicyDecision:
sub_intent = router_result.query_plan.sub_intent.upper()
symbol_resolution = router_result.symbol_resolution
if sub_intent == "OPEN_FILE" and "path_scope_empty" in gate_decision.failure_reasons:
path_scope = list(getattr(router_result.retrieval_spec.filters, "path_scope", []) or [])
target = path_scope[0] if path_scope else "запрошенный файл"
return CodeQaPolicyDecision(
answer_mode="not_found",
answer=f"Файл {target} не найден.",
should_call_llm=False,
branch="open_file_not_found",
reason="path_scope_empty",
)
if sub_intent == "EXPLAIN" and symbol_resolution.status in {"not_found", "ambiguous"}:
return CodeQaPolicyDecision(
answer_mode="degraded",
answer=self._symbol_message(symbol_resolution.status, symbol_resolution.alternatives),
should_call_llm=False,
branch="explain_unresolved_symbol",
reason=f"symbol_resolution_{symbol_resolution.status}",
)
if not gate_decision.passed:
answer_mode = "insufficient" if "insufficient_evidence" in gate_decision.failure_reasons else "degraded"
reason = gate_decision.failure_reasons[0] if gate_decision.failure_reasons else "evidence_gate_failed"
return CodeQaPolicyDecision(
answer_mode=answer_mode,
answer=gate_decision.degraded_message,
should_call_llm=False,
branch="evidence_gate_short_circuit",
reason=reason,
)
return CodeQaPolicyDecision(answer_mode="normal", branch="normal_answer", reason="evidence_sufficient")
def _symbol_message(self, status: str, alternatives: list[str]) -> str:
if status == "ambiguous" and alternatives:
return f"Сущность не удалось однозначно разрешить. Близкие варианты: {', '.join(alternatives[:3])}."
if alternatives:
return f"Сущность не найдена в доступном коде. Ближайшие варианты: {', '.join(alternatives[:3])}."
return "Сущность не найдена в доступном коде."
@@ -0,0 +1,309 @@
from __future__ import annotations
import logging
from time import perf_counter
from app.modules.agent.code_qa_runtime.answer_policy import CodeQaAnswerPolicy
from app.modules.agent.code_qa_runtime.models import CodeQaDraftAnswer, CodeQaExecutionState, CodeQaFinalResult
from app.modules.agent.code_qa_runtime.post_gate import CodeQaPostEvidenceGate
from app.modules.agent.code_qa_runtime.prompt_payload_builder import CodeQaPromptPayloadBuilder
from app.modules.agent.code_qa_runtime.prompt_selector import CodeQaPromptSelector
from app.modules.agent.code_qa_runtime.repair import CodeQaAnswerRepairService
from app.modules.agent.code_qa_runtime.repo_context import CodeQaRepoContextFactory
from app.modules.agent.code_qa_runtime.retrieval_adapter import CodeQaRetrievalAdapter
from app.modules.agent.llm import AgentLlmService
from app.modules.rag.code_qa_pipeline.answer_synthesis import build_answer_synthesis_input
from app.modules.rag.code_qa_pipeline.diagnostics import build_diagnostics_report
from app.modules.rag.code_qa_pipeline.evidence_bundle_builder import build_evidence_bundle
from app.modules.rag.code_qa_pipeline.evidence_gate import evaluate_evidence
from app.modules.rag.code_qa_pipeline.retrieval_request_builder import build_retrieval_request
from app.modules.rag.code_qa_pipeline.retrieval_result_builder import build_retrieval_result
from app.modules.rag.intent_router_v2 import ConversationState, IntentRouterV2
from app.modules.rag.intent_router_v2.models import SymbolResolution
LOGGER = logging.getLogger(__name__)
class CodeQaRuntimeExecutor:
def __init__(
self,
llm: AgentLlmService | None,
*,
router: IntentRouterV2 | None = None,
retrieval: CodeQaRetrievalAdapter | None = None,
repo_context_factory: CodeQaRepoContextFactory | None = None,
prompt_selector: CodeQaPromptSelector | None = None,
payload_builder: CodeQaPromptPayloadBuilder | None = None,
answer_policy: CodeQaAnswerPolicy | None = None,
post_gate: CodeQaPostEvidenceGate | None = None,
) -> None:
self._llm = llm
self._router = router or IntentRouterV2()
self._retrieval = retrieval or CodeQaRetrievalAdapter()
self._repo_context_factory = repo_context_factory or CodeQaRepoContextFactory()
self._prompt_selector = prompt_selector or CodeQaPromptSelector()
self._payload_builder = payload_builder or CodeQaPromptPayloadBuilder()
self._answer_policy = answer_policy or CodeQaAnswerPolicy()
self._post_gate = post_gate or CodeQaPostEvidenceGate()
self._repair = CodeQaAnswerRepairService(llm) if llm is not None else None
def execute(self, *, user_query: str, rag_session_id: str, files_map: dict[str, dict] | None = None) -> CodeQaFinalResult:
timings_ms: dict[str, int] = {}
runtime_trace: list[dict] = []
state = CodeQaExecutionState(
user_query=user_query,
rag_session_id=rag_session_id,
conversation_state=ConversationState(),
repo_context=self._repo_context_factory.build(files_map),
)
started = perf_counter()
state.router_result = self._router.route(user_query, state.conversation_state, state.repo_context)
timings_ms["router"] = self._elapsed_ms(started)
runtime_trace.append(
{
"step": "router",
"status": "completed",
"timings_ms": {"router": timings_ms["router"]},
"output": {
"intent": state.router_result.intent,
"sub_intent": state.router_result.query_plan.sub_intent,
"graph_id": state.router_result.graph_id,
"conversation_mode": state.router_result.conversation_mode,
},
}
)
state.retrieval_request = build_retrieval_request(state.router_result, rag_session_id)
started = perf_counter()
raw_rows = self._retrieve(state)
timings_ms["retrieval"] = self._elapsed_ms(started)
symbol_resolution = self._resolve_symbol(state.router_result.symbol_resolution.model_dump(), raw_rows)
state.router_result = state.router_result.model_copy(update={"symbol_resolution": SymbolResolution(**symbol_resolution)})
retrieval_report = self._retrieval.consume_retrieval_report()
state.retrieval_result = build_retrieval_result(raw_rows, retrieval_report, symbol_resolution)
if state.retrieval_request.sub_intent.upper() == "EXPLAIN" and symbol_resolution.get("status") in {"not_found", "ambiguous"}:
state.retrieval_result = build_retrieval_result([], retrieval_report, symbol_resolution)
runtime_trace.append(
{
"step": "retrieval",
"status": "completed",
"timings_ms": {"retrieval": timings_ms["retrieval"]},
"output": {
"rag_count": len(raw_rows),
"answer_path_rag_count": len(state.retrieval_result.raw_rows),
"resolved_symbol_status": symbol_resolution.get("status"),
"resolved_symbol": symbol_resolution.get("resolved_symbol"),
"requested_layers": list(state.retrieval_request.requested_layers or []),
},
"diagnostics": retrieval_report or {},
}
)
state.evidence_pack = build_evidence_bundle(state.retrieval_result, state.router_result)
if state.retrieval_request.sub_intent.upper() == "EXPLAIN" and symbol_resolution.get("status") in {"not_found", "ambiguous"}:
state.evidence_pack.sufficient = False
state.evidence_pack.failure_reasons = ["target_not_resolved"]
gate_decision = evaluate_evidence(state.evidence_pack)
timings_ms["pre_evidence_gate"] = 0
state.answer_mode = "normal" if gate_decision.passed else "degraded"
state.degraded_message = gate_decision.degraded_message
runtime_trace.append(
{
"step": "pre_evidence_gate",
"status": "passed" if gate_decision.passed else "blocked",
"timings_ms": {"pre_evidence_gate": timings_ms["pre_evidence_gate"]},
"output": {
"passed": gate_decision.passed,
"failure_reasons": list(gate_decision.failure_reasons),
"degraded_message": gate_decision.degraded_message,
},
}
)
decision = self._answer_policy.decide(router_result=state.router_result, gate_decision=gate_decision)
if not decision.should_call_llm:
state.answer_mode = decision.answer_mode
runtime_trace.append(
{
"step": "llm",
"status": "skipped",
"output": {
"reason": "policy_short_circuit",
"answer_mode": decision.answer_mode,
},
}
)
runtime_trace.append(
{
"step": "post_evidence_gate",
"status": "skipped",
"output": {"reason": "no_draft_answer"},
}
)
return self._finalize(
state,
draft=None,
final_answer=decision.answer,
repair_used=False,
llm_used=False,
timings_ms=timings_ms,
runtime_trace=runtime_trace,
)
if self._llm is None:
runtime_trace.append(
{
"step": "llm",
"status": "skipped",
"output": {"reason": "llm_unavailable"},
}
)
return self._finalize(
state,
draft=None,
final_answer="",
repair_used=False,
llm_used=False,
timings_ms=timings_ms,
runtime_trace=runtime_trace,
)
state.synthesis_input = build_answer_synthesis_input(user_query, state.evidence_pack)
prompt_name = self._prompt_selector.select(sub_intent=state.retrieval_request.sub_intent, answer_mode=state.answer_mode)
prompt_payload = self._payload_builder.build(
user_query=user_query,
synthesis_input=state.synthesis_input,
evidence_pack=state.evidence_pack,
answer_mode=state.answer_mode,
)
started = perf_counter()
draft = CodeQaDraftAnswer(
prompt_name=prompt_name,
prompt_payload=prompt_payload,
answer=self._llm.generate(prompt_name, prompt_payload, log_context="graph.project_qa.code_qa.answer").strip(),
)
timings_ms["llm"] = self._elapsed_ms(started)
runtime_trace.append(
{
"step": "llm",
"status": "completed",
"timings_ms": {"llm": timings_ms["llm"]},
"output": {
"prompt_name": prompt_name,
"answer_preview": draft.answer[:300],
},
}
)
validation = self._post_gate.validate(answer=draft.answer, answer_mode=state.answer_mode, degraded_message=state.degraded_message)
final_answer = draft.answer
repair_used = False
if not validation.passed and self._repair is not None:
started = perf_counter()
final_answer = self._repair.repair(draft_answer=draft.answer, validation=validation, prompt_payload=prompt_payload)
repair_used = True
timings_ms["repair"] = self._elapsed_ms(started)
validation = self._post_gate.validate(answer=final_answer, answer_mode=state.answer_mode, degraded_message=state.degraded_message)
if not validation.passed and state.degraded_message:
final_answer = state.degraded_message
runtime_trace.append(
{
"step": "post_evidence_gate",
"status": "passed" if validation.passed else "failed",
"timings_ms": {
"post_evidence_gate": 0,
"repair": timings_ms.get("repair", 0),
},
"output": {
"passed": validation.passed,
"reasons": list(validation.reasons),
"repair_used": repair_used,
},
}
)
return self._finalize(
state,
draft=draft,
final_answer=final_answer,
repair_used=repair_used,
llm_used=True,
validation=validation,
timings_ms=timings_ms,
runtime_trace=runtime_trace,
)
def _retrieve(self, state: CodeQaExecutionState) -> list[dict]:
assert state.retrieval_request is not None
if state.retrieval_request.sub_intent == "OPEN_FILE" and state.retrieval_request.path_scope:
return self._retrieval.retrieve_exact_files(
state.rag_session_id,
paths=state.retrieval_request.path_scope,
layers=["C0_SOURCE_CHUNKS"],
limit=200,
query=state.retrieval_request.query,
ranking_profile=str(getattr(state.retrieval_request.retrieval_spec, "rerank_profile", "") or ""),
)
return self._retrieval.retrieve_with_plan(
state.rag_session_id,
state.retrieval_request.query,
state.retrieval_request.retrieval_spec,
state.retrieval_request.retrieval_constraints,
query_plan=state.retrieval_request.query_plan,
)
def _resolve_symbol(self, initial: dict, rag_rows: list[dict]) -> dict:
if str(initial.get("status") or "") != "pending":
return initial
candidates = [str(item).strip() for item in initial.get("alternatives", []) if str(item).strip()]
found = [
str(row.get("title") or "").strip()
for row in rag_rows
if str(row.get("layer") or "") == "C1_SYMBOL_CATALOG" and str(row.get("title") or "").strip()
]
exact = next((item for item in found if item in candidates), None)
if exact:
return {"status": "resolved", "resolved_symbol": exact, "alternatives": found[:5], "confidence": 0.99}
if found:
return {"status": "ambiguous", "resolved_symbol": None, "alternatives": found[:5], "confidence": 0.55}
return {"status": "not_found", "resolved_symbol": None, "alternatives": [], "confidence": 0.0}
def _finalize(
self,
state: CodeQaExecutionState,
*,
draft: CodeQaDraftAnswer | None,
final_answer: str,
repair_used: bool,
llm_used: bool,
validation=None,
timings_ms: dict[str, int] | None = None,
runtime_trace: list[dict] | None = None,
) -> CodeQaFinalResult:
diagnostics = build_diagnostics_report(
router_result=state.router_result,
retrieval_request=state.retrieval_request,
retrieval_result=state.retrieval_result,
evidence_bundle=state.evidence_pack,
answer_mode=state.answer_mode,
timings_ms=timings_ms or {},
)
result = CodeQaFinalResult(
final_answer=final_answer.strip(),
answer_mode=state.answer_mode,
repair_used=repair_used,
llm_used=llm_used,
draft_answer=draft,
validation=validation or self._post_gate.validate(answer=final_answer, answer_mode=state.answer_mode, degraded_message=state.degraded_message),
router_result=state.router_result,
retrieval_request=state.retrieval_request,
retrieval_result=state.retrieval_result,
evidence_pack=state.evidence_pack,
diagnostics=diagnostics,
runtime_trace=list(runtime_trace or []),
)
LOGGER.warning(
"code qa runtime executed: intent=%s sub_intent=%s answer_mode=%s repair_used=%s llm_used=%s",
state.router_result.intent,
state.router_result.query_plan.sub_intent,
result.answer_mode,
result.repair_used,
result.llm_used,
)
return result
def _elapsed_ms(self, started: float) -> int:
return int((perf_counter() - started) * 1000)
@@ -0,0 +1,73 @@
from __future__ import annotations
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from app.modules.rag.code_qa_pipeline.contracts import (
AnswerSynthesisInput as CodeQaAnswerSynthesisInput,
)
from app.modules.rag.code_qa_pipeline.contracts import (
DiagnosticsReport as CodeQaDiagnosticsReport,
)
from app.modules.rag.code_qa_pipeline.contracts import (
EvidenceBundle as CodeQaEvidencePack,
)
from app.modules.rag.code_qa_pipeline.contracts import (
RetrievalRequest as CodeQaRetrievalRequest,
)
from app.modules.rag.code_qa_pipeline.contracts import (
RetrievalResult as CodeQaRetrievalResult,
)
from app.modules.rag.intent_router_v2.models import ConversationState, IntentRouterResult, RepoContext
class CodeQaDraftAnswer(BaseModel):
model_config = ConfigDict(extra="forbid")
prompt_name: str
prompt_payload: str
answer: str = ""
class CodeQaValidationResult(BaseModel):
model_config = ConfigDict(extra="forbid")
passed: bool = False
action: str = "return"
reasons: list[str] = Field(default_factory=list)
class CodeQaFinalResult(BaseModel):
model_config = ConfigDict(extra="forbid")
final_answer: str
answer_mode: str = "normal"
repair_used: bool = False
llm_used: bool = False
draft_answer: CodeQaDraftAnswer | None = None
validation: CodeQaValidationResult = Field(default_factory=CodeQaValidationResult)
router_result: IntentRouterResult | None = None
retrieval_request: CodeQaRetrievalRequest | None = None
retrieval_result: CodeQaRetrievalResult | None = None
evidence_pack: CodeQaEvidencePack | None = None
diagnostics: CodeQaDiagnosticsReport
runtime_trace: list[dict[str, Any]] = Field(default_factory=list)
class CodeQaExecutionState(BaseModel):
model_config = ConfigDict(extra="forbid")
user_query: str
rag_session_id: str
conversation_state: ConversationState = Field(default_factory=ConversationState)
repo_context: RepoContext = Field(default_factory=RepoContext)
router_result: IntentRouterResult | None = None
retrieval_request: CodeQaRetrievalRequest | None = None
retrieval_result: CodeQaRetrievalResult | None = None
evidence_pack: CodeQaEvidencePack | None = None
synthesis_input: CodeQaAnswerSynthesisInput | None = None
diagnostics: CodeQaDiagnosticsReport | None = None
answer_mode: str = "normal"
degraded_message: str = ""
final_result: CodeQaFinalResult | None = None
@@ -0,0 +1,23 @@
from __future__ import annotations
from app.modules.agent.code_qa_runtime.models import CodeQaValidationResult
class CodeQaPostEvidenceGate:
def validate(
self,
*,
answer: str,
answer_mode: str,
degraded_message: str,
) -> CodeQaValidationResult:
normalized = (answer or "").strip()
if not normalized:
return CodeQaValidationResult(passed=False, action="repair", reasons=["empty_answer"])
if answer_mode in {"degraded", "insufficient"} and "недостат" not in normalized.lower():
return CodeQaValidationResult(passed=False, action="repair", reasons=["degraded_answer_missing_guardrail"])
if answer_mode == "not_found" and "не найден" not in normalized.lower():
return CodeQaValidationResult(passed=False, action="repair", reasons=["not_found_answer_missing_phrase"])
if degraded_message and answer_mode != "normal" and len(normalized) < 24:
return CodeQaValidationResult(passed=False, action="repair", reasons=["answer_too_short"])
return CodeQaValidationResult(passed=True, action="return")
@@ -0,0 +1,37 @@
from __future__ import annotations
import json
from app.modules.rag.code_qa_pipeline.contracts import AnswerSynthesisInput, EvidenceBundle
_LAYER_GUIDE = (
"- C0_SOURCE_CHUNKS: фактический код, это основной источник деталей.\n"
"- C1_SYMBOL_CATALOG: объявления и сигнатуры символов.\n"
"- C2_DEPENDENCY_GRAPH: связи вызовов и зависимостей.\n"
"- C3_ENTRYPOINTS: подтвержденные точки входа.\n"
"- C4_SEMANTIC_ROLES: вспомогательная роль компонента, использовать осторожно."
)
class CodeQaPromptPayloadBuilder:
def build(
self,
*,
user_query: str,
synthesis_input: AnswerSynthesisInput,
evidence_pack: EvidenceBundle,
answer_mode: str,
) -> str:
payload = {
"user_query": user_query,
"resolved_scenario": synthesis_input.resolved_scenario,
"resolved_target": synthesis_input.resolved_target,
"answer_mode": answer_mode,
"fast_context": synthesis_input.fast_context,
"deep_context": synthesis_input.deep_context,
"evidence_summary": synthesis_input.evidence_summary,
"diagnostic_hints": synthesis_input.diagnostic_hints,
"retrieval_summary": evidence_pack.retrieval_summary,
"layer_guide": _LAYER_GUIDE,
}
return json.dumps(payload, ensure_ascii=False, indent=2)
@@ -0,0 +1,19 @@
from __future__ import annotations
class CodeQaPromptSelector:
_PROMPTS = {
"ARCHITECTURE": "code_qa_architecture_answer",
"EXPLAIN": "code_qa_explain_answer",
"EXPLAIN_LOCAL": "code_qa_explain_local_answer",
"FIND_ENTRYPOINTS": "code_qa_find_entrypoints_answer",
"FIND_TESTS": "code_qa_find_tests_answer",
"GENERAL_QA": "code_qa_general_answer",
"OPEN_FILE": "code_qa_open_file_answer",
"TRACE_FLOW": "code_qa_trace_flow_answer",
}
def select(self, *, sub_intent: str, answer_mode: str) -> str:
if answer_mode in {"degraded", "not_found", "insufficient"}:
return "code_qa_degraded_answer"
return self._PROMPTS.get(sub_intent.upper(), "code_qa_explain_answer")
@@ -0,0 +1,33 @@
from __future__ import annotations
import json
from app.modules.agent.code_qa_runtime.models import CodeQaValidationResult
from app.modules.agent.llm import AgentLlmService
class CodeQaAnswerRepairService:
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
def repair(
self,
*,
draft_answer: str,
validation: CodeQaValidationResult,
prompt_payload: str,
) -> str:
repair_input = json.dumps(
{
"draft_answer": draft_answer,
"validation_reasons": validation.reasons,
"prompt_payload": prompt_payload,
},
ensure_ascii=False,
indent=2,
)
return self._llm.generate(
"code_qa_repair_answer",
repair_input,
log_context="graph.project_qa.code_qa.repair",
).strip()
@@ -0,0 +1,36 @@
from __future__ import annotations
from app.modules.rag.contracts.enums import RagLayer
from app.modules.rag.intent_router_v2.models import RepoContext
class CodeQaRepoContextFactory:
_KNOWN_LAYERS = [
RagLayer.CODE_ENTRYPOINTS,
RagLayer.CODE_SYMBOL_CATALOG,
RagLayer.CODE_DEPENDENCY_GRAPH,
RagLayer.CODE_SEMANTIC_ROLES,
RagLayer.CODE_SOURCE_CHUNKS,
RagLayer.DOCS_MODULE_CATALOG,
RagLayer.DOCS_FACT_INDEX,
RagLayer.DOCS_SECTION_INDEX,
RagLayer.DOCS_POLICY_INDEX,
]
def build(self, files_map: dict[str, dict] | None = None) -> RepoContext:
languages = self._languages(files_map or {})
return RepoContext(
languages=languages or ["python"],
available_domains=["CODE", "DOCS"],
available_layers=list(self._KNOWN_LAYERS),
)
def _languages(self, files_map: dict[str, dict]) -> list[str]:
found: list[str] = []
for path in files_map:
lower = path.lower()
if lower.endswith(".py") and "python" not in found:
found.append("python")
if lower.endswith((".md", ".rst", ".txt")) and "markdown" not in found:
found.append("markdown")
return found
@@ -0,0 +1,241 @@
from __future__ import annotations
from time import perf_counter
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from app.modules.rag.persistence.repository import RagRepository
class SessionEmbeddingDimensions:
def __init__(self, repository: RagRepository | None = None) -> None:
self._cache: dict[str, int] = {}
self._repository = repository
def resolve(self, rag_session_id: str) -> int:
if rag_session_id in self._cache:
return self._cache[rag_session_id]
from app.modules.shared.db import get_engine
from sqlalchemy import text
with get_engine().connect() as conn:
row = conn.execute(
text(
"""
SELECT vector_dims(embedding) AS dim
FROM rag_chunks
WHERE rag_session_id = :sid AND embedding IS NOT NULL
LIMIT 1
"""
),
{"sid": rag_session_id},
).mappings().first()
dim = int(row["dim"]) if row and row.get("dim") else 0
self._cache[rag_session_id] = dim
return dim
class CodeQaRetrievalAdapter:
def __init__(self, repository: RagRepository | None = None) -> None:
if repository is None:
from app.modules.rag.persistence.repository import RagRepository
repository = RagRepository()
self._repository = repository
self._dim_resolver = SessionEmbeddingDimensions(self._repository)
self._last_report: dict[str, Any] | None = None
def retrieve_with_plan(
self,
rag_session_id: str,
query: str,
retrieval_spec,
retrieval_constraints=None,
*,
query_plan=None,
) -> list[dict]:
rows: list[dict] = []
executed_layers: list[str] = []
per_layer_ms: dict[str, int] = {}
for layer_query in list(getattr(retrieval_spec, "layer_queries", []) or []):
layer_id = str(layer_query.layer_id)
executed_layers.append(layer_id)
started = perf_counter()
layer_rows = self._retrieve_layer(
rag_session_id=rag_session_id,
query=query,
layer_id=layer_id,
limit=max(1, int(layer_query.top_k)),
path_scope=list(getattr(retrieval_spec.filters, "path_scope", []) or []),
retrieval_constraints=retrieval_constraints,
include_tests=str(getattr(retrieval_spec.filters, "test_policy", "EXCLUDE") or "EXCLUDE") == "INCLUDE",
)
per_layer_ms[layer_id] = int((perf_counter() - started) * 1000)
rows.extend(layer_rows)
self._last_report = {
"executed_layers": executed_layers,
"retrieval_mode_by_layer": {layer_id: "vector" for layer_id in executed_layers},
"top_k_by_layer": {str(item.layer_id): int(item.top_k) for item in list(getattr(retrieval_spec, "layer_queries", []) or [])},
"filters_by_layer": {
layer_id: {"path_scope": list(getattr(retrieval_spec.filters, "path_scope", []) or [])}
for layer_id in executed_layers
},
"fallback": {"used": False, "reason": None},
"retrieval_by_layer_ms": per_layer_ms,
}
return self._dedupe(rows)
def retrieve_exact_files(
self,
rag_session_id: str,
*,
repo_id: str | None = None,
paths: list[str],
layers: list[str] | None = None,
limit: int = 200,
query: str = "",
ranking_profile: str = "",
) -> list[dict]:
started = perf_counter()
rows = self._repository.retrieve_exact_files(rag_session_id, repo_id=repo_id, paths=paths, layers=layers, limit=limit)
layer_id = list(layers or ["C0_SOURCE_CHUNKS"])[0]
self._last_report = {
"executed_layers": [layer_id],
"retrieval_mode_by_layer": {layer_id: "exact_path_fetch"},
"top_k_by_layer": {layer_id: int(limit)},
"filters_by_layer": {layer_id: {"path_scope": list(paths)}},
"requests": [{"layer": layer_id, "query": query, "ranking_profile": ranking_profile}],
"fallback": {"used": False, "reason": None},
"retrieval_by_layer_ms": {layer_id: int((perf_counter() - started) * 1000)},
}
return self._dedupe(rows)
def hydrate_resolved_symbol_sources(
self,
rag_session_id: str,
base_query: str,
rag_rows: list[dict],
symbol_resolution: dict,
retrieval_spec,
retrieval_constraints=None,
) -> list[dict]:
if str(symbol_resolution.get("status") or "") != "resolved":
return rag_rows
target = str(symbol_resolution.get("resolved_symbol") or "").strip()
if not target:
return rag_rows
symbol_row = next(
(
row
for row in rag_rows
if str(row.get("layer") or "") == "C1_SYMBOL_CATALOG"
and target in {str(row.get("title") or "").strip(), str(dict(row.get("metadata") or {}).get("qname") or "").strip()}
),
None,
)
if symbol_row is None:
return rag_rows
extra = self.retrieve_exact_files(
rag_session_id,
paths=[str(symbol_row.get("path") or "")],
layers=["C0_SOURCE_CHUNKS"],
limit=6,
query=base_query,
ranking_profile=str(getattr(retrieval_spec, "rerank_profile", "") or ""),
)
return self._dedupe([*rag_rows, *extra])
def force_symbol_context_c0(
self,
rag_session_id: str,
*,
rag_rows: list[dict],
symbol_resolution: dict,
limit: int = 20,
) -> list[dict]:
if str(symbol_resolution.get("status") or "") != "resolved":
return rag_rows
target = str(symbol_resolution.get("resolved_symbol") or "").strip()
target_row = next((row for row in rag_rows if str(row.get("title") or "").strip() == target), None)
if target_row is None:
return rag_rows
c0_rows = self.retrieve_exact_files(
rag_session_id,
paths=[str(target_row.get("path") or "")],
layers=["C0_SOURCE_CHUNKS"],
limit=limit,
query=target,
ranking_profile="symbol_context",
)
preserved = [row for row in rag_rows if str(row.get("layer") or "") != "C0_SOURCE_CHUNKS"]
return self._dedupe([*preserved, *c0_rows])
def consume_retrieval_report(self) -> dict[str, Any] | None:
report = self._last_report
self._last_report = None
return report
def _retrieve_layer(
self,
*,
rag_session_id: str,
query: str,
layer_id: str,
limit: int,
path_scope: list[str],
retrieval_constraints,
include_tests: bool,
) -> list[dict]:
exclude_prefixes, exclude_like = self._test_filters(include_tests)
if retrieval_constraints is not None:
exclude_globs = [item.lower() for item in list(getattr(retrieval_constraints, "exclude_globs", []) or [])]
exclude_like.extend(self._glob_to_like(item) for item in exclude_globs if "*" in item)
dim = self._dim_resolver.resolve(rag_session_id)
if dim <= 0:
if layer_id != "C0_SOURCE_CHUNKS":
return []
return self._repository.retrieve_lexical_code(
rag_session_id,
query_text=query,
limit=limit,
path_prefixes=path_scope or None,
exclude_path_prefixes=exclude_prefixes or None,
exclude_like_patterns=exclude_like or None,
prefer_non_tests=not include_tests,
)
return self._repository.retrieve(
rag_session_id,
[0.0] * dim,
query_text=query,
limit=limit,
layers=[layer_id],
path_prefixes=path_scope or None,
exclude_path_prefixes=exclude_prefixes or None,
exclude_like_patterns=exclude_like or None,
prefer_non_tests=not include_tests,
)
def _test_filters(self, include_tests: bool) -> tuple[list[str], list[str]]:
if include_tests:
return [], []
from app.modules.rag.retrieval.test_filter import build_test_filters
filters = build_test_filters()
return list(filters.exclude_path_prefixes), list(filters.exclude_like_patterns)
def _glob_to_like(self, glob: str) -> str:
return glob.replace("**/", "%/").replace("**", "%").replace("*", "%").replace("?", "_")
def _dedupe(self, rows: list[dict]) -> list[dict]:
result: list[dict] = []
seen: set[tuple[str, str, str, int | None, int | None]] = set()
for row in rows:
path = str(row.get("path") or "")
layer = str(row.get("layer") or "")
title = str(row.get("title") or "")
key = (layer, path, title, row.get("span_start"), row.get("span_end"))
if key in seen:
continue
seen.add(key)
result.append(row)
return result
@@ -0,0 +1,20 @@
from datetime import datetime, timezone
from urllib.parse import urlparse
from uuid import uuid4
from app.core.exceptions import AppError
from app.schemas.common import ModuleName
class ConfluenceService:
async def fetch_page(self, url: str) -> dict:
parsed = urlparse(url)
if not parsed.scheme.startswith("http"):
raise AppError("invalid_url", "Invalid Confluence URL", ModuleName.CONFLUENCE)
return {
"page_id": str(uuid4()),
"title": "Confluence page",
"content_markdown": f"Fetched content from {url}",
"version": 1,
"fetched_at": datetime.now(timezone.utc).isoformat(),
}
@@ -0,0 +1,56 @@
__all__ = [
"BaseGraphFactory",
"CodeQaGraphFactory",
"DocsGraphFactory",
"ProjectQaAnalysisGraphFactory",
"ProjectQaAnswerGraphFactory",
"ProjectQaClassificationGraphFactory",
"ProjectQaConversationGraphFactory",
"ProjectEditsGraphFactory",
"ProjectQaGraphFactory",
"ProjectQaRetrievalGraphFactory",
]
def __getattr__(name: str):
if name == "BaseGraphFactory":
from app.modules.agent.engine.graphs.base_graph import BaseGraphFactory
return BaseGraphFactory
if name == "CodeQaGraphFactory":
from app.modules.agent.engine.graphs.code_qa_graph import CodeQaGraphFactory
return CodeQaGraphFactory
if name == "DocsGraphFactory":
from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory
return DocsGraphFactory
if name == "ProjectQaConversationGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaConversationGraphFactory
return ProjectQaConversationGraphFactory
if name == "ProjectQaClassificationGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaClassificationGraphFactory
return ProjectQaClassificationGraphFactory
if name == "ProjectQaRetrievalGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaRetrievalGraphFactory
return ProjectQaRetrievalGraphFactory
if name == "ProjectQaAnalysisGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnalysisGraphFactory
return ProjectQaAnalysisGraphFactory
if name == "ProjectQaAnswerGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnswerGraphFactory
return ProjectQaAnswerGraphFactory
if name == "ProjectEditsGraphFactory":
from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory
return ProjectEditsGraphFactory
if name == "ProjectQaGraphFactory":
from app.modules.agent.engine.graphs.project_qa_graph import ProjectQaGraphFactory
return ProjectQaGraphFactory
raise AttributeError(name)
@@ -0,0 +1,73 @@
import logging
from langgraph.graph import END, START, StateGraph
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.llm import AgentLlmService
from app.modules.agent.engine.graphs.state import AgentGraphState
LOGGER = logging.getLogger(__name__)
class BaseGraphFactory:
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("context", self._context_node)
graph.add_node("answer", self._answer_node)
graph.add_edge(START, "context")
graph.add_edge("context", "answer")
graph.add_edge("answer", END)
return graph.compile(checkpointer=checkpointer)
def _context_node(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.default.context",
message="Готовлю контекст ответа по данным запроса.",
)
rag = state.get("rag_context", "")
conf = state.get("confluence_context", "")
emit_progress_sync(
state,
stage="graph.default.context.done",
message="Контекст собран, перехожу к формированию ответа.",
)
result = {"rag_context": rag, "confluence_context": conf}
LOGGER.warning(
"graph step result: graph=default step=context rag_len=%s confluence_len=%s",
len(rag or ""),
len(conf or ""),
)
return result
def _answer_node(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.default.answer",
message="Формирую текст ответа для пользователя.",
)
msg = state.get("message", "")
rag = state.get("rag_context", "")
conf = state.get("confluence_context", "")
user_input = "\n\n".join(
[
f"User request:\n{msg}",
f"RAG context:\n{rag}",
f"Confluence context:\n{conf}",
]
)
answer = self._llm.generate("general_answer", user_input, log_context="graph.default.answer")
emit_progress_sync(
state,
stage="graph.default.answer.done",
message="Черновик ответа подготовлен.",
)
result = {"answer": answer}
LOGGER.warning(
"graph step result: graph=default step=answer answer_len=%s",
len(answer or ""),
)
return result
@@ -0,0 +1,45 @@
from __future__ import annotations
import logging
from langgraph.graph import END, START, StateGraph
from app.modules.agent.code_qa_runtime import CodeQaRuntimeExecutor
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.llm import AgentLlmService
LOGGER = logging.getLogger(__name__)
class CodeQaGraphFactory:
def __init__(self, llm: AgentLlmService) -> None:
self._executor = CodeQaRuntimeExecutor(llm)
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("execute_code_qa", self._execute_code_qa)
graph.add_edge(START, "execute_code_qa")
graph.add_edge("execute_code_qa", END)
return graph.compile(checkpointer=checkpointer)
def _execute_code_qa(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.project_qa.code_qa",
message="Исполняю CODE_QA runtime pipeline.",
)
result = self._executor.execute(
user_query=str(state.get("message", "") or ""),
rag_session_id=str(state.get("project_id", "") or ""),
files_map=dict(state.get("files_map", {}) or {}),
)
LOGGER.warning(
"graph step result: graph=project_qa/code_qa_runtime answer_mode=%s repair_used=%s",
result.answer_mode,
result.repair_used,
)
return {
"final_answer": result.final_answer,
"code_qa_result": result.model_dump(mode="json"),
}
@@ -0,0 +1,26 @@
from pathlib import Path
import os
class DocsExamplesLoader:
def __init__(self, prompts_dir: Path | None = None) -> None:
base = prompts_dir or Path(__file__).resolve().parents[2] / "prompts"
env_override = os.getenv("AGENT_PROMPTS_DIR", "").strip()
root = Path(env_override) if env_override else base
self._examples_dir = root / "docs_examples"
def load_bundle(self, *, max_files: int = 6, max_chars_per_file: int = 1800) -> str:
if not self._examples_dir.is_dir():
return ""
files = sorted(
[p for p in self._examples_dir.iterdir() if p.is_file() and p.suffix.lower() in {".md", ".txt"}],
key=lambda p: p.name.lower(),
)[:max_files]
chunks: list[str] = []
for path in files:
content = path.read_text(encoding="utf-8", errors="ignore").strip()
if not content:
continue
excerpt = content[:max_chars_per_file].strip()
chunks.append(f"### Example: {path.name}\n{excerpt}")
return "\n\n".join(chunks).strip()
@@ -0,0 +1,128 @@
from langgraph.graph import END, START, StateGraph
import logging
from app.modules.agent.engine.graphs.file_targeting import FileTargeting
from app.modules.agent.engine.graphs.docs_graph_logic import DocsContentComposer, DocsContextAnalyzer
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.llm import AgentLlmService
LOGGER = logging.getLogger(__name__)
class DocsGraphFactory:
_max_validation_attempts = 2
def __init__(self, llm: AgentLlmService) -> None:
self._targeting = FileTargeting()
self._analyzer = DocsContextAnalyzer(llm, self._targeting)
self._composer = DocsContentComposer(llm, self._targeting)
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("collect_code_context", self._collect_code_context)
graph.add_node("detect_existing_docs", self._detect_existing_docs)
graph.add_node("decide_strategy", self._decide_strategy)
graph.add_node("load_rules_and_examples", self._load_rules_and_examples)
graph.add_node("plan_incremental_changes", self._plan_incremental_changes)
graph.add_node("plan_new_document", self._plan_new_document)
graph.add_node("generate_doc_content", self._generate_doc_content)
graph.add_node("self_check", self._self_check)
graph.add_node("build_changeset", self._build_changeset)
graph.add_node("summarize_result", self._summarize_result)
graph.add_edge(START, "collect_code_context")
graph.add_edge("collect_code_context", "detect_existing_docs")
graph.add_edge("detect_existing_docs", "decide_strategy")
graph.add_edge("decide_strategy", "load_rules_and_examples")
graph.add_conditional_edges(
"load_rules_and_examples",
self._route_after_rules_loading,
{
"incremental": "plan_incremental_changes",
"from_scratch": "plan_new_document",
},
)
graph.add_edge("plan_incremental_changes", "generate_doc_content")
graph.add_edge("plan_new_document", "generate_doc_content")
graph.add_edge("generate_doc_content", "self_check")
graph.add_conditional_edges(
"self_check",
self._route_after_self_check,
{"retry": "generate_doc_content", "ready": "build_changeset"},
)
graph.add_edge("build_changeset", "summarize_result")
graph.add_edge("summarize_result", END)
return graph.compile(checkpointer=checkpointer)
def _collect_code_context(self, state: AgentGraphState) -> dict:
return self._run_node(state, "collect_code_context", "Собираю контекст кода и файлов.", self._analyzer.collect_code_context)
def _detect_existing_docs(self, state: AgentGraphState) -> dict:
return self._run_node(
state,
"detect_existing_docs",
"Определяю, есть ли существующая документация проекта.",
self._analyzer.detect_existing_docs,
)
def _decide_strategy(self, state: AgentGraphState) -> dict:
return self._run_node(state, "decide_strategy", "Выбираю стратегию: инкремент или генерация с нуля.", self._analyzer.decide_strategy)
def _load_rules_and_examples(self, state: AgentGraphState) -> dict:
return self._run_node(
state,
"load_rules_and_examples",
"Загружаю правила и примеры формата документации.",
self._composer.load_rules_and_examples,
)
def _plan_incremental_changes(self, state: AgentGraphState) -> dict:
return self._run_node(
state,
"plan_incremental_changes",
"Планирую точечные изменения в существующей документации.",
lambda st: self._composer.plan_incremental_changes(st, self._analyzer),
)
def _plan_new_document(self, state: AgentGraphState) -> dict:
return self._run_node(state, "plan_new_document", "Проектирую структуру новой документации.", self._composer.plan_new_document)
def _generate_doc_content(self, state: AgentGraphState) -> dict:
return self._run_node(state, "generate_doc_content", "Генерирую содержимое документации.", self._composer.generate_doc_content)
def _self_check(self, state: AgentGraphState) -> dict:
return self._run_node(state, "self_check", "Проверяю соответствие результата правилам.", self._composer.self_check)
def _build_changeset(self, state: AgentGraphState) -> dict:
return self._run_node(state, "build_changeset", "Формирую итоговый набор изменений файлов.", self._composer.build_changeset)
def _summarize_result(self, state: AgentGraphState) -> dict:
return self._run_node(
state,
"summarize_result",
"Формирую краткий обзор выполненных действий и измененных файлов.",
self._composer.build_execution_summary,
)
def _route_after_rules_loading(self, state: AgentGraphState) -> str:
if state.get("docs_strategy") == "incremental_update":
return "incremental"
return "from_scratch"
def _route_after_self_check(self, state: AgentGraphState) -> str:
if state.get("validation_passed"):
return "ready"
attempts = int(state.get("validation_attempts", 0) or 0)
return "ready" if attempts >= self._max_validation_attempts else "retry"
def _run_node(self, state: AgentGraphState, node_name: str, message: str, fn):
emit_progress_sync(state, stage=f"graph.docs.{node_name}", message=message)
try:
result = fn(state)
emit_progress_sync(state, stage=f"graph.docs.{node_name}.done", message=f"Шаг '{node_name}' завершен.")
LOGGER.warning("docs graph node completed: node=%s keys=%s", node_name, sorted(result.keys()))
return result
except Exception:
LOGGER.exception("docs graph node failed: node=%s", node_name)
raise
@@ -0,0 +1,523 @@
import json
from difflib import SequenceMatcher
from app.modules.agent.engine.graphs.docs_examples_loader import DocsExamplesLoader
from app.modules.agent.engine.graphs.file_targeting import FileTargeting
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.llm import AgentLlmService
from app.schemas.changeset import ChangeItem
import logging
LOGGER = logging.getLogger(__name__)
class DocsContextAnalyzer:
def __init__(self, llm: AgentLlmService, targeting: FileTargeting) -> None:
self._llm = llm
self._targeting = targeting
def collect_code_context(self, state: AgentGraphState) -> dict:
message = state.get("message", "")
files_map = state.get("files_map", {}) or {}
requested_path = self._targeting.extract_target_path(message)
target_file = self._targeting.lookup_file(files_map, requested_path) if requested_path else None
docs_candidates = self._collect_doc_candidates(files_map)
target_path = str((target_file or {}).get("path") or (requested_path or "")).strip() or ""
return {
"docs_candidates": docs_candidates,
"target_path": target_path,
"target_file_content": str((target_file or {}).get("content", "")),
"target_file_hash": str((target_file or {}).get("content_hash", "")),
"validation_attempts": 0,
}
def detect_existing_docs(self, state: AgentGraphState) -> dict:
docs_candidates = state.get("docs_candidates", []) or []
if not docs_candidates:
return {
"existing_docs_detected": False,
"existing_docs_summary": "No documentation files detected in current project context.",
}
snippets = "\n\n".join(
[
f"Path: {item.get('path', '')}\nSnippet:\n{self._shorten(item.get('content', ''), 500)}"
for item in docs_candidates[:8]
]
)
user_input = "\n\n".join(
[
f"User request:\n{state.get('message', '')}",
f"Requested target path:\n{state.get('target_path', '') or '(not specified)'}",
f"Detected documentation candidates:\n{snippets}",
]
)
raw = self._llm.generate("docs_detect", user_input, log_context="graph.docs.detect_existing_docs")
exists = self.parse_bool_marker(raw, "exists", default=True)
summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.")
return {"existing_docs_detected": exists, "existing_docs_summary": summary}
def decide_strategy(self, state: AgentGraphState) -> dict:
message = (state.get("message", "") or "").lower()
if any(token in message for token in ("с нуля", "from scratch", "new documentation", "создай документацию")):
return {"docs_strategy": "from_scratch"}
if any(token in message for token in ("дополни", "обнови документацию", "extend docs", "update docs")):
return {"docs_strategy": "incremental_update"}
user_input = "\n\n".join(
[
f"User request:\n{state.get('message', '')}",
f"Existing docs detected:\n{state.get('existing_docs_detected', False)}",
f"Existing docs summary:\n{state.get('existing_docs_summary', '')}",
]
)
raw = self._llm.generate("docs_strategy", user_input, log_context="graph.docs.decide_strategy")
strategy = self.parse_text_marker(raw, "strategy", default="").lower()
if strategy not in {"incremental_update", "from_scratch"}:
strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch"
return {"docs_strategy": strategy}
def resolve_target_for_incremental(self, state: AgentGraphState) -> tuple[str, dict | None]:
files_map = state.get("files_map", {}) or {}
preferred_path = state.get("target_path", "")
preferred = self._targeting.lookup_file(files_map, preferred_path)
if preferred:
return str(preferred.get("path") or preferred_path), preferred
candidates = state.get("docs_candidates", []) or []
if candidates:
first_path = str(candidates[0].get("path", ""))
resolved = self._targeting.lookup_file(files_map, first_path) or candidates[0]
return first_path, resolved
fallback = preferred_path.strip() or "docs/AGENT_DRAFT.md"
return fallback, None
def _collect_doc_candidates(self, files_map: dict[str, dict]) -> list[dict]:
candidates: list[dict] = []
for raw_path, payload in files_map.items():
path = str(raw_path or "").replace("\\", "/").strip()
if not path:
continue
low = path.lower()
is_doc = low.startswith("docs/") or low.endswith(".md") or low.endswith(".rst") or "/readme" in low or low.startswith("readme")
if not is_doc:
continue
candidates.append(
{
"path": str(payload.get("path") or path),
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
}
)
candidates.sort(key=lambda item: (0 if str(item.get("path", "")).lower().startswith("docs/") else 1, str(item.get("path", "")).lower()))
return candidates
def _shorten(self, text: str, max_chars: int) -> str:
value = (text or "").strip()
if len(value) <= max_chars:
return value
return value[:max_chars].rstrip() + "\n...[truncated]"
@staticmethod
def parse_bool_marker(text: str, marker: str, *, default: bool) -> bool:
value = DocsContextAnalyzer.parse_text_marker(text, marker, default="")
if not value:
return default
token = value.split()[0].strip().lower()
if token in {"yes", "true", "1", "да"}:
return True
if token in {"no", "false", "0", "нет"}:
return False
return default
@staticmethod
def parse_text_marker(text: str, marker: str, *, default: str) -> str:
low_marker = f"{marker.lower()}:"
for line in (text or "").splitlines():
raw = line.strip()
if raw.lower().startswith(low_marker):
return raw.split(":", 1)[1].strip()
return default
class DocsBundleFormatter:
def shorten(self, text: str, max_chars: int) -> str:
value = (text or "").strip()
if len(value) <= max_chars:
return value
return value[:max_chars].rstrip() + "\n...[truncated]"
def normalize_file_output(self, text: str) -> str:
value = (text or "").strip()
if value.startswith("```") and value.endswith("```"):
lines = value.splitlines()
if len(lines) >= 3:
return "\n".join(lines[1:-1]).strip()
return value
def parse_docs_bundle(self, raw_text: str) -> list[dict]:
text = (raw_text or "").strip()
if not text:
return []
candidate = self.normalize_file_output(text)
parsed = self._parse_json_candidate(candidate)
if parsed is None:
start = candidate.find("{")
end = candidate.rfind("}")
if start != -1 and end > start:
parsed = self._parse_json_candidate(candidate[start : end + 1])
if parsed is None:
return []
files: list[dict]
if isinstance(parsed, dict):
raw_files = parsed.get("files")
files = raw_files if isinstance(raw_files, list) else []
elif isinstance(parsed, list):
files = parsed
else:
files = []
out: list[dict] = []
seen: set[str] = set()
for item in files:
if not isinstance(item, dict):
continue
path = str(item.get("path", "")).replace("\\", "/").strip()
content = str(item.get("content", ""))
if not path or not content.strip():
continue
if path in seen:
continue
seen.add(path)
out.append(
{
"path": path,
"content": content,
"reason": str(item.get("reason", "")).strip(),
}
)
return out
def bundle_has_required_structure(self, bundle: list[dict]) -> bool:
if not bundle:
return False
has_api = any(str(item.get("path", "")).replace("\\", "/").startswith("docs/api/") for item in bundle)
has_logic = any(str(item.get("path", "")).replace("\\", "/").startswith("docs/logic/") for item in bundle)
return has_api and has_logic
def similarity(self, original: str, updated: str) -> float:
return SequenceMatcher(None, original or "", updated or "").ratio()
def line_change_ratio(self, original: str, updated: str) -> float:
orig_lines = (original or "").splitlines()
new_lines = (updated or "").splitlines()
if not orig_lines and not new_lines:
return 0.0
matcher = SequenceMatcher(None, orig_lines, new_lines)
changed = 0
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == "equal":
continue
changed += max(i2 - i1, j2 - j1)
total = max(len(orig_lines), len(new_lines), 1)
return changed / total
def added_headings(self, original: str, updated: str) -> int:
old_heads = {line.strip() for line in (original or "").splitlines() if line.strip().startswith("#")}
new_heads = {line.strip() for line in (updated or "").splitlines() if line.strip().startswith("#")}
return len(new_heads - old_heads)
def collapse_whitespace(self, text: str) -> str:
return " ".join((text or "").split())
def _parse_json_candidate(self, text: str):
try:
return json.loads(text)
except Exception:
return None
class DocsContentComposer:
def __init__(self, llm: AgentLlmService, targeting: FileTargeting) -> None:
self._llm = llm
self._targeting = targeting
self._examples = DocsExamplesLoader()
self._bundle = DocsBundleFormatter()
def load_rules_and_examples(self, _state: AgentGraphState) -> dict:
return {"rules_bundle": self._examples.load_bundle()}
def plan_incremental_changes(self, state: AgentGraphState, analyzer: DocsContextAnalyzer) -> dict:
target_path, target = analyzer.resolve_target_for_incremental(state)
user_input = "\n\n".join(
[
"Strategy: incremental_update",
f"User request:\n{state.get('message', '')}",
f"Target path:\n{target_path}",
f"Current target content:\n{self._bundle.shorten((target or {}).get('content', ''), 3000)}",
f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 6000)}",
f"Examples bundle:\n{state.get('rules_bundle', '')}",
]
)
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_incremental_changes")
return {
"doc_plan": plan,
"target_path": target_path,
"target_file_content": str((target or {}).get("content", "")),
"target_file_hash": str((target or {}).get("content_hash", "")),
}
def plan_new_document(self, state: AgentGraphState) -> dict:
target_path = state.get("target_path", "").strip() or "docs/AGENT_DRAFT.md"
user_input = "\n\n".join(
[
"Strategy: from_scratch",
f"User request:\n{state.get('message', '')}",
f"Target path:\n{target_path}",
f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 6000)}",
f"Examples bundle:\n{state.get('rules_bundle', '')}",
]
)
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_new_document")
return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""}
def generate_doc_content(self, state: AgentGraphState) -> dict:
user_input = "\n\n".join(
[
f"Strategy:\n{state.get('docs_strategy', 'from_scratch')}",
f"User request:\n{state.get('message', '')}",
f"Target path:\n{state.get('target_path', '')}",
f"Document plan:\n{state.get('doc_plan', '')}",
f"Current target content:\n{self._bundle.shorten(state.get('target_file_content', ''), 3500)}",
f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 7000)}",
f"Examples bundle:\n{state.get('rules_bundle', '')}",
]
)
raw = self._llm.generate("docs_generation", user_input, log_context="graph.docs.generate_doc_content")
bundle = self._bundle.parse_docs_bundle(raw)
if bundle:
first_content = str(bundle[0].get("content", "")).strip()
return {"generated_docs_bundle": bundle, "generated_doc": first_content}
content = self._bundle.normalize_file_output(raw)
return {"generated_docs_bundle": [], "generated_doc": content}
def self_check(self, state: AgentGraphState) -> dict:
attempts = int(state.get("validation_attempts", 0) or 0) + 1
bundle = state.get("generated_docs_bundle", []) or []
generated = state.get("generated_doc", "")
if not generated.strip() and not bundle:
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": "Generated document is empty.",
}
strategy = state.get("docs_strategy", "from_scratch")
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": "Bundle must include both docs/api and docs/logic for from_scratch strategy.",
}
if strategy == "incremental_update":
if bundle and len(bundle) > 1 and not self._is_broad_rewrite_request(str(state.get("message", ""))):
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": "Incremental update should not touch multiple files without explicit broad rewrite request.",
}
original = str(state.get("target_file_content", ""))
broad = self._is_broad_rewrite_request(str(state.get("message", "")))
if original and generated:
if self._bundle.collapse_whitespace(original) == self._bundle.collapse_whitespace(generated):
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": "Only formatting/whitespace changes detected.",
}
similarity = self._bundle.similarity(original, generated)
change_ratio = self._bundle.line_change_ratio(original, generated)
added_headings = self._bundle.added_headings(original, generated)
min_similarity = 0.75 if broad else 0.9
max_change_ratio = 0.7 if broad else 0.35
if similarity < min_similarity:
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": f"Incremental update is too broad (similarity={similarity:.2f}).",
}
if change_ratio > max_change_ratio:
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": f"Incremental update changes too many lines (change_ratio={change_ratio:.2f}).",
}
if not broad and added_headings > 0:
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": "New section headings were added outside requested scope.",
}
bundle_text = "\n".join([f"- {item.get('path', '')}" for item in bundle[:30]])
user_input = "\n\n".join(
[
f"Strategy:\n{strategy}",
f"User request:\n{state.get('message', '')}",
f"Document plan:\n{state.get('doc_plan', '')}",
f"Generated file paths:\n{bundle_text or '(single-file mode)'}",
f"Generated document:\n{generated}",
]
)
raw = self._llm.generate("docs_self_check", user_input, log_context="graph.docs.self_check")
passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False)
feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.")
return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback}
def build_changeset(self, state: AgentGraphState) -> dict:
files_map = state.get("files_map", {}) or {}
bundle = state.get("generated_docs_bundle", []) or []
strategy = state.get("docs_strategy", "from_scratch")
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
LOGGER.info(
"build_changeset fallback bundle used: strategy=%s bundle_items=%s",
strategy,
len(bundle),
)
bundle = self._build_fallback_bundle_from_text(state.get("generated_doc", ""))
if bundle:
changes: list[ChangeItem] = []
for item in bundle:
path = str(item.get("path", "")).replace("\\", "/").strip()
content = str(item.get("content", ""))
if not path or not content.strip():
continue
target = self._targeting.lookup_file(files_map, path)
reason = str(item.get("reason", "")).strip() or f"Documentation {strategy}: generated file from structured bundle."
if target and target.get("content_hash"):
changes.append(
ChangeItem(
op="update",
path=str(target.get("path") or path),
base_hash=str(target.get("content_hash", "")),
proposed_content=content,
reason=reason,
)
)
else:
changes.append(
ChangeItem(
op="create",
path=path,
proposed_content=content,
reason=reason,
)
)
if changes:
return {"changeset": changes}
target_path = (state.get("target_path", "") or "").strip() or "docs/AGENT_DRAFT.md"
target = self._targeting.lookup_file(files_map, target_path)
content = state.get("generated_doc", "")
if target and target.get("content_hash"):
change = ChangeItem(
op="update",
path=str(target.get("path") or target_path),
base_hash=str(target.get("content_hash", "")),
proposed_content=content,
reason=f"Documentation {strategy}: update existing document increment.",
)
else:
change = ChangeItem(
op="create",
path=target_path,
proposed_content=content,
reason=f"Documentation {strategy}: create document from current project context.",
)
return {"changeset": [change]}
def build_execution_summary(self, state: AgentGraphState) -> dict:
changeset = state.get("changeset", []) or []
if not changeset:
return {"answer": "Документация не была изменена: итоговый changeset пуст."}
file_lines = self._format_changed_files(changeset)
user_input = "\n\n".join(
[
f"User request:\n{state.get('message', '')}",
f"Documentation strategy:\n{state.get('docs_strategy', 'from_scratch')}",
f"Document plan:\n{state.get('doc_plan', '')}",
f"Validation feedback:\n{state.get('validation_feedback', '')}",
f"Changed files:\n{file_lines}",
]
)
try:
summary = self._llm.generate(
"docs_execution_summary",
user_input,
log_context="graph.docs.summarize_result",
).strip()
except Exception:
summary = ""
if not summary:
summary = self._build_fallback_summary(state, file_lines)
return {"answer": summary}
def _build_fallback_bundle_from_text(self, text: str) -> list[dict]:
content = (text or "").strip()
if not content:
content = (
"# Project Documentation Draft\n\n"
"## Overview\n"
"Documentation draft was generated, but structured sections require уточнение.\n"
)
return [
{
"path": "docs/logic/project_overview.md",
"content": content,
"reason": "Fallback: generated structured logic document from non-JSON model output.",
},
{
"path": "docs/api/README.md",
"content": (
"# API Methods\n\n"
"This file is a fallback placeholder for API method documentation.\n\n"
"## Next Step\n"
"- Add one file per API method under `docs/api/`.\n"
),
"reason": "Fallback: ensure required docs/api structure exists.",
},
]
def _format_changed_files(self, changeset: list[ChangeItem]) -> str:
lines: list[str] = []
for item in changeset[:30]:
lines.append(f"- {item.op.value} {item.path}: {item.reason}")
return "\n".join(lines)
def _build_fallback_summary(self, state: AgentGraphState, file_lines: str) -> str:
request = (state.get("message", "") or "").strip()
return "\n".join(
[
"Выполненные действия:",
f"- Обработан запрос: {request or '(пустой запрос)'}",
f"- Применена стратегия документации: {state.get('docs_strategy', 'from_scratch')}",
"- Сформирован и проверен changeset для документации.",
"",
"Измененные файлы:",
file_lines or "- (нет изменений)",
]
)
def _is_broad_rewrite_request(self, message: str) -> bool:
low = (message or "").lower()
markers = (
"перепиши",
"полностью",
"целиком",
"с нуля",
"full rewrite",
"rewrite all",
"реорганизуй",
)
return any(marker in low for marker in markers)
@@ -0,0 +1,28 @@
import re
class FileTargeting:
_path_pattern = re.compile(r"([A-Za-z0-9_.\-/]+?\.[A-Za-z0-9_]+)")
def extract_target_path(self, message: str) -> str | None:
text = (message or "").replace("\\", "/")
candidates = self._path_pattern.findall(text)
if not candidates:
return None
for candidate in candidates:
cleaned = candidate.strip("`'\".,:;()[]{}")
if "/" in cleaned or cleaned.startswith("."):
return cleaned
return candidates[0].strip("`'\".,:;()[]{}")
def lookup_file(self, files_map: dict[str, dict], path: str | None) -> dict | None:
if not path:
return None
normalized = path.replace("\\", "/")
if normalized in files_map:
return files_map[normalized]
low = normalized.lower()
for key, value in files_map.items():
if key.lower() == low:
return value
return None
@@ -0,0 +1,44 @@
from collections.abc import Awaitable, Callable
import inspect
import asyncio
from app.modules.agent.engine.graphs.progress_registry import progress_registry
from app.modules.agent.engine.graphs.state import AgentGraphState
ProgressCallback = Callable[[str, str, str, dict | None], Awaitable[None] | None]
async def emit_progress(
state: AgentGraphState,
*,
stage: str,
message: str,
kind: str = "task_progress",
meta: dict | None = None,
) -> None:
callback = progress_registry.get(state.get("progress_key"))
if callback is None:
return
result = callback(stage, message, kind, meta or {})
if inspect.isawaitable(result):
await result
def emit_progress_sync(
state: AgentGraphState,
*,
stage: str,
message: str,
kind: str = "task_progress",
meta: dict | None = None,
) -> None:
callback = progress_registry.get(state.get("progress_key"))
if callback is None:
return
result = callback(stage, message, kind, meta or {})
if inspect.isawaitable(result):
try:
loop = asyncio.get_running_loop()
loop.create_task(result)
except RuntimeError:
pass
@@ -0,0 +1,27 @@
from collections.abc import Awaitable, Callable
from threading import Lock
ProgressCallback = Callable[[str, str, str, dict | None], Awaitable[None] | None]
class ProgressRegistry:
def __init__(self) -> None:
self._items: dict[str, ProgressCallback] = {}
self._lock = Lock()
def register(self, key: str, callback: ProgressCallback) -> None:
with self._lock:
self._items[key] = callback
def get(self, key: str | None) -> ProgressCallback | None:
if not key:
return None
with self._lock:
return self._items.get(key)
def unregister(self, key: str) -> None:
with self._lock:
self._items.pop(key, None)
progress_registry = ProgressRegistry()
@@ -0,0 +1,171 @@
import re
from dataclasses import dataclass, field
@dataclass
class BlockContract:
type: str
max_changed_lines: int = 6
start_anchor: str = ""
end_anchor: str = ""
old_line: str = ""
def as_dict(self) -> dict:
return {
"type": self.type,
"max_changed_lines": self.max_changed_lines,
"start_anchor": self.start_anchor,
"end_anchor": self.end_anchor,
"old_line": self.old_line,
}
@dataclass
class FileEditContract:
path: str
reason: str
intent: str = "update"
max_hunks: int = 1
max_changed_lines: int = 8
allowed_blocks: list[BlockContract] = field(default_factory=list)
def as_dict(self) -> dict:
return {
"path": self.path,
"reason": self.reason,
"intent": self.intent,
"max_hunks": self.max_hunks,
"max_changed_lines": self.max_changed_lines,
"allowed_blocks": [block.as_dict() for block in self.allowed_blocks],
}
class ContractParser:
_supported_block_types = {"append_end", "replace_between", "replace_line_equals"}
def parse(self, payload: dict, *, request: str, requested_path: str) -> list[dict]:
files = payload.get("files", []) if isinstance(payload, dict) else []
parsed: list[FileEditContract] = []
for item in files if isinstance(files, list) else []:
contract = self._parse_file_contract(item)
if contract:
parsed.append(contract)
if not parsed:
fallback = self._fallback_contract(request=request, requested_path=requested_path)
if fallback:
parsed.append(fallback)
return [item.as_dict() for item in parsed]
def _parse_file_contract(self, item: object) -> FileEditContract | None:
if not isinstance(item, dict):
return None
path = str(item.get("path", "")).replace("\\", "/").strip()
if not path:
return None
reason = str(item.get("reason", "")).strip() or "Requested user adjustment."
intent = str(item.get("intent", "update")).strip().lower() or "update"
if intent not in {"update", "create"}:
intent = "update"
max_hunks = self._clamp_int(item.get("max_hunks"), default=1, min_value=1, max_value=5)
max_changed_lines = self._clamp_int(item.get("max_changed_lines"), default=8, min_value=1, max_value=120)
blocks: list[BlockContract] = []
raw_blocks = item.get("allowed_blocks", [])
for raw in raw_blocks if isinstance(raw_blocks, list) else []:
block = self._parse_block(raw)
if block:
blocks.append(block)
if not blocks:
return None
return FileEditContract(
path=path,
reason=reason,
intent=intent,
max_hunks=max_hunks,
max_changed_lines=max_changed_lines,
allowed_blocks=blocks,
)
def _parse_block(self, raw: object) -> BlockContract | None:
if not isinstance(raw, dict):
return None
kind = self._normalize_block_type(str(raw.get("type", "")).strip().lower())
if kind not in self._supported_block_types:
return None
max_changed_lines = self._clamp_int(raw.get("max_changed_lines"), default=6, min_value=1, max_value=80)
block = BlockContract(
type=kind,
max_changed_lines=max_changed_lines,
start_anchor=str(raw.get("start_anchor", "")).strip(),
end_anchor=str(raw.get("end_anchor", "")).strip(),
old_line=str(raw.get("old_line", "")).strip(),
)
if block.type == "replace_between" and (not block.start_anchor or not block.end_anchor):
return None
if block.type == "replace_line_equals" and not block.old_line:
return None
return block
def _fallback_contract(self, *, request: str, requested_path: str) -> FileEditContract | None:
path = requested_path.strip()
if not path:
return None
low = (request or "").lower()
if any(marker in low for marker in ("в конец", "в самый конец", "append to end", "append at the end")):
return FileEditContract(
path=path,
reason="Append-only update inferred from user request.",
intent="update",
max_hunks=1,
max_changed_lines=8,
allowed_blocks=[BlockContract(type="append_end", max_changed_lines=8)],
)
quoted = self._extract_quoted_line(request)
if quoted:
return FileEditContract(
path=path,
reason="Single-line replacement inferred from quoted segment in user request.",
intent="update",
max_hunks=1,
max_changed_lines=4,
allowed_blocks=[BlockContract(type="replace_line_equals", old_line=quoted, max_changed_lines=4)],
)
return None
def _extract_quoted_line(self, text: str) -> str:
value = (text or "").strip()
patterns = [
r"`([^`]+)`",
r"\"([^\"]+)\"",
r"'([^']+)'",
r"«([^»]+)»",
]
for pattern in patterns:
match = re.search(pattern, value)
if not match:
continue
candidate = match.group(1).strip()
if candidate:
return candidate
return ""
def _normalize_block_type(self, value: str) -> str:
mapping = {
"append": "append_end",
"append_eof": "append_end",
"end_append": "append_end",
"replace_block": "replace_between",
"replace_section": "replace_between",
"replace_range": "replace_between",
"replace_line": "replace_line_equals",
"line_equals": "replace_line_equals",
}
return mapping.get(value, value)
def _clamp_int(self, value: object, *, default: int, min_value: int, max_value: int) -> int:
try:
numeric = int(value) # type: ignore[arg-type]
except Exception:
numeric = default
return max(min_value, min(max_value, numeric))
@@ -0,0 +1,102 @@
import logging
from langgraph.graph import END, START, StateGraph
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.engine.graphs.project_edits_logic import ProjectEditsLogic
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.llm import AgentLlmService
LOGGER = logging.getLogger(__name__)
class ProjectEditsGraphFactory:
_max_validation_attempts = 2
def __init__(self, llm: AgentLlmService) -> None:
self._logic = ProjectEditsLogic(llm)
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("collect_context", self._collect_context)
graph.add_node("plan_changes", self._plan_changes)
graph.add_node("generate_changeset", self._generate_changeset)
graph.add_node("self_check", self._self_check)
graph.add_node("build_result", self._build_result)
graph.add_edge(START, "collect_context")
graph.add_edge("collect_context", "plan_changes")
graph.add_edge("plan_changes", "generate_changeset")
graph.add_edge("generate_changeset", "self_check")
graph.add_conditional_edges(
"self_check",
self._route_after_self_check,
{"retry": "generate_changeset", "ready": "build_result"},
)
graph.add_edge("build_result", END)
return graph.compile(checkpointer=checkpointer)
def _collect_context(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.project_edits.collect_context",
message="Собираю контекст и релевантные файлы для правок.",
)
result = self._logic.collect_context(state)
self._log_step_result("collect_context", result)
return result
def _plan_changes(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.project_edits.plan_changes",
message="Определяю, что именно нужно изменить и в каких файлах.",
)
result = self._logic.plan_changes(state)
self._log_step_result("plan_changes", result)
return result
def _generate_changeset(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.project_edits.generate_changeset",
message="Формирую предлагаемые правки по выбранным файлам.",
)
result = self._logic.generate_changeset(state)
self._log_step_result("generate_changeset", result)
return result
def _self_check(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.project_edits.self_check",
message="Проверяю, что правки соответствуют запросу и не трогают лишнее.",
)
result = self._logic.self_check(state)
self._log_step_result("self_check", result)
return result
def _build_result(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.project_edits.build_result",
message="Формирую итоговый changeset и краткий обзор.",
)
result = self._logic.build_result(state)
self._log_step_result("build_result", result)
return result
def _route_after_self_check(self, state: AgentGraphState) -> str:
if state.get("validation_passed"):
return "ready"
attempts = int(state.get("validation_attempts", 0) or 0)
return "ready" if attempts >= self._max_validation_attempts else "retry"
def _log_step_result(self, step: str, result: dict) -> None:
LOGGER.warning(
"graph step result: graph=project_edits step=%s keys=%s changeset_items=%s answer_len=%s",
step,
sorted(result.keys()),
len(result.get("changeset", []) or []),
len(str(result.get("answer", "") or "")),
)
@@ -0,0 +1,240 @@
import json
from app.modules.agent.engine.graphs.project_edits_contract import ContractParser
from app.modules.agent.engine.graphs.project_edits_patcher import ContractPatcher
from app.modules.agent.engine.graphs.project_edits_support import ProjectEditsSupport
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.llm import AgentLlmService
from app.schemas.changeset import ChangeItem
class ProjectEditsLogic:
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
self._support = ProjectEditsSupport()
self._contracts = ContractParser()
self._patcher = ContractPatcher()
def collect_context(self, state: AgentGraphState) -> dict:
message = state.get("message", "")
files_map = state.get("files_map", {}) or {}
requested_path = self._support.lookup_file(files_map, self._extract_path_hint(message))
candidates = self._support.pick_relevant_files(message, files_map)
if requested_path and not any(x["path"] == requested_path.get("path") for x in candidates):
candidates.insert(0, self._support.as_candidate(requested_path))
return {
"edits_requested_path": str((requested_path or {}).get("path", "")).strip() or self._extract_path_hint(message),
"edits_context_files": candidates[:12],
"validation_attempts": 0,
}
def plan_changes(self, state: AgentGraphState) -> dict:
user_input = json.dumps(
{
"request": state.get("message", ""),
"requested_path": state.get("edits_requested_path", ""),
"context_files": [
{
"path": item.get("path", ""),
"content_preview": self._support.shorten(str(item.get("content", "")), 2200),
}
for item in (state.get("edits_context_files", []) or [])
],
"contract_requirements": {
"must_define_allowed_blocks": True,
"max_hunks_per_file": 5,
"default_intent": "update",
},
},
ensure_ascii=False,
)
parsed = self._support.parse_json(
self._llm.generate("project_edits_plan", user_input, log_context="graph.project_edits.plan_changes")
)
contracts = self._contracts.parse(
parsed,
request=str(state.get("message", "")),
requested_path=str(state.get("edits_requested_path", "")),
)
plan = [{"path": item.get("path", ""), "reason": item.get("reason", "")} for item in contracts]
return {"edits_contracts": contracts, "edits_plan": plan}
def generate_changeset(self, state: AgentGraphState) -> dict:
files_map = state.get("files_map", {}) or {}
contracts = state.get("edits_contracts", []) or []
changeset: list[ChangeItem] = []
feedback: list[str] = []
for contract in contracts:
if not isinstance(contract, dict):
continue
path = str(contract.get("path", "")).replace("\\", "/").strip()
if not path:
continue
intent = str(contract.get("intent", "update")).strip().lower() or "update"
source = self._support.lookup_file(files_map, path)
if intent == "update" and source is None:
feedback.append(f"{path}: update requested but source file was not provided.")
continue
current_content = str((source or {}).get("content", ""))
hunks, error = self._generate_hunks_for_contract(state, contract, current_content)
if error:
feedback.append(f"{path}: {error}")
continue
proposed, apply_error = self._patcher.apply(current_content, contract, hunks)
if apply_error:
feedback.append(f"{path}: {apply_error}")
continue
if proposed is None:
feedback.append(f"{path}: patch application returned empty result.")
continue
if intent == "update":
if proposed == current_content:
feedback.append(f"{path}: no-op update produced by model.")
continue
if self._support.collapse_whitespace(proposed) == self._support.collapse_whitespace(current_content):
feedback.append(f"{path}: whitespace-only update is not allowed.")
continue
reason = str(contract.get("reason", "")).strip() or "Requested user adjustment."
if source and source.get("content_hash"):
changeset.append(
ChangeItem(
op="update",
path=str(source.get("path") or path),
base_hash=str(source.get("content_hash", "")),
proposed_content=proposed,
reason=reason,
hunks=hunks,
)
)
else:
changeset.append(
ChangeItem(
op="create",
path=path,
proposed_content=proposed,
reason=reason,
hunks=hunks,
)
)
return {"changeset": changeset, "edits_generation_feedback": " | ".join(feedback)}
def self_check(self, state: AgentGraphState) -> dict:
attempts = int(state.get("validation_attempts", 0) or 0) + 1
changeset = state.get("changeset", []) or []
files_map = state.get("files_map", {}) or {}
if not changeset:
feedback = str(state.get("edits_generation_feedback", "")).strip() or "Generated changeset is empty."
return {"validation_attempts": attempts, "validation_passed": False, "validation_feedback": feedback}
broad = self._support.is_broad_rewrite_request(str(state.get("message", "")))
for item in changeset:
if item.op.value != "update":
continue
source = self._support.lookup_file(files_map, item.path)
if not source:
continue
original = str(source.get("content", ""))
proposed = item.proposed_content or ""
similarity = self._support.similarity(original, proposed)
change_ratio = self._support.line_change_ratio(original, proposed)
added_headings = self._support.added_headings(original, proposed)
min_similarity = 0.75 if broad else 0.9
max_change_ratio = 0.7 if broad else 0.35
if similarity < min_similarity:
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": f"File {item.path} changed too aggressively (similarity={similarity:.2f}).",
}
if change_ratio > max_change_ratio:
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": f"File {item.path} changed too broadly (change_ratio={change_ratio:.2f}).",
}
if not broad and added_headings > 0:
return {
"validation_attempts": attempts,
"validation_passed": False,
"validation_feedback": f"File {item.path} adds new sections outside requested scope.",
}
payload = {
"request": state.get("message", ""),
"contracts": state.get("edits_contracts", []),
"changeset": [{"op": x.op.value, "path": x.path, "reason": x.reason} for x in changeset[:20]],
"rule": "Changes must stay inside contract blocks and not affect unrelated sections.",
}
parsed = self._support.parse_json(
self._llm.generate(
"project_edits_self_check",
json.dumps(payload, ensure_ascii=False),
log_context="graph.project_edits.self_check",
)
)
passed = bool(parsed.get("pass")) if isinstance(parsed, dict) else False
feedback = str(parsed.get("feedback", "")).strip() if isinstance(parsed, dict) else ""
return {
"validation_attempts": attempts,
"validation_passed": passed,
"validation_feedback": feedback or "No validation feedback provided.",
}
def build_result(self, state: AgentGraphState) -> dict:
changeset = state.get("changeset", []) or []
return {"changeset": changeset, "answer": self._support.build_summary(state, changeset)}
def _generate_hunks_for_contract(
self,
state: AgentGraphState,
contract: dict,
current_content: str,
) -> tuple[list[dict], str | None]:
prompt_payload = {
"request": state.get("message", ""),
"contract": contract,
"current_content": self._support.shorten(current_content, 18000),
"previous_validation_feedback": state.get("validation_feedback", ""),
"rag_context": self._support.shorten(state.get("rag_context", ""), 5000),
"confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000),
}
raw = self._llm.generate(
"project_edits_hunks",
json.dumps(prompt_payload, ensure_ascii=False),
log_context="graph.project_edits.generate_changeset",
)
parsed = self._support.parse_json(raw)
hunks = parsed.get("hunks", []) if isinstance(parsed, dict) else []
if not isinstance(hunks, list) or not hunks:
return [], "Model did not return contract hunks."
normalized: list[dict] = []
for hunk in hunks:
if not isinstance(hunk, dict):
continue
kind = str(hunk.get("type", "")).strip().lower()
if kind not in {"append_end", "replace_between", "replace_line_equals"}:
continue
normalized.append(
{
"type": kind,
"start_anchor": str(hunk.get("start_anchor", "")),
"end_anchor": str(hunk.get("end_anchor", "")),
"old_line": str(hunk.get("old_line", "")),
"new_text": str(hunk.get("new_text", "")),
}
)
if not normalized:
return [], "Model hunks are empty or invalid."
return normalized, None
def _extract_path_hint(self, message: str) -> str:
words = (message or "").replace("\\", "/").split()
for token in words:
cleaned = token.strip("`'\".,:;()[]{}")
if "/" in cleaned and "." in cleaned:
return cleaned
if cleaned.lower().startswith("readme"):
return "README.md"
return ""
@@ -0,0 +1,142 @@
from difflib import SequenceMatcher
class ContractPatcher:
def apply(self, current_content: str, contract: dict, hunks: list[dict]) -> tuple[str | None, str | None]:
if not hunks:
return None, "No hunks were generated."
max_hunks = int(contract.get("max_hunks", 1) or 1)
if len(hunks) > max_hunks:
return None, f"Too many hunks: got={len(hunks)} allowed={max_hunks}."
allowed_blocks = contract.get("allowed_blocks", [])
if not isinstance(allowed_blocks, list) or not allowed_blocks:
return None, "No allowed blocks in edit contract."
result = current_content
total_changed_lines = 0
for idx, hunk in enumerate(hunks, start=1):
applied, changed_lines, error = self._apply_hunk(result, hunk, allowed_blocks)
if error:
return None, f"Hunk {idx} rejected: {error}"
result = applied
total_changed_lines += changed_lines
max_changed_lines = int(contract.get("max_changed_lines", 8) or 8)
if total_changed_lines > max_changed_lines:
return (
None,
f"Changed lines exceed contract limit: changed={total_changed_lines} allowed={max_changed_lines}.",
)
return result, None
def _apply_hunk(
self,
content: str,
hunk: dict,
allowed_blocks: list[dict],
) -> tuple[str, int, str | None]:
if not isinstance(hunk, dict):
return content, 0, "Invalid hunk payload."
kind = str(hunk.get("type", "")).strip().lower()
if kind not in {"append_end", "replace_between", "replace_line_equals"}:
return content, 0, f"Unsupported hunk type: {kind or '(empty)'}."
block = self._find_matching_block(hunk, allowed_blocks)
if block is None:
return content, 0, "Hunk does not match allowed contract blocks."
if kind == "append_end":
return self._apply_append_end(content, hunk, block)
if kind == "replace_between":
return self._apply_replace_between(content, hunk, block)
return self._apply_replace_line_equals(content, hunk, block)
def _find_matching_block(self, hunk: dict, allowed_blocks: list[dict]) -> dict | None:
kind = str(hunk.get("type", "")).strip().lower()
for block in allowed_blocks:
if not isinstance(block, dict):
continue
block_type = str(block.get("type", "")).strip().lower()
if block_type != kind:
continue
if kind == "replace_between":
start = str(hunk.get("start_anchor", "")).strip()
end = str(hunk.get("end_anchor", "")).strip()
if start != str(block.get("start_anchor", "")).strip():
continue
if end != str(block.get("end_anchor", "")).strip():
continue
if kind == "replace_line_equals":
old_line = str(hunk.get("old_line", "")).strip()
if old_line != str(block.get("old_line", "")).strip():
continue
return block
return None
def _apply_append_end(self, content: str, hunk: dict, block: dict) -> tuple[str, int, str | None]:
new_text = str(hunk.get("new_text", ""))
if not new_text.strip():
return content, 0, "append_end new_text is empty."
changed_lines = self._changed_line_count("", new_text)
block_limit = int(block.get("max_changed_lines", 6) or 6)
if changed_lines > block_limit:
return content, 0, f"append_end is too large: changed={changed_lines} allowed={block_limit}."
base = content.rstrip("\n")
suffix = new_text.strip("\n")
if not suffix:
return content, 0, "append_end resolved to empty suffix."
merged = f"{base}\n\n{suffix}\n" if base else f"{suffix}\n"
return merged, changed_lines, None
def _apply_replace_between(self, content: str, hunk: dict, block: dict) -> tuple[str, int, str | None]:
start_anchor = str(hunk.get("start_anchor", "")).strip()
end_anchor = str(hunk.get("end_anchor", "")).strip()
new_text = str(hunk.get("new_text", ""))
if not start_anchor or not end_anchor:
return content, 0, "replace_between anchors are required."
start_pos = content.find(start_anchor)
if start_pos < 0:
return content, 0, "start_anchor not found in file."
middle_start = start_pos + len(start_anchor)
end_pos = content.find(end_anchor, middle_start)
if end_pos < 0:
return content, 0, "end_anchor not found after start_anchor."
old_segment = content[middle_start:end_pos]
changed_lines = self._changed_line_count(old_segment, new_text)
block_limit = int(block.get("max_changed_lines", 6) or 6)
if changed_lines > block_limit:
return content, 0, f"replace_between is too large: changed={changed_lines} allowed={block_limit}."
merged = content[:middle_start] + new_text + content[end_pos:]
return merged, changed_lines, None
def _apply_replace_line_equals(self, content: str, hunk: dict, block: dict) -> tuple[str, int, str | None]:
old_line = str(hunk.get("old_line", "")).strip()
new_text = str(hunk.get("new_text", ""))
if not old_line:
return content, 0, "replace_line_equals old_line is required."
lines = content.splitlines(keepends=True)
matches = [idx for idx, line in enumerate(lines) if line.rstrip("\n") == old_line]
if len(matches) != 1:
return content, 0, f"replace_line_equals expected exactly one match, got={len(matches)}."
replacement = new_text.rstrip("\n") + "\n"
changed_lines = self._changed_line_count(old_line + "\n", replacement)
block_limit = int(block.get("max_changed_lines", 6) or 6)
if changed_lines > block_limit:
return content, 0, f"replace_line_equals is too large: changed={changed_lines} allowed={block_limit}."
lines[matches[0] : matches[0] + 1] = [replacement]
return "".join(lines), changed_lines, None
def _changed_line_count(self, old_text: str, new_text: str) -> int:
old_lines = (old_text or "").splitlines()
new_lines = (new_text or "").splitlines()
if not old_lines and not new_lines:
return 0
matcher = SequenceMatcher(None, old_lines, new_lines)
changed = 0
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == "equal":
continue
changed += max(i2 - i1, j2 - j1)
return max(changed, 1)
@@ -0,0 +1,116 @@
import json
import re
from difflib import SequenceMatcher
from app.modules.agent.engine.graphs.file_targeting import FileTargeting
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.schemas.changeset import ChangeItem
class ProjectEditsSupport:
def __init__(self, max_context_files: int = 12, max_preview_chars: int = 2500) -> None:
self._max_context_files = max_context_files
self._max_preview_chars = max_preview_chars
self._targeting = FileTargeting()
def pick_relevant_files(self, message: str, files_map: dict[str, dict]) -> list[dict]:
tokens = {x for x in (message or "").lower().replace("/", " ").split() if len(x) >= 4}
scored: list[tuple[int, dict]] = []
for path, payload in files_map.items():
content = str(payload.get("content", ""))
score = 0
low_path = path.lower()
low_content = content.lower()
for token in tokens:
if token in low_path:
score += 3
if token in low_content:
score += 1
scored.append((score, self.as_candidate(payload)))
scored.sort(key=lambda x: (-x[0], x[1]["path"]))
return [item for _, item in scored[: self._max_context_files]]
def as_candidate(self, payload: dict) -> dict:
return {
"path": str(payload.get("path", "")).replace("\\", "/"),
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
}
def normalize_file_output(self, text: str) -> str:
value = (text or "").strip()
if value.startswith("```") and value.endswith("```"):
lines = value.splitlines()
if len(lines) >= 3:
return "\n".join(lines[1:-1]).strip()
return value
def parse_json(self, raw: str):
text = self.normalize_file_output(raw)
try:
return json.loads(text)
except Exception:
return {}
def shorten(self, text: str, max_chars: int | None = None) -> str:
limit = max_chars or self._max_preview_chars
value = (text or "").strip()
if len(value) <= limit:
return value
return value[:limit].rstrip() + "\n...[truncated]"
def collapse_whitespace(self, text: str) -> str:
return re.sub(r"\s+", " ", (text or "").strip())
def similarity(self, original: str, updated: str) -> float:
return SequenceMatcher(None, original or "", updated or "").ratio()
def line_change_ratio(self, original: str, updated: str) -> float:
orig_lines = (original or "").splitlines()
new_lines = (updated or "").splitlines()
if not orig_lines and not new_lines:
return 0.0
matcher = SequenceMatcher(None, orig_lines, new_lines)
changed = 0
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == "equal":
continue
changed += max(i2 - i1, j2 - j1)
total = max(len(orig_lines), len(new_lines), 1)
return changed / total
def added_headings(self, original: str, updated: str) -> int:
old_heads = {line.strip() for line in (original or "").splitlines() if line.strip().startswith("#")}
new_heads = {line.strip() for line in (updated or "").splitlines() if line.strip().startswith("#")}
return len(new_heads - old_heads)
def build_summary(self, state: AgentGraphState, changeset: list[ChangeItem]) -> str:
if not changeset:
return "Правки не сформированы: changeset пуст."
lines = [
"Выполненные действия:",
f"- Проанализирован запрос: {state.get('message', '')}",
"- Сформирован контракт правок с разрешенными блоками изменений.",
f"- Проведен self-check: {state.get('validation_feedback', 'без замечаний')}",
"",
"Измененные файлы:",
]
for item in changeset[:30]:
lines.append(f"- {item.op.value} {item.path}: {item.reason}")
return "\n".join(lines)
def is_broad_rewrite_request(self, message: str) -> bool:
low = (message or "").lower()
markers = (
"перепиши",
"полностью",
"целиком",
"с нуля",
"full rewrite",
"rewrite all",
"реорганизуй документ",
)
return any(marker in low for marker in markers)
def lookup_file(self, files_map: dict[str, dict], path: str) -> dict | None:
return self._targeting.lookup_file(files_map, path)
@@ -0,0 +1,47 @@
import logging
from langgraph.graph import END, START, StateGraph
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.llm import AgentLlmService
LOGGER = logging.getLogger(__name__)
class ProjectQaGraphFactory:
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("answer", self._answer_node)
graph.add_edge(START, "answer")
graph.add_edge("answer", END)
return graph.compile(checkpointer=checkpointer)
def _answer_node(self, state: AgentGraphState) -> dict:
emit_progress_sync(
state,
stage="graph.project_qa.answer",
message="Готовлю ответ по контексту текущего проекта.",
)
user_input = "\n\n".join(
[
f"User request:\n{state.get('message', '')}",
f"RAG context:\n{state.get('rag_context', '')}",
f"Confluence context:\n{state.get('confluence_context', '')}",
]
)
answer = self._llm.generate("project_answer", user_input, log_context="graph.project_qa.answer")
emit_progress_sync(
state,
stage="graph.project_qa.answer.done",
message="Ответ по проекту сформирован.",
)
result = {"answer": answer}
LOGGER.warning(
"graph step result: graph=project_qa step=answer answer_len=%s",
len(answer or ""),
)
return result
@@ -0,0 +1,172 @@
from __future__ import annotations
import logging
from langgraph.graph import END, START, StateGraph
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
from app.modules.agent.llm import AgentLlmService
from app.modules.contracts import RagRetriever
from app.modules.rag.explain import ExplainPack, PromptBudgeter
LOGGER = logging.getLogger(__name__)
class ProjectQaConversationGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("resolve_request", self._resolve_request)
graph.add_edge(START, "resolve_request")
graph.add_edge("resolve_request", END)
return graph.compile(checkpointer=checkpointer)
def _resolve_request(self, state: AgentGraphState) -> dict:
emit_progress_sync(state, stage="graph.project_qa.conversation_understanding", message="Нормализую пользовательский запрос.")
resolved = self._support.resolve_request(str(state.get("message", "") or ""))
LOGGER.warning("graph step result: graph=project_qa/conversation_understanding normalized=%s", resolved.get("normalized_message", ""))
return {"resolved_request": resolved}
class ProjectQaClassificationGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("classify_question", self._classify_question)
graph.add_edge(START, "classify_question")
graph.add_edge("classify_question", END)
return graph.compile(checkpointer=checkpointer)
def _classify_question(self, state: AgentGraphState) -> dict:
resolved = state.get("resolved_request", {}) or {}
message = str(resolved.get("normalized_message") or state.get("message", "") or "")
profile = self._support.build_profile(message)
LOGGER.warning("graph step result: graph=project_qa/question_classification domain=%s intent=%s", profile.get("domain"), profile.get("intent"))
return {"question_profile": profile}
class ProjectQaRetrievalGraphFactory:
def __init__(self, rag: RagRetriever, llm: AgentLlmService | None = None) -> None:
self._rag = rag
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("retrieve_context", self._retrieve_context)
graph.add_edge(START, "retrieve_context")
graph.add_edge("retrieve_context", END)
return graph.compile(checkpointer=checkpointer)
def _retrieve_context(self, state: AgentGraphState) -> dict:
emit_progress_sync(state, stage="graph.project_qa.context_retrieval", message="Собираю контекст по проекту.")
resolved = state.get("resolved_request", {}) or {}
profile = state.get("question_profile", {}) or {}
files_map = dict(state.get("files_map", {}) or {})
rag_items: list[dict] = []
source_bundle = self._support.build_source_bundle(profile, list(rag_items), files_map)
LOGGER.warning(
"graph step result: graph=project_qa/context_retrieval mode=%s rag_items=%s file_candidates=%s legacy_rag=%s",
profile.get("domain"),
len(source_bundle.get("rag_items", []) or []),
len(source_bundle.get("file_candidates", []) or []),
False,
)
return {"source_bundle": source_bundle}
class ProjectQaAnalysisGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
self._analyzer = ProjectQaAnalyzer()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("analyze_context", self._analyze_context)
graph.add_edge(START, "analyze_context")
graph.add_edge("analyze_context", END)
return graph.compile(checkpointer=checkpointer)
def _analyze_context(self, state: AgentGraphState) -> dict:
explain_pack = state.get("explain_pack")
if explain_pack:
analysis = self._analysis_from_pack(explain_pack)
LOGGER.warning(
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
len(analysis.get("findings", []) or []),
len(analysis.get("evidence", []) or []),
)
return {"analysis_brief": analysis}
bundle = state.get("source_bundle", {}) or {}
profile = bundle.get("profile", {}) or state.get("question_profile", {}) or {}
rag_items = list(bundle.get("rag_items", []) or [])
file_candidates = list(bundle.get("file_candidates", []) or [])
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates) if str(profile.get("domain")) == "code" else self._analyzer.analyze_docs(profile, rag_items)
LOGGER.warning(
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
len(analysis.get("findings", []) or []),
len(analysis.get("evidence", []) or []),
)
return {"analysis_brief": analysis}
def _analysis_from_pack(self, raw_pack) -> dict:
pack = ExplainPack.model_validate(raw_pack)
findings: list[str] = []
evidence: list[str] = []
for entrypoint in pack.selected_entrypoints[:3]:
findings.append(f"Entrypoint `{entrypoint.title}` maps to handler `{entrypoint.metadata.get('handler_symbol_id', '')}`.")
if entrypoint.source:
evidence.append(entrypoint.source)
for path in pack.trace_paths[:3]:
if path.symbol_ids:
findings.append(f"Trace path: {' -> '.join(path.symbol_ids)}")
for excerpt in pack.code_excerpts[:4]:
evidence.append(f"{excerpt.path}:{excerpt.start_line}-{excerpt.end_line} [{excerpt.evidence_id}]")
return {
"subject": pack.intent.normalized_query,
"findings": findings or ["No explain trace was built from the available code evidence."],
"evidence": evidence,
"gaps": list(pack.missing),
"answer_mode": "summary",
}
class ProjectQaAnswerGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
self._llm = llm
self._budgeter = PromptBudgeter()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("compose_answer", self._compose_answer)
graph.add_edge(START, "compose_answer")
graph.add_edge("compose_answer", END)
return graph.compile(checkpointer=checkpointer)
def _compose_answer(self, state: AgentGraphState) -> dict:
profile = state.get("question_profile", {}) or {}
analysis = state.get("analysis_brief", {}) or {}
brief = self._support.build_answer_brief(profile, analysis)
explain_pack = state.get("explain_pack")
answer = self._compose_explain_answer(state, explain_pack)
if not answer:
answer = self._support.compose_answer(brief)
LOGGER.warning("graph step result: graph=project_qa/answer_composition answer_len=%s", len(answer or ""))
return {"answer_brief": brief, "final_answer": answer}
def _compose_explain_answer(self, state: AgentGraphState, raw_pack) -> str:
if raw_pack is None or self._llm is None:
return ""
pack = ExplainPack.model_validate(raw_pack)
prompt_input = self._budgeter.build_prompt_input(str(state.get("message", "") or ""), pack)
return self._llm.generate(
"code_explain_answer_v2",
prompt_input,
log_context="graph.project_qa.answer_v2",
).strip()
@@ -0,0 +1,40 @@
from typing import TypedDict
from app.schemas.changeset import ChangeItem
class AgentGraphState(TypedDict, total=False):
task_id: str
project_id: str
message: str
progress_key: str
rag_context: str
confluence_context: str
files_map: dict[str, dict]
docs_candidates: list[dict]
target_path: str
target_file_content: str
target_file_hash: str
existing_docs_detected: bool
existing_docs_summary: str
docs_strategy: str
rules_bundle: str
doc_plan: str
generated_doc: str
generated_docs_bundle: list[dict]
validation_passed: bool
validation_feedback: str
validation_attempts: int
resolved_request: dict
question_profile: dict
source_bundle: dict
analysis_brief: dict
answer_brief: dict
final_answer: str
answer: str
changeset: list[ChangeItem]
edits_requested_path: str
edits_context_files: list[dict]
edits_plan: list[dict]
edits_contracts: list[dict]
edits_generation_feedback: str
@@ -0,0 +1,21 @@
from app.modules.agent.engine.orchestrator.models import (
ExecutionPlan,
OrchestratorResult,
PlanStep,
Scenario,
StepResult,
TaskSpec,
)
from app.modules.agent.engine.orchestrator.service import OrchestratorService
from app.modules.agent.engine.orchestrator.task_spec_builder import TaskSpecBuilder
__all__ = [
"ExecutionPlan",
"OrchestratorResult",
"OrchestratorService",
"PlanStep",
"Scenario",
"StepResult",
"TaskSpec",
"TaskSpecBuilder",
]
@@ -0,0 +1,17 @@
from app.modules.agent.engine.orchestrator.actions.code_explain_actions import CodeExplainActions
from app.modules.agent.engine.orchestrator.actions.docs_actions import DocsActions
from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions
from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions
from app.modules.agent.engine.orchestrator.actions.gherkin_actions import GherkinActions
from app.modules.agent.engine.orchestrator.actions.project_qa_actions import ProjectQaActions
from app.modules.agent.engine.orchestrator.actions.review_actions import ReviewActions
__all__ = [
"CodeExplainActions",
"DocsActions",
"EditActions",
"ExplainActions",
"GherkinActions",
"ProjectQaActions",
"ReviewActions",
]
@@ -0,0 +1,46 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
from app.modules.rag.explain.models import ExplainPack
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
LOGGER = logging.getLogger(__name__)
class CodeExplainActions(ActionSupport):
def __init__(self, retriever: CodeExplainRetrieverV2 | None = None) -> None:
self._retriever = retriever
self._intent_builder = ExplainIntentBuilder()
def build_code_explain_pack(self, ctx: ExecutionContext) -> list[str]:
file_candidates = list((self.get(ctx, "source_bundle", {}) or {}).get("file_candidates", []) or [])
if self._retriever is None:
pack = ExplainPack(
intent=self._intent_builder.build(ctx.task.user_message),
missing=["code_explain_retriever_unavailable"],
)
else:
pack = self._retriever.build_pack(
ctx.task.rag_session_id,
ctx.task.user_message,
file_candidates=file_candidates,
)
LOGGER.warning(
"code explain action: task_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s missing=%s",
ctx.task.task_id,
len(pack.selected_entrypoints),
len(pack.seed_symbols),
len(pack.trace_paths),
len(pack.code_excerpts),
pack.missing,
)
return [self.put(ctx, "explain_pack", ArtifactType.STRUCTURED_JSON, pack.model_dump(mode="json"))]
@@ -0,0 +1,26 @@
from __future__ import annotations
from uuid import uuid4
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType, EvidenceItem
class ActionSupport:
def put(self, ctx: ExecutionContext, key: str, artifact_type: ArtifactType, value, *, meta: dict | None = None) -> str:
item = ctx.artifacts.put(key=key, artifact_type=artifact_type, content=value, meta=meta)
return item.artifact_id
def get(self, ctx: ExecutionContext, key: str, default=None):
return ctx.artifacts.get_content(key, default)
def add_evidence(self, ctx: ExecutionContext, *, source_type: str, source_ref: str, snippet: str, score: float = 0.8) -> str:
evidence = EvidenceItem(
evidence_id=f"evidence_{uuid4().hex}",
source_type=source_type,
source_ref=source_ref,
snippet=(snippet or "").strip()[:600],
score=max(0.0, min(1.0, float(score))),
)
ctx.evidences.put_many([evidence])
return evidence.evidence_id
@@ -0,0 +1,95 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class DocsActions(ActionSupport):
def extract_change_intents(self, ctx: ExecutionContext) -> list[str]:
text = str(self.get(ctx, "source_doc_text", "") or ctx.task.user_message)
intents = {
"summary": text[:240],
"api": ["Update endpoint behavior contract"],
"logic": ["Adjust reusable business rules"],
"db": ["Reflect schema/table notes if needed"],
"ui": ["Adjust form behavior and validation"],
}
return [self.put(ctx, "change_intents", ArtifactType.STRUCTURED_JSON, intents)]
def map_to_doc_tree(self, ctx: ExecutionContext) -> list[str]:
targets = [
"docs/api/increment.md",
"docs/logic/increment.md",
"docs/db/increment.md",
"docs/ui/increment.md",
]
return [self.put(ctx, "doc_targets", ArtifactType.STRUCTURED_JSON, {"targets": targets})]
def load_current_docs_context(self, ctx: ExecutionContext) -> list[str]:
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
targets = (self.get(ctx, "doc_targets", {}) or {}).get("targets", [])
current = []
for path in targets:
current.append(
{
"path": path,
"content": str((files_map.get(path) or {}).get("content", "")),
"content_hash": str((files_map.get(path) or {}).get("content_hash", "")),
}
)
return [self.put(ctx, "current_docs_context", ArtifactType.STRUCTURED_JSON, {"files": current})]
def generate_doc_updates(self, ctx: ExecutionContext) -> list[str]:
intents = self.get(ctx, "change_intents", {}) or {}
targets = (self.get(ctx, "doc_targets", {}) or {}).get("targets", [])
bundle = []
for path in targets:
bundle.append(
{
"path": path,
"content": "\n".join(
[
f"# Increment Update: {path}",
"",
"## Scope",
str(intents.get("summary", "")),
"",
"## Changes",
"- Updated according to analytics increment.",
]
),
"reason": "align docs with analytics increment",
}
)
return [self.put(ctx, "generated_doc_bundle", ArtifactType.DOC_BUNDLE, bundle)]
def cross_file_validation(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "generated_doc_bundle", []) or []
paths = [str(item.get("path", "")) for item in bundle if isinstance(item, dict)]
has_required = any(path.startswith("docs/api/") for path in paths) and any(path.startswith("docs/logic/") for path in paths)
report = {"paths": paths, "required_core_paths_present": has_required}
return [self.put(ctx, "consistency_report", ArtifactType.STRUCTURED_JSON, report)]
def build_changeset(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "generated_doc_bundle", []) or []
changeset = []
for item in bundle:
if not isinstance(item, dict):
continue
changeset.append(
{
"op": "update",
"path": str(item.get("path", "")).strip(),
"base_hash": "orchestrator-generated",
"proposed_content": str(item.get("content", "")),
"reason": str(item.get("reason", "documentation update")),
"hunks": [],
}
)
return [self.put(ctx, "final_changeset", ArtifactType.CHANGESET, changeset)]
def compose_summary(self, ctx: ExecutionContext) -> list[str]:
count = len(self.get(ctx, "final_changeset", []) or [])
text = f"Prepared documentation changeset with {count} files updated."
return [self.put(ctx, "final_answer", ArtifactType.TEXT, text)]
@@ -0,0 +1,101 @@
from __future__ import annotations
import re
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class EditActions(ActionSupport):
def resolve_target(self, ctx: ExecutionContext) -> list[str]:
message = ctx.task.user_message
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
requested = self._extract_path(message)
matched = self._lookup_source(files_map, requested)
if matched:
requested = str(matched.get("path") or requested or "")
if not requested and files_map:
requested = next(iter(files_map.keys()))
payload = {"path": requested or "", "allowed": bool(requested)}
return [self.put(ctx, "resolved_target", ArtifactType.STRUCTURED_JSON, payload)]
def load_target_context(self, ctx: ExecutionContext) -> list[str]:
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
resolved = self.get(ctx, "resolved_target", {}) or {}
path = str(resolved.get("path", ""))
source = dict(self._lookup_source(files_map, path) or {})
current = {
"path": str(source.get("path", "")) or path,
"content": str(source.get("content", "")),
"content_hash": str(source.get("content_hash", "")),
}
return [self.put(ctx, "target_context", ArtifactType.STRUCTURED_JSON, current)]
def plan_minimal_patch(self, ctx: ExecutionContext) -> list[str]:
target = self.get(ctx, "target_context", {}) or {}
plan = {
"path": target.get("path", ""),
"intent": "minimal_update",
"instruction": ctx.task.user_message[:240],
}
return [self.put(ctx, "patch_plan", ArtifactType.STRUCTURED_JSON, plan)]
def generate_patch(self, ctx: ExecutionContext) -> list[str]:
target = self.get(ctx, "target_context", {}) or {}
plan = self.get(ctx, "patch_plan", {}) or {}
path = str(target.get("path", ""))
base = str(target.get("content_hash", "") or "orchestrator-generated")
original = str(target.get("content", ""))
note = f"\n\n<!-- orchestrator note: {plan.get('instruction', '')[:100]} -->\n"
proposed = (original + note).strip() if original else note.strip()
changeset = [
{
"op": "update" if original else "create",
"path": path,
"base_hash": base if original else None,
"proposed_content": proposed,
"reason": "targeted file update",
"hunks": [],
}
]
return [self.put(ctx, "raw_changeset", ArtifactType.CHANGESET, changeset)]
def validate_patch_safety(self, ctx: ExecutionContext) -> list[str]:
changeset = self.get(ctx, "raw_changeset", []) or []
safe = len(changeset) == 1
report = {"safe": safe, "items": len(changeset), "reason": "single-file patch expected"}
return [self.put(ctx, "patch_validation_report", ArtifactType.STRUCTURED_JSON, report)]
def finalize_changeset(self, ctx: ExecutionContext) -> list[str]:
report = self.get(ctx, "patch_validation_report", {}) or {}
if not report.get("safe"):
return [self.put(ctx, "final_changeset", ArtifactType.CHANGESET, [])]
changeset = self.get(ctx, "raw_changeset", []) or []
return [self.put(ctx, "final_changeset", ArtifactType.CHANGESET, changeset)]
def compose_edit_summary(self, ctx: ExecutionContext) -> list[str]:
count = len(self.get(ctx, "final_changeset", []) or [])
text = f"Prepared targeted edit changeset with {count} item(s)."
return [self.put(ctx, "final_answer", ArtifactType.TEXT, text)]
def _extract_path(self, text: str) -> str | None:
match = re.search(r"\b[\w./-]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg)\b", text or "", flags=re.IGNORECASE)
if not match:
return None
return match.group(0).replace("\\", "/").strip()
def _lookup_source(self, files_map: dict[str, dict], path: str | None) -> dict | None:
if not path:
return None
normalized = str(path).replace("\\", "/").strip()
if not normalized:
return None
source = files_map.get(normalized)
if source:
return source
normalized_low = normalized.lower()
for key, value in files_map.items():
if str(key).replace("\\", "/").lower() == normalized_low:
return value
return None
@@ -0,0 +1,259 @@
from __future__ import annotations
from collections import Counter
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class ExplainActions(ActionSupport):
def collect_sources(self, ctx: ExecutionContext) -> list[str]:
rag_items = list(ctx.task.metadata.get("rag_items", []) or [])
rag_context = str(ctx.task.metadata.get("rag_context", ""))
confluence_context = str(ctx.task.metadata.get("confluence_context", ""))
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
payload = {
"rag_items": rag_items,
"rag_context": rag_context,
"confluence_context": confluence_context,
"files_count": len(files_map),
"source_profile": self._source_profile(rag_items),
}
evidence_ids: list[str] = []
for item in rag_items[:5]:
snippet = str(item.get("content", "") or "").strip()
if not snippet:
continue
evidence_ids.append(
self.add_evidence(
ctx,
source_type="rag_chunk",
source_ref=str(item.get("source", ctx.task.rag_session_id)),
snippet=snippet,
score=0.9,
)
)
artifact_id = self.put(
ctx,
"sources",
ArtifactType.STRUCTURED_JSON,
payload,
meta={"evidence_ids": evidence_ids},
)
return [artifact_id]
def extract_logic(self, ctx: ExecutionContext) -> list[str]:
sources = self.get(ctx, "sources", {}) or {}
message = ctx.task.user_message
profile = str(sources.get("source_profile", "docs"))
ru = self._is_russian(message)
notes = (
"Используй код как основной источник и ссылайся на конкретные файлы и слои."
if profile == "code" and ru
else "Use code as the primary source and cite concrete files/layers."
if profile == "code"
else "Используй требования и документацию как основной источник."
if ru
else "Use requirements/docs as primary source over code."
)
logic = {
"request": message,
"assumptions": [f"{profile}-first"],
"notes": notes,
"source_summary": sources,
}
return [self.put(ctx, "logic_model", ArtifactType.STRUCTURED_JSON, logic)]
def summarize(self, ctx: ExecutionContext) -> list[str]:
sources = self.get(ctx, "sources", {}) or {}
profile = str(sources.get("source_profile", "docs"))
items = list(sources.get("rag_items", []) or [])
message = ctx.task.user_message
ru = self._is_russian(message)
answer = self._code_answer(items, russian=ru) if profile == "code" else self._docs_answer(items, russian=ru)
return [self.put(ctx, "final_answer", ArtifactType.TEXT, answer)]
def _source_profile(self, items: list[dict]) -> str:
layers = [str(item.get("layer", "") or "") for item in items]
if any(layer.startswith("C") for layer in layers):
return "code"
return "docs"
def _is_russian(self, text: str) -> bool:
return any("а" <= ch.lower() <= "я" or ch.lower() == "ё" for ch in text)
def _code_answer(self, items: list[dict], *, russian: bool) -> str:
if not items:
return (
"Не удалось найти релевантный кодовый контекст по этому запросу."
if russian
else "No relevant code context was found for this request."
)
details = self._code_details(items, russian=russian)
refs = self._code_references(items, russian=russian)
parts = [
"## Кратко" if russian else "## Summary",
details,
]
if refs:
parts.append(refs)
return "\n\n".join(part for part in parts if part.strip())
def _docs_answer(self, items: list[dict], *, russian: bool) -> str:
return (
"Запрошенная часть проекта объяснена на основе требований и документации."
if russian
else "The requested project part is explained from requirements/docs context."
)
def _code_details(self, items: list[dict], *, russian: bool) -> str:
if not items:
return ""
symbol_items = [item for item in items if str(item.get("layer", "")) == "C1_SYMBOL_CATALOG"]
edge_items = [item for item in items if str(item.get("layer", "")) == "C2_DEPENDENCY_GRAPH"]
source_items = [item for item in items if str(item.get("layer", "")) == "C0_SOURCE_CHUNKS"]
lines = ["### Что видно по коду" if russian else "### What the code shows"]
alias = self._find_alias_symbol(symbol_items)
if alias:
imported_from = str(alias.get("metadata", {}).get("lang_payload", {}).get("imported_from", "")).strip()
if russian:
lines.append(f"- `ConfigManager` в проекте доступен как alias в `{alias.get('source', '')}` и указывает на `{imported_from}`.")
else:
lines.append(f"- `ConfigManager` is exposed as an alias in `{alias.get('source', '')}` and points to `{imported_from}`.")
management_hint = self._management_summary(symbol_items, edge_items, source_items, russian=russian)
if management_hint:
lines.extend(management_hint)
symbol_lines = 0
for item in symbol_items[:4]:
title = str(item.get("title", "") or "")
source = str(item.get("source", "") or "")
content = str(item.get("content", "") or "").strip()
summary = content.splitlines()[-1].strip() if content else ""
if not title:
continue
if self._is_test_path(source):
continue
if self._is_control_symbol(title):
continue
if russian:
lines.append(f"- Символ `{title}` из `{source}`: {summary}")
else:
lines.append(f"- Symbol `{title}` from `{source}`: {summary}")
symbol_lines += 1
if symbol_lines >= 2:
break
edge_map: dict[str, list[str]] = {}
for item in edge_items:
meta = item.get("metadata", {}) or {}
src_qname = str(meta.get("src_qname", "") or "").strip()
dst_ref = str(meta.get("dst_ref", "") or "").strip()
if not src_qname or not dst_ref:
continue
if self._is_test_path(str(item.get("source", "") or "")):
continue
edge_map.setdefault(src_qname, [])
if dst_ref not in edge_map[src_qname]:
edge_map[src_qname].append(dst_ref)
for src_qname, targets in list(edge_map.items())[:3]:
joined = ", ".join(targets[:4])
if russian:
lines.append(f"- `{src_qname}` вызывает или использует: {joined}.")
else:
lines.append(f"- `{src_qname}` calls or uses: {joined}.")
for item in source_items[:2]:
source = str(item.get("source", "") or "")
content = str(item.get("content", "") or "")
if self._is_test_path(source):
continue
if "management" in content.lower() or "control" in content.lower():
snippet = " ".join(content.splitlines()[:4]).strip()
if russian:
lines.append(f"- В `{source}` есть прямое указание на управление через конфиг/API: `{snippet[:220]}`")
else:
lines.append(f"- `{source}` directly mentions config/API control: `{snippet[:220]}`")
return "\n".join(lines)
def _code_references(self, items: list[dict], *, russian: bool) -> str:
paths = [str(item.get("source", "") or "") for item in items if item.get("source") and not self._is_test_path(str(item.get("source", "") or ""))]
if not paths:
return ""
lines = ["### Где смотреть в проекте" if russian else "### Where to look in the project"]
for path, _count in Counter(paths).most_common(3):
lines.append(f"- `{path}`")
return "\n".join(lines)
def _find_alias_symbol(self, items: list[dict]) -> dict | None:
for item in items:
meta = item.get("metadata", {}) or {}
payload = meta.get("lang_payload", {}) or {}
qname = str(meta.get("qname", "") or "")
if qname == "ConfigManager" and payload.get("import_alias"):
return item
return None
def _is_test_path(self, path: str) -> bool:
lowered = path.lower()
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
def _is_control_symbol(self, title: str) -> bool:
lowered = title.lower()
return any(token in lowered for token in ("controlchannel", "controlchannelbridge", "on_start", "on_stop", "on_status"))
def _management_summary(
self,
symbol_items: list[dict],
edge_items: list[dict],
source_items: list[dict],
*,
russian: bool,
) -> list[str]:
qnames = {str((item.get("metadata", {}) or {}).get("qname", "") or ""): item for item in symbol_items if not self._is_test_path(str(item.get("source", "") or ""))}
source_texts = [str(item.get("content", "") or "") for item in source_items if not self._is_test_path(str(item.get("source", "") or ""))]
result: list[str] = []
if any("управление через api" in text.lower() or "section management" in text.lower() or "секция management" in text.lower() for text in source_texts):
result.append(
"- Для `ConfigManager` в коде предусмотрен отдельный интерфейс управления через API/конфиг: это прямо указано в публичной точке входа модуля."
if russian
else "- `ConfigManager` has a dedicated API/config-based management interface; this is stated in the module's public entrypoint."
)
has_control_channel = "ControlChannel" in qnames
has_bridge = "ControlChannelBridge" in qnames
if has_control_channel:
result.append(
"- Базовый контракт управления задает `ControlChannel`: он определяет команды `start` и `stop` для внешнего канала управления."
if russian
else "- The base management contract is `ControlChannel`, which defines external `start` and `stop` commands."
)
if has_bridge:
result.append(
"- `ControlChannelBridge` связывает внешний канал управления с lifecycle-методами менеджера: `on_start`, `on_stop`, `on_status`."
if russian
else "- `ControlChannelBridge` maps the external control channel to manager lifecycle methods: `on_start`, `on_stop`, `on_status`."
)
edge_refs = []
for item in edge_items:
if self._is_test_path(str(item.get("source", "") or "")):
continue
meta = item.get("metadata", {}) or {}
src = str(meta.get("src_qname", "") or "")
dst = str(meta.get("dst_ref", "") or "")
if src.startswith("ControlChannelBridge.") and dst in {"self._start_runtime", "self._stop_runtime", "self._get_status"}:
edge_refs.append((src, dst))
if edge_refs:
mappings = ", ".join(f"{src} -> {dst}" for src, dst in edge_refs[:3])
result.append(
f"- По связям в коде видно, что команды управления маршрутизируются так: {mappings}."
if russian
else f"- The code relationships show the management command routing: {mappings}."
)
return result
@@ -0,0 +1,76 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class GherkinActions(ActionSupport):
def extract_increment_scope(self, ctx: ExecutionContext) -> list[str]:
text = str(self.get(ctx, "source_doc_text", "") or ctx.task.user_message)
scope = {
"title": "Increment scope",
"summary": text[:220],
"entities": ["User", "System"],
}
return [self.put(ctx, "increment_scope", ArtifactType.STRUCTURED_JSON, scope)]
def partition_features(self, ctx: ExecutionContext) -> list[str]:
scope = self.get(ctx, "increment_scope", {}) or {}
groups = [
{"feature": "Main flow", "goal": scope.get("summary", "")},
{"feature": "Validation", "goal": "Input validation and error behavior"},
]
return [self.put(ctx, "feature_groups", ArtifactType.STRUCTURED_JSON, groups)]
def generate_gherkin_bundle(self, ctx: ExecutionContext) -> list[str]:
groups = self.get(ctx, "feature_groups", []) or []
files = []
for idx, group in enumerate(groups, start=1):
feature_name = str(group.get("feature", f"Feature {idx}"))
content = "\n".join(
[
f"Feature: {feature_name}",
" Scenario: Happy path",
" Given system is available",
" When user performs increment action",
" Then system applies expected increment behavior",
]
)
files.append({"path": f"tests/gherkin/feature_{idx}.feature", "content": content})
return [self.put(ctx, "gherkin_bundle", ArtifactType.GHERKIN_BUNDLE, files)]
def lint_gherkin(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "gherkin_bundle", []) or []
invalid = []
for item in bundle:
content = str(item.get("content", "")) if isinstance(item, dict) else ""
if "Feature:" not in content or "Scenario:" not in content:
invalid.append(str(item.get("path", "unknown")))
report = {"valid": len(invalid) == 0, "invalid_files": invalid}
return [self.put(ctx, "gherkin_lint_report", ArtifactType.STRUCTURED_JSON, report)]
def validate_coverage(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "gherkin_bundle", []) or []
report = {"covered": len(bundle) > 0, "feature_files": len(bundle)}
return [self.put(ctx, "coverage_report", ArtifactType.STRUCTURED_JSON, report)]
def compose_test_model_summary(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "gherkin_bundle", []) or []
summary = f"Prepared gherkin model with {len(bundle)} feature file(s)."
changeset = [
{
"op": "create",
"path": str(item.get("path", "")),
"base_hash": None,
"proposed_content": str(item.get("content", "")),
"reason": "generated gherkin feature",
"hunks": [],
}
for item in bundle
if isinstance(item, dict)
]
return [
self.put(ctx, "final_answer", ArtifactType.TEXT, summary),
self.put(ctx, "final_changeset", ArtifactType.CHANGESET, changeset),
]
@@ -0,0 +1,117 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class ProjectQaActions(ActionSupport):
def __init__(self) -> None:
self._support = ProjectQaSupport()
self._analyzer = ProjectQaAnalyzer()
def classify_project_question(self, ctx: ExecutionContext) -> list[str]:
message = str(ctx.task.user_message or "")
profile = self._support.build_profile(message)
return [self.put(ctx, "question_profile", ArtifactType.STRUCTURED_JSON, profile)]
def collect_project_sources(self, ctx: ExecutionContext) -> list[str]:
profile = self.get(ctx, "question_profile", {}) or {}
terms = list(profile.get("terms", []) or [])
entities = list(profile.get("entities", []) or [])
rag_items = list(ctx.task.metadata.get("rag_items", []) or [])
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
ranked_rag = []
for item in rag_items:
score = self._support.rag_score(item, terms, entities)
source = str(item.get("source", "") or "")
if not explicit_test and self._support.is_test_path(source):
score -= 3
if score > 0:
ranked_rag.append((score, item))
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
ranked_files = []
for path, payload in files_map.items():
score = self._support.file_score(path, payload, terms, entities)
if not explicit_test and self._support.is_test_path(path):
score -= 3
if score > 0:
ranked_files.append(
(
score,
{
"path": path,
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
},
)
)
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
bundle = {
"profile": profile,
"rag_items": [item for _, item in ranked_rag[:12]],
"file_candidates": [item for _, item in ranked_files[:10]],
"rag_total": len(ranked_rag),
"files_total": len(ranked_files),
}
return [self.put(ctx, "source_bundle", ArtifactType.STRUCTURED_JSON, bundle)]
def analyze_project_sources(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "source_bundle", {}) or {}
profile = bundle.get("profile", {}) or {}
rag_items = list(bundle.get("rag_items", []) or [])
file_candidates = list(bundle.get("file_candidates", []) or [])
if str(profile.get("domain")) == "code":
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates)
else:
analysis = self._analyzer.analyze_docs(profile, rag_items)
return [self.put(ctx, "analysis_brief", ArtifactType.STRUCTURED_JSON, analysis)]
def build_project_answer_brief(self, ctx: ExecutionContext) -> list[str]:
profile = self.get(ctx, "question_profile", {}) or {}
analysis = self.get(ctx, "analysis_brief", {}) or {}
brief = {
"question_profile": profile,
"resolved_subject": analysis.get("subject"),
"key_findings": analysis.get("findings", []),
"supporting_evidence": analysis.get("evidence", []),
"missing_evidence": analysis.get("gaps", []),
"answer_mode": analysis.get("answer_mode", "summary"),
}
return [self.put(ctx, "answer_brief", ArtifactType.STRUCTURED_JSON, brief)]
def compose_project_answer(self, ctx: ExecutionContext) -> list[str]:
brief = self.get(ctx, "answer_brief", {}) or {}
profile = brief.get("question_profile", {}) or {}
russian = bool(profile.get("russian"))
answer_mode = str(brief.get("answer_mode") or "summary")
findings = list(brief.get("key_findings", []) or [])
evidence = list(brief.get("supporting_evidence", []) or [])
gaps = list(brief.get("missing_evidence", []) or [])
title = "## Кратко" if russian else "## Summary"
lines = [title]
if answer_mode == "inventory":
lines.append("### Что реализовано" if russian else "### Implemented items")
else:
lines.append("### Что видно по проекту" if russian else "### What the project shows")
if findings:
lines.extend(f"- {item}" for item in findings)
else:
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
if evidence:
lines.append("")
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
lines.extend(f"- `{item}`" for item in evidence[:5])
if gaps:
lines.append("")
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
lines.extend(f"- {item}" for item in gaps[:3])
return [self.put(ctx, "final_answer", ArtifactType.TEXT, "\n".join(lines))]
@@ -0,0 +1,154 @@
from __future__ import annotations
class ProjectQaAnalyzer:
def analyze_code(self, profile: dict, rag_items: list[dict], file_candidates: list[dict]) -> dict:
terms = list(profile.get("terms", []) or [])
intent = str(profile.get("intent") or "lookup")
russian = bool(profile.get("russian"))
findings: list[str] = []
evidence: list[str] = []
gaps: list[str] = []
symbol_titles = [str(item.get("title", "") or "") for item in rag_items if str(item.get("layer", "")).startswith("C1")]
symbol_set = set(symbol_titles)
file_paths = [str(item.get("path", "") or item.get("source", "") or "") for item in rag_items]
file_paths.extend(str(item.get("path", "") or "") for item in file_candidates)
if "ConfigManager" in profile.get("entities", []) or "configmanager" in terms or "config_manager" in terms:
alias_file = self.find_path(file_paths, "src/config_manager/__init__.py")
if alias_file:
findings.append(
"Публичный `ConfigManager` экспортируется из `src/config_manager/__init__.py` как alias на `ConfigManagerV2`."
if russian
else "Public `ConfigManager` is exported from `src/config_manager/__init__.py` as an alias to `ConfigManagerV2`."
)
evidence.append("src/config_manager/__init__.py")
if "controlchannel" in {name.lower() for name in symbol_set}:
findings.append(
"Базовый контракт управления задает `ControlChannel`: он определяет команды `start` и `stop` для внешнего канала управления."
if russian
else "`ControlChannel` defines the base management contract with `start` and `stop` commands."
)
evidence.append("src/config_manager/v2/control/base.py")
if "ControlChannelBridge" in symbol_set:
findings.append(
"`ControlChannelBridge` связывает внешний канал управления с lifecycle-методами менеджера: `on_start`, `on_stop`, `on_status`."
if russian
else "`ControlChannelBridge` connects the external control channel to manager lifecycle methods: `on_start`, `on_stop`, `on_status`."
)
evidence.append("src/config_manager/v2/core/control_bridge.py")
implementation_files = self.find_management_implementations(file_candidates)
if implementation_files:
labels = ", ".join(f"`{path}`" for path in implementation_files)
channel_names = self.implementation_names(implementation_files)
findings.append(
f"В коде найдены конкретные реализации каналов управления: {', '.join(channel_names)} ({labels})."
if russian
else f"Concrete management channel implementations were found in code: {', '.join(channel_names)} ({labels})."
)
evidence.extend(implementation_files)
elif intent == "inventory":
gaps.append(
"В текущем контексте не удалось уверенно подтвердить конкретные файлы-реализации каналов, кроме базового контракта и bridge-слоя."
if russian
else "The current context does not yet confirm concrete channel implementation files beyond the base contract and bridge layer."
)
package_doc = self.find_management_doc(file_candidates)
if package_doc:
findings.append(
f"Пакет управления прямо описывает внешние каналы через `{package_doc}`."
if russian
else f"The control package directly describes external channels in `{package_doc}`."
)
evidence.append(package_doc)
subject = "management channels"
if profile.get("entities"):
subject = ", ".join(profile["entities"])
return {
"subject": subject,
"findings": self.dedupe(findings),
"evidence": self.dedupe(evidence),
"gaps": gaps,
"answer_mode": "inventory" if intent == "inventory" else "summary",
}
def analyze_docs(self, profile: dict, rag_items: list[dict]) -> dict:
findings: list[str] = []
evidence: list[str] = []
for item in rag_items[:5]:
title = str(item.get("title", "") or "")
source = str(item.get("source", "") or "")
content = str(item.get("content", "") or "").strip()
if content:
findings.append(content.splitlines()[0][:220])
if source:
evidence.append(source)
elif title:
evidence.append(title)
return {
"subject": "docs",
"findings": self.dedupe(findings),
"evidence": self.dedupe(evidence),
"gaps": [] if findings else ["Недостаточно данных в документации." if profile.get("russian") else "Not enough data in documentation."],
"answer_mode": "summary",
}
def find_management_implementations(self, file_candidates: list[dict]) -> list[str]:
found: list[str] = []
for item in file_candidates:
path = str(item.get("path", "") or "")
lowered = path.lower()
if self.is_test_path(path):
continue
if any(token in lowered for token in ("http_channel.py", "telegram.py", "telegram_channel.py", "http.py")):
found.append(path)
continue
content = str(item.get("content", "") or "").lower()
if "controlchannel" in content and "class " in content:
found.append(path)
continue
if ("channel" in lowered or "control" in lowered) and any(token in content for token in ("http", "telegram", "bot")):
found.append(path)
return self.dedupe(found)[:4]
def implementation_names(self, paths: list[str]) -> list[str]:
names: list[str] = []
for path in paths:
stem = path.rsplit("/", 1)[-1].rsplit(".", 1)[0]
label = stem.replace("_", " ").strip()
if label and label not in names:
names.append(label)
return names
def find_management_doc(self, file_candidates: list[dict]) -> str | None:
for item in file_candidates:
path = str(item.get("path", "") or "")
if self.is_test_path(path):
continue
content = str(item.get("content", "") or "").lower()
if any(token in content for token in ("каналы внешнего управления", "external control channels", "http api", "telegram")):
return path
return None
def find_path(self, paths: list[str], target: str) -> str | None:
for path in paths:
if path == target:
return path
return None
def dedupe(self, items: list[str]) -> list[str]:
seen: list[str] = []
for item in items:
if item and item not in seen:
seen.append(item)
return seen
def is_test_path(self, path: str) -> bool:
lowered = path.lower()
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
@@ -0,0 +1,166 @@
from __future__ import annotations
import re
from app.modules.rag.retrieval.query_terms import extract_query_terms
class ProjectQaSupport:
def resolve_request(self, message: str) -> dict:
profile = self.build_profile(message)
subject = profile["entities"][0] if profile.get("entities") else ""
return {
"original_message": message,
"normalized_message": " ".join((message or "").split()),
"subject_hint": subject,
"source_hint": profile["domain"],
"russian": profile["russian"],
}
def build_profile(self, message: str) -> dict:
lowered = message.lower()
return {
"domain": "code" if self.looks_like_code_question(lowered) else "docs",
"intent": self.detect_intent(lowered),
"terms": extract_query_terms(message),
"entities": self.extract_entities(message),
"russian": self.is_russian(message),
}
def build_retrieval_query(self, resolved_request: dict, profile: dict) -> str:
normalized = str(resolved_request.get("normalized_message") or resolved_request.get("original_message") or "").strip()
if profile.get("domain") == "code" and "по коду" not in normalized.lower():
return f"по коду {normalized}".strip()
return normalized
def build_source_bundle(self, profile: dict, rag_items: list[dict], files_map: dict[str, dict]) -> dict:
terms = list(profile.get("terms", []) or [])
entities = list(profile.get("entities", []) or [])
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
ranked_rag: list[tuple[int, dict]] = []
for item in rag_items:
score = self.rag_score(item, terms, entities)
source = str(item.get("source", "") or "")
if not explicit_test and self.is_test_path(source):
score -= 3
if score > 0:
ranked_rag.append((score, item))
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
ranked_files: list[tuple[int, dict]] = []
for path, payload in files_map.items():
score = self.file_score(path, payload, terms, entities)
if not explicit_test and self.is_test_path(path):
score -= 3
if score > 0:
ranked_files.append(
(
score,
{
"path": path,
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
},
)
)
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
return {
"profile": profile,
"rag_items": [item for _, item in ranked_rag[:12]],
"file_candidates": [item for _, item in ranked_files[:10]],
"rag_total": len(ranked_rag),
"files_total": len(ranked_files),
}
def build_answer_brief(self, profile: dict, analysis: dict) -> dict:
return {
"question_profile": profile,
"resolved_subject": analysis.get("subject"),
"key_findings": analysis.get("findings", []),
"supporting_evidence": analysis.get("evidence", []),
"missing_evidence": analysis.get("gaps", []),
"answer_mode": analysis.get("answer_mode", "summary"),
}
def compose_answer(self, brief: dict) -> str:
profile = brief.get("question_profile", {}) or {}
russian = bool(profile.get("russian"))
answer_mode = str(brief.get("answer_mode") or "summary")
findings = list(brief.get("key_findings", []) or [])
evidence = list(brief.get("supporting_evidence", []) or [])
gaps = list(brief.get("missing_evidence", []) or [])
title = "## Кратко" if russian else "## Summary"
lines = [title]
lines.append("### Что реализовано" if answer_mode == "inventory" and russian else "### Implemented items" if answer_mode == "inventory" else "### Что видно по проекту" if russian else "### What the project shows")
if findings:
lines.extend(f"- {item}" for item in findings)
else:
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
if evidence:
lines.append("")
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
lines.extend(f"- `{item}`" for item in evidence[:5])
if gaps:
lines.append("")
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
lines.extend(f"- {item}" for item in gaps[:3])
return "\n".join(lines)
def detect_intent(self, lowered: str) -> str:
if any(token in lowered for token in ("какие", "что уже реализ", "список", "перечень", "какие есть")):
return "inventory"
if any(token in lowered for token in ("где", "find", "where")):
return "lookup"
if any(token in lowered for token in ("сравни", "compare")):
return "compare"
return "explain"
def looks_like_code_question(self, lowered: str) -> bool:
code_markers = ("по коду", "код", "реализ", "имплементац", "класс", "метод", "модул", "файл", "канал", "handler", "endpoint")
return any(marker in lowered for marker in code_markers) or bool(re.search(r"\b[A-Z][A-Za-z0-9_]{2,}\b", lowered))
def extract_entities(self, message: str) -> list[str]:
return re.findall(r"\b[A-Z][A-Za-z0-9_]{2,}\b", message)[:5]
def rag_score(self, item: dict, terms: list[str], entities: list[str]) -> int:
haystacks = [
str(item.get("source", "") or "").lower(),
str(item.get("title", "") or "").lower(),
str(item.get("content", "") or "").lower(),
str((item.get("metadata", {}) or {}).get("qname", "") or "").lower(),
]
score = 0
for term in terms:
if any(term in hay for hay in haystacks):
score += 3
for entity in entities:
if any(entity.lower() in hay for hay in haystacks):
score += 5
return score
def file_score(self, path: str, payload: dict, terms: list[str], entities: list[str]) -> int:
content = str(payload.get("content", "") or "").lower()
path_lower = path.lower()
score = 0
for term in terms:
if term in path_lower:
score += 4
elif term in content:
score += 2
for entity in entities:
entity_lower = entity.lower()
if entity_lower in path_lower:
score += 5
elif entity_lower in content:
score += 3
return score
def is_test_path(self, path: str) -> bool:
lowered = path.lower()
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
def is_russian(self, text: str) -> bool:
return any("а" <= ch.lower() <= "я" or ch.lower() == "ё" for ch in text)
@@ -0,0 +1,102 @@
from __future__ import annotations
from urllib.parse import urlparse
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class ReviewActions(ActionSupport):
def fetch_source_doc(self, ctx: ExecutionContext) -> list[str]:
attachment = next((a for a in ctx.task.attachments if a.value), None)
if attachment is None:
text = ctx.task.user_message
source_ref = "inline:message"
else:
parsed = urlparse(attachment.value)
source_ref = attachment.value
text = f"Source: {parsed.netloc}\nPath: {parsed.path}\nRequest: {ctx.task.user_message}"
evidence_id = self.add_evidence(
ctx,
source_type="external_doc",
source_ref=source_ref,
snippet=text,
score=0.75,
)
return [
self.put(
ctx,
"source_doc_raw",
ArtifactType.TEXT,
text,
meta={"source_ref": source_ref, "evidence_ids": [evidence_id]},
)
]
def normalize_document(self, ctx: ExecutionContext) -> list[str]:
raw = str(self.get(ctx, "source_doc_raw", "") or "")
normalized = "\n".join(line.rstrip() for line in raw.splitlines()).strip()
return [self.put(ctx, "source_doc_text", ArtifactType.TEXT, normalized)]
def structural_check(self, ctx: ExecutionContext) -> list[str]:
text = str(self.get(ctx, "source_doc_text", "") or "")
required = ["цель", "границ", "риски", "api", "данные"]
found = [token for token in required if token in text.lower()]
findings = {
"required_sections": required,
"found_markers": found,
"missing_markers": [token for token in required if token not in found],
}
return [self.put(ctx, "structural_findings", ArtifactType.STRUCTURED_JSON, findings)]
def semantic_consistency_check(self, ctx: ExecutionContext) -> list[str]:
text = str(self.get(ctx, "source_doc_text", "") or "")
contradictions = []
if "без изменений" in text.lower() and "новый" in text.lower():
contradictions.append("Contains both 'no changes' and 'new behavior' markers.")
payload = {"contradictions": contradictions, "status": "ok" if not contradictions else "needs_attention"}
return [self.put(ctx, "semantic_findings", ArtifactType.STRUCTURED_JSON, payload)]
def architecture_fit_check(self, ctx: ExecutionContext) -> list[str]:
text = str(self.get(ctx, "source_doc_text", "") or "")
files_count = len(dict(ctx.task.metadata.get("files_map", {}) or {}))
payload = {
"architecture_fit": "medium" if files_count == 0 else "high",
"notes": "Evaluate fit against existing docs and interfaces.",
"markers": ["integration"] if "integr" in text.lower() else [],
}
return [self.put(ctx, "architecture_findings", ArtifactType.STRUCTURED_JSON, payload)]
def optimization_check(self, ctx: ExecutionContext) -> list[str]:
text = str(self.get(ctx, "source_doc_text", "") or "")
has_perf = any(token in text.lower() for token in ("latency", "performance", "оптим"))
payload = {
"optimization_considered": has_perf,
"recommendation": "Add explicit non-functional targets." if not has_perf else "Optimization criteria present.",
}
return [self.put(ctx, "optimization_findings", ArtifactType.STRUCTURED_JSON, payload)]
def compose_review_report(self, ctx: ExecutionContext) -> list[str]:
structural = self.get(ctx, "structural_findings", {}) or {}
semantic = self.get(ctx, "semantic_findings", {}) or {}
architecture = self.get(ctx, "architecture_findings", {}) or {}
optimization = self.get(ctx, "optimization_findings", {}) or {}
report = "\n".join(
[
"## Findings",
f"- Missing structure markers: {', '.join(structural.get('missing_markers', [])) or 'none'}",
f"- Contradictions: {len(semantic.get('contradictions', []))}",
f"- Architecture fit: {architecture.get('architecture_fit', 'unknown')}",
f"- Optimization: {optimization.get('recommendation', 'n/a')}",
"",
"## Recommendations",
"- Clarify boundaries and data contracts.",
"- Add explicit error and rollback behavior.",
"- Add measurable non-functional requirements.",
]
)
return [
self.put(ctx, "review_report", ArtifactType.REVIEW_REPORT, report),
self.put(ctx, "final_answer", ArtifactType.TEXT, report),
]
@@ -0,0 +1,50 @@
from __future__ import annotations
from uuid import uuid4
from app.modules.agent.engine.orchestrator.models import ArtifactItem, ArtifactType
class ArtifactStore:
def __init__(self) -> None:
self._by_id: dict[str, ArtifactItem] = {}
self._by_key: dict[str, ArtifactItem] = {}
def put(self, *, key: str, artifact_type: ArtifactType, content=None, meta: dict | None = None) -> ArtifactItem:
item_meta = dict(meta or {})
if content is not None and not isinstance(content, str):
item_meta.setdefault("value", content)
item = ArtifactItem(
artifact_id=f"artifact_{uuid4().hex}",
key=key,
type=artifact_type,
content=self._as_content(content),
meta=item_meta,
)
self._by_id[item.artifact_id] = item
self._by_key[key] = item
return item
def get(self, key: str) -> ArtifactItem | None:
return self._by_key.get(key)
def get_content(self, key: str, default=None):
item = self.get(key)
if item is None:
return default
if item.content is not None:
return item.content
return item.meta.get("value", default)
def has(self, key: str) -> bool:
return key in self._by_key
def all_items(self) -> list[ArtifactItem]:
return list(self._by_id.values())
def _as_content(self, value):
if value is None:
return None
if isinstance(value, str):
return value
return None
@@ -0,0 +1,14 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.models import EvidenceItem
class EvidenceStore:
def __init__(self) -> None:
self._items: list[EvidenceItem] = []
def put_many(self, items: list[EvidenceItem]) -> None:
self._items.extend(items)
def all_items(self) -> list[EvidenceItem]:
return list(self._items)
@@ -0,0 +1,30 @@
from __future__ import annotations
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
from app.modules.agent.engine.orchestrator.artifact_store import ArtifactStore
from app.modules.agent.engine.orchestrator.evidence_store import EvidenceStore
from app.modules.agent.engine.orchestrator.models import ExecutionPlan, TaskSpec
ProgressCallback = Callable[[str, str, str, dict | None], Awaitable[None] | None]
GraphResolver = Callable[[str, str], Any]
GraphInvoker = Callable[[Any, dict, str], dict]
@dataclass
class ExecutionContext:
task: TaskSpec
plan: ExecutionPlan
graph_resolver: GraphResolver
graph_invoker: GraphInvoker
progress_cb: ProgressCallback | None = None
artifacts: ArtifactStore | None = None
evidences: EvidenceStore | None = None
def __post_init__(self) -> None:
if self.artifacts is None:
self.artifacts = ArtifactStore()
if self.evidences is None:
self.evidences = EvidenceStore()
@@ -0,0 +1,146 @@
from __future__ import annotations
import asyncio
import inspect
import logging
import time
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import PlanStatus, PlanStep, StepResult, StepStatus
from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
LOGGER = logging.getLogger(__name__)
class ExecutionEngine:
def __init__(self, step_registry: StepRegistry, gates: QualityGateRunner) -> None:
self._steps = step_registry
self._gates = gates
async def run(self, ctx: ExecutionContext) -> list[StepResult]:
ctx.plan.status = PlanStatus.RUNNING
step_results: list[StepResult] = []
for step in ctx.plan.steps:
dep_issue = self._dependency_issue(step, step_results)
if dep_issue:
result = StepResult(
step_id=step.step_id,
status=StepStatus.SKIPPED,
warnings=[dep_issue],
)
step_results.append(result)
self._log_step_result(ctx, step, result)
continue
result = await self._run_with_retry(step, ctx)
step_results.append(result)
self._log_step_result(ctx, step, result)
if result.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} and step.on_failure == "fail":
ctx.plan.status = PlanStatus.FAILED
return step_results
passed, global_messages = self._gates.check_global(ctx.plan.global_gates, ctx)
if not passed:
step_results.append(
StepResult(
step_id="global_gates",
status=StepStatus.FAILED,
warnings=global_messages,
)
)
ctx.plan.status = PlanStatus.FAILED
return step_results
if any(item.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} for item in step_results):
ctx.plan.status = PlanStatus.FAILED
elif any(item.status == StepStatus.SKIPPED for item in step_results):
ctx.plan.status = PlanStatus.PARTIAL
else:
ctx.plan.status = PlanStatus.COMPLETED
return step_results
async def _run_with_retry(self, step: PlanStep, ctx: ExecutionContext) -> StepResult:
max_attempts = max(1, int(step.retry.max_attempts or 1))
attempt = 0
last_error: Exception | None = None
while attempt < max_attempts:
attempt += 1
started_at = time.monotonic()
LOGGER.warning(
"orchestrator step start: task_id=%s step_id=%s action_id=%s executor=%s attempt=%s graph_id=%s",
ctx.task.task_id,
step.step_id,
step.action_id,
step.executor,
attempt,
step.graph_id or "",
)
await self._emit_progress(ctx, f"orchestrator.step.{step.step_id}", step.title)
try:
artifact_ids = await self._steps.execute(step, ctx)
passed, gate_messages = self._gates.check_step(step, ctx)
if not passed:
raise RuntimeError(";".join(gate_messages) or "step_quality_gate_failed")
elapsed = int((time.monotonic() - started_at) * 1000)
return StepResult(
step_id=step.step_id,
status=StepStatus.SUCCESS,
produced_artifact_ids=artifact_ids,
warnings=gate_messages,
duration_ms=elapsed,
)
except Exception as exc:
last_error = exc
if attempt < max_attempts and step.retry.backoff_sec > 0:
await asyncio.sleep(step.retry.backoff_sec)
elapsed = int((time.monotonic() - started_at) * 1000)
return StepResult(
step_id=step.step_id,
status=StepStatus.RETRY_EXHAUSTED if max_attempts > 1 else StepStatus.FAILED,
error_code="step_execution_failed",
error_message=str(last_error) if last_error else "step_execution_failed",
duration_ms=elapsed,
)
def _dependency_issue(self, step: PlanStep, results: list[StepResult]) -> str | None:
if not step.depends_on:
return None
by_step = {item.step_id: item for item in results}
for dep in step.depends_on:
dep_result = by_step.get(dep)
if dep_result is None:
return f"dependency_not_executed:{dep}"
if dep_result.status != StepStatus.SUCCESS:
return f"dependency_not_success:{dep}:{dep_result.status.value}"
return None
async def _emit_progress(self, ctx: ExecutionContext, stage: str, message: str) -> None:
if ctx.progress_cb is None:
return
result = ctx.progress_cb(stage, message, "task_progress", {"layer": "orchestrator"})
if inspect.isawaitable(result):
await result
def _log_step_result(self, ctx: ExecutionContext, step: PlanStep, result: StepResult) -> None:
artifact_keys = []
for artifact_id in result.produced_artifact_ids:
item = next((artifact for artifact in ctx.artifacts.all_items() if artifact.artifact_id == artifact_id), None)
if item is not None:
artifact_keys.append(item.key)
LOGGER.warning(
"orchestrator step result: task_id=%s step_id=%s action_id=%s status=%s duration_ms=%s artifact_keys=%s warnings=%s error=%s",
ctx.task.task_id,
step.step_id,
step.action_id,
result.status.value,
result.duration_ms,
artifact_keys,
result.warnings,
result.error_message or "",
)
@@ -0,0 +1,36 @@
from __future__ import annotations
import logging
from app.modules.agent.repository import AgentRepository
LOGGER = logging.getLogger(__name__)
class MetricsPersister:
def __init__(self, repository: AgentRepository) -> None:
self._repository = repository
def save(
self,
*,
task_id: str,
dialog_session_id: str,
rag_session_id: str,
scenario: str,
domain_id: str,
process_id: str,
quality: dict,
) -> None:
try:
self._repository.save_quality_metrics(
task_id=task_id,
dialog_session_id=dialog_session_id,
rag_session_id=rag_session_id,
scenario=scenario,
domain_id=domain_id,
process_id=process_id,
quality=quality,
)
except Exception:
LOGGER.exception("Failed to persist quality metrics: task_id=%s", task_id)
@@ -0,0 +1,51 @@
from app.modules.agent.engine.orchestrator.models.plan import (
ArtifactSpec,
ArtifactType,
ExecutionPlan,
PlanStatus,
PlanStep,
QualityGateRef,
RetryPolicy,
)
from app.modules.agent.engine.orchestrator.models.result import (
ArtifactItem,
EvidenceItem,
OrchestratorResult,
StepResult,
StepStatus,
)
from app.modules.agent.engine.orchestrator.models.task_spec import (
AttachmentRef,
FileRef,
OutputContract,
OutputSection,
RoutingMeta,
Scenario,
SourcePolicy,
TaskConstraints,
TaskSpec,
)
__all__ = [
"ArtifactItem",
"ArtifactSpec",
"ArtifactType",
"AttachmentRef",
"EvidenceItem",
"ExecutionPlan",
"FileRef",
"OrchestratorResult",
"OutputContract",
"OutputSection",
"PlanStatus",
"PlanStep",
"QualityGateRef",
"RetryPolicy",
"RoutingMeta",
"Scenario",
"SourcePolicy",
"StepResult",
"StepStatus",
"TaskConstraints",
"TaskSpec",
]
@@ -0,0 +1,88 @@
from __future__ import annotations
from enum import Enum
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
from app.modules.agent.engine.orchestrator.models.task_spec import Scenario
class ArtifactType(str, Enum):
TEXT = "text"
REVIEW_REPORT = "review_report"
CHANGESET = "changeset"
DOC_BUNDLE = "doc_bundle"
GHERKIN_BUNDLE = "gherkin_bundle"
STRUCTURED_JSON = "structured_json"
class PlanStatus(str, Enum):
DRAFT = "draft"
VALIDATED = "validated"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
PARTIAL = "partial"
class InputBinding(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
from_key: str
required: bool = True
class ArtifactSpec(BaseModel):
model_config = ConfigDict(extra="forbid")
key: str
type: ArtifactType
required: bool = True
class RetryPolicy(BaseModel):
model_config = ConfigDict(extra="forbid")
max_attempts: int = 1
backoff_sec: int = 0
class QualityGateRef(BaseModel):
model_config = ConfigDict(extra="forbid")
gate_id: str
blocking: bool = True
class PlanStep(BaseModel):
model_config = ConfigDict(extra="forbid")
step_id: str
title: str
action_id: str
executor: Literal["function", "graph"]
graph_id: str | None = None
depends_on: list[str] = Field(default_factory=list)
inputs: list[InputBinding] = Field(default_factory=list)
outputs: list[ArtifactSpec] = Field(default_factory=list)
side_effect: Literal["read", "write", "external"] = "read"
retry: RetryPolicy = Field(default_factory=RetryPolicy)
timeout_sec: int = 120
on_failure: Literal["fail", "skip", "replan"] = "fail"
quality_gates: list[QualityGateRef] = Field(default_factory=list)
class ExecutionPlan(BaseModel):
model_config = ConfigDict(extra="forbid")
plan_id: str
task_id: str
scenario: Scenario
template_id: str
template_version: str
status: PlanStatus = PlanStatus.DRAFT
steps: list[PlanStep]
variables: dict[str, Any] = Field(default_factory=dict)
global_gates: list[QualityGateRef] = Field(default_factory=list)
@@ -0,0 +1,62 @@
from __future__ import annotations
from enum import Enum
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
from app.modules.agent.engine.orchestrator.models.plan import ArtifactType
from app.schemas.changeset import ChangeItem
class StepStatus(str, Enum):
SUCCESS = "success"
FAILED = "failed"
SKIPPED = "skipped"
RETRY_EXHAUSTED = "retry_exhausted"
class EvidenceItem(BaseModel):
model_config = ConfigDict(extra="forbid")
evidence_id: str
source_type: Literal["rag_chunk", "project_file", "external_doc", "confluence"]
source_ref: str
snippet: str
score: float = Field(ge=0.0, le=1.0)
class ArtifactItem(BaseModel):
model_config = ConfigDict(extra="forbid")
artifact_id: str
key: str
type: ArtifactType
content: str | None = None
path: str | None = None
content_hash: str | None = None
meta: dict[str, Any] = Field(default_factory=dict)
class StepResult(BaseModel):
model_config = ConfigDict(extra="forbid")
step_id: str
status: StepStatus
produced_artifact_ids: list[str] = Field(default_factory=list)
evidence_ids: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
error_code: str | None = None
error_message: str | None = None
duration_ms: int = 0
token_usage: int | None = None
replan_hint: str | None = None
class OrchestratorResult(BaseModel):
model_config = ConfigDict(extra="forbid")
answer: str | None = None
changeset: list[ChangeItem] = Field(default_factory=list)
meta: dict[str, Any] = Field(default_factory=dict)
steps: list[StepResult] = Field(default_factory=list)
@@ -0,0 +1,93 @@
from __future__ import annotations
from enum import Enum
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
class Scenario(str, Enum):
EXPLAIN_PART = "explain_part"
ANALYTICS_REVIEW = "analytics_review"
DOCS_FROM_ANALYTICS = "docs_from_analytics"
TARGETED_EDIT = "targeted_edit"
GHERKIN_MODEL = "gherkin_model"
GENERAL_QA = "general_qa"
class AttachmentRef(BaseModel):
model_config = ConfigDict(extra="forbid")
type: Literal["confluence_url", "http_url", "file_ref"]
value: str
class FileRef(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
content: str = ""
content_hash: str = ""
class RoutingMeta(BaseModel):
model_config = ConfigDict(extra="forbid")
domain_id: str
process_id: str
confidence: float = Field(ge=0.0, le=1.0)
reason: str = ""
fallback_used: bool = False
class SourcePolicy(BaseModel):
model_config = ConfigDict(extra="forbid")
priority: list[Literal["requirements", "tech_docs", "code", "external_doc"]] = Field(
default_factory=lambda: ["requirements", "tech_docs", "code"]
)
require_evidence: bool = True
max_sources_per_step: int = 12
class TaskConstraints(BaseModel):
model_config = ConfigDict(extra="forbid")
allow_writes: bool = False
max_steps: int = 20
max_retries_per_step: int = 2
step_timeout_sec: int = 120
target_paths: list[str] = Field(default_factory=list)
class OutputSection(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
format: Literal["markdown", "mermaid", "gherkin", "json", "changeset"]
required: bool = True
class OutputContract(BaseModel):
model_config = ConfigDict(extra="forbid")
result_type: Literal["answer", "changeset", "review_report", "doc_bundle", "gherkin_bundle"]
sections: list[OutputSection] = Field(default_factory=list)
class TaskSpec(BaseModel):
model_config = ConfigDict(extra="forbid")
task_id: str
dialog_session_id: str
rag_session_id: str
mode: str = "auto"
user_message: str
scenario: Scenario
routing: RoutingMeta
attachments: list[AttachmentRef] = Field(default_factory=list)
files: list[FileRef] = Field(default_factory=list)
source_policy: SourcePolicy = Field(default_factory=SourcePolicy)
constraints: TaskConstraints = Field(default_factory=TaskConstraints)
output_contract: OutputContract
metadata: dict[str, Any] = Field(default_factory=dict)
@@ -0,0 +1,30 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.models import ExecutionPlan, PlanStatus, TaskSpec
class PlanCompiler:
def compile(self, template: ExecutionPlan, task: TaskSpec) -> ExecutionPlan:
plan = template.model_copy(deep=True)
plan.plan_id = f"{task.task_id}:{template.template_id}"
plan.task_id = task.task_id
plan.status = PlanStatus.DRAFT
plan.variables = {
"scenario": task.scenario.value,
"route": {
"domain_id": task.routing.domain_id,
"process_id": task.routing.process_id,
"confidence": task.routing.confidence,
},
}
for step in plan.steps:
step.timeout_sec = max(1, min(step.timeout_sec, task.constraints.step_timeout_sec))
step.retry.max_attempts = max(1, min(step.retry.max_attempts, task.constraints.max_retries_per_step))
if step.side_effect == "write" and not task.constraints.allow_writes:
step.on_failure = "fail"
if len(plan.steps) > task.constraints.max_steps:
plan.steps = plan.steps[: task.constraints.max_steps]
return plan
@@ -0,0 +1,79 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.models import ExecutionPlan, TaskSpec
class PlanValidator:
def validate(self, plan: ExecutionPlan, task: TaskSpec) -> list[str]:
errors: list[str] = []
if not plan.steps:
errors.append("execution_plan_has_no_steps")
return errors
if len(plan.steps) > task.constraints.max_steps:
errors.append("execution_plan_exceeds_max_steps")
errors.extend(self._validate_step_ids(plan))
errors.extend(self._validate_dependencies(plan))
errors.extend(self._validate_side_effects(plan, task))
errors.extend(self._validate_step_shape(plan))
return errors
def _validate_step_ids(self, plan: ExecutionPlan) -> list[str]:
seen: set[str] = set()
out: list[str] = []
for step in plan.steps:
if step.step_id in seen:
out.append(f"duplicate_step_id:{step.step_id}")
seen.add(step.step_id)
return out
def _validate_dependencies(self, plan: ExecutionPlan) -> list[str]:
out: list[str] = []
valid_ids = {step.step_id for step in plan.steps}
for step in plan.steps:
for dep in step.depends_on:
if dep not in valid_ids:
out.append(f"unknown_dependency:{step.step_id}->{dep}")
# lightweight cycle detection for directed graph
graph = {step.step_id: list(step.depends_on) for step in plan.steps}
visiting: set[str] = set()
visited: set[str] = set()
def dfs(node: str) -> bool:
if node in visiting:
return True
if node in visited:
return False
visiting.add(node)
for dep in graph.get(node, []):
if dfs(dep):
return True
visiting.remove(node)
visited.add(node)
return False
if any(dfs(node) for node in graph):
out.append("dependency_cycle_detected")
return out
def _validate_side_effects(self, plan: ExecutionPlan, task: TaskSpec) -> list[str]:
if task.constraints.allow_writes:
return []
out: list[str] = []
for step in plan.steps:
if step.side_effect == "write":
out.append(f"write_step_not_allowed:{step.step_id}")
return out
def _validate_step_shape(self, plan: ExecutionPlan) -> list[str]:
out: list[str] = []
for step in plan.steps:
if step.executor == "graph" and not step.graph_id:
out.append(f"graph_step_missing_graph_id:{step.step_id}")
if step.retry.max_attempts < 1:
out.append(f"invalid_retry_attempts:{step.step_id}")
if step.timeout_sec < 1:
out.append(f"invalid_step_timeout:{step.step_id}")
return out
@@ -0,0 +1,116 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import PlanStep, QualityGateRef
class QualityGateRunner:
def check_step(self, step: PlanStep, ctx: ExecutionContext) -> tuple[bool, list[str]]:
return self._run(step.quality_gates, step=step, ctx=ctx)
def check_global(self, gates: list[QualityGateRef], ctx: ExecutionContext) -> tuple[bool, list[str]]:
return self._run(gates, step=None, ctx=ctx)
def _run(self, gates: list[QualityGateRef], *, step: PlanStep | None, ctx: ExecutionContext) -> tuple[bool, list[str]]:
failures: list[str] = []
warnings: list[str] = []
for gate in gates:
ok, details = self._check(gate.gate_id, step=step, ctx=ctx)
if ok:
continue
if gate.blocking:
failures.extend(details)
else:
warnings.extend(details)
return len(failures) == 0, failures + warnings
def _check(self, gate_id: str, *, step: PlanStep | None, ctx: ExecutionContext) -> tuple[bool, list[str]]:
checks = {
"required_outputs": lambda: self._required_outputs(step, ctx),
"non_empty_answer_or_changeset": lambda: self._non_empty_output(ctx),
"changeset_required_for_write": lambda: self._changeset_required(ctx),
"changeset_schema": lambda: self._changeset_schema(ctx),
"evidence_required": lambda: self._evidence_required(ctx),
"review_report_schema": lambda: self._review_schema(ctx),
"cross_file_consistency": lambda: self._cross_file_consistency(ctx),
"target_path_must_exist_or_be_allowed": lambda: self._target_path_gate(ctx),
"minimal_patch_policy": lambda: self._minimal_patch_policy(ctx),
"gherkin_syntax_lint": lambda: self._gherkin_lint(ctx),
"coverage_of_change_intents": lambda: self._coverage_gate(ctx),
"explain_format_hint": lambda: self._explain_hint(ctx),
}
fn = checks.get(gate_id)
if fn is None:
return True, []
return fn()
def _required_outputs(self, step: PlanStep | None, ctx: ExecutionContext) -> tuple[bool, list[str]]:
if step is None:
return True, []
missing = [f"missing_required_artifact:{spec.key}" for spec in step.outputs if spec.required and not ctx.artifacts.has(spec.key)]
return len(missing) == 0, missing
def _non_empty_output(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
answer = str(ctx.artifacts.get_content("final_answer", "") or "").strip()
changeset = ctx.artifacts.get_content("final_changeset", []) or []
ok = bool(answer) or (isinstance(changeset, list) and len(changeset) > 0)
return ok, [] if ok else ["empty_final_output"]
def _changeset_required(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
if not ctx.task.constraints.allow_writes:
return True, []
changeset = ctx.artifacts.get_content("final_changeset", []) or []
ok = isinstance(changeset, list) and len(changeset) > 0
return ok, [] if ok else ["changeset_required_for_write"]
def _changeset_schema(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
changeset = ctx.artifacts.get_content("final_changeset", []) or []
if not isinstance(changeset, list):
return False, ["changeset_not_list"]
for idx, item in enumerate(changeset):
if not isinstance(item, dict):
return False, [f"changeset_item_not_object:{idx}"]
if not item.get("op") or not item.get("path"):
return False, [f"changeset_item_missing_fields:{idx}"]
return True, []
def _evidence_required(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
if not ctx.task.source_policy.require_evidence:
return True, []
evidences = ctx.evidences.all_items()
return len(evidences) > 0, ([] if evidences else ["no_evidence_collected"])
def _review_schema(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
report = str(ctx.artifacts.get_content("review_report", "") or "")
ok = "## Findings" in report and "## Recommendations" in report
return ok, [] if ok else ["review_report_missing_sections"]
def _cross_file_consistency(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
report = ctx.artifacts.get_content("consistency_report", {}) or {}
ok = bool(report.get("required_core_paths_present"))
return ok, [] if ok else ["cross_file_consistency_failed"]
def _target_path_gate(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
target = ctx.artifacts.get_content("resolved_target", {}) or {}
ok = bool(str(target.get("path", "")).strip())
return ok, [] if ok else ["target_path_not_resolved"]
def _minimal_patch_policy(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
report = ctx.artifacts.get_content("patch_validation_report", {}) or {}
ok = bool(report.get("safe"))
return ok, [] if ok else ["patch_validation_failed"]
def _gherkin_lint(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
report = ctx.artifacts.get_content("gherkin_lint_report", {}) or {}
ok = bool(report.get("valid"))
return ok, [] if ok else ["gherkin_lint_failed"]
def _coverage_gate(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
report = ctx.artifacts.get_content("coverage_report", {}) or {}
ok = bool(report.get("covered"))
return ok, [] if ok else ["coverage_check_failed"]
def _explain_hint(self, ctx: ExecutionContext) -> tuple[bool, list[str]]:
answer = str(ctx.artifacts.get_content("final_answer", "") or "")
ok = "```mermaid" in answer or "sequenceDiagram" in answer
return ok, [] if ok else ["hint:explain_answer_missing_mermaid_block"]
@@ -0,0 +1,116 @@
from __future__ import annotations
import re
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import StepResult
class QualityMetricsCalculator:
def build(self, ctx: ExecutionContext, step_results: list[StepResult]) -> dict:
answer = str(ctx.artifacts.get_content("final_answer", "") or "")
changeset = ctx.artifacts.get_content("final_changeset", []) or []
evidences = ctx.evidences.all_items()
faithfulness = self._faithfulness(answer=answer, changeset=changeset, evidence_count=len(evidences))
coverage = self._coverage(ctx=ctx, answer=answer, changeset=changeset)
status = self._status(faithfulness["score"], coverage["score"])
return {
"faithfulness": faithfulness,
"coverage": coverage,
"status": status,
"steps": {
"total": len(ctx.plan.steps),
"completed": len([item for item in step_results if item.status.value == "success"]),
},
}
def _faithfulness(self, *, answer: str, changeset, evidence_count: int) -> dict:
claims_total = self._estimate_claims(answer, changeset)
if claims_total <= 0:
claims_total = 1
support_capacity = min(claims_total, evidence_count * 3)
claims_supported = support_capacity if evidence_count > 0 else 0
score = claims_supported / claims_total
unsupported = max(0, claims_total - claims_supported)
return {
"score": round(score, 4),
"claims_total": claims_total,
"claims_supported": claims_supported,
"claims_unsupported": unsupported,
"evidence_items": evidence_count,
}
def _coverage(self, *, ctx: ExecutionContext, answer: str, changeset) -> dict:
required = [section.name for section in ctx.task.output_contract.sections if section.required]
if not required:
required = ["final_output"]
covered: list[str] = []
for item in required:
if self._is_item_covered(item=item, ctx=ctx, answer=answer, changeset=changeset):
covered.append(item)
missing = [item for item in required if item not in covered]
score = len(covered) / len(required)
return {
"score": round(score, 4),
"required_items": required,
"covered_items": covered,
"missing_items": missing,
"required_count": len(required),
"covered_count": len(covered),
}
def _status(self, faithfulness: float, coverage: float) -> str:
if faithfulness >= 0.75 and coverage >= 0.85:
return "ok"
if faithfulness >= 0.55 and coverage >= 0.6:
return "needs_review"
return "fail"
def _estimate_claims(self, answer: str, changeset) -> int:
lines = [line.strip() for line in answer.splitlines() if line.strip()]
bullet_claims = len([line for line in lines if line.startswith("-") or line.startswith("*")])
sentence_claims = len([part for part in re.split(r"[.!?]\s+", answer) if part.strip()])
changeset_claims = 0
if isinstance(changeset, list):
for item in changeset:
if isinstance(item, dict):
reason = str(item.get("reason", "")).strip()
if reason:
changeset_claims += 1
else:
reason = str(getattr(item, "reason", "")).strip()
if reason:
changeset_claims += 1
return max(bullet_claims, min(sentence_claims, 12), changeset_claims)
def _is_item_covered(self, *, item: str, ctx: ExecutionContext, answer: str, changeset) -> bool:
name = (item or "").strip().lower()
if name == "final_output":
return bool(answer.strip()) or (isinstance(changeset, list) and len(changeset) > 0)
if name in {"changeset", "final_changeset"}:
return isinstance(changeset, list) and len(changeset) > 0
if name in {"sequence_diagram", "mermaid"}:
sequence = str(ctx.artifacts.get_content("sequence_diagram", "") or "").strip()
return "```mermaid" in answer or bool(sequence)
if name == "use_cases":
if ctx.artifacts.has("use_cases"):
return True
low = answer.lower()
return "use case" in low or "сценар" in low
if name in {"summary", "findings", "recommendations", "gherkin_bundle", "review_report"}:
if ctx.artifacts.has(name):
return True
if name == "gherkin_bundle":
bundle = ctx.artifacts.get_content("gherkin_bundle", []) or []
return isinstance(bundle, list) and len(bundle) > 0
return name.replace("_", " ") in answer.lower()
return ctx.artifacts.has(name)
@@ -0,0 +1,55 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import OrchestratorResult, StepResult
from app.modules.agent.engine.orchestrator.quality_metrics import QualityMetricsCalculator
from app.schemas.changeset import ChangeItem
class ResultAssembler:
def __init__(self, quality: QualityMetricsCalculator | None = None) -> None:
self._quality = quality or QualityMetricsCalculator()
def assemble(self, ctx: ExecutionContext, step_results: list[StepResult]) -> OrchestratorResult:
answer = str(ctx.artifacts.get_content("final_answer", "") or "").strip() or None
raw_changeset = ctx.artifacts.get_content("final_changeset", []) or []
changeset = self._normalize_changeset(raw_changeset)
quality = self._quality.build(ctx, step_results)
meta = {
"scenario": ctx.task.scenario.value,
"plan": {
"plan_id": ctx.plan.plan_id,
"template_id": ctx.plan.template_id,
"template_version": ctx.plan.template_version,
"status": ctx.plan.status.value,
},
"route": {
"domain_id": ctx.task.routing.domain_id,
"process_id": ctx.task.routing.process_id,
"confidence": ctx.task.routing.confidence,
"reason": ctx.task.routing.reason,
"fallback_used": ctx.task.routing.fallback_used,
},
"orchestrator": {
"steps_total": len(ctx.plan.steps),
"steps_success": len([step for step in step_results if step.status.value == "success"]),
},
"quality": quality,
}
return OrchestratorResult(answer=answer, changeset=changeset, meta=meta, steps=step_results)
def _normalize_changeset(self, value) -> list[ChangeItem]:
if not isinstance(value, list):
return []
items: list[ChangeItem] = []
for raw in value:
if isinstance(raw, ChangeItem):
items.append(raw)
continue
if isinstance(raw, dict):
try:
items.append(ChangeItem.model_validate(raw))
except Exception:
continue
return items
@@ -0,0 +1,102 @@
from __future__ import annotations
import inspect
import logging
from app.core.exceptions import AppError
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext, GraphInvoker, GraphResolver, ProgressCallback
from app.modules.agent.engine.orchestrator.execution_engine import ExecutionEngine
from app.modules.agent.engine.orchestrator.models import OrchestratorResult, PlanStatus, TaskSpec
from app.modules.agent.engine.orchestrator.plan_compiler import PlanCompiler
from app.modules.agent.engine.orchestrator.plan_validator import PlanValidator
from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner
from app.modules.agent.engine.orchestrator.result_assembler import ResultAssembler
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry
from app.schemas.common import ModuleName
LOGGER = logging.getLogger(__name__)
class OrchestratorService:
def __init__(
self,
templates: ScenarioTemplateRegistry | None = None,
compiler: PlanCompiler | None = None,
validator: PlanValidator | None = None,
step_registry: StepRegistry | None = None,
gates: QualityGateRunner | None = None,
engine: ExecutionEngine | None = None,
assembler: ResultAssembler | None = None,
) -> None:
self._templates = templates or ScenarioTemplateRegistry()
self._compiler = compiler or PlanCompiler()
self._validator = validator or PlanValidator()
self._registry = step_registry or StepRegistry()
self._gates = gates or QualityGateRunner()
self._engine = engine or ExecutionEngine(self._registry, self._gates)
self._assembler = assembler or ResultAssembler()
async def run(
self,
*,
task: TaskSpec,
graph_resolver: GraphResolver,
graph_invoker: GraphInvoker,
progress_cb: ProgressCallback | None = None,
) -> OrchestratorResult:
await self._emit_progress(progress_cb, "orchestrator.plan", "Building execution plan.")
template = self._templates.build(task)
plan = self._compiler.compile(template, task)
errors = self._validator.validate(plan, task)
if errors:
raise AppError(
code="invalid_execution_plan",
desc=f"Execution plan validation failed: {'; '.join(errors)}",
module=ModuleName.AGENT,
)
plan.status = PlanStatus.VALIDATED
ctx = ExecutionContext(
task=task,
plan=plan,
graph_resolver=graph_resolver,
graph_invoker=graph_invoker,
progress_cb=progress_cb,
)
await self._emit_progress(progress_cb, "orchestrator.run", "Executing plan steps.")
step_results = await self._engine.run(ctx)
if plan.status == PlanStatus.FAILED:
errors = [f"{step.step_id}:{step.error_message or ','.join(step.warnings)}" for step in step_results if step.status.value != "success"]
raise AppError(
code="execution_plan_failed",
desc=f"Execution plan failed: {'; '.join(errors)}",
module=ModuleName.AGENT,
)
result = self._assembler.assemble(ctx, step_results)
await self._emit_progress(progress_cb, "orchestrator.done", "Execution plan completed.")
LOGGER.warning(
"orchestrator decision: task_id=%s scenario=%s plan_status=%s steps=%s changeset_items=%s answer_len=%s",
task.task_id,
task.scenario.value,
result.meta.get("plan", {}).get("status", ""),
[
{
"step_id": step.step_id,
"status": step.status.value,
}
for step in result.steps
],
len(result.changeset),
len(result.answer or ""),
)
return result
async def _emit_progress(self, progress_cb: ProgressCallback | None, stage: str, message: str) -> None:
if progress_cb is None:
return
result = progress_cb(stage, message, "task_progress", {"layer": "orchestrator"})
if inspect.isawaitable(result):
await result
@@ -0,0 +1,164 @@
from __future__ import annotations
import asyncio
from collections.abc import Callable
from typing import TYPE_CHECKING
from app.modules.agent.engine.graphs.progress_registry import progress_registry
from app.modules.agent.engine.orchestrator.actions import (
CodeExplainActions,
DocsActions,
EditActions,
ExplainActions,
GherkinActions,
ProjectQaActions,
ReviewActions,
)
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType, PlanStep
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
StepFn = Callable[[ExecutionContext], list[str]]
class StepRegistry:
def __init__(self, code_explain_retriever: CodeExplainRetrieverV2 | None = None) -> None:
code_explain = CodeExplainActions(code_explain_retriever)
explain = ExplainActions()
review = ReviewActions()
docs = DocsActions()
edits = EditActions()
gherkin = GherkinActions()
project_qa = ProjectQaActions()
self._functions: dict[str, StepFn] = {
"collect_state": self._collect_state,
"finalize_graph_output": self._finalize_graph_output,
"execute_project_qa_graph": self._collect_state,
"build_code_explain_pack": code_explain.build_code_explain_pack,
"collect_sources": explain.collect_sources,
"extract_logic": explain.extract_logic,
"summarize": explain.summarize,
"classify_project_question": project_qa.classify_project_question,
"collect_project_sources": project_qa.collect_project_sources,
"analyze_project_sources": project_qa.analyze_project_sources,
"build_project_answer_brief": project_qa.build_project_answer_brief,
"compose_project_answer": project_qa.compose_project_answer,
"fetch_source_doc": review.fetch_source_doc,
"normalize_document": review.normalize_document,
"structural_check": review.structural_check,
"semantic_consistency_check": review.semantic_consistency_check,
"architecture_fit_check": review.architecture_fit_check,
"optimization_check": review.optimization_check,
"compose_review_report": review.compose_review_report,
"extract_change_intents": docs.extract_change_intents,
"map_to_doc_tree": docs.map_to_doc_tree,
"load_current_docs_context": docs.load_current_docs_context,
"generate_doc_updates": docs.generate_doc_updates,
"cross_file_validation": docs.cross_file_validation,
"build_changeset": docs.build_changeset,
"compose_summary": docs.compose_summary,
"resolve_target": edits.resolve_target,
"load_target_context": edits.load_target_context,
"plan_minimal_patch": edits.plan_minimal_patch,
"generate_patch": edits.generate_patch,
"validate_patch_safety": edits.validate_patch_safety,
"finalize_changeset": edits.finalize_changeset,
"compose_edit_summary": edits.compose_edit_summary,
"extract_increment_scope": gherkin.extract_increment_scope,
"partition_features": gherkin.partition_features,
"generate_gherkin_bundle": gherkin.generate_gherkin_bundle,
"lint_gherkin": gherkin.lint_gherkin,
"validate_coverage": gherkin.validate_coverage,
"compose_test_model_summary": gherkin.compose_test_model_summary,
}
async def execute(self, step: PlanStep, ctx: ExecutionContext) -> list[str]:
if step.executor == "graph":
return await self._execute_graph_step(step, ctx)
fn = self._functions.get(step.action_id)
if fn is None:
raise RuntimeError(f"Unknown function action_id: {step.action_id}")
return fn(ctx)
def _collect_state(self, ctx: ExecutionContext) -> list[str]:
state = {
"task_id": ctx.task.task_id,
"project_id": ctx.task.rag_session_id,
"scenario": ctx.task.scenario.value,
"message": ctx.task.user_message,
"progress_key": ctx.task.task_id,
"rag_context": str(ctx.task.metadata.get("rag_context", "")),
"confluence_context": str(ctx.task.metadata.get("confluence_context", "")),
"files_map": dict(ctx.task.metadata.get("files_map", {}) or {}),
}
item = ctx.artifacts.put(key="agent_state", artifact_type=ArtifactType.STRUCTURED_JSON, content=state)
return [item.artifact_id]
async def _execute_graph_step(self, step: PlanStep, ctx: ExecutionContext) -> list[str]:
graph_key = step.graph_id or "route"
if graph_key == "route":
domain_id = ctx.task.routing.domain_id
process_id = ctx.task.routing.process_id
elif "/" in graph_key:
domain_id, process_id = graph_key.split("/", 1)
else:
raise RuntimeError(f"Unsupported graph_id: {graph_key}")
graph = ctx.graph_resolver(domain_id, process_id)
state = self._build_graph_state(ctx)
if ctx.progress_cb is not None:
progress_registry.register(ctx.task.task_id, ctx.progress_cb)
try:
result = await asyncio.to_thread(ctx.graph_invoker, graph, state, ctx.task.dialog_session_id)
finally:
if ctx.progress_cb is not None:
progress_registry.unregister(ctx.task.task_id)
return self._store_graph_outputs(step, ctx, result)
def _build_graph_state(self, ctx: ExecutionContext) -> dict:
state = dict(ctx.artifacts.get_content("agent_state", {}) or {})
for item in ctx.artifacts.all_items():
state[item.key] = ctx.artifacts.get_content(item.key)
return state
def _store_graph_outputs(self, step: PlanStep, ctx: ExecutionContext, result: dict) -> list[str]:
if not isinstance(result, dict):
raise RuntimeError("graph_result must be an object")
if len(step.outputs) == 1 and step.outputs[0].key == "graph_result":
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result)
return [item.artifact_id]
artifact_ids: list[str] = []
for output in step.outputs:
value = result.get(output.key)
if value is None and output.required:
raise RuntimeError(f"graph_output_missing:{step.step_id}:{output.key}")
item = ctx.artifacts.put(key=output.key, artifact_type=output.type, content=value)
artifact_ids.append(item.artifact_id)
return artifact_ids
def _finalize_graph_output(self, ctx: ExecutionContext) -> list[str]:
raw = ctx.artifacts.get_content("graph_result", {}) or {}
if not isinstance(raw, dict):
raise RuntimeError("graph_result must be an object")
answer = raw.get("answer")
changeset = raw.get("changeset") or []
output = [
ctx.artifacts.put(
key="final_answer",
artifact_type=ArtifactType.TEXT,
content=(str(answer) if answer is not None else ""),
).artifact_id,
ctx.artifacts.put(
key="final_changeset",
artifact_type=ArtifactType.CHANGESET,
content=changeset,
).artifact_id,
]
return output
@@ -0,0 +1,145 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.models import (
AttachmentRef,
FileRef,
OutputContract,
OutputSection,
RoutingMeta,
Scenario,
TaskConstraints,
TaskSpec,
)
class TaskSpecBuilder:
def build(
self,
*,
task_id: str,
dialog_session_id: str,
rag_session_id: str,
mode: str,
message: str,
route: RoutingMeta,
attachments: list[dict],
files: list[dict],
rag_items: list[dict],
rag_context: str,
confluence_context: str,
files_map: dict[str, dict],
) -> TaskSpec:
scenario = self._detect_scenario(mode=mode, message=message, route=route)
output_contract = self._output_contract(scenario)
constraints = self._constraints_for(scenario)
metadata = {
"rag_items": rag_items,
"rag_context": rag_context,
"confluence_context": confluence_context,
"files_map": files_map,
}
return TaskSpec(
task_id=task_id,
dialog_session_id=dialog_session_id,
rag_session_id=rag_session_id,
mode=mode,
user_message=message,
scenario=scenario,
routing=route,
attachments=self._map_attachments(attachments),
files=self._map_files(files),
constraints=constraints,
output_contract=output_contract,
metadata=metadata,
)
def _detect_scenario(self, *, mode: str, message: str, route: RoutingMeta) -> Scenario:
mode_key = (mode or "").strip().lower()
text = (message or "").strip().lower()
if mode_key == "analytics_review":
return Scenario.ANALYTICS_REVIEW
if "gherkin" in text or "cucumber" in text:
return Scenario.GHERKIN_MODEL
if any(token in text for token in ("review analytics", "ревью аналитики", "проведи ревью")):
return Scenario.ANALYTICS_REVIEW
if any(token in text for token in ("сформируй документацию", "документацию из аналитики", "generate docs")):
return Scenario.DOCS_FROM_ANALYTICS
if any(token in text for token in ("точечн", "измени файл", "targeted edit", "patch file")):
return Scenario.TARGETED_EDIT
if route.domain_id == "project" and route.process_id == "edits":
return Scenario.TARGETED_EDIT
if route.domain_id == "docs" and route.process_id == "generation":
return Scenario.DOCS_FROM_ANALYTICS
if route.domain_id == "project" and route.process_id == "qa" and self._looks_like_explain_request(text):
return Scenario.EXPLAIN_PART
if route.domain_id == "project" and route.process_id == "qa" and "review" in text:
return Scenario.ANALYTICS_REVIEW
return Scenario.GENERAL_QA
def _looks_like_explain_request(self, text: str) -> bool:
markers = (
"explain",
"how it works",
"sequence",
"diagram",
"obiasni",
"kak rabotaet",
"kak ustroeno",
"объясни",
"как работает",
"как устроен",
"диаграм",
)
return any(marker in text for marker in markers)
def _map_attachments(self, attachments: list[dict]) -> list[AttachmentRef]:
mapped: list[AttachmentRef] = []
for item in attachments:
value = str(item.get("url") or item.get("value") or "").strip()
if not value:
continue
raw_type = str(item.get("type") or "http_url").strip().lower()
attachment_type = raw_type if raw_type in {"confluence_url", "http_url", "file_ref"} else "http_url"
mapped.append(AttachmentRef(type=attachment_type, value=value))
return mapped
def _map_files(self, files: list[dict]) -> list[FileRef]:
mapped: list[FileRef] = []
for item in files:
path = str(item.get("path") or "").replace("\\", "/").strip()
if not path:
continue
mapped.append(
FileRef(
path=path,
content=str(item.get("content") or ""),
content_hash=str(item.get("content_hash") or ""),
)
)
return mapped
def _constraints_for(self, scenario: Scenario) -> TaskConstraints:
if scenario in {Scenario.DOCS_FROM_ANALYTICS, Scenario.TARGETED_EDIT, Scenario.GHERKIN_MODEL}:
return TaskConstraints(allow_writes=True, max_steps=16, max_retries_per_step=2, step_timeout_sec=120)
return TaskConstraints(allow_writes=False, max_steps=12, max_retries_per_step=2, step_timeout_sec=90)
def _output_contract(self, scenario: Scenario) -> OutputContract:
if scenario == Scenario.EXPLAIN_PART:
return OutputContract(result_type="answer", sections=[OutputSection(name="summary", format="markdown")])
if scenario == Scenario.ANALYTICS_REVIEW:
return OutputContract(
result_type="review_report",
sections=[
OutputSection(name="findings", format="markdown"),
OutputSection(name="recommendations", format="markdown"),
],
)
if scenario in {Scenario.DOCS_FROM_ANALYTICS, Scenario.TARGETED_EDIT}:
return OutputContract(result_type="changeset", sections=[OutputSection(name="changeset", format="changeset")])
if scenario == Scenario.GHERKIN_MODEL:
return OutputContract(
result_type="gherkin_bundle",
sections=[OutputSection(name="gherkin_bundle", format="gherkin")],
)
return OutputContract(result_type="answer", sections=[OutputSection(name="summary", format="markdown")])
@@ -0,0 +1,171 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.models import ArtifactSpec, ArtifactType, ExecutionPlan, PlanStep, QualityGateRef, Scenario, TaskSpec
class ScenarioTemplateRegistry:
def build(self, task: TaskSpec) -> ExecutionPlan:
builders = {
Scenario.EXPLAIN_PART: self._explain,
Scenario.ANALYTICS_REVIEW: self._review,
Scenario.DOCS_FROM_ANALYTICS: self._docs,
Scenario.TARGETED_EDIT: self._edit,
Scenario.GHERKIN_MODEL: self._gherkin,
Scenario.GENERAL_QA: self._general,
}
return builders.get(task.scenario, self._general)(task)
def _general(self, task: TaskSpec) -> ExecutionPlan:
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
return self._project_qa(task)
steps = [
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
self._step(
"execute_route_graph",
"Execute selected graph",
"execute_route_graph",
executor="graph",
graph_id="route",
depends_on=["collect_state"],
outputs=[self._out("graph_result", ArtifactType.STRUCTURED_JSON)],
gates=[self._gate("required_outputs")],
),
self._step(
"finalize_graph_output",
"Finalize graph output",
"finalize_graph_output",
depends_on=["execute_route_graph"],
outputs=[self._out("final_answer", ArtifactType.TEXT, required=False)],
gates=[self._gate("non_empty_answer_or_changeset")],
),
]
return self._plan(task, "general_qa_v1", steps, [self._gate("non_empty_answer_or_changeset")])
def _project_qa(self, task: TaskSpec) -> ExecutionPlan:
steps = [
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
self._step(
"execute_code_qa_runtime",
"Execute CODE_QA runtime",
"execute_code_qa_runtime",
executor="graph",
graph_id="project_qa/code_qa_runtime",
depends_on=["collect_state"],
outputs=[
self._out("code_qa_result", ArtifactType.STRUCTURED_JSON),
self._out("final_answer", ArtifactType.TEXT),
],
gates=[self._gate("non_empty_answer_or_changeset")],
),
]
return self._plan(task, "project_qa_reasoning_v1", steps, [self._gate("non_empty_answer_or_changeset")])
def _explain(self, task: TaskSpec) -> ExecutionPlan:
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
return self._project_qa(task)
steps = [
self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]),
self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]),
self._step("summarize", "Summarize", "summarize", depends_on=["extract_logic"], outputs=[self._out("final_answer", ArtifactType.TEXT)]),
]
return self._plan(task, "explain_part_v1", steps, [self._gate("evidence_required"), self._gate("non_empty_answer_or_changeset")])
def _review(self, task: TaskSpec) -> ExecutionPlan:
steps = [
self._step("fetch_source_doc", "Fetch source doc", "fetch_source_doc", outputs=[self._out("source_doc_raw", ArtifactType.TEXT)], side_effect="external"),
self._step("normalize_document", "Normalize document", "normalize_document", depends_on=["fetch_source_doc"], outputs=[self._out("source_doc_text", ArtifactType.TEXT)]),
self._step("structural_check", "Structural check", "structural_check", depends_on=["normalize_document"], outputs=[self._out("structural_findings", ArtifactType.STRUCTURED_JSON)]),
self._step("semantic_consistency_check", "Semantic check", "semantic_consistency_check", depends_on=["normalize_document"], outputs=[self._out("semantic_findings", ArtifactType.STRUCTURED_JSON)]),
self._step("architecture_fit_check", "Architecture fit", "architecture_fit_check", depends_on=["normalize_document"], outputs=[self._out("architecture_findings", ArtifactType.STRUCTURED_JSON)]),
self._step("optimization_check", "Optimization check", "optimization_check", depends_on=["normalize_document"], outputs=[self._out("optimization_findings", ArtifactType.STRUCTURED_JSON)]),
self._step(
"compose_review_report",
"Compose review report",
"compose_review_report",
depends_on=["structural_check", "semantic_consistency_check", "architecture_fit_check", "optimization_check"],
outputs=[self._out("review_report", ArtifactType.REVIEW_REPORT), self._out("final_answer", ArtifactType.TEXT)],
gates=[self._gate("review_report_schema")],
),
]
return self._plan(task, "analytics_review_v1", steps, [self._gate("evidence_required"), self._gate("non_empty_answer_or_changeset")])
def _docs(self, task: TaskSpec) -> ExecutionPlan:
steps = [
self._step("fetch_source_doc", "Fetch source doc", "fetch_source_doc", outputs=[self._out("source_doc_raw", ArtifactType.TEXT)], side_effect="external"),
self._step("normalize_document", "Normalize document", "normalize_document", depends_on=["fetch_source_doc"], outputs=[self._out("source_doc_text", ArtifactType.TEXT)]),
self._step("extract_change_intents", "Extract intents", "extract_change_intents", depends_on=["normalize_document"], outputs=[self._out("change_intents", ArtifactType.STRUCTURED_JSON)]),
self._step("map_to_doc_tree", "Map to doc tree", "map_to_doc_tree", depends_on=["extract_change_intents"], outputs=[self._out("doc_targets", ArtifactType.STRUCTURED_JSON)]),
self._step("load_current_docs_context", "Load current docs", "load_current_docs_context", depends_on=["map_to_doc_tree"], outputs=[self._out("current_docs_context", ArtifactType.STRUCTURED_JSON)]),
self._step("generate_doc_updates", "Generate doc updates", "generate_doc_updates", depends_on=["load_current_docs_context"], outputs=[self._out("generated_doc_bundle", ArtifactType.DOC_BUNDLE)], side_effect="write"),
self._step("cross_file_validation", "Cross-file validation", "cross_file_validation", depends_on=["generate_doc_updates"], outputs=[self._out("consistency_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("cross_file_consistency")]),
self._step("build_changeset", "Build changeset", "build_changeset", depends_on=["cross_file_validation"], outputs=[self._out("final_changeset", ArtifactType.CHANGESET)], side_effect="write"),
self._step("compose_summary", "Compose summary", "compose_summary", depends_on=["build_changeset"], outputs=[self._out("final_answer", ArtifactType.TEXT)]),
]
return self._plan(task, "docs_from_analytics_v1", steps, [self._gate("changeset_required_for_write"), self._gate("changeset_schema")])
def _edit(self, task: TaskSpec) -> ExecutionPlan:
steps = [
self._step("resolve_target", "Resolve target", "resolve_target", outputs=[self._out("resolved_target", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("target_path_must_exist_or_be_allowed")]),
self._step("load_target_context", "Load target context", "load_target_context", depends_on=["resolve_target"], outputs=[self._out("target_context", ArtifactType.STRUCTURED_JSON)]),
self._step("plan_minimal_patch", "Plan minimal patch", "plan_minimal_patch", depends_on=["load_target_context"], outputs=[self._out("patch_plan", ArtifactType.STRUCTURED_JSON)]),
self._step("generate_patch", "Generate patch", "generate_patch", depends_on=["plan_minimal_patch"], outputs=[self._out("raw_changeset", ArtifactType.CHANGESET)], side_effect="write"),
self._step("validate_patch_safety", "Validate patch", "validate_patch_safety", depends_on=["generate_patch"], outputs=[self._out("patch_validation_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("minimal_patch_policy")]),
self._step("finalize_changeset", "Finalize changeset", "finalize_changeset", depends_on=["validate_patch_safety"], outputs=[self._out("final_changeset", ArtifactType.CHANGESET)], side_effect="write"),
self._step("compose_edit_summary", "Compose summary", "compose_edit_summary", depends_on=["finalize_changeset"], outputs=[self._out("final_answer", ArtifactType.TEXT)]),
]
return self._plan(task, "targeted_edit_v1", steps, [self._gate("changeset_required_for_write"), self._gate("changeset_schema")])
def _gherkin(self, task: TaskSpec) -> ExecutionPlan:
steps = [
self._step("fetch_source_doc", "Fetch source doc", "fetch_source_doc", outputs=[self._out("source_doc_raw", ArtifactType.TEXT)], side_effect="external"),
self._step("normalize_document", "Normalize document", "normalize_document", depends_on=["fetch_source_doc"], outputs=[self._out("source_doc_text", ArtifactType.TEXT)]),
self._step("extract_increment_scope", "Extract increment scope", "extract_increment_scope", depends_on=["normalize_document"], outputs=[self._out("increment_scope", ArtifactType.STRUCTURED_JSON)]),
self._step("partition_features", "Partition features", "partition_features", depends_on=["extract_increment_scope"], outputs=[self._out("feature_groups", ArtifactType.STRUCTURED_JSON)]),
self._step("generate_gherkin_bundle", "Generate gherkin", "generate_gherkin_bundle", depends_on=["partition_features"], outputs=[self._out("gherkin_bundle", ArtifactType.GHERKIN_BUNDLE)], side_effect="write"),
self._step("lint_gherkin", "Lint gherkin", "lint_gherkin", depends_on=["generate_gherkin_bundle"], outputs=[self._out("gherkin_lint_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("gherkin_syntax_lint")]),
self._step("validate_coverage", "Validate coverage", "validate_coverage", depends_on=["generate_gherkin_bundle"], outputs=[self._out("coverage_report", ArtifactType.STRUCTURED_JSON)], gates=[self._gate("coverage_of_change_intents")]),
self._step("compose_test_model_summary", "Compose summary", "compose_test_model_summary", depends_on=["lint_gherkin", "validate_coverage"], outputs=[self._out("final_answer", ArtifactType.TEXT), self._out("final_changeset", ArtifactType.CHANGESET)], side_effect="write"),
]
return self._plan(task, "gherkin_model_v1", steps, [self._gate("changeset_schema"), self._gate("non_empty_answer_or_changeset")])
def _plan(self, task: TaskSpec, template_id: str, steps: list[PlanStep], gates: list[QualityGateRef]) -> ExecutionPlan:
return ExecutionPlan(
plan_id=f"{task.task_id}:{template_id}",
task_id=task.task_id,
scenario=task.scenario,
template_id=template_id,
template_version="1.0",
steps=steps,
global_gates=gates,
)
def _step(
self,
step_id: str,
title: str,
action_id: str,
*,
executor: str = "function",
graph_id: str | None = None,
depends_on: list[str] | None = None,
outputs: list[ArtifactSpec] | None = None,
gates: list[QualityGateRef] | None = None,
side_effect: str = "read",
) -> PlanStep:
return PlanStep(
step_id=step_id,
title=title,
action_id=action_id,
executor=executor,
graph_id=graph_id,
depends_on=depends_on or [],
outputs=outputs or [],
quality_gates=gates or [],
side_effect=side_effect,
)
def _out(self, key: str, artifact_type: ArtifactType, *, required: bool = True) -> ArtifactSpec:
return ArtifactSpec(key=key, type=artifact_type, required=required)
def _gate(self, gate_id: str, *, blocking: bool = True) -> QualityGateRef:
return QualityGateRef(gate_id=gate_id, blocking=blocking)
@@ -0,0 +1,55 @@
from pathlib import Path
from typing import TYPE_CHECKING
from app.modules.agent.llm import AgentLlmService
from app.modules.contracts import RagRetriever
if TYPE_CHECKING:
from app.modules.agent.repository import AgentRepository
from app.modules.agent.engine.router.router_service import RouterService
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository", rag: RagRetriever) -> "RouterService":
from app.modules.agent.engine.graphs import (
BaseGraphFactory,
CodeQaGraphFactory,
DocsGraphFactory,
ProjectEditsGraphFactory,
ProjectQaAnalysisGraphFactory,
ProjectQaAnswerGraphFactory,
ProjectQaClassificationGraphFactory,
ProjectQaConversationGraphFactory,
ProjectQaGraphFactory,
ProjectQaRetrievalGraphFactory,
)
from app.modules.agent.engine.router.context_store import RouterContextStore
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
from app.modules.agent.engine.router.registry import IntentRegistry
from app.modules.agent.engine.router.router_service import RouterService
registry_path = Path(__file__).resolve().parent / "intents_registry.yaml"
registry = IntentRegistry(registry_path=registry_path)
registry.register("default", "general", BaseGraphFactory(llm).build)
registry.register("project", "qa", ProjectQaGraphFactory(llm).build)
registry.register("project_qa", "code_qa_runtime", CodeQaGraphFactory(llm).build)
registry.register("project", "edits", ProjectEditsGraphFactory(llm).build)
registry.register("docs", "generation", DocsGraphFactory(llm).build)
registry.register("project_qa", "conversation_understanding", ProjectQaConversationGraphFactory(llm).build)
registry.register("project_qa", "question_classification", ProjectQaClassificationGraphFactory(llm).build)
registry.register("project_qa", "context_retrieval", ProjectQaRetrievalGraphFactory(rag, llm).build)
registry.register("project_qa", "context_analysis", ProjectQaAnalysisGraphFactory(llm).build)
registry.register("project_qa", "answer_composition", ProjectQaAnswerGraphFactory(llm).build)
classifier = IntentClassifier(llm)
switch_detector = IntentSwitchDetector()
context_store = RouterContextStore(agent_repository)
return RouterService(
registry=registry,
classifier=classifier,
context_store=context_store,
switch_detector=switch_detector,
)
__all__ = ["build_router_service"]
@@ -0,0 +1,31 @@
from app.modules.agent.repository import AgentRepository
from app.modules.agent.engine.router.schemas import RouterContext
class RouterContextStore:
def __init__(self, repository: AgentRepository) -> None:
self._repo = repository
def get(self, conversation_key: str) -> RouterContext:
return self._repo.get_router_context(conversation_key)
def update(
self,
conversation_key: str,
*,
domain_id: str,
process_id: str,
user_message: str,
assistant_message: str,
decision_type: str = "start",
max_history: int = 10,
) -> None:
self._repo.update_router_context(
conversation_key,
domain_id=domain_id,
process_id=process_id,
user_message=user_message,
assistant_message=assistant_message,
decision_type=decision_type,
max_history=max_history,
)
@@ -0,0 +1,196 @@
import json
import re
from app.modules.agent.engine.router.schemas import RouteDecision, RouterContext
from app.modules.agent.llm import AgentLlmService
class IntentClassifier:
_short_confirmations = {"да", "ок", "делай", "поехали", "запускай"}
_route_mapping = {
"default/general": ("default", "general"),
"project/qa": ("project", "qa"),
"project/edits": ("project", "edits"),
"docs/generation": ("docs", "generation"),
}
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
def classify_new_intent(self, user_message: str, context: RouterContext) -> RouteDecision:
text = (user_message or "").strip().lower()
if text in self._short_confirmations and context.last_routing:
return RouteDecision(
domain_id=context.last_routing["domain_id"],
process_id=context.last_routing["process_id"],
confidence=1.0,
reason="short_confirmation",
use_previous=True,
decision_type="continue",
)
deterministic = self._deterministic_route(text)
if deterministic:
return deterministic
llm_decision = self._classify_with_llm(user_message, context)
if llm_decision:
return llm_decision
return RouteDecision(
domain_id="default",
process_id="general",
confidence=0.8,
reason="default",
decision_type="start",
)
def from_mode(self, mode: str) -> RouteDecision | None:
mapping = {
"project_qa": ("project", "qa"),
"project_edits": ("project", "edits"),
"docs_generation": ("docs", "generation"),
# Legacy aliases kept for API compatibility.
"analytics_review": ("project", "qa"),
"code_change": ("project", "edits"),
"qa": ("default", "general"),
}
route = mapping.get((mode or "auto").strip().lower())
if not route:
return None
return RouteDecision(
domain_id=route[0],
process_id=route[1],
confidence=1.0,
reason=f"mode_override:{mode}",
decision_type="switch",
explicit_switch=True,
)
def _classify_with_llm(self, user_message: str, context: RouterContext) -> RouteDecision | None:
history = context.message_history[-8:]
user_input = json.dumps(
{
"message": user_message,
"history": history,
"allowed_routes": list(self._route_mapping.keys()),
},
ensure_ascii=False,
)
try:
raw = self._llm.generate("router_intent", user_input).strip()
except Exception:
return None
payload = self._parse_llm_payload(raw)
if not payload:
return None
route = self._route_mapping.get(payload["route"])
if not route:
return None
confidence = self._normalize_confidence(payload.get("confidence"))
return RouteDecision(
domain_id=route[0],
process_id=route[1],
confidence=confidence,
reason=f"llm_router:{payload.get('reason', 'ok')}",
decision_type="start",
)
def _parse_llm_payload(self, raw: str) -> dict[str, str | float] | None:
candidate = self._strip_code_fence(raw.strip())
if not candidate:
return None
try:
parsed = json.loads(candidate)
except json.JSONDecodeError:
return None
if not isinstance(parsed, dict):
return None
route = str(parsed.get("route", "")).strip().lower()
if not route:
return None
return {
"route": route,
"confidence": parsed.get("confidence"),
"reason": str(parsed.get("reason", "ok")).strip().lower(),
}
def _normalize_confidence(self, value: object) -> float:
if isinstance(value, (float, int)):
return max(0.0, min(1.0, float(value)))
return 0.75
def _strip_code_fence(self, text: str) -> str:
if not text.startswith("```"):
return text
lines = text.splitlines()
if len(lines) < 3:
return text
if lines[-1].strip() != "```":
return text
return "\n".join(lines[1:-1]).strip()
def _deterministic_route(self, text: str) -> RouteDecision | None:
if self._is_targeted_file_edit_request(text):
return RouteDecision(
domain_id="project",
process_id="edits",
confidence=0.97,
reason="deterministic_targeted_file_edit",
decision_type="switch",
explicit_switch=True,
)
if self._is_broad_docs_request(text):
return RouteDecision(
domain_id="docs",
process_id="generation",
confidence=0.95,
reason="deterministic_docs_generation",
decision_type="switch",
explicit_switch=True,
)
return None
def _is_targeted_file_edit_request(self, text: str) -> bool:
if not text:
return False
edit_markers = (
"добавь",
"добавить",
"измени",
"исправь",
"обнови",
"удали",
"замени",
"вставь",
"в конец",
"в начале",
"append",
"update",
"edit",
"remove",
"replace",
)
has_edit_marker = any(marker in text for marker in edit_markers)
has_file_marker = (
"readme" in text
or bool(re.search(r"\b[\w.\-/]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg)\b", text))
)
return has_edit_marker and has_file_marker
def _is_broad_docs_request(self, text: str) -> bool:
if not text:
return False
docs_markers = (
"подготовь документац",
"сгенерируй документац",
"создай документац",
"опиши документац",
"generate documentation",
"write documentation",
"docs/",
)
return any(marker in text for marker in docs_markers)
@@ -0,0 +1,81 @@
from __future__ import annotations
import re
from app.modules.agent.engine.router.schemas import RouterContext
class IntentSwitchDetector:
_EXPLICIT_SWITCH_MARKERS = (
"теперь",
"а теперь",
"давай теперь",
"переключись",
"переключаемся",
"сейчас другое",
"новая задача",
"new task",
"switch to",
"now do",
"instead",
)
_FOLLOW_UP_MARKERS = (
"а еще",
"а ещё",
"подробнее",
"почему",
"зачем",
"что если",
"и еще",
"и ещё",
"покажи подробнее",
"можешь подробнее",
)
def should_switch(self, user_message: str, context: RouterContext) -> bool:
if not context.dialog_started or context.active_intent is None:
return False
text = " ".join((user_message or "").strip().lower().split())
if not text:
return False
if self._is_follow_up(text):
return False
if any(marker in text for marker in self._EXPLICIT_SWITCH_MARKERS):
return True
return self._is_strong_targeted_edit_request(text) or self._is_strong_docs_request(text)
def _is_follow_up(self, text: str) -> bool:
return any(marker in text for marker in self._FOLLOW_UP_MARKERS)
def _is_strong_targeted_edit_request(self, text: str) -> bool:
edit_markers = (
"добавь",
"добавить",
"измени",
"исправь",
"обнови",
"удали",
"замени",
"append",
"update",
"edit",
"remove",
"replace",
)
has_edit_marker = any(marker in text for marker in edit_markers)
has_file_marker = (
"readme" in text
or bool(re.search(r"\b[\w.\-/]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg|py)\b", text))
)
return has_edit_marker and has_file_marker
def _is_strong_docs_request(self, text: str) -> bool:
docs_markers = (
"подготовь документац",
"сгенерируй документац",
"создай документац",
"опиши документац",
"generate documentation",
"write documentation",
)
return any(marker in text for marker in docs_markers)
@@ -0,0 +1,17 @@
intents:
- domain_id: "default"
process_id: "general"
description: "General Q&A"
priority: 1
- domain_id: "project"
process_id: "qa"
description: "Project-specific Q&A with RAG and confluence context"
priority: 2
- domain_id: "project"
process_id: "edits"
description: "Project file edits from user request with conservative changeset generation"
priority: 3
- domain_id: "docs"
process_id: "generation"
description: "Documentation generation as changeset"
priority: 2
@@ -0,0 +1,46 @@
from collections.abc import Callable
from pathlib import Path
from typing import Any
import yaml
class IntentRegistry:
def __init__(self, registry_path: Path) -> None:
self._registry_path = registry_path
self._factories: dict[tuple[str, str], Callable[..., Any]] = {}
def register(self, domain_id: str, process_id: str, factory: Callable[..., Any]) -> None:
self._factories[(domain_id, process_id)] = factory
def get_factory(self, domain_id: str, process_id: str) -> Callable[..., Any] | None:
return self._factories.get((domain_id, process_id))
def is_valid(self, domain_id: str, process_id: str) -> bool:
return self.get_factory(domain_id, process_id) is not None
def load_intents(self) -> list[dict[str, Any]]:
if not self._registry_path.is_file():
return []
with self._registry_path.open("r", encoding="utf-8") as fh:
payload = yaml.safe_load(fh) or {}
intents = payload.get("intents")
if not isinstance(intents, list):
return []
output: list[dict[str, Any]] = []
for item in intents:
if not isinstance(item, dict):
continue
domain_id = item.get("domain_id")
process_id = item.get("process_id")
if not isinstance(domain_id, str) or not isinstance(process_id, str):
continue
output.append(
{
"domain_id": domain_id,
"process_id": process_id,
"description": str(item.get("description") or ""),
"priority": int(item.get("priority") or 0),
}
)
return output
@@ -0,0 +1,114 @@
from app.modules.agent.engine.router.context_store import RouterContextStore
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
from app.modules.agent.engine.router.registry import IntentRegistry
from app.modules.agent.engine.router.schemas import RouteDecision, RouteResolution
class RouterService:
def __init__(
self,
registry: IntentRegistry,
classifier: IntentClassifier,
context_store: RouterContextStore,
switch_detector: IntentSwitchDetector | None = None,
min_confidence: float = 0.7,
) -> None:
self._registry = registry
self._classifier = classifier
self._ctx = context_store
self._switch_detector = switch_detector or IntentSwitchDetector()
self._min_confidence = min_confidence
def resolve(self, user_message: str, conversation_key: str, mode: str = "auto") -> RouteResolution:
context = self._ctx.get(conversation_key)
forced = self._classifier.from_mode(mode)
if forced:
return self._resolution(forced)
if not context.dialog_started or context.active_intent is None:
decision = self._classifier.classify_new_intent(user_message, context)
if not self._is_acceptable(decision):
return self._fallback("low_confidence")
return self._resolution(
decision.model_copy(
update={
"decision_type": "start",
"explicit_switch": False,
}
)
)
if self._switch_detector.should_switch(user_message, context):
decision = self._classifier.classify_new_intent(user_message, context)
if self._is_acceptable(decision):
return self._resolution(
decision.model_copy(
update={
"decision_type": "switch",
"explicit_switch": True,
}
)
)
return self._continue_current(context, "explicit_switch_unresolved_keep_current")
return self._continue_current(context, "continue_current_intent")
def persist_context(
self,
conversation_key: str,
*,
domain_id: str,
process_id: str,
user_message: str,
assistant_message: str,
decision_type: str = "start",
) -> None:
self._ctx.update(
conversation_key,
domain_id=domain_id,
process_id=process_id,
user_message=user_message,
assistant_message=assistant_message,
decision_type=decision_type,
)
def graph_factory(self, domain_id: str, process_id: str):
return self._registry.get_factory(domain_id, process_id)
def _fallback(self, reason: str) -> RouteResolution:
return RouteResolution(
domain_id="default",
process_id="general",
confidence=0.0,
reason=reason,
fallback_used=True,
decision_type="start",
explicit_switch=False,
)
def _continue_current(self, context, reason: str) -> RouteResolution:
active = context.active_intent or context.last_routing or {"domain_id": "default", "process_id": "general"}
return RouteResolution(
domain_id=str(active["domain_id"]),
process_id=str(active["process_id"]),
confidence=1.0,
reason=reason,
fallback_used=False,
decision_type="continue",
explicit_switch=False,
)
def _is_acceptable(self, decision: RouteDecision) -> bool:
return decision.confidence >= self._min_confidence and self._registry.is_valid(decision.domain_id, decision.process_id)
def _resolution(self, decision: RouteDecision) -> RouteResolution:
return RouteResolution(
domain_id=decision.domain_id,
process_id=decision.process_id,
confidence=decision.confidence,
reason=decision.reason,
fallback_used=False,
decision_type=decision.decision_type,
explicit_switch=decision.explicit_switch,
)
@@ -0,0 +1,34 @@
from pydantic import BaseModel, Field, field_validator
class RouteDecision(BaseModel):
domain_id: str = "default"
process_id: str = "general"
confidence: float = 0.0
reason: str = ""
use_previous: bool = False
decision_type: str = "start"
explicit_switch: bool = False
@field_validator("confidence")
@classmethod
def clamp_confidence(cls, value: float) -> float:
return max(0.0, min(1.0, float(value)))
class RouteResolution(BaseModel):
domain_id: str
process_id: str
confidence: float
reason: str
fallback_used: bool = False
decision_type: str = "start"
explicit_switch: bool = False
class RouterContext(BaseModel):
last_routing: dict[str, str] | None = None
message_history: list[dict[str, str]] = Field(default_factory=list)
active_intent: dict[str, str] | None = None
dialog_started: bool = False
turn_index: int = 0
+3
View File
@@ -0,0 +1,3 @@
from app.modules.agent.llm.service import AgentLlmService
__all__ = ["AgentLlmService"]
+40
View File
@@ -0,0 +1,40 @@
import logging
from app.modules.agent.prompt_loader import PromptLoader
from app.modules.shared.gigachat.client import GigaChatClient
LOGGER = logging.getLogger(__name__)
def _truncate_for_log(text: str, max_chars: int = 1500) -> str:
value = (text or "").replace("\n", "\\n").strip()
if len(value) <= max_chars:
return value
return value[:max_chars].rstrip() + "...[truncated]"
class AgentLlmService:
def __init__(self, client: GigaChatClient, prompts: PromptLoader) -> None:
self._client = client
self._prompts = prompts
def generate(self, prompt_name: str, user_input: str, *, log_context: str | None = None) -> str:
system_prompt = self._prompts.load(prompt_name)
if not system_prompt:
system_prompt = "You are a helpful assistant."
if log_context:
LOGGER.warning(
"graph llm input: context=%s prompt=%s user_input=%s",
log_context,
prompt_name,
_truncate_for_log(user_input),
)
output = self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
if log_context:
LOGGER.warning(
"graph llm output: context=%s prompt=%s output=%s",
log_context,
prompt_name,
_truncate_for_log(output),
)
return output
+64
View File
@@ -0,0 +1,64 @@
from __future__ import annotations
from fastapi import APIRouter
from pydantic import BaseModel, HttpUrl
from typing import TYPE_CHECKING
from app.modules.agent.changeset_validator import ChangeSetValidator
from app.modules.agent.confluence_service import ConfluenceService
from app.modules.agent.llm import AgentLlmService
from app.modules.agent.prompt_loader import PromptLoader
from app.modules.agent.story_context_repository import StoryContextRepository
from app.modules.agent.story_session_recorder import StorySessionRecorder
from app.modules.agent.service import GraphAgentRuntime
from app.modules.agent.repository import AgentRepository
from app.modules.contracts import RagRetriever
from app.modules.shared.gigachat.client import GigaChatClient
from app.modules.shared.gigachat.settings import GigaChatSettings
from app.modules.shared.gigachat.token_provider import GigaChatTokenProvider
class ConfluenceFetchRequest(BaseModel):
url: HttpUrl
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
class AgentModule:
def __init__(
self,
rag_retriever: RagRetriever,
agent_repository: AgentRepository,
story_context_repository: StoryContextRepository,
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
) -> None:
self.confluence = ConfluenceService()
self.changeset_validator = ChangeSetValidator()
self.story_context_repository = story_context_repository
settings = GigaChatSettings.from_env()
token_provider = GigaChatTokenProvider(settings)
client = GigaChatClient(settings, token_provider)
prompt_loader = PromptLoader()
llm = AgentLlmService(client=client, prompts=prompt_loader)
self.llm = llm
story_recorder = StorySessionRecorder(story_context_repository)
self.runtime = GraphAgentRuntime(
rag=rag_retriever,
confluence=self.confluence,
changeset_validator=self.changeset_validator,
llm=self.llm,
agent_repository=agent_repository,
story_recorder=story_recorder,
code_explain_retriever=code_explain_retriever,
)
def internal_router(self) -> APIRouter:
router = APIRouter(prefix="/internal/tools", tags=["internal-tools"])
@router.post("/confluence/fetch")
async def fetch_page(request: ConfluenceFetchRequest) -> dict:
return await self.confluence.fetch_page(str(request.url))
return router
+15
View File
@@ -0,0 +1,15 @@
from pathlib import Path
import os
class PromptLoader:
def __init__(self, prompts_dir: Path | None = None) -> None:
base = prompts_dir or Path(__file__).resolve().parent / "prompts"
env_override = os.getenv("AGENT_PROMPTS_DIR", "").strip()
self._dir = Path(env_override) if env_override else base
def load(self, name: str) -> str:
path = self._dir / f"{name}.txt"
if not path.is_file():
return ""
return path.read_text(encoding="utf-8").strip()
@@ -0,0 +1,17 @@
Объяснение кода осуществляется только с использованием предоставленного ExplainPack.
Правила:
- Сначала используйте доказательства.
- Каждый ключевой шаг в процессе должен содержать один или несколько идентификаторов доказательств в квадратных скобках, например, [entrypoint_1] или [excerpt_3].
- Не придумывайте символы, файлы, маршруты или фрагменты кода, отсутствующие в пакете.
- Если доказательства неполные, укажите это явно.
- В качестве якорей используйте выбранные точки входа и пути трассировки.
Верните Markdown со следующей структурой:
1. Краткое описание
2. Пошаговый процесс
3. Данные и побочные эффекты
4. Ошибки и граничные случаи
5. Указатели
Указатели должны представлять собой короткий маркированный список, сопоставляющий идентификаторы доказательств с местоположениями файлов.
@@ -0,0 +1,25 @@
Ты инженер, который объясняет устройство подсистемы только по наблюдаемым компонентам и связям из кода.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Дай архитектурное объяснение без лишней теории.
Назови подтверждённые компоненты и конкретные связи между ними: создаёт, вызывает, регистрирует, читает, пишет, передаёт, оборачивает.
Затем коротко опиши границы ответственности, только если они реально видны в коде.
Не используй synthetic role labels как готовый пользовательский вывод, если они не поддержаны кодом.
Не придумывай скрытые слои и не расширяй архитектуру за пределы извлечённого контекста.
Не используй обязательные markdown-секции.
Не используй абстрактные формулы вроде "главный компонент", "центральный управляющий компонент", "управляет потоками данных и состоянием системы", если конкретная связь не раскрыта через наблюдаемые методы, поля или вызовы.
@@ -0,0 +1,3 @@
Ты формируешь осторожный деградированный ответ.
Нужно честно описать, что удалось подтвердить, а чего не хватает.
Не выдавай предположения за факты и не заполняй пробелы догадками.
@@ -0,0 +1,26 @@
Ты senior Python-инженер и code reviewer, который объясняет устройство кода без домысливания.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Объясни, как работает сущность из вопроса пользователя, обычным инженерным текстом.
Начни с самого важного: что это за сущность и где она находится, если это видно.
Затем кратко опиши подтверждённые зависимости, вызовы, аргументы, поля или шаги работы, только если они реально видны.
Не используй общие формулы без конкретных имён.
Если виден конструктор, метод или вызов, лучше назвать его явно, чем писать абстрактно про "инициализацию", "службы", "аргументы" или "компоненты".
Если вывод основан на косвенных признаках, явно пометь это как осторожный вывод.
Если сущность не найдена или evidence слабый, не пиши обычное объяснение — прямо скажи об этом и остановись.
Не используй обязательные секции и подзаголовки.
@@ -0,0 +1,23 @@
Ты инженер, который объясняет локальный фрагмент кода без лишней теории и без перехода на уровень всей архитектуры.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Дай локальное объяснение по конкретному файлу, символу или короткому участку кода.
Сконцентрируйся на том, что делает этот участок, какие входы и выходы видны и какие ближайшие вызовы или зависимости заметны рядом.
Если виден только фрагмент, ограничь вывод тем, что прямо видно в этом фрагменте.
Не компенсируй нехватку локального контекста общими архитектурными фразами.
Не расписывай всю архитектуру проекта и не используй секции без необходимости.
@@ -0,0 +1,27 @@
Ты инженер, который находит подтверждённые точки входа и отдельно помечает только возможные кандидаты.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Найди точки входа, обработчики запуска или важные entrypoints.
Для подтверждённых HTTP route сначала называй их в прикладном виде: HTTP method и route path, например `GET /health`.
Затем коротко добавляй, где route объявлен и какой handler, функция, метод или контекст его обслуживает, если это видно.
Подтверждённые entrypoints перечисляй первыми.
Кандидатов без явного route marker упоминай только если они действительно полезны, и явно помечай как кандидатов.
Не своди ответ к обсуждению декораторов вроде `@app.get`; пользователю важнее method, path и контекст.
Не используй искусственные секции, если ответ можно дать компактным списком или коротким абзацем.
Если кандидатов нет, не создавай отдельную строку или блок про их отсутствие.
Не заменяй `GET /health` абстрактной формулой вроде "route для health-check"; сначала всегда пиши method и path.
@@ -0,0 +1,24 @@
Ты инженер, который ищет тестовое покрытие и различает прямые и косвенные тесты.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Найди связанные тесты и ответь, где они расположены.
Сначала назови прямые тесты, только если связь с сущностью подтверждается именем, импортом, вызовом или проверяемым поведением.
Если прямых тестов нет, прямо скажи это и только потом упомяни ближайшие косвенные тесты, если они есть.
Коротко поясни, что именно проверяется.
Не выдавай косвенные совпадения за подтверждённое покрытие и не используй отчётные секции без нужды.
Если косвенных тестов тоже нет, не добавляй отдельный пустой блок про их отсутствие.
@@ -0,0 +1,24 @@
Ты senior Python-инженер, который даёт обзорный ответ по подсистеме или проекту, но остаётся строго привязанным к коду из контекста.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Дай обзорный ответ по вопросу пользователя о коде, подсистеме или сценарии работы.
Сначала скажи, что можно уверенно подтвердить по коду, затем коротко укажи, какие файлы, классы, функции или route это подтверждают.
Если данных недостаточно, прямо скажи, чего именно не хватает.
Не подменяй обзор общими рассуждениями о типичной архитектуре таких систем.
Не используй секции без необходимости.
Не заполняй пробелы общими словами вроде "несколько модулей", "различные компоненты" или "ряд зависимостей", если конкретные имена не видны.
@@ -0,0 +1,26 @@
Ты технический ассистент, который помогает открыть конкретный файл и показать, что в нём реально видно.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Сосредоточься на указанном файле и отвечай коротко.
Обычно достаточно назвать путь файла и в 1-3 фразах сказать, какие конкретные сущности или элементы видны: класс, функция, метод, импорт, route, константа.
Не используй общие описания файла без конкретных имён.
Если в контексте виден только фрагмент файла, не добавляй общую фразу про то, что ответ основан на видимом фрагменте. Вместо этого просто ограничься тем, что реально видно.
Не превращай ответ в архитектурный обзор проекта.
Не используй секции и подзаголовки.
Если файла нет, ответь одной короткой фразой: `Файл <path> не найден.`
Не придумывай анализ отсутствующего файла.
@@ -0,0 +1,3 @@
Ты исправляешь черновой ответ по коду после проверки groundedness.
Сделай ответ короче, точнее и строже по evidence payload.
Если проверка требует not_found или degraded формулировку, отрази это явно и убери спекуляции.
@@ -0,0 +1,23 @@
Ты инженер, который восстанавливает поток вызовов и движение данных только по доказуемой цепочке из контекста.
Отвечай только по коду и структуре проекта, которые есть в контексте.
Пиши естественным инженерным языком, без искусственных markdown-секций и без повторов одной и той же мысли.
Если ответ можно дать в 1-3 фразах, не раздувай его.
Упоминай файлы, классы, функции, методы и связи только если они реально присутствуют в извлечённых данных.
Каждое содержательное утверждение по возможности привязывай к конкретному наблюдаемому имени или факту из контекста: пути файла, имени класса, функции, метода, аргумента, поля, route path, вызова или связи.
Если конкретные имена, параметры, вызовы или связи не видны, прямо скажи, чего именно не видно, вместо общих формулировок.
Не вводи новые сущности, зависимости или сценарии, которых нет в контексте.
Явно различай подтверждённые факты и осторожные выводы по косвенным признакам.
Если данных мало, честно скажи об этом вместо общего обзора.
Не используй жирные заголовки блоков, если пользователь их не просил.
Строго соблюдай контракт sub-intent и не подменяй локальный ответ архитектурным обзором.
Избегай расплывчатых и пустых формулировок вроде: "различные аргументы", "ряд аргументов", "различные подпакеты", "основные службы", "ключевой компонент", "играет роль", "представляет собой", если после них нет конкретики.
Не добавляй очевидные метафразы о том, что ответ основан на контексте или на видимом фрагменте, если это ничего не добавляет по сути.
Если сущность не найдена, остановись на факте not_found и не объясняй её предполагаемое назначение по одному только названию.
Не выводи пустые разделы, пустые списки и формулировки вида "кандидатов нет", если это не помогает ответу.
Проследи поток выполнения или поток данных по найденным артефактам.
Старайся описывать шаги последовательно и коротко, без лишних подзаголовков.
Не склеивай шаги, если между ними нет прямой связи в коде или явно подтверждённого отношения в извлечённых данных.
Если поток восстанавливается только частично, так и скажи.
Не заменяй конкретные шаги общими словами вроде "обрабатывает запрос", "передаёт данные" или "инициализирует службы", если можно назвать конкретный вызов, метод или route.
@@ -0,0 +1,18 @@
Ты анализируешь, есть ли в проекте существующая документация, в которую нужно встраиваться.
Оцени входные данные:
- User request
- Requested target path
- Detected documentation candidates (пути и сниппеты)
Критерии EXISTS=yes:
- Есть хотя бы один релевантный doc-файл, и
- Он по смыслу подходит под запрос пользователя.
Критерии EXISTS=no:
- Нет релевантных doc-файлов, или
- Есть только нерелевантные/пустые заготовки.
Верни строго две строки:
EXISTS: yes|no
SUMMARY: <короткое объяснение на 1-2 предложения>
@@ -0,0 +1,27 @@
# Feature X Documentation
## Goal
Describe how Feature X works and how to integrate it safely.
## Architecture Overview
- Input enters through HTTP endpoint.
- Request is validated and transformed.
- Worker executes business logic and persists result.
## Data Flow
1. Client sends request payload.
2. Service validates payload.
3. Domain layer computes output.
4. Repository stores entities.
## Configuration
- Required environment variables.
- Optional tuning parameters.
## Deployment Notes
- Migration prerequisites.
- Rollback strategy.
## Risks and Constraints
- Throughput is bounded by downstream API limits.
- Partial failures require retry-safe handlers.
@@ -0,0 +1,21 @@
# API Client Module
## Purpose
This document explains how the API client authenticates and retries requests.
## Current Behavior
- Access token is fetched before outbound request.
- Retry policy uses exponential backoff for transient failures.
## Recent Increment (v2)
### Added cache for tokens
- Token is cached in memory for a short TTL.
- Cache invalidates on 401 responses.
### Operational impact
- Reduced auth latency for repetitive calls.
- Fewer token endpoint requests.
## Limitations
- Single-process cache only.
- No distributed cache synchronization.
@@ -0,0 +1,12 @@
Ты технический писатель и готовишь краткий итог по выполненной задаче документации.
Верни только markdown-текст без JSON и без лишних вступлений.
Структура ответа:
1) "Что сделано" — 3-6 коротких пунктов по основным частям пользовательского запроса.
2) "Измененные файлы" — список файлов с кратким описанием изменения по каждому файлу.
3) "Ограничения" — добавляй только если в данных есть явные пробелы или ограничения.
Правила:
- Используй только входные данные.
- Не выдумывай изменения, которых нет в списке changed files.
- Пиши коротко и по делу.
@@ -0,0 +1,53 @@
Ты senior technical writer и пишешь только проектную документацию в markdown.
Твоя задача:
1) Если strategy=incremental_update, встроиться в существующую документацию и добавить только недостающий инкремент.
2) Если strategy=from_scratch, создать целостный документ с нуля.
Правила:
- Опирайся только на входной контекст (request, plan, rag context, current file content, examples bundle).
- Не выдумывай факты о коде, которых нет во входных данных.
- Сохраняй стиль существующего документа при incremental_update.
- Если контекст неполный, отмечай ограничения явно и коротко в отдельном разделе "Ограничения".
- Структура должна быть логичной и пригодной для реального репозитория.
- Агент должен спроектировать структуру папок и файлов документации под правила ниже.
- Документация должна быть разделена минимум на 2 направления:
- отдельная папка для описания методов API;
- отдельная папка для описания логики/требований.
- В одном markdown-файле допускается описание только:
- одного метода API, или
- одного атомарного куска логики/требования.
- Для описания одного метода API используй структуру:
- название метода;
- параметры запроса;
- параметры ответа;
- use case (сценарий последовательности вызова метода);
- функциональные требования (если нужны технические детали).
- Для описания логики используй аналогичный подход:
- сценарий;
- ссылки из шагов сценария на функциональные требования;
- отдельные функциональные требования с техническими деталями.
- Правила для сценариев:
- без объемных шагов;
- каждый шаг краткий, не более 2 предложений;
- если нужны технические детали, вынеси их из шага в отдельное функциональное требование и дай ссылку на него из шага.
Формат ответа:
- Верни только JSON-объект без пояснений и без markdown-оберток.
- Строгий формат:
{
"files": [
{
"path": "docs/api/<file>.md",
"content": "<полное содержимое markdown-файла>",
"reason": "<кратко зачем создан/обновлен файл>"
},
{
"path": "docs/logic/<file>.md",
"content": "<полное содержимое markdown-файла>",
"reason": "<кратко зачем создан/обновлен файл>"
}
]
}
- Для from_scratch сформируй несколько файлов и обязательно покрой обе папки: `docs/api` и `docs/logic`.
- Для incremental_update также соблюдай правило атомарности: один файл = один метод API или один атомарный кусок логики/требования.
@@ -0,0 +1,25 @@
Ты составляешь план изменений документации перед генерацией текста.
Вход:
- Strategy
- User request
- Target path
- Current target content (для incremental_update)
- RAG context по коду
- Examples bundle
Требования к плану:
- Сначала спроектируй структуру папок и файлов документации под формат:
- отдельная папка для API-методов;
- отдельная папка для логики/требований;
- один файл = один метод API или один атомарный кусок логики/требования.
- Для API-файлов закладывай структуру: название метода, параметры запроса, параметры ответа, use case, функциональные требования.
- Для логики закладывай структуру: сценарий, ссылки из шагов на функциональные требования, отдельные функциональные требования.
- Для сценариев закладывай короткие шаги (не более 2 предложений на шаг), а технические детали выноси в функциональные требования.
- Дай нумерованный список разделов будущего документа.
- Для incremental_update отмечай, какие разделы добавить/обновить, не переписывая все целиком.
- Для from_scratch давай полный каркас документа.
- Каждый пункт должен включать краткую цель раздела.
- Если контекст частичный, включи пункт "Ограничения и допущения".
Формат ответа: только план в markdown, без вступлений и без JSON.
@@ -0,0 +1,22 @@
Ты валидатор качества документации.
Проверь:
- Соответствие strategy и user request.
- Соответствие generated document плану секций.
- Отсутствие очевидных выдуманных фактов.
- Практическую применимость текста к проекту.
- Для incremental_update: минимально необходимый инкремент без лишнего переписывания.
- Проверку структуры документации:
- есть разбиение по папкам `docs/api` и `docs/logic`;
- один файл описывает только один API-метод или один атомарный кусок логики;
- сценарии состоят из коротких шагов, а технические детали вынесены в функциональные требования.
Если документ приемлем:
PASS: yes
FEEDBACK: <коротко, что ок>
Если документ неприемлем:
PASS: no
FEEDBACK: <коротко, что исправить в следующей попытке>
Верни ровно две строки в этом формате.

Some files were not shown because too many files have changed in this diff Show More