Фиксирую рабочее состояние

This commit is contained in:
2026-04-15 14:20:27 +03:00
parent 7f22a00696
commit 77851e99a7
42 changed files with 1066 additions and 123 deletions
@@ -53,6 +53,18 @@ class _EndpointPathExtractor:
_PATH_RE = re.compile(r"`([^`]+)`|(/[A-Za-z0-9_./{}-]+)") _PATH_RE = re.compile(r"`([^`]+)`|(/[A-Za-z0-9_./{}-]+)")
_VALID_ENDPOINT_RE = re.compile(r"^/[a-z0-9._/-]+(?:/\{[a-z0-9_]+\})?$") _VALID_ENDPOINT_RE = re.compile(r"^/[a-z0-9._/-]+(?:/\{[a-z0-9_]+\})?$")
_DOC_EXTENSIONS = (".md", ".yaml", ".yml", ".json") _DOC_EXTENSIONS = (".md", ".yaml", ".yml", ".json")
_FILESYSTEM_PREFIXES = (
"/users/",
"/home/",
"/tmp/",
"/var/",
"/opt/",
"/etc/",
"/private/",
"/mnt/",
"/workspace/",
"/workspaces/",
)
def extract(self, query: str) -> list[str]: def extract(self, query: str) -> list[str]:
values: list[str] = [] values: list[str] = []
@@ -72,6 +84,8 @@ class _EndpointPathExtractor:
def _is_endpoint(self, token: str) -> bool: def _is_endpoint(self, token: str) -> bool:
if not token or not self._VALID_ENDPOINT_RE.fullmatch(token): if not token or not self._VALID_ENDPOINT_RE.fullmatch(token):
return False return False
if token.startswith(self._FILESYSTEM_PREFIXES):
return False
return not token.endswith(self._DOC_EXTENSIONS) return not token.endswith(self._DOC_EXTENSIONS)
def _append_unique(self, items: list[str], value: str) -> None: def _append_unique(self, items: list[str], value: str) -> None:
@@ -2,8 +2,10 @@
from __future__ import annotations from __future__ import annotations
import re
from collections.abc import Callable from collections.abc import Callable
from dataclasses import replace from dataclasses import replace
from typing import TYPE_CHECKING
from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor
from app.core.agent.processes.v2.intent_router.modules.normalizer import V2QueryNormalizer from app.core.agent.processes.v2.intent_router.modules.normalizer import V2QueryNormalizer
@@ -22,7 +24,66 @@ from app.core.agent.processes.v2.intent_router.routers.route_catalog import V2Ro
from app.core.agent.processes.v2.intent_router.routers.validator import V2RouteValidator from app.core.agent.processes.v2.intent_router.routers.validator import V2RouteValidator
from app.core.agent.utils.process_v2.models import V2RouteResult, V2ScopeType from app.core.agent.utils.process_v2.models import V2RouteResult, V2ScopeType
from app.core.agent.utils.llm import AgentLlmService from app.core.agent.utils.llm import AgentLlmService
from app.core.rag.persistence.query_repository import RagQueryRepository
if TYPE_CHECKING:
from app.core.rag.persistence.query_repository import RagQueryRepository
class _ExplicitDocsUpdateResolver:
_UPDATE_MARKERS = (
"собери документац",
"сгенерир",
"построй документац",
"обнови документац",
"обновить документац",
"generate documentation",
"build documentation",
"update documentation",
)
_FEATURE_MARKERS = (
"/features/",
"\\features\\",
"feature",
"системной аналитик",
"confluence",
)
_PATH_PATTERN = re.compile(r"(/[^\n`]+?\.md)")
_URL_PATTERN = re.compile(r"https?://[^\s)]*confluence[^\s)]*")
def matches(self, user_query: str) -> bool:
query = str(user_query or "")
lowered = query.lower()
if not any(marker in lowered for marker in self._UPDATE_MARKERS):
return False
path = self._extract_path(query)
if path and self._is_feature_source(path):
return True
url = self._extract_confluence_url(query)
if url:
return True
return any(marker in lowered for marker in self._FEATURE_MARKERS)
def _extract_path(self, query: str) -> str:
if "`" in query:
for chunk in query.split("`"):
value = chunk.strip().strip('"').strip("'")
if value.endswith(".md") and value.startswith("/"):
return value
match = self._PATH_PATTERN.search(query)
return match.group(1).strip().strip('"').strip("'") if match else ""
def _extract_confluence_url(self, query: str) -> str:
match = self._URL_PATTERN.search(query)
return match.group(0).strip() if match else ""
def _is_feature_source(self, path: str) -> bool:
lowered = str(path or "").lower()
return "/feature" in lowered
class _ExplicitFileLookupResolver:
def matches(self, anchor_analysis) -> bool:
return bool(getattr(anchor_analysis.anchors, "file_names", []))
def _scope_candidate_dict(candidate) -> dict[str, object]: def _scope_candidate_dict(candidate) -> dict[str, object]:
@@ -56,6 +117,8 @@ class V2IntentRouter:
self._enable_llm_disambiguation = enable_llm_disambiguation self._enable_llm_disambiguation = enable_llm_disambiguation
self._llm_router = V2LlmRouter(llm, catalog=self._catalog) if llm is not None else None self._llm_router = V2LlmRouter(llm, catalog=self._catalog) if llm is not None else None
self._scope_rows_provider = scope_rows_provider self._scope_rows_provider = scope_rows_provider
self._explicit_docs_update_resolver = _ExplicitDocsUpdateResolver()
self._explicit_file_lookup_resolver = _ExplicitFileLookupResolver()
def route(self, user_query: str, *, rag_session_id: str | None = None) -> V2RouteResult: def route(self, user_query: str, *, rag_session_id: str | None = None) -> V2RouteResult:
normalized_query = self._normalizer.normalize(user_query) normalized_query = self._normalizer.normalize(user_query)
@@ -98,6 +161,36 @@ class V2IntentRouter:
endpoint_markers=list(anchor_analysis.endpoint_markers), endpoint_markers=list(anchor_analysis.endpoint_markers),
scope_type=resolution.scope_type, scope_type=resolution.scope_type,
) )
if self._explicit_docs_update_resolver.matches(user_query):
return V2RouteResult(
routing_domain="DOCS",
intent="DOC_UPDATE",
subintent="FROM_FEATURE",
user_query=user_query,
normalized_query=features.normalized_query,
target_terms=features.target_terms,
anchors=anchor_analysis.anchors,
confidence=1.0,
routing_mode="deterministic",
llm_router_used=False,
reason_short="explicit docs update from feature source",
scope_type=resolution.scope_type,
)
if self._explicit_file_lookup_resolver.matches(anchor_analysis):
return V2RouteResult(
routing_domain="DOCS",
intent="DOC_EXPLAIN",
subintent="FIND_FILES",
user_query=user_query,
normalized_query=features.normalized_query,
target_terms=features.target_terms,
anchors=anchor_analysis.anchors,
confidence=1.0,
routing_mode="deterministic",
llm_router_used=False,
reason_short="explicit file reference",
scope_type=resolution.scope_type,
)
llm_attempted = self._enable_llm_disambiguation and self._llm_router is not None llm_attempted = self._enable_llm_disambiguation and self._llm_router is not None
llm_candidate = self._route_with_llm( llm_candidate = self._route_with_llm(
features=features, features=features,
@@ -121,11 +214,12 @@ class V2IntentRouter:
scope_type=resolution.scope_type, scope_type=resolution.scope_type,
) )
if llm_attempted: if llm_attempted:
return self._fallback_router.route_without_deterministic_signals( return self._fallback_router.route(
user_query=user_query, user_query=user_query,
features=features, features=features,
anchors=anchor_analysis.anchors, anchors=anchor_analysis.anchors,
scope_type=resolution.scope_type, scope_type=resolution.scope_type,
llm_attempted=True,
) )
return self._fallback_router.route( return self._fallback_router.route(
user_query=user_query, user_query=user_query,
@@ -142,10 +236,15 @@ class V2IntentRouter:
if self._scope_rows_provider is not None: if self._scope_rows_provider is not None:
return self._scope_rows_provider(sid) return self._scope_rows_provider(sid)
try: try:
return RagQueryRepository().list_docs_scope_index_rows(sid) return self._build_query_repository().list_docs_scope_index_rows(sid)
except Exception: except Exception:
return [] return []
def _build_query_repository(self) -> "RagQueryRepository":
from app.core.rag.persistence.query_repository import RagQueryRepository
return RagQueryRepository()
def _apply_scope_to_anchors(self, anchors, resolution) -> None: def _apply_scope_to_anchors(self, anchors, resolution) -> None:
anchors.candidate_domains = list(resolution.candidate_domains) anchors.candidate_domains = list(resolution.candidate_domains)
anchors.candidate_subdomains = list(resolution.candidate_subdomains) anchors.candidate_subdomains = list(resolution.candidate_subdomains)
+26 -1
View File
@@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from typing import Any from typing import Any
from app.core.agent.processes.base import AgentProcess, ProcessResult from app.core.agent.processes.base import AgentProcess, ProcessResult
@@ -88,7 +89,21 @@ class V2Process(AgentProcess):
async def run(self, context) -> ProcessResult: async def run(self, context) -> ProcessResult:
rag_session_id = context.session.active_rag_session_id or "" rag_session_id = context.session.active_rag_session_id or ""
route = self._router.route(context.request.message, rag_session_id=rag_session_id or None) route = await asyncio.to_thread(
self._router.route,
context.request.message,
rag_session_id=rag_session_id or None,
)
await context.publisher.publish_status(
context.request.request_id,
"process.v2",
f"Запрос принял, переход в {self._subintent_label(route.intent, route.subintent)}.",
{
"routing_domain": route.routing_domain,
"intent": route.intent,
"subintent": route.subintent,
},
)
context.trace.module("process.v2").log( context.trace.module("process.v2").log(
"intent_routed", "intent_routed",
{ {
@@ -148,6 +163,16 @@ class V2Process(AgentProcess):
def _log_step(self, context, step: str, payload: dict[str, object]) -> None: def _log_step(self, context, step: str, payload: dict[str, object]) -> None:
context.trace.module("process.v2.pipeline").log(step, payload) context.trace.module("process.v2.pipeline").log(step, payload)
def _subintent_label(self, intent: str, subintent: str) -> str:
labels = {
(V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY): "объяснение документации",
(V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES): "поиск файлов документации",
(V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED): "проверку экспонирования API",
(V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE): "обновление документации по аналитике",
(V2Intent.GENERAL_QA, V2Subintent.SUMMARY): "общий ответ по проекту",
}
return labels.get((intent, subintent), str(subintent).lower())
async def _run_workflow(self, runtime_context, route, rag_session_id: str): async def _run_workflow(self, runtime_context, route, rag_session_id: str):
workflow = self._workflows.get((route.routing_domain, route.intent, route.subintent)) workflow = self._workflows.get((route.routing_domain, route.intent, route.subintent))
if workflow is None: if workflow is None:
@@ -17,15 +17,16 @@ class DocExplainApiExposedWorkflowGraph(WorkflowGraph[TContext]):
steps_buffer: list[dict[str, object]] = [] steps_buffer: list[dict[str, object]] = []
for step in self._steps: for step in self._steps:
inp = step.trace_input(context) inp = step.trace_input(context)
request_id = context.runtime.request.request_id await self._publish_step_status(context, step, phase="before", input_context=context)
await context.runtime.publisher.publish_status( next_context = await step.run(context)
request_id, out = step.trace_output(next_context)
self._source, await self._publish_step_status(
f"Шаг workflow: {step.title}.", next_context,
{"workflow_id": self._workflow_id, "step_id": step.step_id}, step,
phase="after",
input_context=context,
output_context=next_context,
) )
context = await step.run(context)
out = step.trace_output(context)
trace.log( trace.log(
"workflow_step_traced", "workflow_step_traced",
{ {
@@ -36,7 +37,7 @@ class DocExplainApiExposedWorkflowGraph(WorkflowGraph[TContext]):
}, },
) )
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
context = next_context
trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer}) trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer})
trace.log("workflow_completed", {"workflow_id": self._workflow_id}) trace.log("workflow_completed", {"workflow_id": self._workflow_id})
return context return context
@@ -19,15 +19,16 @@ class DocExplainFindFilesWorkflowGraph(WorkflowGraph[TContext]):
steps_buffer: list[dict[str, object]] = [] steps_buffer: list[dict[str, object]] = []
for step in self._steps: for step in self._steps:
inp = step.trace_input(context) inp = step.trace_input(context)
request_id = context.runtime.request.request_id await self._publish_step_status(context, step, phase="before", input_context=context)
await context.runtime.publisher.publish_status( next_context = await step.run(context)
request_id, out = step.trace_output(next_context)
self._source, await self._publish_step_status(
f"Шаг workflow: {step.title}.", next_context,
{"workflow_id": self._workflow_id, "step_id": step.step_id}, step,
phase="after",
input_context=context,
output_context=next_context,
) )
context = await step.run(context)
out = step.trace_output(context)
trace.log( trace.log(
"workflow_step_traced", "workflow_step_traced",
{ {
@@ -38,6 +39,7 @@ class DocExplainFindFilesWorkflowGraph(WorkflowGraph[TContext]):
}, },
) )
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
context = next_context
trace.log( trace.log(
"workflow_trace_flushed", "workflow_trace_flushed",
{"workflow_id": self._workflow_id, "steps": steps_buffer}, {"workflow_id": self._workflow_id, "steps": steps_buffer},
@@ -19,15 +19,16 @@ class DocExplainSummaryWorkflowGraph(WorkflowGraph[TContext]):
steps_buffer: list[dict[str, object]] = [] steps_buffer: list[dict[str, object]] = []
for step in self._steps: for step in self._steps:
inp = step.trace_input(context) inp = step.trace_input(context)
request_id = context.runtime.request.request_id await self._publish_step_status(context, step, phase="before", input_context=context)
await context.runtime.publisher.publish_status( next_context = await step.run(context)
request_id, out = step.trace_output(next_context)
self._source, await self._publish_step_status(
f"Шаг workflow: {step.title}.", next_context,
{"workflow_id": self._workflow_id, "step_id": step.step_id}, step,
phase="after",
input_context=context,
output_context=next_context,
) )
context = await step.run(context)
out = step.trace_output(context)
trace.log( trace.log(
"workflow_step_traced", "workflow_step_traced",
{ {
@@ -38,6 +39,7 @@ class DocExplainSummaryWorkflowGraph(WorkflowGraph[TContext]):
}, },
) )
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
context = next_context
trace.log( trace.log(
"workflow_trace_flushed", "workflow_trace_flushed",
{"workflow_id": self._workflow_id, "steps": steps_buffer}, {"workflow_id": self._workflow_id, "steps": steps_buffer},
@@ -19,17 +19,18 @@ class DocUpdateFromFeatureWorkflowGraph(WorkflowGraph[TContext]):
before = self._snapshot(context) before = self._snapshot(context)
raw_inp = step.trace_input(context) raw_inp = step.trace_input(context)
inp = self._merge_trace_payload(raw_inp, before) inp = self._merge_trace_payload(raw_inp, before)
request_id = context.runtime.request.request_id await self._publish_step_status(context, step, phase="before", input_context=context)
await context.runtime.publisher.publish_status( next_context = await step.run(context)
request_id, after = self._snapshot(next_context)
self._source, raw_out = step.trace_output(next_context)
f"Шаг workflow: {step.title}.",
{"workflow_id": self._workflow_id, "step_id": step.step_id},
)
context = await step.run(context)
after = self._snapshot(context)
raw_out = step.trace_output(context)
out = self._merge_trace_payload(raw_out, after) out = self._merge_trace_payload(raw_out, after)
await self._publish_step_status(
next_context,
step,
phase="after",
input_context=context,
output_context=next_context,
)
trace.log( trace.log(
"workflow_step_traced", "workflow_step_traced",
{ {
@@ -40,6 +41,7 @@ class DocUpdateFromFeatureWorkflowGraph(WorkflowGraph[TContext]):
}, },
) )
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
context = next_context
trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer}) trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer})
trace.log("workflow_completed", {"workflow_id": self._workflow_id}) trace.log("workflow_completed", {"workflow_id": self._workflow_id})
return context return context
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -30,7 +31,7 @@ class LoadSourceContentStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
context.issues.append(f"Файл системной аналитики не найден: {context.source_ref}") context.issues.append(f"Файл системной аналитики не найден: {context.source_ref}")
return context return context
try: try:
context.source_content = source_path.read_text(encoding="utf-8") context.source_content = await asyncio.to_thread(source_path.read_text, encoding="utf-8")
context.project_root = self._resolve_project_root(source_path).as_posix() context.project_root = self._resolve_project_root(source_path).as_posix()
except Exception as exc: except Exception as exc:
context.issues.append(f"Не удалось прочитать системную аналитику: {exc}") context.issues.append(f"Не удалось прочитать системную аналитику: {exc}")
@@ -42,6 +43,9 @@ class LoadSourceContentStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
idx = parts.index("_incoming") idx = parts.index("_incoming")
if idx > 0: if idx > 0:
return Path(*parts[:idx]) return Path(*parts[:idx])
for parent in source_path.parents:
if (parent / "docs").is_dir():
return parent
return source_path.parent return source_path.parent
def trace_input(self, context: DocUpdateFromFeatureV2Context) -> dict[str, Any]: def trace_input(self, context: DocUpdateFromFeatureV2Context) -> dict[str, Any]:
@@ -52,3 +56,6 @@ class LoadSourceContentStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
"project_root": context.project_root, "project_root": context.project_root,
"source_content_len": len(context.source_content or ""), "source_content_len": len(context.source_content or ""),
} }
def get_after_status_message(self):
return "Системная аналитика загружена"
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from typing import Any from typing import Any
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step3_parse_requirements.parser import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step3_parse_requirements.parser import (
@@ -21,7 +22,7 @@ class ParseRequirementsStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
async def run(self, context: DocUpdateFromFeatureV2Context) -> DocUpdateFromFeatureV2Context: async def run(self, context: DocUpdateFromFeatureV2Context) -> DocUpdateFromFeatureV2Context:
if context.answer or not context.source_content: if context.answer or not context.source_content:
return context return context
meta, units = self._parser.parse(context.source_content) meta, units = await asyncio.to_thread(self._parser.parse, context.source_content)
context.analytics_meta = meta context.analytics_meta = meta
context.requirements = units context.requirements = units
@@ -54,3 +55,6 @@ class ParseRequirementsStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
for item in context.requirements for item in context.requirements
], ],
} }
def get_after_status_message(self):
return "Функциональные требования прочитаны"
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -23,7 +24,16 @@ class LoadRulesStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
if not self._rules_root.exists(): if not self._rules_root.exists():
context.issues.append(f"Папка rules не найдена: {self._rules_root.as_posix()}") context.issues.append(f"Папка rules не найдена: {self._rules_root.as_posix()}")
return context return context
loaded, issues = await asyncio.to_thread(self._load_rules)
context.issues.extend(issues)
context.rules = loaded
if not context.rules:
context.issues.append("Rules v2 пустые: не найдено ни одного *.md файла.")
return context
def _load_rules(self) -> tuple[list[RuleDocument], list[str]]:
loaded: list[RuleDocument] = [] loaded: list[RuleDocument] = []
issues: list[str] = []
for item in sorted(self._rules_root.rglob("*.md")): for item in sorted(self._rules_root.rglob("*.md")):
try: try:
loaded.append( loaded.append(
@@ -33,11 +43,8 @@ class LoadRulesStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
) )
) )
except Exception as exc: except Exception as exc:
context.issues.append(f"Не удалось прочитать rule {item.name}: {exc}") issues.append(f"Не удалось прочитать rule {item.name}: {exc}")
context.rules = loaded return loaded, issues
if not context.rules:
context.issues.append("Rules v2 пустые: не найдено ни одного *.md файла.")
return context
def _discover_rules_root(self) -> Path: def _discover_rules_root(self) -> Path:
current = Path(__file__).resolve() current = Path(__file__).resolve()
@@ -55,3 +62,6 @@ class LoadRulesStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
"rules_count": len(context.rules), "rules_count": len(context.rules),
"rule_names": [item.name for item in context.rules], "rule_names": [item.name for item in context.rules],
} }
def get_after_status_message(self):
return "Загружены правила документации v3"
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Any from typing import Any
@@ -30,12 +31,15 @@ class PrepareRequirementTasksStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
async def run(self, context: DocUpdateFromFeatureV2Context) -> DocUpdateFromFeatureV2Context: async def run(self, context: DocUpdateFromFeatureV2Context) -> DocUpdateFromFeatureV2Context:
if context.answer or not context.requirements: if context.answer or not context.requirements:
return context return context
self._catalog_loader.load(context) context.requirement_tasks = await asyncio.to_thread(self._build_tasks, context)
context.requirement_tasks = self._task_orderer.order(self._task_builder.build(context))
if not context.requirement_tasks: if not context.requirement_tasks:
context.issues.append("Не удалось подготовить задачи по разделу 6 аналитики.") context.issues.append("Не удалось подготовить задачи по разделу 6 аналитики.")
return context return context
def _build_tasks(self, context: DocUpdateFromFeatureV2Context):
self._catalog_loader.load(context)
return self._task_orderer.order(self._task_builder.build(context))
def trace_input(self, context: DocUpdateFromFeatureV2Context) -> dict[str, Any]: def trace_input(self, context: DocUpdateFromFeatureV2Context) -> dict[str, Any]:
return { return {
"requirements_count": len(context.requirements), "requirements_count": len(context.requirements),
@@ -64,3 +68,6 @@ class PrepareRequirementTasksStep(WorkflowStep[DocUpdateFromFeatureV2Context]):
for item in context.requirement_tasks for item in context.requirement_tasks
], ],
} }
def get_after_status_message(self):
return "Составялем план изменений"
@@ -2,12 +2,19 @@ from __future__ import annotations
import re import re
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.doc_type_normalizer import (
DocTypeNormalizer,
)
class DocsPathResolver: class DocsPathResolver:
def __init__(self, normalizer: DocTypeNormalizer | None = None) -> None:
self._normalizer = normalizer or DocTypeNormalizer()
def resolve(self, *, application: str, platform: str, doc_type: str, doc_id: str, domain: str = "") -> str: def resolve(self, *, application: str, platform: str, doc_type: str, doc_id: str, domain: str = "") -> str:
root = self._clean(domain) or self._clean(application) or "common" root = self._clean(domain) or self._clean(application) or "common"
plat = self._clean(platform) or "web" plat = self._clean(platform) or "web"
dtype = self._clean(doc_type) or "misc" dtype = self._clean(self._normalizer.normalize(doc_type)) or "misc"
did = self._clean(doc_id) or "untitled" did = self._clean(doc_id) or "untitled"
return f"docs/{root}/{plat}/{dtype}/{did}.md" return f"docs/{root}/{plat}/{dtype}/{did}.md"
@@ -3,6 +3,7 @@ namespace: v2_docs_update_v2
prompts: prompts:
resolve_attributes_fallback: | resolve_attributes_fallback: |
Определи недостающие атрибуты страницы документации по секции аналитики и структуре docs catalog. Определи недостающие атрибуты страницы документации по секции аналитики и структуре docs catalog.
Используй только канонические значения `doc_type`: `api_method`, `logic_block`, `architecture_overview`, `db_table`, `ui_page`.
Верни только JSON-объект с полями: doc_type, id, application, platform, domain, sub_domain. Верни только JSON-объект с полями: doc_type, id, application, platform, domain, sub_domain.
Не добавляй пояснений. Не добавляй пояснений.
@@ -8,6 +8,9 @@ from pathlib import Path
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step5_execute_subprocesses.classifier import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step5_execute_subprocesses.classifier import (
DeleteIntentHeuristic, DeleteIntentHeuristic,
) )
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.doc_type_normalizer import (
DocTypeNormalizer,
)
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.create_doc import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.create_doc import (
CreateDocSubprocess, CreateDocSubprocess,
) )
@@ -30,9 +33,10 @@ from app.schemas.changeset import ChangeItem, ChangeOp
class TaskAttributeResolver: class TaskAttributeResolver:
def __init__(self, llm: AgentLlmService) -> None: def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm self._llm = llm
self._doc_type_normalizer = DocTypeNormalizer()
def resolve(self, context: DocUpdateFromFeatureV2Context, task: RequirementTaskContext, rules_text: str) -> None: def resolve(self, context: DocUpdateFromFeatureV2Context, task: RequirementTaskContext, rules_text: str) -> None:
task.doc_type = str(task.metadata.get("doc_type") or task.metadata.get("type") or "").strip() task.doc_type = self._normalize_doc_type(task.metadata.get("doc_type") or task.metadata.get("type") or "")
task.doc_id = str(task.metadata.get("id") or self._slug(task.heading)).strip() task.doc_id = str(task.metadata.get("id") or self._slug(task.heading)).strip()
task.application = str(task.metadata.get("application") or context.analytics_meta.application or "").strip() task.application = str(task.metadata.get("application") or context.analytics_meta.application or "").strip()
task.platform = str(task.metadata.get("platform") or context.analytics_meta.platform or "").strip().lower() task.platform = str(task.metadata.get("platform") or context.analytics_meta.platform or "").strip().lower()
@@ -67,7 +71,7 @@ class TaskAttributeResolver:
log_context="workflow.v2.docs_update.from_feature_v2.resolve_attributes", log_context="workflow.v2.docs_update.from_feature_v2.resolve_attributes",
) )
parsed = self._json_or_empty(raw) parsed = self._json_or_empty(raw)
task.doc_type = task.doc_type or str(parsed.get("doc_type") or parsed.get("type") or "").strip() task.doc_type = task.doc_type or self._normalize_doc_type(parsed.get("doc_type") or parsed.get("type") or "")
task.doc_id = task.doc_id or str(parsed.get("id") or self._slug(task.heading)).strip() task.doc_id = task.doc_id or str(parsed.get("id") or self._slug(task.heading)).strip()
task.application = task.application or str(parsed.get("application") or "").strip() task.application = task.application or str(parsed.get("application") or "").strip()
task.platform = task.platform or str(parsed.get("platform") or "web").strip().lower() task.platform = task.platform or str(parsed.get("platform") or "web").strip().lower()
@@ -77,6 +81,9 @@ class TaskAttributeResolver:
def _slug(self, value: str) -> str: def _slug(self, value: str) -> str:
return re.sub(r"[^a-z0-9._-]+", "-", (value or "").strip().lower()).strip(".-") or "untitled" return re.sub(r"[^a-z0-9._-]+", "-", (value or "").strip().lower()).strip(".-") or "untitled"
def _normalize_doc_type(self, value: object) -> str:
return self._doc_type_normalizer.normalize(str(value or ""))
def _json_or_empty(self, raw: str) -> dict[str, object]: def _json_or_empty(self, raw: str) -> dict[str, object]:
value = str(raw or "").strip() value = str(raw or "").strip()
if not value: if not value:
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from typing import Any from typing import Any
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step5_execute_subprocesses.services import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step5_execute_subprocesses.services import (
@@ -22,9 +23,12 @@ class ExecuteRequirementSubprocessesStep(WorkflowStep[DocUpdateFromFeatureV2Cont
async def run(self, context: DocUpdateFromFeatureV2Context) -> DocUpdateFromFeatureV2Context: async def run(self, context: DocUpdateFromFeatureV2Context) -> DocUpdateFromFeatureV2Context:
if context.answer or not context.requirement_tasks: if context.answer or not context.requirement_tasks:
return context return context
await asyncio.to_thread(self._execute_all, context)
return context
def _execute_all(self, context: DocUpdateFromFeatureV2Context) -> None:
for task in context.requirement_tasks: for task in context.requirement_tasks:
self._change_executor.execute(context, task) self._change_executor.execute(context, task)
return context
def trace_input(self, context: DocUpdateFromFeatureV2Context) -> dict[str, Any]: def trace_input(self, context: DocUpdateFromFeatureV2Context) -> dict[str, Any]:
return { return {
@@ -57,3 +61,6 @@ class ExecuteRequirementSubprocessesStep(WorkflowStep[DocUpdateFromFeatureV2Cont
for item in context.accumulated_pages for item in context.accumulated_pages
], ],
} }
def get_after_status_message(self):
return "Правки подготовлены"
@@ -0,0 +1,12 @@
from __future__ import annotations
class DocTypeNormalizer:
_ALIASES = {
"data_entity": "db_table",
"domain_entity": "db_table",
}
def normalize(self, doc_type: str) -> str:
value = str(doc_type or "").strip().lower()
return self._ALIASES.get(value, value)
@@ -3,6 +3,9 @@ from __future__ import annotations
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.rules_catalog import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.rules_catalog import (
RulesCatalog, RulesCatalog,
) )
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.doc_type_normalizer import (
DocTypeNormalizer,
)
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.template_models import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.template_models import (
TemplateSpec, TemplateSpec,
) )
@@ -12,10 +15,16 @@ from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocess
class TemplateRegistry: class TemplateRegistry:
def __init__(self, parser: TemplateParser | None = None) -> None: def __init__(
self,
parser: TemplateParser | None = None,
normalizer: DocTypeNormalizer | None = None,
) -> None:
self._parser = parser or TemplateParser() self._parser = parser or TemplateParser()
self._normalizer = normalizer or DocTypeNormalizer()
def load(self, catalog: RulesCatalog, doc_type: str) -> TemplateSpec: def load(self, catalog: RulesCatalog, doc_type: str) -> TemplateSpec:
doc_type = self._normalizer.normalize(doc_type)
template_name = f"templates/{doc_type}.template.md" template_name = f"templates/{doc_type}.template.md"
template_text = catalog.template_text(doc_type) template_text = catalog.template_text(doc_type)
if not template_text.strip(): if not template_text.strip():
@@ -17,17 +17,18 @@ class DocUpdateFromFeatureV2WorkflowGraph(WorkflowGraph[TContext]):
before = self._snapshot(context) before = self._snapshot(context)
raw_inp = step.trace_input(context) raw_inp = step.trace_input(context)
inp = self._merge_trace_payload(raw_inp, before) inp = self._merge_trace_payload(raw_inp, before)
request_id = context.runtime.request.request_id await self._publish_step_status(context, step, phase="before", input_context=context)
await context.runtime.publisher.publish_status( next_context = await step.run(context)
request_id, after = self._snapshot(next_context)
self._source, raw_out = step.trace_output(next_context)
f"Шаг workflow: {step.title}.",
{"workflow_id": self._workflow_id, "step_id": step.step_id},
)
context = await step.run(context)
after = self._snapshot(context)
raw_out = step.trace_output(context)
out = self._merge_trace_payload(raw_out, after) out = self._merge_trace_payload(raw_out, after)
await self._publish_step_status(
next_context,
step,
phase="after",
input_context=context,
output_context=next_context,
)
trace.log( trace.log(
"workflow_step_traced", "workflow_step_traced",
{ {
@@ -38,6 +39,7 @@ class DocUpdateFromFeatureV2WorkflowGraph(WorkflowGraph[TContext]):
}, },
) )
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
context = next_context
trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer}) trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer})
trace.log("workflow_completed", {"workflow_id": self._workflow_id}) trace.log("workflow_completed", {"workflow_id": self._workflow_id})
return context return context
@@ -19,15 +19,16 @@ class GeneralQaSummaryWorkflowGraph(WorkflowGraph[TContext]):
steps_buffer: list[dict[str, object]] = [] steps_buffer: list[dict[str, object]] = []
for step in self._steps: for step in self._steps:
inp = step.trace_input(context) inp = step.trace_input(context)
request_id = context.runtime.request.request_id await self._publish_step_status(context, step, phase="before", input_context=context)
await context.runtime.publisher.publish_status( next_context = await step.run(context)
request_id, out = step.trace_output(next_context)
self._source, await self._publish_step_status(
f"Шаг workflow: {step.title}.", next_context,
{"workflow_id": self._workflow_id, "step_id": step.step_id}, step,
phase="after",
input_context=context,
output_context=next_context,
) )
context = await step.run(context)
out = step.trace_output(context)
trace.log( trace.log(
"workflow_step_traced", "workflow_step_traced",
{ {
@@ -38,6 +39,7 @@ class GeneralQaSummaryWorkflowGraph(WorkflowGraph[TContext]):
}, },
) )
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
context = next_context
trace.log( trace.log(
"workflow_trace_flushed", "workflow_trace_flushed",
{"workflow_id": self._workflow_id, "steps": steps_buffer}, {"workflow_id": self._workflow_id, "steps": steps_buffer},
+23 -4
View File
@@ -96,11 +96,21 @@ class AgentRuntime:
) )
async def _publish_result(self, request: AgentRequest) -> None: async def _publish_result(self, request: AgentRequest) -> None:
try:
await self._publisher.publish_user(request.request_id, "agent", request.answer or "")
except Exception:
LOGGER.exception("failed to publish user event: request_id=%s", request.request_id)
await self._safe_publish_status(request.request_id, "runtime", "Обработка запроса завершена.") await self._safe_publish_status(request.request_id, "runtime", "Обработка запроса завершена.")
try:
await self._publisher.publish_result(
request.request_id,
"agent",
request.answer or "",
{
"result_type": "changeset" if request.changeset else "answer",
"answer": request.answer or "",
"changeset": [item.model_dump(mode="json") for item in request.changeset],
"apply_changeset": request.apply_changeset,
},
)
except Exception:
LOGGER.exception("failed to publish result event: request_id=%s", request.request_id)
def _complete_request(self, request: AgentRequest, session: AgentSession) -> None: def _complete_request(self, request: AgentRequest, session: AgentSession) -> None:
session.append_turn(user_message=request.message, assistant_message=request.answer or "") session.append_turn(user_message=request.message, assistant_message=request.answer or "")
@@ -122,6 +132,15 @@ class AgentRuntime:
"Во время обработки запроса произошла ошибка.", "Во время обработки запроса произошла ошибка.",
{"code": request.error.code}, {"code": request.error.code},
) )
try:
await self._publisher.publish_error(
request.request_id,
"runtime",
request.error.desc,
{"error": request.error.model_dump(mode="json")},
)
except Exception:
LOGGER.exception("failed to publish error event: request_id=%s", request.request_id)
def _build_error_payload(self, exc: Exception) -> ErrorPayload: def _build_error_payload(self, exc: Exception) -> ErrorPayload:
if isinstance(exc, AppError): if isinstance(exc, AppError):
+22
View File
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
import logging import logging
from app.core.api.domain.events.client_event import ClientEventRecord from app.core.api.domain.events.client_event import ClientEventRecord
@@ -24,6 +25,7 @@ class RuntimeEventPublisher:
sorted(list((payload or {}).keys())), sorted(list((payload or {}).keys())),
) )
await self._publish(request_id, ClientEventType.STATUS, source, text, payload) await self._publish(request_id, ClientEventType.STATUS, source, text, payload)
await asyncio.sleep(0)
async def publish_user(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None: async def publish_user(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
LOGGER.warning( LOGGER.warning(
@@ -35,6 +37,26 @@ class RuntimeEventPublisher:
) )
await self._publish(request_id, ClientEventType.USER, source, text, payload) await self._publish(request_id, ClientEventType.USER, source, text, payload)
async def publish_result(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
LOGGER.warning(
"publish result: request_id=%s source=%s text_len=%s payload_keys=%s",
request_id,
source,
len(text or ""),
sorted(list((payload or {}).keys())),
)
await self._publish(request_id, ClientEventType.RESULT, source, text, payload)
async def publish_error(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
LOGGER.warning(
"publish error: request_id=%s source=%s text=%s payload_keys=%s",
request_id,
source,
text,
sorted(list((payload or {}).keys())),
)
await self._publish(request_id, ClientEventType.ERROR, source, text, payload)
async def _publish( async def _publish(
self, self,
request_id: str, request_id: str,
+54 -10
View File
@@ -1,6 +1,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Generic, Sequence, TypeVar from typing import Callable, Generic, Sequence, TypeVar
from app.core.agent.utils.workflow.context import WorkflowContext from app.core.agent.utils.workflow.context import WorkflowContext
from app.core.agent.utils.workflow.step import WorkflowStep from app.core.agent.utils.workflow.step import WorkflowStep
@@ -24,21 +24,65 @@ class WorkflowGraph(Generic[TContext]):
return context return context
async def _run_step(self, context: TContext, step: WorkflowStep[TContext]) -> TContext: async def _run_step(self, context: TContext, step: WorkflowStep[TContext]) -> TContext:
request_id = context.runtime.request.request_id
trace = context.runtime.trace.module(self._source) trace = context.runtime.trace.module(self._source)
trace.log( trace.log(
"step_started", "step_started",
{"workflow_id": self._workflow_id, "step_id": step.step_id, "input": step.trace_input(context)}, {"workflow_id": self._workflow_id, "step_id": step.step_id, "input": step.trace_input(context)},
) )
await context.runtime.publisher.publish_status( await self._publish_step_status(context, step, phase="before", input_context=context)
request_id, next_context = await step.run(context)
self._source, await self._publish_step_status(
f"Шаг workflow: {step.title}.", next_context,
{"workflow_id": self._workflow_id, "step_id": step.step_id}, step,
phase="after",
input_context=context,
output_context=next_context,
) )
context = await step.run(context)
trace.log( trace.log(
"step_completed", "step_completed",
{"workflow_id": self._workflow_id, "step_id": step.step_id, "output": step.trace_output(context)}, {"workflow_id": self._workflow_id, "step_id": step.step_id, "output": step.trace_output(next_context)},
) )
return context return next_context
async def _publish_step_status(
self,
runtime_context: TContext,
step: WorkflowStep[TContext],
*,
phase: str,
input_context: TContext,
output_context: TContext | None = None,
) -> None:
message = self._resolve_step_status_message(step, phase, input_context, output_context)
if not message:
return
await runtime_context.runtime.publisher.publish_status(
runtime_context.runtime.request.request_id,
self._source,
message,
{"workflow_id": self._workflow_id, "step_id": step.step_id, "phase": phase},
)
def _resolve_step_status_message(
self,
step: WorkflowStep[TContext],
phase: str,
input_context: TContext,
output_context: TContext | None,
) -> str | None:
builder: Callable[[], str | None]
active_context = input_context
if phase == "after":
builder = step.get_after_status_message
active_context = output_context or input_context
else:
builder = step.get_before_status_message
tokens = step._push_status_state(
active_context=active_context,
input_context=input_context,
output_context=output_context,
)
try:
return builder()
finally:
step._pop_status_state(tokens)
+42 -1
View File
@@ -1,10 +1,14 @@
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, Generic, TypeVar from contextvars import ContextVar, Token
from typing import Any, Generic, TypeVar, cast
TContext = TypeVar("TContext") TContext = TypeVar("TContext")
_ACTIVE_CONTEXT: ContextVar[Any | None] = ContextVar("workflow_step_active_context", default=None)
_INPUT_CONTEXT: ContextVar[Any | None] = ContextVar("workflow_step_input_context", default=None)
_OUTPUT_CONTEXT: ContextVar[Any | None] = ContextVar("workflow_step_output_context", default=None)
class WorkflowStep(ABC, Generic[TContext]): class WorkflowStep(ABC, Generic[TContext]):
@@ -15,8 +19,45 @@ class WorkflowStep(ABC, Generic[TContext]):
async def run(self, context: TContext) -> TContext: async def run(self, context: TContext) -> TContext:
raise NotImplementedError raise NotImplementedError
@property
def context(self) -> TContext | None:
return cast(TContext | None, _ACTIVE_CONTEXT.get())
@property
def input_context(self) -> TContext | None:
return cast(TContext | None, _INPUT_CONTEXT.get())
@property
def output_context(self) -> TContext | None:
return cast(TContext | None, _OUTPUT_CONTEXT.get())
def get_before_status_message(self) -> str | None:
return None
def get_after_status_message(self) -> str | None:
return None
def trace_input(self, context: TContext) -> dict[str, Any]: def trace_input(self, context: TContext) -> dict[str, Any]:
return {} return {}
def trace_output(self, context: TContext) -> dict[str, Any]: def trace_output(self, context: TContext) -> dict[str, Any]:
return {} return {}
def _push_status_state(
self,
*,
active_context: TContext,
input_context: TContext,
output_context: TContext | None = None,
) -> tuple[Token[Any | None], Token[Any | None], Token[Any | None]]:
return (
_ACTIVE_CONTEXT.set(active_context),
_INPUT_CONTEXT.set(input_context),
_OUTPUT_CONTEXT.set(output_context),
)
def _pop_status_state(self, tokens: tuple[Token[Any | None], Token[Any | None], Token[Any | None]]) -> None:
active_token, input_token, output_token = tokens
_ACTIVE_CONTEXT.reset(active_token)
_INPUT_CONTEXT.reset(input_token)
_OUTPUT_CONTEXT.reset(output_token)
@@ -3,6 +3,7 @@ from __future__ import annotations
import asyncio import asyncio
import logging import logging
from app.core.api.application.request_start_gate import RequestStartGate
from app.core.api.domain.models.agent_request import AgentRequest from app.core.api.domain.models.agent_request import AgentRequest
from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory
from app.core.api.infrastructure.stores.in_memory_request_store import InMemoryRequestStore from app.core.api.infrastructure.stores.in_memory_request_store import InMemoryRequestStore
@@ -19,11 +20,13 @@ class RequestService:
request_ids: RequestIdFactory, request_ids: RequestIdFactory,
sessions: SessionService, sessions: SessionService,
runtime: AgentRuntime, runtime: AgentRuntime,
start_gate: RequestStartGate | None = None,
) -> None: ) -> None:
self._request_store = request_store self._request_store = request_store
self._request_ids = request_ids self._request_ids = request_ids
self._sessions = sessions self._sessions = sessions
self._runtime = runtime self._runtime = runtime
self._start_gate = start_gate or RequestStartGate()
async def create(self, session_id: str, message: str, process_version: str) -> AgentRequest: async def create(self, session_id: str, message: str, process_version: str) -> AgentRequest:
session = self._sessions.get(session_id) session = self._sessions.get(session_id)
@@ -41,13 +44,21 @@ class RequestService:
process_version, process_version,
(message or "").replace("\n", "\\n")[:500], (message or "").replace("\n", "\\n")[:500],
) )
task = asyncio.create_task(self._runtime.run(request, session), name=f"agent-runtime:{request.request_id}") self._start_gate.register(request.request_id)
task = asyncio.create_task(self._run_request(request, session), name=f"agent-runtime:{request.request_id}")
task.add_done_callback(self._log_task_result) task.add_done_callback(self._log_task_result)
return request return request
def get(self, request_id: str) -> AgentRequest | None: def get(self, request_id: str) -> AgentRequest | None:
return self._request_store.get(request_id) return self._request_store.get(request_id)
async def _run_request(self, request: AgentRequest, session) -> None:
try:
await self._start_gate.wait_until_ready(request.request_id)
await self._runtime.run(request, session)
finally:
self._start_gate.forget(request.request_id)
def _log_task_result(self, task: asyncio.Task) -> None: def _log_task_result(self, task: asyncio.Task) -> None:
try: try:
exc = task.exception() exc = task.exception()
@@ -0,0 +1,32 @@
from __future__ import annotations
import asyncio
from threading import Lock
class RequestStartGate:
def __init__(self, timeout_seconds: float = 0.5) -> None:
self._timeout_seconds = timeout_seconds
self._events: dict[str, asyncio.Event] = {}
self._lock = Lock()
def register(self, request_id: str) -> None:
with self._lock:
self._events.setdefault(request_id, asyncio.Event())
def mark_ready(self, request_id: str) -> None:
with self._lock:
event = self._events.setdefault(request_id, asyncio.Event())
event.set()
async def wait_until_ready(self, request_id: str) -> None:
with self._lock:
event = self._events.setdefault(request_id, asyncio.Event())
try:
await asyncio.wait_for(event.wait(), timeout=self._timeout_seconds)
except asyncio.TimeoutError:
return
def forget(self, request_id: str) -> None:
with self._lock:
self._events.pop(request_id, None)
+13 -2
View File
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
from app.core.api.application.request_start_gate import RequestStartGate
from app.infra.exceptions import AppError from app.infra.exceptions import AppError
from app.core.api.infrastructure.streaming.sse_encoder import SseEncoder from app.core.api.infrastructure.streaming.sse_encoder import SseEncoder
from app.core.api.infrastructure.streaming.sse_event_channel import SseEventChannel from app.core.api.infrastructure.streaming.sse_event_channel import SseEventChannel
@@ -7,15 +8,25 @@ from app.schemas.common import ModuleName
class StreamService: class StreamService:
def __init__(self, channel: SseEventChannel, request_exists, encoder: SseEncoder | None = None) -> None: def __init__(
self,
channel: SseEventChannel,
request_exists,
encoder: SseEncoder | None = None,
start_gate: RequestStartGate | None = None,
) -> None:
self._channel = channel self._channel = channel
self._request_exists = request_exists self._request_exists = request_exists
self._encoder = encoder or SseEncoder() self._encoder = encoder or SseEncoder()
self._start_gate = start_gate
async def subscribe(self, request_id: str): async def subscribe(self, request_id: str):
if not self._request_exists(request_id): if not self._request_exists(request_id):
raise AppError("request_not_found", f"Agent request not found: {request_id}", ModuleName.BACKEND) raise AppError("request_not_found", f"Agent request not found: {request_id}", ModuleName.BACKEND)
return await self._channel.subscribe(request_id, replay=True) queue = await self._channel.subscribe(request_id, replay=True)
if self._start_gate is not None:
self._start_gate.mark_ready(request_id)
return queue
async def unsubscribe(self, request_id: str, queue) -> None: async def unsubscribe(self, request_id: str, queue) -> None:
await self._channel.unsubscribe(request_id, queue) await self._channel.unsubscribe(request_id, queue)
@@ -28,8 +28,11 @@ class RagPublicController:
rag_session_id=rag_session_id, rag_session_id=rag_session_id,
index_job_id=job.index_job_id, index_job_id=job.index_job_id,
status=job.status, status=job.status,
total_files=job.total_files,
indexed_files=job.indexed_files, indexed_files=job.indexed_files,
failed_files=job.failed_files, failed_files=job.failed_files,
skipped_files=job.skipped_files,
reuse_percent=job.reuse_percent,
cache_hit_files=job.cache_hit_files, cache_hit_files=job.cache_hit_files,
cache_miss_files=job.cache_miss_files, cache_miss_files=job.cache_miss_files,
error=job.error.model_dump(mode="json") if job.error else None, error=job.error.model_dump(mode="json") if job.error else None,
+8 -1
View File
@@ -13,6 +13,7 @@ from app.core.agent.utils.llm import AgentLlmService, PromptLoader
from app.core.api.module import ApiModule from app.core.api.module import ApiModule
from app.core.api.application.session_bootstrap_service import SessionBootstrapService from app.core.api.application.session_bootstrap_service import SessionBootstrapService
from app.core.api.application.request_service import RequestService from app.core.api.application.request_service import RequestService
from app.core.api.application.request_start_gate import RequestStartGate
from app.core.api.application.session_service import SessionService from app.core.api.application.session_service import SessionService
from app.core.api.application.stream_service import StreamService from app.core.api.application.stream_service import StreamService
from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory
@@ -88,6 +89,7 @@ class ModularApplication:
self.agent_requests = InMemoryRequestStore() self.agent_requests = InMemoryRequestStore()
self.agent_events = SseEventChannel() self.agent_events = SseEventChannel()
self.agent_trace_logger = RequestTraceLogger(Path("runtime_traces/agent_requests")) self.agent_trace_logger = RequestTraceLogger(Path("runtime_traces/agent_requests"))
self.agent_request_start_gate = RequestStartGate()
_publisher = RuntimeEventPublisher(self.agent_events, self.agent_trace_logger) _publisher = RuntimeEventPublisher(self.agent_events, self.agent_trace_logger)
_session_service = SessionService( _session_service = SessionService(
store=self.agent_sessions, store=self.agent_sessions,
@@ -122,11 +124,16 @@ class ModularApplication:
request_ids=RequestIdFactory(), request_ids=RequestIdFactory(),
sessions=_session_service, sessions=_session_service,
runtime=_runtime, runtime=_runtime,
start_gate=self.agent_request_start_gate,
) )
self.api = ApiModule( self.api = ApiModule(
session_bootstrap=_session_bootstrap, session_bootstrap=_session_bootstrap,
requests=_request_service, requests=_request_service,
streams=StreamService(self.agent_events, request_exists=lambda request_id: self.agent_requests.get(request_id) is not None), streams=StreamService(
self.agent_events,
request_exists=lambda request_id: self.agent_requests.get(request_id) is not None,
start_gate=self.agent_request_start_gate,
),
rag=self.rag, rag=self.rag,
) )
+27 -3
View File
@@ -18,20 +18,40 @@ class IndexJob:
index_job_id: str index_job_id: str
rag_session_id: str rag_session_id: str
status: IndexJobStatus = IndexJobStatus.QUEUED status: IndexJobStatus = IndexJobStatus.QUEUED
total_files: int = 0
indexed_files: int = 0 indexed_files: int = 0
failed_files: int = 0 failed_files: int = 0
skipped_files: int = 0
cache_hit_files: int = 0 cache_hit_files: int = 0
cache_miss_files: int = 0 cache_miss_files: int = 0
error: ErrorPayload | None = None error: ErrorPayload | None = None
@property
def reuse_percent(self) -> int:
total = self.cache_hit_files + self.cache_miss_files
if total <= 0:
return 0
return round((self.cache_hit_files / total) * 100)
class IndexJobStore: class IndexJobStore:
def __init__(self, repository: RagRepository) -> None: def __init__(self, repository: RagRepository) -> None:
self._repo = repository self._repo = repository
def create(self, rag_session_id: str) -> IndexJob: def create(self, rag_session_id: str, *, total_files: int = 0, skipped_files: int = 0) -> IndexJob:
job = IndexJob(index_job_id=str(uuid4()), rag_session_id=rag_session_id) job = IndexJob(
self._repo.create_job(job.index_job_id, rag_session_id, job.status.value) index_job_id=str(uuid4()),
rag_session_id=rag_session_id,
total_files=total_files,
skipped_files=skipped_files,
)
self._repo.create_job(
job.index_job_id,
rag_session_id,
job.status.value,
total_files=job.total_files,
skipped_files=job.skipped_files,
)
return job return job
def get(self, index_job_id: str) -> IndexJob | None: def get(self, index_job_id: str) -> IndexJob | None:
@@ -55,8 +75,10 @@ class IndexJobStore:
index_job_id=row.index_job_id, index_job_id=row.index_job_id,
rag_session_id=row.rag_session_id, rag_session_id=row.rag_session_id,
status=IndexJobStatus(row.status), status=IndexJobStatus(row.status),
total_files=row.total_files,
indexed_files=row.indexed_files, indexed_files=row.indexed_files,
failed_files=row.failed_files, failed_files=row.failed_files,
skipped_files=row.skipped_files,
cache_hit_files=row.cache_hit_files, cache_hit_files=row.cache_hit_files,
cache_miss_files=row.cache_miss_files, cache_miss_files=row.cache_miss_files,
error=payload, error=payload,
@@ -88,8 +110,10 @@ class IndexJobStore:
self._repo.update_job( self._repo.update_job(
job.index_job_id, job.index_job_id,
status=job.status.value, status=job.status.value,
total_files=job.total_files,
indexed_files=job.indexed_files, indexed_files=job.indexed_files,
failed_files=job.failed_files, failed_files=job.failed_files,
skipped_files=job.skipped_files,
cache_hit_files=job.cache_hit_files, cache_hit_files=job.cache_hit_files,
cache_miss_files=job.cache_miss_files, cache_miss_files=job.cache_miss_files,
error_code=error_code, error_code=error_code,
+130 -23
View File
@@ -35,33 +35,75 @@ class IndexingOrchestrator:
self._locks: dict[str, asyncio.Lock] = defaultdict(asyncio.Lock) self._locks: dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
async def enqueue_snapshot(self, rag_session_id: str, files: list[dict]) -> IndexJob: async def enqueue_snapshot(self, rag_session_id: str, files: list[dict]) -> IndexJob:
job = self._store.create(rag_session_id) filtered_files = filter_snapshot_files(files)
total_files = len(files)
skipped_files = max(0, total_files - len(filtered_files))
job = self._store.create(
rag_session_id,
total_files=total_files,
skipped_files=skipped_files,
)
LOGGER.warning( LOGGER.warning(
"rag index snapshot queued: job_id=%s rag_session_id=%s files=%s", "rag index snapshot queued: job_id=%s rag_session_id=%s total=%s skipped=%s",
job.index_job_id, job.index_job_id,
rag_session_id, rag_session_id,
len(files), total_files,
skipped_files,
)
asyncio.create_task(
self._process_snapshot(
job.index_job_id,
rag_session_id,
filtered_files,
total_files=total_files,
skipped_files=skipped_files,
)
) )
asyncio.create_task(self._process_snapshot(job.index_job_id, rag_session_id, files))
return job return job
async def enqueue_changes(self, rag_session_id: str, changed_files: list[dict]) -> IndexJob: async def enqueue_changes(self, rag_session_id: str, changed_files: list[dict]) -> IndexJob:
job = self._store.create(rag_session_id) filtered_changes = filter_changes_for_indexing(changed_files)
total_files = len(changed_files)
indexable_total = count_indexable_change_upserts(filtered_changes)
skipped_files = max(0, total_files - indexable_total)
job = self._store.create(
rag_session_id,
total_files=total_files,
skipped_files=skipped_files,
)
LOGGER.warning( LOGGER.warning(
"rag index changes queued: job_id=%s rag_session_id=%s changes=%s", "rag index changes queued: job_id=%s rag_session_id=%s total=%s skipped=%s",
job.index_job_id, job.index_job_id,
rag_session_id, rag_session_id,
len(changed_files), total_files,
skipped_files,
)
asyncio.create_task(
self._process_changes(
job.index_job_id,
rag_session_id,
filtered_changes,
total_files=total_files,
skipped_files=skipped_files,
)
) )
asyncio.create_task(self._process_changes(job.index_job_id, rag_session_id, changed_files))
return job return job
async def _process_snapshot(self, job_id: str, rag_session_id: str, files: list[dict]) -> None: async def _process_snapshot(
filtered_files = filter_snapshot_files(files) self,
job_id: str,
rag_session_id: str,
filtered_files: list[dict],
*,
total_files: int,
skipped_files: int,
) -> None:
await self._run_with_project_lock( await self._run_with_project_lock(
job_id=job_id, job_id=job_id,
rag_session_id=rag_session_id, rag_session_id=rag_session_id,
total_files=len(filtered_files), total_files=total_files,
skipped_files=skipped_files,
progress_total=len(filtered_files),
operation=lambda progress_cb: self._rag.index_snapshot( operation=lambda progress_cb: self._rag.index_snapshot(
rag_session_id=rag_session_id, rag_session_id=rag_session_id,
files=filtered_files, files=filtered_files,
@@ -69,12 +111,21 @@ class IndexingOrchestrator:
), ),
) )
async def _process_changes(self, job_id: str, rag_session_id: str, changed_files: list[dict]) -> None: async def _process_changes(
filtered_changes = filter_changes_for_indexing(changed_files) self,
job_id: str,
rag_session_id: str,
filtered_changes: list[dict],
*,
total_files: int,
skipped_files: int,
) -> None:
await self._run_with_project_lock( await self._run_with_project_lock(
job_id=job_id, job_id=job_id,
rag_session_id=rag_session_id, rag_session_id=rag_session_id,
total_files=count_indexable_change_upserts(filtered_changes), total_files=total_files,
skipped_files=skipped_files,
progress_total=count_indexable_change_upserts(filtered_changes),
operation=lambda progress_cb: self._rag.index_changes( operation=lambda progress_cb: self._rag.index_changes(
rag_session_id=rag_session_id, rag_session_id=rag_session_id,
changed_files=filtered_changes, changed_files=filtered_changes,
@@ -82,7 +133,15 @@ class IndexingOrchestrator:
), ),
) )
async def _run_with_project_lock(self, job_id: str, rag_session_id: str, total_files: int, operation) -> None: async def _run_with_project_lock(
self,
job_id: str,
rag_session_id: str,
total_files: int,
skipped_files: int,
progress_total: int,
operation,
) -> None:
lock = self._locks[rag_session_id] lock = self._locks[rag_session_id]
async with lock: async with lock:
job = self._store.get(job_id) job = self._store.get(job_id)
@@ -90,17 +149,26 @@ class IndexingOrchestrator:
LOGGER.warning("rag index job missing in store before start: job_id=%s", job_id) LOGGER.warning("rag index job missing in store before start: job_id=%s", job_id)
return return
job.status = IndexJobStatus.RUNNING job.status = IndexJobStatus.RUNNING
job.total_files = total_files
job.skipped_files = skipped_files
self._store.save(job) self._store.save(job)
LOGGER.warning( LOGGER.warning(
"rag index job running: job_id=%s rag_session_id=%s total_files=%s", "rag index job running: job_id=%s rag_session_id=%s total_files=%s skipped_files=%s",
job_id, job_id,
rag_session_id, rag_session_id,
total_files, total_files,
skipped_files,
) )
await self._events.publish( await self._events.publish(
job_id, job_id,
"index_status", "index_status",
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files}, {
"index_job_id": job_id,
"status": job.status.value,
"total_files": total_files,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
},
) )
try: try:
async def progress_cb(current_file_index: int, total: int, current_file_name: str) -> None: async def progress_cb(current_file_index: int, total: int, current_file_name: str) -> None:
@@ -110,8 +178,11 @@ class IndexingOrchestrator:
{ {
"index_job_id": job_id, "index_job_id": job_id,
"current_file_index": current_file_index, "current_file_index": current_file_index,
"total_files": total, "total_files": total_files,
"indexable_files": total,
"processed_files": current_file_index, "processed_files": current_file_index,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
"current_file_path": current_file_name, "current_file_path": current_file_name,
"current_file_name": current_file_name, "current_file_name": current_file_name,
}, },
@@ -123,8 +194,10 @@ class IndexingOrchestrator:
timeout=timeout_sec, timeout=timeout_sec,
) )
job.status = IndexJobStatus.DONE job.status = IndexJobStatus.DONE
job.total_files = total_files
job.indexed_files = indexed job.indexed_files = indexed
job.failed_files = failed job.failed_files = failed
job.skipped_files = skipped_files
job.cache_hit_files = cache_hits job.cache_hit_files = cache_hits
job.cache_miss_files = cache_misses job.cache_miss_files = cache_misses
self._store.save(job) self._store.save(job)
@@ -142,11 +215,13 @@ class IndexingOrchestrator:
{ {
"index_job_id": job_id, "index_job_id": job_id,
"status": job.status.value, "status": job.status.value,
"total_files": total_files,
"indexed_files": indexed, "indexed_files": indexed,
"failed_files": failed, "failed_files": failed,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
"cache_hit_files": cache_hits, "cache_hit_files": cache_hits,
"cache_miss_files": cache_misses, "cache_miss_files": cache_misses,
"total_files": total_files,
}, },
) )
await self._events.publish( await self._events.publish(
@@ -155,15 +230,19 @@ class IndexingOrchestrator:
{ {
"index_job_id": job_id, "index_job_id": job_id,
"status": "done", "status": "done",
"total_files": total_files,
"indexed_files": indexed, "indexed_files": indexed,
"failed_files": failed, "failed_files": failed,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
"cache_hit_files": cache_hits, "cache_hit_files": cache_hits,
"cache_miss_files": cache_misses, "cache_miss_files": cache_misses,
"total_files": total_files,
}, },
) )
except (TimeoutError, ConnectionError, OSError) as exc: except (TimeoutError, ConnectionError, OSError) as exc:
job.status = IndexJobStatus.ERROR job.status = IndexJobStatus.ERROR
job.total_files = total_files
job.skipped_files = skipped_files
job.failed_files = max(1, job.failed_files) job.failed_files = max(1, job.failed_files)
job.error = ErrorPayload( job.error = ErrorPayload(
code="index_runtime_error", code="index_runtime_error",
@@ -175,7 +254,13 @@ class IndexingOrchestrator:
await self._events.publish( await self._events.publish(
job_id, job_id,
"index_status", "index_status",
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files}, {
"index_job_id": job_id,
"status": job.status.value,
"total_files": total_files,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
},
) )
await self._events.publish( await self._events.publish(
job_id, job_id,
@@ -184,6 +269,8 @@ class IndexingOrchestrator:
"index_job_id": job_id, "index_job_id": job_id,
"status": "error", "status": "error",
"total_files": total_files, "total_files": total_files,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
"error": { "error": {
"code": job.error.code, "code": job.error.code,
"desc": job.error.desc, "desc": job.error.desc,
@@ -193,6 +280,8 @@ class IndexingOrchestrator:
) )
except asyncio.TimeoutError as exc: except asyncio.TimeoutError as exc:
job.status = IndexJobStatus.ERROR job.status = IndexJobStatus.ERROR
job.total_files = total_files
job.skipped_files = skipped_files
job.failed_files = max(1, job.failed_files) job.failed_files = max(1, job.failed_files)
job.error = ErrorPayload( job.error = ErrorPayload(
code="index_timeout", code="index_timeout",
@@ -204,7 +293,13 @@ class IndexingOrchestrator:
await self._events.publish( await self._events.publish(
job_id, job_id,
"index_status", "index_status",
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files}, {
"index_job_id": job_id,
"status": job.status.value,
"total_files": total_files,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
},
) )
await self._events.publish( await self._events.publish(
job_id, job_id,
@@ -213,6 +308,8 @@ class IndexingOrchestrator:
"index_job_id": job_id, "index_job_id": job_id,
"status": "error", "status": "error",
"total_files": total_files, "total_files": total_files,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
"error": { "error": {
"code": job.error.code, "code": job.error.code,
"desc": job.error.desc, "desc": job.error.desc,
@@ -222,6 +319,8 @@ class IndexingOrchestrator:
) )
except Exception as exc: except Exception as exc:
job.status = IndexJobStatus.ERROR job.status = IndexJobStatus.ERROR
job.total_files = total_files
job.skipped_files = skipped_files
job.failed_files = max(1, job.failed_files) job.failed_files = max(1, job.failed_files)
job.error = ErrorPayload( job.error = ErrorPayload(
code="index_unexpected_error", code="index_unexpected_error",
@@ -233,7 +332,13 @@ class IndexingOrchestrator:
await self._events.publish( await self._events.publish(
job_id, job_id,
"index_status", "index_status",
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files}, {
"index_job_id": job_id,
"status": job.status.value,
"total_files": total_files,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
},
) )
await self._events.publish( await self._events.publish(
job_id, job_id,
@@ -242,6 +347,8 @@ class IndexingOrchestrator:
"index_job_id": job_id, "index_job_id": job_id,
"status": "error", "status": "error",
"total_files": total_files, "total_files": total_files,
"skipped_files": skipped_files,
"reuse_percent": job.reuse_percent,
"error": { "error": {
"code": job.error.code, "code": job.error.code,
"desc": job.error.desc, "desc": job.error.desc,
+34 -5
View File
@@ -13,8 +13,10 @@ class RagJobRow:
index_job_id: str index_job_id: str
rag_session_id: str rag_session_id: str
status: str status: str
total_files: int
indexed_files: int indexed_files: int
failed_files: int failed_files: int
skipped_files: int
cache_hit_files: int cache_hit_files: int
cache_miss_files: int cache_miss_files: int
error_code: str | None error_code: str | None
@@ -24,16 +26,36 @@ class RagJobRow:
class RagJobRepository: class RagJobRepository:
def create_job(self, index_job_id: str, rag_session_id: str, status: str) -> None: def create_job(
self,
index_job_id: str,
rag_session_id: str,
status: str,
*,
total_files: int = 0,
skipped_files: int = 0,
) -> None:
with get_engine().connect() as conn: with get_engine().connect() as conn:
conn.execute( conn.execute(
text( text(
""" """
INSERT INTO rag_index_jobs (index_job_id, rag_session_id, status) INSERT INTO rag_index_jobs (
VALUES (:jid, :sid, :status) index_job_id,
rag_session_id,
status,
total_files,
skipped_files
)
VALUES (:jid, :sid, :status, :total_files, :skipped_files)
""" """
), ),
{"jid": index_job_id, "sid": rag_session_id, "status": status}, {
"jid": index_job_id,
"sid": rag_session_id,
"status": status,
"total_files": total_files,
"skipped_files": skipped_files,
},
) )
conn.commit() conn.commit()
@@ -42,8 +64,10 @@ class RagJobRepository:
index_job_id: str, index_job_id: str,
*, *,
status: str, status: str,
total_files: int,
indexed_files: int, indexed_files: int,
failed_files: int, failed_files: int,
skipped_files: int,
cache_hit_files: int = 0, cache_hit_files: int = 0,
cache_miss_files: int = 0, cache_miss_files: int = 0,
error_code: str | None = None, error_code: str | None = None,
@@ -56,8 +80,10 @@ class RagJobRepository:
""" """
UPDATE rag_index_jobs UPDATE rag_index_jobs
SET status = :status, SET status = :status,
total_files = :total_files,
indexed_files = :indexed, indexed_files = :indexed,
failed_files = :failed, failed_files = :failed,
skipped_files = :skipped_files,
cache_hit_files = :cache_hit_files, cache_hit_files = :cache_hit_files,
cache_miss_files = :cache_miss_files, cache_miss_files = :cache_miss_files,
error_code = :ecode, error_code = :ecode,
@@ -70,8 +96,10 @@ class RagJobRepository:
{ {
"jid": index_job_id, "jid": index_job_id,
"status": status, "status": status,
"total_files": total_files,
"indexed": indexed_files, "indexed": indexed_files,
"failed": failed_files, "failed": failed_files,
"skipped_files": skipped_files,
"cache_hit_files": cache_hit_files, "cache_hit_files": cache_hit_files,
"cache_miss_files": cache_miss_files, "cache_miss_files": cache_miss_files,
"ecode": error_code, "ecode": error_code,
@@ -86,7 +114,8 @@ class RagJobRepository:
row = conn.execute( row = conn.execute(
text( text(
""" """
SELECT index_job_id, rag_session_id, status, indexed_files, failed_files, SELECT index_job_id, rag_session_id, status, total_files, indexed_files, failed_files,
skipped_files,
cache_hit_files, cache_miss_files, error_code, error_desc, error_module, updated_at cache_hit_files, cache_miss_files, error_code, error_desc, error_module, updated_at
FROM rag_index_jobs FROM rag_index_jobs
WHERE index_job_id = :jid WHERE index_job_id = :jid
+2 -2
View File
@@ -31,8 +31,8 @@ class RagRepository:
def get_session(self, rag_session_id: str) -> dict | None: def get_session(self, rag_session_id: str) -> dict | None:
return self._sessions.get_session(rag_session_id) return self._sessions.get_session(rag_session_id)
def create_job(self, index_job_id: str, rag_session_id: str, status: str) -> None: def create_job(self, index_job_id: str, rag_session_id: str, status: str, **kwargs) -> None:
self._jobs.create_job(index_job_id, rag_session_id, status) self._jobs.create_job(index_job_id, rag_session_id, status, **kwargs)
def update_job(self, index_job_id: str, **kwargs) -> None: def update_job(self, index_job_id: str, **kwargs) -> None:
self._jobs.update_job(index_job_id, **kwargs) self._jobs.update_job(index_job_id, **kwargs)
@@ -15,6 +15,8 @@ class RagSchemaMigrator:
def _ensure_core_columns(self, conn) -> None: def _ensure_core_columns(self, conn) -> None:
for statement in ( for statement in (
"ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS total_files INTEGER NOT NULL DEFAULT 0",
"ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS skipped_files INTEGER NOT NULL DEFAULT 0",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS repo_id VARCHAR(512) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS repo_id VARCHAR(512) NULL",
@@ -16,8 +16,10 @@ def base_table_statements() -> tuple[str, ...]:
index_job_id VARCHAR(64) PRIMARY KEY, index_job_id VARCHAR(64) PRIMARY KEY,
rag_session_id VARCHAR(64) NOT NULL, rag_session_id VARCHAR(64) NOT NULL,
status VARCHAR(16) NOT NULL, status VARCHAR(16) NOT NULL,
total_files INTEGER NOT NULL DEFAULT 0,
indexed_files INTEGER NOT NULL DEFAULT 0, indexed_files INTEGER NOT NULL DEFAULT 0,
failed_files INTEGER NOT NULL DEFAULT 0, failed_files INTEGER NOT NULL DEFAULT 0,
skipped_files INTEGER NOT NULL DEFAULT 0,
cache_hit_files INTEGER NOT NULL DEFAULT 0, cache_hit_files INTEGER NOT NULL DEFAULT 0,
cache_miss_files INTEGER NOT NULL DEFAULT 0, cache_miss_files INTEGER NOT NULL DEFAULT 0,
error_code VARCHAR(128) NULL, error_code VARCHAR(128) NULL,
+2
View File
@@ -10,6 +10,8 @@ class ClientEventType(str, Enum):
SYSTEM = "system" SYSTEM = "system"
STATUS = "status" STATUS = "status"
USER = "user" USER = "user"
RESULT = "result"
ERROR = "error"
class ClientEvent(BaseModel): class ClientEvent(BaseModel):
+3
View File
@@ -49,8 +49,11 @@ class IndexJobStatus(str, Enum):
class IndexJobResponse(BaseModel): class IndexJobResponse(BaseModel):
index_job_id: str index_job_id: str
status: IndexJobStatus status: IndexJobStatus
total_files: int = 0
indexed_files: int = 0 indexed_files: int = 0
failed_files: int = 0 failed_files: int = 0
skipped_files: int = 0
reuse_percent: int = 0
cache_hit_files: int = 0 cache_hit_files: int = 0
cache_miss_files: int = 0 cache_miss_files: int = 0
error: Optional[ErrorPayload] = None error: Optional[ErrorPayload] = None
+3
View File
@@ -7,8 +7,11 @@ class RagSessionJobResponse(BaseModel):
rag_session_id: str rag_session_id: str
index_job_id: str index_job_id: str
status: IndexJobStatus status: IndexJobStatus
total_files: int = 0
indexed_files: int = 0 indexed_files: int = 0
failed_files: int = 0 failed_files: int = 0
skipped_files: int = 0
reuse_percent: int = 0
cache_hit_files: int = 0 cache_hit_files: int = 0
cache_miss_files: int = 0 cache_miss_files: int = 0
error: dict | None = None error: dict | None = None
@@ -1,7 +1,15 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from pathlib import Path
from types import SimpleNamespace from types import SimpleNamespace
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step2_load_source_content.step import (
LoadSourceContentStep,
)
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step4_load_rules.step import (
LoadRulesStep,
)
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step3_parse_requirements.parser import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step3_parse_requirements.parser import (
FunctionalRequirementsParser, FunctionalRequirementsParser,
) )
@@ -15,9 +23,15 @@ from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.steps.step
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.source_sections import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.source_sections import (
RequirementSourceSections, RequirementSourceSections,
) )
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.rules_catalog import (
RulesCatalog,
)
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.template_parser import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.template_parser import (
TemplateParser, TemplateParser,
) )
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.subprocesses.common.template_registry import (
TemplateRegistry,
)
from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.workflow_runtime.context import ( from app.core.agent.processes.v2.workflows.doc_update_from_feature_v2.workflow_runtime.context import (
DocUpdateFromFeatureV2Context, DocUpdateFromFeatureV2Context,
) )
@@ -178,6 +192,70 @@ def test_task_builder_uses_create_when_path_is_new_and_no_delete_markers() -> No
assert tasks[0].path == "docs/orders/web/ui_page/orders.ui.list.md" assert tasks[0].path == "docs/orders/web/ui_page/orders.ui.list.md"
def test_task_builder_normalizes_data_entity_to_db_table() -> None:
context = DocUpdateFromFeatureV2Context(
runtime=SimpleNamespace(),
route=SimpleNamespace(),
rag_session_id="",
analytics_meta=AnalyticsMeta(
application="orders_pprb",
domain="orders",
subdomain="lifecycle",
),
requirements=[
RequirementUnit(
section_key="6.5",
heading="Создание таблицы orders в БД",
body="Описание таблицы orders.",
metadata={
"id": "orders.db.table.orders",
"doc_type": "data_entity",
"application": "orders_pprb",
"platform": "pprb",
},
)
],
)
tasks = RequirementTaskBuilder(_UnexpectedLlmCall()).build(context)
assert len(tasks) == 1
assert tasks[0].doc_type == "db_table"
assert tasks[0].path == "docs/orders/pprb/db_table/orders.db.table.orders.md"
def test_task_builder_normalizes_domain_entity_to_db_table() -> None:
context = DocUpdateFromFeatureV2Context(
runtime=SimpleNamespace(),
route=SimpleNamespace(),
rag_session_id="",
analytics_meta=AnalyticsMeta(
application="orders_pprb",
domain="orders",
subdomain="lifecycle",
),
requirements=[
RequirementUnit(
section_key="6.5",
heading="Создание таблицы orders в БД",
body="Описание таблицы orders.",
metadata={
"id": "orders.db.table.orders",
"doc_type": "domain_entity",
"application": "orders_pprb",
"platform": "pprb",
},
)
],
)
tasks = RequirementTaskBuilder(_UnexpectedLlmCall()).build(context)
assert len(tasks) == 1
assert tasks[0].doc_type == "db_table"
assert tasks[0].path == "docs/orders/pprb/db_table/orders.db.table.orders.md"
def test_delete_heuristic_does_not_match_phrase_ne_udalos() -> None: def test_delete_heuristic_does_not_match_phrase_ne_udalos() -> None:
heuristic = DeleteIntentHeuristic() heuristic = DeleteIntentHeuristic()
@@ -185,6 +263,28 @@ def test_delete_heuristic_does_not_match_phrase_ne_udalos() -> None:
assert heuristic.is_delete("Нужно удалить существующую страницу документации.") is True assert heuristic.is_delete("Нужно удалить существующую страницу документации.") is True
def test_load_source_content_step_uses_repo_root_when_docs_dir_is_in_parent(tmp_path) -> None:
project_root = tmp_path / "test_doc"
features_dir = project_root / "features"
docs_dir = project_root / "docs"
features_dir.mkdir(parents=True)
docs_dir.mkdir(parents=True)
source_path = features_dir / "order_list.md"
source_path.write_text("# Feature", encoding="utf-8")
context = DocUpdateFromFeatureV2Context(
runtime=SimpleNamespace(),
route=SimpleNamespace(),
rag_session_id="",
source_ref=source_path.as_posix(),
source_kind="markdown_file",
)
result = asyncio.run(LoadSourceContentStep().run(context))
assert result.project_root == project_root.as_posix()
assert result.source_content == "# Feature"
def test_template_parser_extracts_ordered_sections_from_ui_template() -> None: def test_template_parser_extracts_ordered_sections_from_ui_template() -> None:
parser = TemplateParser() parser = TemplateParser()
template = """ template = """
@@ -216,6 +316,43 @@ doc_type: ui_page
assert spec.sections[1].has_children is True assert spec.sections[1].has_children is True
def test_template_registry_loads_db_table_template_by_data_entity_alias() -> None:
registry = TemplateRegistry()
catalog = RulesCatalog(
by_name={
"templates/db_table.template.md": """
---
doc_type: db_table
---
# <title>
## Summary
## Details
""".strip()
}
)
spec = registry.load(catalog, "data_entity")
assert spec.doc_type == "db_table"
def test_load_rules_step_includes_bundled_db_table_template() -> None:
step = LoadRulesStep(rules_root=Path("_process/doc_rules_v3"))
context = DocUpdateFromFeatureV2Context(
runtime=SimpleNamespace(),
route=SimpleNamespace(),
rag_session_id="",
)
result = asyncio.run(step.run(context))
names = {item.name for item in result.rules}
assert "templates/db_table.template.md" in names
def test_requirement_source_sections_match_template_titles() -> None: def test_requirement_source_sections_match_template_titles() -> None:
locator = RequirementSourceSections() locator = RequirementSourceSections()
body = """ body = """
@@ -3,6 +3,7 @@ from __future__ import annotations
import json import json
from app.core.agent.processes.v2 import V2IntentRouter from app.core.agent.processes.v2 import V2IntentRouter
from app.core.agent.processes.v2.intent_router.modules.target_terms import V2TargetTermsExtractor
class FakeLlm: class FakeLlm:
@@ -131,3 +132,26 @@ def test_router_keeps_short_api_like_token_as_strong_hint_without_explicit_path(
assert result.anchors.endpoint_paths == [] assert result.anchors.endpoint_paths == []
assert "health endpoint" in result.anchors.target_doc_hints assert "health endpoint" in result.anchors.target_doc_hints
assert "health" in result.target_terms assert "health" in result.target_terms
def test_router_routes_explicit_feature_doc_build_to_doc_update_without_llm() -> None:
llm = FakeLlm(_llm_response("DOCS", "DOC_EXPLAIN", "FIND_FILES"))
result = V2IntentRouter(llm=llm).route(
"Собери документацию по /Users/alex/Dev_projects_v2/ai driven app process/v2/test_doc/features/order_list.md"
)
assert result.intent == "DOC_UPDATE"
assert result.subintent == "FROM_FEATURE"
assert result.routing_mode == "deterministic"
assert result.llm_router_used is False
assert result.anchors.endpoint_paths == []
assert len(llm.calls) == 0
def test_target_terms_extractor_does_not_treat_absolute_filesystem_path_as_endpoint() -> None:
analysis = V2TargetTermsExtractor().extract(
"Собери документацию по /Users/alex/Dev_projects_v2/ai driven app process/v2/test_doc/features/order_list.md"
)
assert analysis.endpoint_paths == []
+32
View File
@@ -15,7 +15,20 @@ from app.schemas.orchestration import RequestExecutionStatus
class FakePublisher: class FakePublisher:
def __init__(self) -> None:
self.status_events: list[dict[str, object]] = []
async def publish_status(self, *_args, **_kwargs) -> None: async def publish_status(self, *_args, **_kwargs) -> None:
request_id, source, text = _args[:3]
payload = _args[3] if len(_args) > 3 else _kwargs.get("payload")
self.status_events.append(
{
"request_id": request_id,
"source": source,
"text": text,
"payload": payload or {},
}
)
return None return None
async def publish_user(self, *_args, **_kwargs) -> None: async def publish_user(self, *_args, **_kwargs) -> None:
@@ -242,6 +255,25 @@ def test_v2_process_logs_pipeline_steps() -> None:
assert "answer_generated" in pipeline_titles assert "answer_generated" in pipeline_titles
def test_v2_process_publishes_router_status_message() -> None:
llm = FakeLlm("Краткое объяснение по документации.")
adapter = FakeRagAdapter(summary_rows=_SUMMARY_ROWS, file_rows=[])
process = _v2_process(llm, adapter)
runtime = _context("Что делает endpoint /health?")
asyncio.run(process.run(runtime))
assert runtime.publisher.status_events
router_event = runtime.publisher.status_events[0]
assert router_event["source"] == "process.v2"
assert router_event["text"] == "Запрос принял, переход в объяснение документации."
assert router_event["payload"] == {
"routing_domain": "DOCS",
"intent": "DOC_EXPLAIN",
"subintent": "SUMMARY",
}
def test_v2_process_blocks_generic_docs_answer_without_target_doc() -> None: def test_v2_process_blocks_generic_docs_answer_without_target_doc() -> None:
llm = FakeLlm("галлюцинация") llm = FakeLlm("галлюцинация")
adapter = FakeRagAdapter( adapter = FakeRagAdapter(
@@ -0,0 +1,99 @@
from __future__ import annotations
import asyncio
from dataclasses import dataclass
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.workflow.graph import WorkflowGraph
from app.core.agent.utils.workflow.step import WorkflowStep
from app.core.api.domain.models.agent_request import AgentRequest
from app.core.api.domain.models.agent_session import AgentSession
from app.schemas.orchestration import RequestExecutionStatus
class FakePublisher:
def __init__(self) -> None:
self.status_events: list[dict[str, object]] = []
async def publish_status(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
self.status_events.append(
{"request_id": request_id, "source": source, "text": text, "payload": payload or {}}
)
async def publish_user(self, *_args, **_kwargs) -> None:
return None
class FakeTrace:
def module(self, _name: str) -> "FakeTrace":
return self
def log(self, *_args, **_kwargs) -> None:
return None
@dataclass(slots=True)
class FakeWorkflowContext:
runtime: RuntimeExecutionContext
value: int = 0
class HookedStep(WorkflowStep[FakeWorkflowContext]):
step_id = "hooked"
title = "Hooked"
async def run(self, context: FakeWorkflowContext) -> FakeWorkflowContext:
return FakeWorkflowContext(runtime=context.runtime, value=context.value + 1)
def get_before_status_message(self) -> str | None:
assert self.context is not None
assert self.input_context is not None
assert self.output_context is None
return f"before:{self.context.value}"
def get_after_status_message(self) -> str | None:
assert self.context is not None
assert self.input_context is not None
assert self.output_context is not None
return f"after:{self.input_context.value}->{self.output_context.value}"
def _runtime() -> RuntimeExecutionContext:
request = AgentRequest(
request_id="req-1",
session_id="sess-1",
message="x",
process_version="v2",
status=RequestExecutionStatus.RUNNING,
created_at=AgentRequest.create("req-x", "sess-x", "x", "v2").created_at,
)
session = AgentSession.create("sess-1", "rag-1")
return RuntimeExecutionContext(
request=request,
session=session,
publisher=FakePublisher(),
trace=FakeTrace(),
)
def test_workflow_graph_publishes_before_and_after_status_messages() -> None:
context = FakeWorkflowContext(runtime=_runtime(), value=2)
graph = WorkflowGraph("wf-1", "workflow.test", [HookedStep()])
result = asyncio.run(graph.run(context))
assert result.value == 3
assert context.runtime.publisher.status_events == [
{
"request_id": "req-1",
"source": "workflow.test",
"text": "before:2",
"payload": {"workflow_id": "wf-1", "step_id": "hooked", "phase": "before"},
},
{
"request_id": "req-1",
"source": "workflow.test",
"text": "after:2->3",
"payload": {"workflow_id": "wf-1", "step_id": "hooked", "phase": "after"},
},
]
@@ -0,0 +1,69 @@
from __future__ import annotations
import asyncio
from types import SimpleNamespace
from app.core.api.application.request_service import RequestService
from app.core.api.application.request_start_gate import RequestStartGate
from app.core.api.application.stream_service import StreamService
from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory
from app.core.api.infrastructure.stores.in_memory_request_store import InMemoryRequestStore
from app.core.api.infrastructure.streaming.sse_event_channel import SseEventChannel
class FakeRuntime:
def __init__(self) -> None:
self.started = asyncio.Event()
self.calls: list[tuple[object, object]] = []
async def run(self, request, session) -> None:
self.calls.append((request, session))
self.started.set()
class FakeSessions:
def get(self, _session_id: str):
return SimpleNamespace(session_id="sess-1")
async def _wait_briefly() -> None:
await asyncio.sleep(0.05)
def test_request_service_waits_for_stream_subscriber_before_runtime_start() -> None:
gate = RequestStartGate(timeout_seconds=1.0)
runtime = FakeRuntime()
service = RequestService(
request_store=InMemoryRequestStore(),
request_ids=RequestIdFactory(),
sessions=FakeSessions(),
runtime=runtime,
start_gate=gate,
)
async def scenario() -> None:
request = await service.create("sess-1", "hello", "v2")
await _wait_briefly()
assert runtime.calls == []
gate.mark_ready(request.request_id)
await asyncio.wait_for(runtime.started.wait(), timeout=1.0)
assert len(runtime.calls) == 1
asyncio.run(scenario())
def test_stream_service_subscribe_marks_request_ready() -> None:
gate = RequestStartGate(timeout_seconds=1.0)
gate.register("req-1")
service = StreamService(
channel=SseEventChannel(),
request_exists=lambda request_id: request_id == "req-1",
start_gate=gate,
)
async def scenario() -> None:
waiter = asyncio.create_task(gate.wait_until_ready("req-1"))
await service.subscribe("req-1")
await asyncio.wait_for(waiter, timeout=1.0)
asyncio.run(scenario())