Работает агент, поправлены пути
This commit is contained in:
+6
-17
@@ -89,28 +89,17 @@ RAG сейчас используется как общее ядро индек
|
|||||||
Хранит карточку документа как точку входа в документ и его краткое описание.
|
Хранит карточку документа как точку входа в документ и его краткое описание.
|
||||||
|
|
||||||
Формирование:
|
Формирование:
|
||||||
Источник данных - frontmatter, fallback title, summary и doc kind, вычисленный классификатором документации.
|
Источник данных - frontmatter `as is`, summary и doc kind, вычисленный классификатором документации.
|
||||||
Данные извлекаются структурированно по атрибутам.
|
В `metadata_json` копируются все `key-value` из frontmatter без нормализации и без fallback для frontmatter-атрибутов.
|
||||||
|
Дополнительно в `metadata_json` добавляются служебные поля `source_path`, `summary_text`, `doc_kind`.
|
||||||
|
Атрибут `document_id` добавляется только при наличии `frontmatter.id` (fallback до пути файла не применяется).
|
||||||
В `content` попадает summary документа, а не склейка всех частей документа в сплошной текст.
|
В `content` попадает summary документа, а не склейка всех частей документа в сплошной текст.
|
||||||
|
|
||||||
Фиксация в БД:
|
Фиксация в БД:
|
||||||
| Атрибут в `metadata_json` | Описание | Источник |
|
| Атрибут в `metadata_json` | Описание | Источник |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `document_id` | идентификатор документа | `frontmatter.id`, иначе путь файла |
|
| `*` frontmatter fields | все поля frontmatter в исходном виде | frontmatter документа |
|
||||||
| `type` | тип документа из frontmatter | `frontmatter.type` |
|
| `document_id` | идентификатор документа, добавляется только если в frontmatter есть `id` | `frontmatter.id` |
|
||||||
| `name` | системное имя документа | `frontmatter.name` |
|
|
||||||
| `title` | человекочитаемый заголовок документа | `frontmatter.title`, иначе fallback title |
|
|
||||||
| `module` | модуль документа | `frontmatter.module` |
|
|
||||||
| `domain` | домен документа | `frontmatter.domain` |
|
|
||||||
| `subdomain` | поддомен документа | `frontmatter.subdomain` |
|
|
||||||
| `layer` | логический слой, указанный в frontmatter документа | `frontmatter.layer` |
|
|
||||||
| `status` | статус документа | `frontmatter.status` |
|
|
||||||
| `updated_at` | дата или отметка последнего обновления | `frontmatter.updated_at` |
|
|
||||||
| `tags` | теги документа | `frontmatter.tags` |
|
|
||||||
| `entities` | сущности, связанные с документом | `frontmatter.entities` |
|
|
||||||
| `parent` | родительский документ | `frontmatter.parent` |
|
|
||||||
| `children` | дочерние документы | `frontmatter.children` |
|
|
||||||
| `links` | ссылки на связанные материалы | `frontmatter.links` |
|
|
||||||
| `source_path` | исходный путь документа | путь файла |
|
| `source_path` | исходный путь документа | путь файла |
|
||||||
| `summary_text` | краткое содержание документа | секция `# Summary` |
|
| `summary_text` | краткое содержание документа | секция `# Summary` |
|
||||||
| `doc_kind` | классификация документа, например `readme`, `spec`, `runbook` | `DocsClassifier.classify(path)` |
|
| `doc_kind` | классификация документа, например `readme`, `spec`, `runbook` | `DocsClassifier.classify(path)` |
|
||||||
|
|||||||
@@ -0,0 +1,50 @@
|
|||||||
|
---
|
||||||
|
id: ui.example_page
|
||||||
|
type: ui_page
|
||||||
|
doc_type: ui_page
|
||||||
|
name: example_page
|
||||||
|
title: Пример UI-страницы
|
||||||
|
module: example_module
|
||||||
|
layer: presentation
|
||||||
|
domain: example_domain
|
||||||
|
sub_domain: example_subdomain
|
||||||
|
related_docs: []
|
||||||
|
status: draft
|
||||||
|
updated_at: 2026-03-20
|
||||||
|
source_of_truth: mixed
|
||||||
|
parent: null
|
||||||
|
children: []
|
||||||
|
tags: []
|
||||||
|
entities: []
|
||||||
|
links: {}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Пример UI-страницы
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Краткое описание страницы и её назначения.
|
||||||
|
|
||||||
|
## Details
|
||||||
|
|
||||||
|
### Назначение страницы
|
||||||
|
|
||||||
|
### Пользовательский сценарий
|
||||||
|
|
||||||
|
### Основные блоки интерфейса
|
||||||
|
|
||||||
|
### Связанные API и сущности
|
||||||
|
|
||||||
|
### Функциональные требования
|
||||||
|
|
||||||
|
### Нефункциональные требования
|
||||||
|
|
||||||
|
### Ограничения и граничные случаи
|
||||||
|
|
||||||
|
### Ошибки и валидации
|
||||||
|
|
||||||
|
### Связанный код
|
||||||
|
|
||||||
|
### Связанные документы
|
||||||
|
|
||||||
|
### История изменений
|
||||||
-25
@@ -8,26 +8,14 @@ class ApiEndpointCollector:
|
|||||||
_ENDPOINT_VALUE_RE = re.compile(
|
_ENDPOINT_VALUE_RE = re.compile(
|
||||||
r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)(?:\s*\|\s*(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS))*)\s+(/[-a-zA-Z0-9_./{}]+)"
|
r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)(?:\s*\|\s*(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS))*)\s+(/[-a-zA-Z0-9_./{}]+)"
|
||||||
)
|
)
|
||||||
_METHOD_PATH_RE = re.compile(r"\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s+(/[-a-zA-Z0-9_./{}]+)")
|
|
||||||
_PATH_RE = re.compile(r"(/[-a-zA-Z0-9_./{}]+)")
|
|
||||||
_DOC_EXTS = (".md", ".yaml", ".yml", ".json")
|
_DOC_EXTS = (".md", ".yaml", ".yml", ".json")
|
||||||
|
|
||||||
def collect(self, rows: list[dict]) -> list[str]:
|
def collect(self, rows: list[dict]) -> list[str]:
|
||||||
endpoints: list[str] = []
|
endpoints: list[str] = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self._append_from_endpoint_metadata(endpoints, row)
|
self._append_from_endpoint_metadata(endpoints, row)
|
||||||
self._append_from_title_fallback(endpoints, row)
|
|
||||||
for raw in self._row_candidates(row):
|
|
||||||
self._append_from_text(endpoints, raw)
|
|
||||||
return sorted(set(endpoints))
|
return sorted(set(endpoints))
|
||||||
|
|
||||||
def _append_from_title_fallback(self, out: list[str], row: dict) -> None:
|
|
||||||
title = str(row.get("title") or "").strip()
|
|
||||||
if not title:
|
|
||||||
return
|
|
||||||
for match in self._PATH_RE.findall(title):
|
|
||||||
self._append_default(out, match)
|
|
||||||
|
|
||||||
def _append_from_endpoint_metadata(self, out: list[str], row: dict) -> None:
|
def _append_from_endpoint_metadata(self, out: list[str], row: dict) -> None:
|
||||||
metadata = dict(row.get("metadata") or {})
|
metadata = dict(row.get("metadata") or {})
|
||||||
endpoint_value = str(metadata.get("endpoint") or "").strip()
|
endpoint_value = str(metadata.get("endpoint") or "").strip()
|
||||||
@@ -36,19 +24,6 @@ class ApiEndpointCollector:
|
|||||||
for methods, path in self._ENDPOINT_VALUE_RE.findall(endpoint_value):
|
for methods, path in self._ENDPOINT_VALUE_RE.findall(endpoint_value):
|
||||||
self._append_methods_with_path(out, methods, path)
|
self._append_methods_with_path(out, methods, path)
|
||||||
|
|
||||||
def _row_candidates(self, row: dict) -> list[str]:
|
|
||||||
metadata = dict(row.get("metadata") or {})
|
|
||||||
values = [
|
|
||||||
metadata.get("name"),
|
|
||||||
metadata.get("summary_text"),
|
|
||||||
row.get("title"),
|
|
||||||
]
|
|
||||||
return [str(value or "") for value in values if str(value or "").strip()]
|
|
||||||
|
|
||||||
def _append_from_text(self, out: list[str], text: str) -> None:
|
|
||||||
for method, path in self._METHOD_PATH_RE.findall(text):
|
|
||||||
self._append_with_method(out, method, path)
|
|
||||||
|
|
||||||
def _append_methods_with_path(self, out: list[str], methods_raw: str, path_raw: str) -> None:
|
def _append_methods_with_path(self, out: list[str], methods_raw: str, path_raw: str) -> None:
|
||||||
methods = [
|
methods = [
|
||||||
part.strip().upper()
|
part.strip().upper()
|
||||||
|
|||||||
+66
-1
@@ -56,7 +56,17 @@ class DocRulesChangesetGenerator:
|
|||||||
)
|
)
|
||||||
payload = self._parse_json(raw)
|
payload = self._parse_json(raw)
|
||||||
if payload is None:
|
if payload is None:
|
||||||
return None, f"LLM вернул невалидный JSON changeset для {item.path}."
|
if trace is not None:
|
||||||
|
trace.log("changeset_json_parse_failed", {"path": item.path, "raw_chars": len(str(raw or ""))})
|
||||||
|
repaired_raw = self._llm.generate(
|
||||||
|
"v2_docs_update.repair_doc_changeset_json",
|
||||||
|
self._build_repair_input(raw=raw, item=item),
|
||||||
|
log_context="workflow.v2.docs_update.from_feature.changeset_repair",
|
||||||
|
trace=trace,
|
||||||
|
)
|
||||||
|
payload = self._parse_json(repaired_raw)
|
||||||
|
if payload is None:
|
||||||
|
return None, f"LLM вернул невалидный JSON changeset для {item.path} даже после repair."
|
||||||
payload["op"] = item.op
|
payload["op"] = item.op
|
||||||
payload["path"] = item.path
|
payload["path"] = item.path
|
||||||
payload["reason"] = str(payload.get("reason") or item.reason)[:500]
|
payload["reason"] = str(payload.get("reason") or item.reason)[:500]
|
||||||
@@ -79,8 +89,63 @@ class DocRulesChangesetGenerator:
|
|||||||
try:
|
try:
|
||||||
value = json.loads(text)
|
value = json.loads(text)
|
||||||
return value if isinstance(value, dict) else None
|
return value if isinstance(value, dict) else None
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
normalized = self._escape_control_chars_in_json_strings(text)
|
||||||
|
if normalized != text:
|
||||||
|
try:
|
||||||
|
value = json.loads(normalized)
|
||||||
|
return value if isinstance(value, dict) else None
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return None
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _build_repair_input(self, *, raw: str, item: PlannedChange) -> str:
|
||||||
|
payload = {
|
||||||
|
"expected_contract": {
|
||||||
|
"op": item.op,
|
||||||
|
"path": item.path,
|
||||||
|
"required_keys": ["op", "path", "reason"],
|
||||||
|
"proposed_content_required_for": ["create", "update"],
|
||||||
|
},
|
||||||
|
"raw_llm_output": str(raw or ""),
|
||||||
|
}
|
||||||
|
return json.dumps(payload, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
def _escape_control_chars_in_json_strings(self, text: str) -> str:
|
||||||
|
escaped: list[str] = []
|
||||||
|
in_string = False
|
||||||
|
backslash = False
|
||||||
|
for char in text:
|
||||||
|
if not in_string:
|
||||||
|
escaped.append(char)
|
||||||
|
if char == '"':
|
||||||
|
in_string = True
|
||||||
|
continue
|
||||||
|
if backslash:
|
||||||
|
escaped.append(char)
|
||||||
|
backslash = False
|
||||||
|
continue
|
||||||
|
if char == "\\":
|
||||||
|
escaped.append(char)
|
||||||
|
backslash = True
|
||||||
|
continue
|
||||||
|
if char == '"':
|
||||||
|
escaped.append(char)
|
||||||
|
in_string = False
|
||||||
|
continue
|
||||||
|
codepoint = ord(char)
|
||||||
|
if codepoint < 0x20:
|
||||||
|
if char == "\n":
|
||||||
|
escaped.append("\\n")
|
||||||
|
elif char == "\r":
|
||||||
|
escaped.append("\\r")
|
||||||
|
elif char == "\t":
|
||||||
|
escaped.append("\\t")
|
||||||
|
else:
|
||||||
|
escaped.append(f"\\u{codepoint:04x}")
|
||||||
|
continue
|
||||||
|
escaped.append(char)
|
||||||
|
return "".join(escaped)
|
||||||
|
|
||||||
def _resolve_base_hash(self, project_root: str, rel_path: str) -> str:
|
def _resolve_base_hash(self, project_root: str, rel_path: str) -> str:
|
||||||
root = Path(project_root or "").expanduser()
|
root = Path(project_root or "").expanduser()
|
||||||
|
|||||||
+45
-65
@@ -1,9 +1,13 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
|
|
||||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.docs_state_loader import DocsState
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.docs_state_loader import DocsState
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.plan_hints import (
|
||||||
|
PlanUnitHint,
|
||||||
|
parse_plan_hints,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.plan_path_policy import PlanPathPolicy
|
||||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
|
||||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
|
||||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.system_rules import (
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.system_rules import (
|
||||||
@@ -23,15 +27,16 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
|||||||
def __init__(self, llm: AgentLlmService, query_repository: RagQueryRepository | None = None) -> None:
|
def __init__(self, llm: AgentLlmService, query_repository: RagQueryRepository | None = None) -> None:
|
||||||
self._llm = llm
|
self._llm = llm
|
||||||
self._query_repository = query_repository or RagQueryRepository()
|
self._query_repository = query_repository or RagQueryRepository()
|
||||||
|
self._path_policy = PlanPathPolicy(DOC_TYPE_TO_FOLDER)
|
||||||
|
|
||||||
async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
|
async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
|
||||||
if context.answer or not context.units:
|
if context.answer or not context.units:
|
||||||
return context
|
return context
|
||||||
self._load_docs_state(context)
|
self._load_docs_state(context)
|
||||||
inferred_types = self._infer_missing_types(context)
|
inferred_hints = self._infer_plan_hints(context)
|
||||||
state = DocsState.from_rows(context.docs_catalog_rows)
|
state = DocsState.from_rows(context.docs_catalog_rows)
|
||||||
for index, unit in enumerate(context.units):
|
for index, unit in enumerate(context.units):
|
||||||
planned = self._build_unit_plan(context, unit, state, inferred_types.get(index, ""))
|
planned = self._build_unit_plan(context, unit, state, inferred_hints.get(index, PlanUnitHint()))
|
||||||
if planned is None:
|
if planned is None:
|
||||||
continue
|
continue
|
||||||
context.planned_changes.append(planned)
|
context.planned_changes.append(planned)
|
||||||
@@ -55,18 +60,26 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
context.issues.append(f"Не удалось загрузить состояние документации из RAG: {exc}")
|
context.issues.append(f"Не удалось загрузить состояние документации из RAG: {exc}")
|
||||||
|
|
||||||
def _infer_missing_types(self, context: DocUpdateFromFeatureContext) -> dict[int, str]:
|
def _infer_plan_hints(self, context: DocUpdateFromFeatureContext) -> dict[int, PlanUnitHint]:
|
||||||
missing: list[tuple[int, str, str]] = []
|
items = []
|
||||||
for idx, unit in enumerate(context.units):
|
for idx, unit in enumerate(context.units):
|
||||||
value = str(unit.metadata.get("type") or "").strip()
|
items.append(
|
||||||
if not value:
|
{
|
||||||
missing.append((idx, unit.heading, unit.body[:400]))
|
"index": idx,
|
||||||
if not missing:
|
"heading": unit.heading,
|
||||||
return {}
|
"snippet": unit.body[:400],
|
||||||
|
"known": {
|
||||||
|
"type": str(unit.metadata.get("type") or "").strip(),
|
||||||
|
"id": str(unit.metadata.get("id") or "").strip(),
|
||||||
|
"application": str(unit.metadata.get("application") or context.analytics_meta.application or "").strip(),
|
||||||
|
"platform": str(unit.metadata.get("platform") or context.analytics_meta.platform or "").strip(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
payload = {
|
payload = {
|
||||||
"system_rules": SYSTEM_RULES_TEXT,
|
"system_rules": SYSTEM_RULES_TEXT,
|
||||||
"allowed_doc_types": list(ALLOWED_DOC_TYPES),
|
"allowed_doc_types": list(ALLOWED_DOC_TYPES),
|
||||||
"items": [{"index": idx, "heading": h, "snippet": snippet} for idx, h, snippet in missing],
|
"items": items,
|
||||||
}
|
}
|
||||||
raw = self._llm.generate(
|
raw = self._llm.generate(
|
||||||
"v2_docs_update.plan_change_units",
|
"v2_docs_update.plan_change_units",
|
||||||
@@ -74,43 +87,38 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
|||||||
log_context="workflow.v2.docs_update.from_feature.plan",
|
log_context="workflow.v2.docs_update.from_feature.plan",
|
||||||
trace=context.runtime.trace.module("workflow.v2.docs_update.from_feature.llm"),
|
trace=context.runtime.trace.module("workflow.v2.docs_update.from_feature.llm"),
|
||||||
)
|
)
|
||||||
return self._parse_type_inference(raw)
|
return parse_plan_hints(raw, ALLOWED_DOC_TYPES)
|
||||||
|
|
||||||
def _parse_type_inference(self, raw: str) -> dict[int, str]:
|
|
||||||
try:
|
|
||||||
data = json.loads(str(raw or "").strip())
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return {}
|
|
||||||
rows = data.get("items") if isinstance(data, dict) else []
|
|
||||||
if not isinstance(rows, list):
|
|
||||||
return {}
|
|
||||||
result: dict[int, str] = {}
|
|
||||||
for row in rows:
|
|
||||||
if not isinstance(row, dict):
|
|
||||||
continue
|
|
||||||
index = row.get("index")
|
|
||||||
doc_type = str(row.get("doc_type") or "").strip()
|
|
||||||
if not isinstance(index, int) or doc_type not in ALLOWED_DOC_TYPES:
|
|
||||||
continue
|
|
||||||
result[index] = doc_type
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _build_unit_plan(
|
def _build_unit_plan(
|
||||||
self,
|
self,
|
||||||
context: DocUpdateFromFeatureContext,
|
context: DocUpdateFromFeatureContext,
|
||||||
unit,
|
unit,
|
||||||
state: DocsState,
|
state: DocsState,
|
||||||
inferred_doc_type: str,
|
hint: PlanUnitHint,
|
||||||
) -> PlannedChange | None:
|
) -> PlannedChange | None:
|
||||||
doc_type = str(unit.metadata.get("type") or inferred_doc_type).strip()
|
doc_type = str(unit.metadata.get("type") or hint.doc_type).strip()
|
||||||
if doc_type not in ALLOWED_DOC_TYPES:
|
if doc_type not in ALLOWED_DOC_TYPES:
|
||||||
context.issues.append(f"Unit '{unit.heading}': неизвестный или отсутствующий type '{doc_type}'.")
|
context.issues.append(f"Unit '{unit.heading}': неизвестный или отсутствующий type '{doc_type}'.")
|
||||||
return None
|
return None
|
||||||
unit_id = str(unit.metadata.get("id") or self._make_doc_id(doc_type, unit.heading)).strip()
|
unit_id = self._path_policy.make_doc_id(
|
||||||
|
doc_type=doc_type,
|
||||||
|
heading=unit.heading,
|
||||||
|
hinted_doc_id=str(unit.metadata.get("id") or hint.doc_id or "").strip(),
|
||||||
|
)
|
||||||
op_hint = str(unit.metadata.get("op") or "create_or_update").strip().lower()
|
op_hint = str(unit.metadata.get("op") or "create_or_update").strip().lower()
|
||||||
target_hint = str(unit.metadata.get("target_path_hint") or "").strip()
|
application = str(unit.metadata.get("application") or context.analytics_meta.application or hint.application).strip()
|
||||||
path = self._resolve_path(doc_type, unit_id, unit.heading, target_hint, state)
|
platform = str(unit.metadata.get("platform") or context.analytics_meta.platform or hint.platform).strip().lower()
|
||||||
op = self._resolve_op(op_hint, unit_id, path, state)
|
page_type = str(unit.metadata.get("page_type") or hint.page_type or self._path_policy.default_page_type(doc_type)).strip()
|
||||||
|
path = self._path_policy.resolve_path(
|
||||||
|
doc_type=doc_type,
|
||||||
|
unit_id=unit_id,
|
||||||
|
application=application,
|
||||||
|
platform=platform,
|
||||||
|
page_type=page_type,
|
||||||
|
inferred_path=hint.path,
|
||||||
|
state=state,
|
||||||
|
)
|
||||||
|
op = self._path_policy.resolve_op(op_hint=op_hint, unit_id=unit_id, path=path, state=state)
|
||||||
source_refs = self._as_list(unit.metadata.get("source_refs")) or ["section: 5. Функциональные требования"]
|
source_refs = self._as_list(unit.metadata.get("source_refs")) or ["section: 5. Функциональные требования"]
|
||||||
related_docs = self._as_list(unit.metadata.get("related_docs"))
|
related_docs = self._as_list(unit.metadata.get("related_docs"))
|
||||||
reason = f"Из unit '{unit.heading}' системной аналитики ({context.analytics_meta.analysis_id or 'analysis'})."
|
reason = f"Из unit '{unit.heading}' системной аналитики ({context.analytics_meta.analysis_id or 'analysis'})."
|
||||||
@@ -126,34 +134,6 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
|||||||
related_docs=related_docs,
|
related_docs=related_docs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _resolve_path(self, doc_type: str, unit_id: str, heading: str, hint: str, state: DocsState) -> str:
|
|
||||||
if unit_id in state.by_doc_id:
|
|
||||||
return state.by_doc_id[unit_id]
|
|
||||||
if hint:
|
|
||||||
return hint
|
|
||||||
folder = DOC_TYPE_TO_FOLDER.get(doc_type, "docs")
|
|
||||||
slug = self._slugify(unit_id or heading)
|
|
||||||
return f"{folder}/{slug}.md"
|
|
||||||
|
|
||||||
def _resolve_op(self, op_hint: str, unit_id: str, path: str, state: DocsState) -> str:
|
|
||||||
if op_hint == "delete":
|
|
||||||
return "delete"
|
|
||||||
if op_hint == "create":
|
|
||||||
return "create"
|
|
||||||
if op_hint == "update":
|
|
||||||
return "update"
|
|
||||||
if path in state.by_path or unit_id in state.by_doc_id:
|
|
||||||
return "update"
|
|
||||||
return "create"
|
|
||||||
|
|
||||||
def _make_doc_id(self, doc_type: str, heading: str) -> str:
|
|
||||||
slug = self._slugify(heading).replace("-", "_")
|
|
||||||
return f"{doc_type}.{slug}".strip(".")
|
|
||||||
|
|
||||||
def _slugify(self, value: str) -> str:
|
|
||||||
cleaned = re.sub(r"[^a-zA-Z0-9а-яА-Я_-]+", "-", value.lower()).strip("-")
|
|
||||||
return re.sub(r"-+", "-", cleaned) or "doc"
|
|
||||||
|
|
||||||
def _as_list(self, value: object) -> list[str]:
|
def _as_list(self, value: object) -> list[str]:
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
return [str(item).strip() for item in value if str(item).strip()]
|
return [str(item).strip() for item in value if str(item).strip()]
|
||||||
|
|||||||
+8
-5
@@ -15,7 +15,7 @@ class ParsedFeatureSpec:
|
|||||||
|
|
||||||
|
|
||||||
class FeatureMarkdownParser:
|
class FeatureMarkdownParser:
|
||||||
_META_KEYS = {"analysis_id", "domains", "subdomains"}
|
_META_KEYS = {"analysis_id", "application", "platform", "domain", "sub_domain", "domains", "subdomains"}
|
||||||
|
|
||||||
def parse(self, content: str) -> ParsedFeatureSpec:
|
def parse(self, content: str) -> ParsedFeatureSpec:
|
||||||
lines = content.splitlines()
|
lines = content.splitlines()
|
||||||
@@ -54,8 +54,10 @@ class FeatureMarkdownParser:
|
|||||||
i = j
|
i = j
|
||||||
return AnalyticsMeta(
|
return AnalyticsMeta(
|
||||||
analysis_id=str(values.get("analysis_id") or "").strip(),
|
analysis_id=str(values.get("analysis_id") or "").strip(),
|
||||||
domains=self._as_list(values.get("domains")),
|
application=str(values.get("application") or "").strip(),
|
||||||
subdomains=self._as_list(values.get("subdomains")),
|
platform=str(values.get("platform") or "").strip(),
|
||||||
|
domains=self._as_list(values.get("domain") or values.get("domains")),
|
||||||
|
subdomains=self._as_list(values.get("sub_domain") or values.get("subdomains")),
|
||||||
)
|
)
|
||||||
|
|
||||||
def _extract_functional_section(self, lines: list[str]) -> list[str]:
|
def _extract_functional_section(self, lines: list[str]) -> list[str]:
|
||||||
@@ -109,9 +111,10 @@ class FeatureMarkdownParser:
|
|||||||
body_start = i + 1
|
body_start = i + 1
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
if ":" not in stripped:
|
line = stripped[2:].strip() if stripped.startswith("- ") else stripped
|
||||||
|
if ":" not in line:
|
||||||
break
|
break
|
||||||
key, value = [part.strip() for part in stripped.split(":", 1)]
|
key, value = [part.strip() for part in line.split(":", 1)]
|
||||||
if not key.isidentifier():
|
if not key.isidentifier():
|
||||||
break
|
break
|
||||||
if value:
|
if value:
|
||||||
|
|||||||
+4
-2
@@ -21,9 +21,9 @@ class ParseFeatureRequirementsStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
|||||||
if not context.analytics_meta.analysis_id:
|
if not context.analytics_meta.analysis_id:
|
||||||
context.issues.append("Отсутствует analysis_id в metadata аналитики.")
|
context.issues.append("Отсутствует analysis_id в metadata аналитики.")
|
||||||
if not context.analytics_meta.domains:
|
if not context.analytics_meta.domains:
|
||||||
context.issues.append("Отсутствует domains в metadata аналитики.")
|
context.issues.append("Отсутствует domain в metadata аналитики.")
|
||||||
if not context.analytics_meta.subdomains:
|
if not context.analytics_meta.subdomains:
|
||||||
context.issues.append("Отсутствует subdomains в metadata аналитики.")
|
context.issues.append("Отсутствует sub_domain в metadata аналитики.")
|
||||||
if not context.units:
|
if not context.units:
|
||||||
context.issues.append(
|
context.issues.append(
|
||||||
"Не найдены units в разделе '## 5. Функциональные требования' с заголовками уровня '###'."
|
"Не найдены units в разделе '## 5. Функциональные требования' с заголовками уровня '###'."
|
||||||
@@ -33,6 +33,8 @@ class ParseFeatureRequirementsStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
|||||||
def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
|
def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
|
||||||
return {
|
return {
|
||||||
"analysis_id": context.analytics_meta.analysis_id,
|
"analysis_id": context.analytics_meta.analysis_id,
|
||||||
|
"application": context.analytics_meta.application,
|
||||||
|
"platform": context.analytics_meta.platform,
|
||||||
"domains": context.analytics_meta.domains,
|
"domains": context.analytics_meta.domains,
|
||||||
"subdomains": context.analytics_meta.subdomains,
|
"subdomains": context.analytics_meta.subdomains,
|
||||||
"units": len(context.units),
|
"units": len(context.units),
|
||||||
|
|||||||
@@ -0,0 +1,64 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class PlanUnitHint:
|
||||||
|
doc_type: str = ""
|
||||||
|
doc_id: str = ""
|
||||||
|
application: str = ""
|
||||||
|
platform: str = ""
|
||||||
|
page_type: str = ""
|
||||||
|
path: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_plan_hints(raw: str, allowed_doc_types: tuple[str, ...]) -> dict[int, PlanUnitHint]:
|
||||||
|
try:
|
||||||
|
data = json.loads(str(raw or "").strip())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {}
|
||||||
|
rows = data.get("items") if isinstance(data, dict) else []
|
||||||
|
if not isinstance(rows, list):
|
||||||
|
return {}
|
||||||
|
result: dict[int, PlanUnitHint] = {}
|
||||||
|
for row in rows:
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
continue
|
||||||
|
index = row.get("index")
|
||||||
|
if not isinstance(index, int):
|
||||||
|
continue
|
||||||
|
doc_type = str(row.get("doc_type") or "").strip()
|
||||||
|
result[index] = PlanUnitHint(
|
||||||
|
doc_type=doc_type if doc_type in allowed_doc_types else "",
|
||||||
|
doc_id=str(row.get("id") or "").strip(),
|
||||||
|
application=str(row.get("application") or "").strip(),
|
||||||
|
platform=str(row.get("platform") or "").strip().lower(),
|
||||||
|
page_type=str(row.get("page_type") or "").strip(),
|
||||||
|
path=str(row.get("path") or "").strip(),
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def page_type_for_doc_type(doc_type: str, doc_type_to_folder: dict[str, str]) -> str:
|
||||||
|
if doc_type == "index_page":
|
||||||
|
return "index"
|
||||||
|
folder = doc_type_to_folder.get(doc_type, "docs")
|
||||||
|
parts = folder.split("/")
|
||||||
|
return parts[-1] if parts else "docs"
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_inferred_path(inferred_path: str, unit_id: str) -> str:
|
||||||
|
path = str(inferred_path or "").strip()
|
||||||
|
if not path or not path.startswith("docs/"):
|
||||||
|
return ""
|
||||||
|
if not path.endswith(f"/{unit_id}.md"):
|
||||||
|
return ""
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_path_segment(value: str) -> str:
|
||||||
|
cleaned = re.sub(r"[^a-zA-Z0-9._-]+", "-", str(value or "").strip().lower()).strip("-")
|
||||||
|
return re.sub(r"-+", "-", cleaned) or "unknown"
|
||||||
+59
@@ -0,0 +1,59 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.docs_state_loader import DocsState
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.plan_hints import (
|
||||||
|
normalize_inferred_path,
|
||||||
|
normalize_path_segment,
|
||||||
|
page_type_for_doc_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
_VALID_PLATFORMS = {"web", "ufs", "pprb"}
|
||||||
|
|
||||||
|
|
||||||
|
class PlanPathPolicy:
|
||||||
|
def __init__(self, doc_type_to_folder: dict[str, str]) -> None:
|
||||||
|
self._doc_type_to_folder = dict(doc_type_to_folder)
|
||||||
|
|
||||||
|
def make_doc_id(self, *, doc_type: str, heading: str, hinted_doc_id: str) -> str:
|
||||||
|
return str(hinted_doc_id or f"{doc_type}.{self._slugify(heading).replace('-', '_')}").strip(".")
|
||||||
|
|
||||||
|
def resolve_path(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
doc_type: str,
|
||||||
|
unit_id: str,
|
||||||
|
application: str,
|
||||||
|
platform: str,
|
||||||
|
page_type: str,
|
||||||
|
inferred_path: str,
|
||||||
|
state: DocsState,
|
||||||
|
) -> str:
|
||||||
|
if unit_id in state.by_doc_id:
|
||||||
|
return state.by_doc_id[unit_id]
|
||||||
|
normalized_inferred = normalize_inferred_path(inferred_path, unit_id)
|
||||||
|
if normalized_inferred:
|
||||||
|
return normalized_inferred
|
||||||
|
page = normalize_path_segment(page_type or page_type_for_doc_type(doc_type, self._doc_type_to_folder))
|
||||||
|
app = normalize_path_segment(application or "unknown_app")
|
||||||
|
plat = platform if platform in _VALID_PLATFORMS else "unknown_platform"
|
||||||
|
return f"docs/{app}/{plat}/{page}/{unit_id}.md"
|
||||||
|
|
||||||
|
def resolve_op(self, *, op_hint: str, unit_id: str, path: str, state: DocsState) -> str:
|
||||||
|
if op_hint == "delete":
|
||||||
|
return "delete"
|
||||||
|
if op_hint == "create":
|
||||||
|
return "create"
|
||||||
|
if op_hint == "update":
|
||||||
|
return "update"
|
||||||
|
if path in state.by_path or unit_id in state.by_doc_id:
|
||||||
|
return "update"
|
||||||
|
return "create"
|
||||||
|
|
||||||
|
def default_page_type(self, doc_type: str) -> str:
|
||||||
|
return page_type_for_doc_type(doc_type, self._doc_type_to_folder)
|
||||||
|
|
||||||
|
def _slugify(self, value: str) -> str:
|
||||||
|
cleaned = re.sub(r"[^a-zA-Z0-9а-яА-Я_-]+", "-", value.lower()).strip("-")
|
||||||
|
return re.sub(r"-+", "-", cleaned) or "doc"
|
||||||
+49
-2
@@ -7,7 +7,16 @@ prompts:
|
|||||||
Верни только JSON:
|
Верни только JSON:
|
||||||
{
|
{
|
||||||
"items": [
|
"items": [
|
||||||
{"index": 0, "doc_type": "api_method", "reason": "..."}
|
{
|
||||||
|
"index": 0,
|
||||||
|
"doc_type": "api_method",
|
||||||
|
"id": "ufs.contacts_dgr.api.create",
|
||||||
|
"application": "coverage",
|
||||||
|
"platform": "ufs",
|
||||||
|
"page_type": "api",
|
||||||
|
"path": "docs/coverage/ufs/api/ufs.contacts_dgr.api.create.md",
|
||||||
|
"reason": "..."
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -15,12 +24,22 @@ prompts:
|
|||||||
- Используй только doc_type из allowed_doc_types.
|
- Используй только doc_type из allowed_doc_types.
|
||||||
- Не пропускай item, даже если не уверен: выбери наиболее близкий тип.
|
- Не пропускай item, даже если не уверен: выбери наиболее близкий тип.
|
||||||
- Ориентируйся на heading и snippet.
|
- Ориентируйся на heading и snippet.
|
||||||
|
- path — это служебное поле плана изменений, не поле frontmatter.
|
||||||
|
- id:
|
||||||
|
- брать из metadata unit, если задан;
|
||||||
|
- если id нет, сгенерировать стабильный id по смыслу unit и по аналогии с существующей документацией.
|
||||||
|
- имя файла всегда формировать строго как <id>.md.
|
||||||
|
- для существующего документа (если это видно из контекста и индекса) путь не менять.
|
||||||
|
- для нового документа путь формировать строго как docs/<application>/<platform>/<page_type>/<id>.md.
|
||||||
|
- platform использовать только из допустимых значений: web, ufs, pprb.
|
||||||
|
- page_type выбирать по doc_type (например ui_page -> ui, api_method -> api, logic_block -> logic).
|
||||||
|
- последний сегмент path обязан совпадать с <id>.md.
|
||||||
- Никакого markdown и текста вне JSON.
|
- Никакого markdown и текста вне JSON.
|
||||||
|
|
||||||
build_doc_changeset: |
|
build_doc_changeset: |
|
||||||
Ты формируешь один item changeset для документации на основе системной аналитики и правил doc_rules.
|
Ты формируешь один item changeset для документации на основе системной аналитики и правил doc_rules.
|
||||||
|
|
||||||
Верни только JSON-объект формата:
|
Верни только один JSON-объект (RFC8259) формата:
|
||||||
{
|
{
|
||||||
"op": "create|update|delete",
|
"op": "create|update|delete",
|
||||||
"path": "docs/...",
|
"path": "docs/...",
|
||||||
@@ -28,9 +47,37 @@ prompts:
|
|||||||
"proposed_content": "полный markdown документа для create/update"
|
"proposed_content": "полный markdown документа для create/update"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Схема и ограничения:
|
||||||
|
- Обязательные поля всегда: op, path, reason.
|
||||||
|
- Для op=create/update поле proposed_content обязательно и содержит полный markdown документа:
|
||||||
|
1) frontmatter между --- и ---,
|
||||||
|
2) затем body согласно doc_rules.
|
||||||
|
- Для op=delete поле proposed_content запрещено.
|
||||||
|
- В JSON используй двойные кавычки, без trailing commas.
|
||||||
|
- Никаких code fences (```), комментариев и текста до/после JSON.
|
||||||
|
|
||||||
Правила:
|
Правила:
|
||||||
- Строго соблюдай структуру и ограничения из doc_rules_context.
|
- Строго соблюдай структуру и ограничения из doc_rules_context.
|
||||||
- Для create/update верни полный итоговый markdown (frontmatter + body).
|
- Для create/update верни полный итоговый markdown (frontmatter + body).
|
||||||
- Для update не используй placeholder-тексты; возвращай пригодный к сохранению документ.
|
- Для update не используй placeholder-тексты; возвращай пригодный к сохранению документ.
|
||||||
- reason обязателен, короткий, по сути изменения.
|
- reason обязателен, короткий, по сути изменения.
|
||||||
- Никакого markdown и текста вне JSON.
|
- Никакого markdown и текста вне JSON.
|
||||||
|
|
||||||
|
repair_doc_changeset_json: |
|
||||||
|
Ты ремонтируешь невалидный ответ модели и должен вернуть строго валидный JSON changeset.
|
||||||
|
|
||||||
|
Вход содержит:
|
||||||
|
- expected_contract: ожидаемые поля и ограничения.
|
||||||
|
- raw_llm_output: исходный (возможно невалидный) ответ.
|
||||||
|
|
||||||
|
Задача:
|
||||||
|
- Извлеки максимально полный смысл из raw_llm_output.
|
||||||
|
- Верни ровно один JSON-объект, соответствующий expected_contract.
|
||||||
|
- Если часть данных отсутствует, используй безопасные значения по умолчанию:
|
||||||
|
- reason: "generated by repair"
|
||||||
|
- proposed_content: только если op=create/update, иначе не добавляй.
|
||||||
|
|
||||||
|
Ограничения вывода:
|
||||||
|
- Только JSON-объект, без markdown/code fences/комментариев.
|
||||||
|
- Двойные кавычки, без trailing commas.
|
||||||
|
- Внутри строк (особенно proposed_content) все переносы строк должны быть экранированы как \\n, не literal newline.
|
||||||
|
|||||||
+2
@@ -64,6 +64,8 @@ class DocUpdateFromFeatureWorkflowGraph(WorkflowGraph[TContext]):
|
|||||||
"project_root": str(getattr(context, "project_root", "") or ""),
|
"project_root": str(getattr(context, "project_root", "") or ""),
|
||||||
"feature_content_len": len(str(getattr(context, "feature_content", "") or "")),
|
"feature_content_len": len(str(getattr(context, "feature_content", "") or "")),
|
||||||
"analysis_id": str(getattr(analytics, "analysis_id", "") or ""),
|
"analysis_id": str(getattr(analytics, "analysis_id", "") or ""),
|
||||||
|
"application": str(getattr(analytics, "application", "") or ""),
|
||||||
|
"platform": str(getattr(analytics, "platform", "") or ""),
|
||||||
"domains": list(getattr(analytics, "domains", []) or []),
|
"domains": list(getattr(analytics, "domains", []) or []),
|
||||||
"subdomains": list(getattr(analytics, "subdomains", []) or []),
|
"subdomains": list(getattr(analytics, "subdomains", []) or []),
|
||||||
"units_count": len(units),
|
"units_count": len(units),
|
||||||
|
|||||||
+2
@@ -6,6 +6,8 @@ from dataclasses import dataclass, field
|
|||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
class AnalyticsMeta:
|
class AnalyticsMeta:
|
||||||
analysis_id: str = ""
|
analysis_id: str = ""
|
||||||
|
application: str = ""
|
||||||
|
platform: str = ""
|
||||||
domains: list[str] = field(default_factory=list)
|
domains: list[str] = field(default_factory=list)
|
||||||
subdomains: list[str] = field(default_factory=list)
|
subdomains: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from app.core.api.application.session_service import SessionService
|
from app.core.api.application.session_service import SessionService
|
||||||
@@ -16,6 +17,8 @@ from app.infra.observability.request_trace_logger import RequestTraceLogger
|
|||||||
from app.schemas.common import ErrorPayload, ModuleName
|
from app.schemas.common import ErrorPayload, ModuleName
|
||||||
from app.schemas.orchestration import RequestExecutionStatus
|
from app.schemas.orchestration import RequestExecutionStatus
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class AgentRuntime:
|
class AgentRuntime:
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -35,6 +38,12 @@ class AgentRuntime:
|
|||||||
self._trace_logger = trace_logger
|
self._trace_logger = trace_logger
|
||||||
|
|
||||||
async def run(self, request: AgentRequest, session: AgentSession) -> None:
|
async def run(self, request: AgentRequest, session: AgentSession) -> None:
|
||||||
|
LOGGER.warning(
|
||||||
|
"runtime run started: request_id=%s process_version=%s active_rag_session_id=%s",
|
||||||
|
request.request_id,
|
||||||
|
request.process_version,
|
||||||
|
session.active_rag_session_id,
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
process = self._resolve_process(request.process_version)
|
process = self._resolve_process(request.process_version)
|
||||||
self._start_request(request, session)
|
self._start_request(request, session)
|
||||||
@@ -51,7 +60,19 @@ class AgentRuntime:
|
|||||||
request.apply_changeset = bool(result.apply_changeset)
|
request.apply_changeset = bool(result.apply_changeset)
|
||||||
await self._publish_result(request)
|
await self._publish_result(request)
|
||||||
self._complete_request(request, session)
|
self._complete_request(request, session)
|
||||||
|
LOGGER.warning(
|
||||||
|
"runtime run completed: request_id=%s status=%s changeset_items=%s apply_changeset=%s",
|
||||||
|
request.request_id,
|
||||||
|
request.status,
|
||||||
|
len(request.changeset),
|
||||||
|
request.apply_changeset,
|
||||||
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
LOGGER.exception(
|
||||||
|
"runtime run failed: request_id=%s process_version=%s",
|
||||||
|
request.request_id,
|
||||||
|
request.process_version,
|
||||||
|
)
|
||||||
await self._fail_request(request, exc)
|
await self._fail_request(request, exc)
|
||||||
|
|
||||||
def _resolve_process(self, version: str):
|
def _resolve_process(self, version: str):
|
||||||
@@ -66,8 +87,8 @@ class AgentRuntime:
|
|||||||
self._trace_logger.start_request(request, session)
|
self._trace_logger.start_request(request, session)
|
||||||
|
|
||||||
async def _announce_start(self, request_id: str, process_version: str) -> None:
|
async def _announce_start(self, request_id: str, process_version: str) -> None:
|
||||||
await self._publisher.publish_status(request_id, "runtime", "Запрос принят и поставлен в обработку.")
|
await self._safe_publish_status(request_id, "runtime", "Запрос принят и поставлен в обработку.")
|
||||||
await self._publisher.publish_status(
|
await self._safe_publish_status(
|
||||||
request_id,
|
request_id,
|
||||||
"runtime",
|
"runtime",
|
||||||
f"Запускаю процесс {process_version}.",
|
f"Запускаю процесс {process_version}.",
|
||||||
@@ -75,8 +96,11 @@ class AgentRuntime:
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def _publish_result(self, request: AgentRequest) -> None:
|
async def _publish_result(self, request: AgentRequest) -> None:
|
||||||
|
try:
|
||||||
await self._publisher.publish_user(request.request_id, "agent", request.answer or "")
|
await self._publisher.publish_user(request.request_id, "agent", request.answer or "")
|
||||||
await self._publisher.publish_status(request.request_id, "runtime", "Обработка запроса завершена.")
|
except Exception:
|
||||||
|
LOGGER.exception("failed to publish user event: request_id=%s", request.request_id)
|
||||||
|
await self._safe_publish_status(request.request_id, "runtime", "Обработка запроса завершена.")
|
||||||
|
|
||||||
def _complete_request(self, request: AgentRequest, session: AgentSession) -> None:
|
def _complete_request(self, request: AgentRequest, session: AgentSession) -> None:
|
||||||
session.append_turn(user_message=request.message, assistant_message=request.answer or "")
|
session.append_turn(user_message=request.message, assistant_message=request.answer or "")
|
||||||
@@ -92,7 +116,7 @@ class AgentRuntime:
|
|||||||
request.error = self._build_error_payload(exc)
|
request.error = self._build_error_payload(exc)
|
||||||
self._request_store.save(request)
|
self._request_store.save(request)
|
||||||
self._trace_logger.fail_request(request)
|
self._trace_logger.fail_request(request)
|
||||||
await self._publisher.publish_status(
|
await self._safe_publish_status(
|
||||||
request.request_id,
|
request.request_id,
|
||||||
"runtime",
|
"runtime",
|
||||||
"Во время обработки запроса произошла ошибка.",
|
"Во время обработки запроса произошла ошибка.",
|
||||||
@@ -107,3 +131,14 @@ class AgentRuntime:
|
|||||||
desc="Agent request failed unexpectedly.",
|
desc="Agent request failed unexpectedly.",
|
||||||
module=ModuleName.AGENT,
|
module=ModuleName.AGENT,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _safe_publish_status(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
||||||
|
try:
|
||||||
|
await self._publisher.publish_status(request_id, source, text, payload)
|
||||||
|
except Exception:
|
||||||
|
LOGGER.exception(
|
||||||
|
"failed to publish status event: request_id=%s source=%s text=%s",
|
||||||
|
request_id,
|
||||||
|
source,
|
||||||
|
text,
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,10 +1,14 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from app.core.api.domain.events.client_event import ClientEventRecord
|
from app.core.api.domain.events.client_event import ClientEventRecord
|
||||||
from app.core.api.infrastructure.streaming.sse_event_channel import SseEventChannel
|
from app.core.api.infrastructure.streaming.sse_event_channel import SseEventChannel
|
||||||
from app.infra.observability.request_trace_logger import RequestTraceLogger
|
from app.infra.observability.request_trace_logger import RequestTraceLogger
|
||||||
from app.schemas.client_events import ClientEventType
|
from app.schemas.client_events import ClientEventType
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class RuntimeEventPublisher:
|
class RuntimeEventPublisher:
|
||||||
def __init__(self, channel: SseEventChannel, trace_logger: RequestTraceLogger) -> None:
|
def __init__(self, channel: SseEventChannel, trace_logger: RequestTraceLogger) -> None:
|
||||||
@@ -12,9 +16,23 @@ class RuntimeEventPublisher:
|
|||||||
self._trace_logger = trace_logger
|
self._trace_logger = trace_logger
|
||||||
|
|
||||||
async def publish_status(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
async def publish_status(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
||||||
|
LOGGER.warning(
|
||||||
|
"publish status: request_id=%s source=%s text=%s payload_keys=%s",
|
||||||
|
request_id,
|
||||||
|
source,
|
||||||
|
text,
|
||||||
|
sorted(list((payload or {}).keys())),
|
||||||
|
)
|
||||||
await self._publish(request_id, ClientEventType.STATUS, source, text, payload)
|
await self._publish(request_id, ClientEventType.STATUS, source, text, payload)
|
||||||
|
|
||||||
async def publish_user(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
async def publish_user(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
||||||
|
LOGGER.warning(
|
||||||
|
"publish user: request_id=%s source=%s text_len=%s payload_keys=%s",
|
||||||
|
request_id,
|
||||||
|
source,
|
||||||
|
len(text or ""),
|
||||||
|
sorted(list((payload or {}).keys())),
|
||||||
|
)
|
||||||
await self._publish(request_id, ClientEventType.USER, source, text, payload)
|
await self._publish(request_id, ClientEventType.USER, source, text, payload)
|
||||||
|
|
||||||
async def _publish(
|
async def _publish(
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import logging
|
||||||
|
|
||||||
from app.core.api.domain.models.agent_request import AgentRequest
|
from app.core.api.domain.models.agent_request import AgentRequest
|
||||||
from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory
|
from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory
|
||||||
@@ -8,6 +9,8 @@ from app.core.api.infrastructure.stores.in_memory_request_store import InMemoryR
|
|||||||
from app.core.api.application.session_service import SessionService
|
from app.core.api.application.session_service import SessionService
|
||||||
from app.core.agent.runtime import AgentRuntime
|
from app.core.agent.runtime import AgentRuntime
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class RequestService:
|
class RequestService:
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -31,8 +34,28 @@ class RequestService:
|
|||||||
process_version=process_version,
|
process_version=process_version,
|
||||||
)
|
)
|
||||||
self._request_store.save(request)
|
self._request_store.save(request)
|
||||||
asyncio.create_task(self._runtime.run(request, session))
|
LOGGER.warning(
|
||||||
|
"plugin request accepted: request_id=%s session_id=%s process_version=%s message=%s",
|
||||||
|
request.request_id,
|
||||||
|
session_id,
|
||||||
|
process_version,
|
||||||
|
(message or "").replace("\n", "\\n")[:500],
|
||||||
|
)
|
||||||
|
task = asyncio.create_task(self._runtime.run(request, session), name=f"agent-runtime:{request.request_id}")
|
||||||
|
task.add_done_callback(self._log_task_result)
|
||||||
return request
|
return request
|
||||||
|
|
||||||
def get(self, request_id: str) -> AgentRequest | None:
|
def get(self, request_id: str) -> AgentRequest | None:
|
||||||
return self._request_store.get(request_id)
|
return self._request_store.get(request_id)
|
||||||
|
|
||||||
|
def _log_task_result(self, task: asyncio.Task) -> None:
|
||||||
|
try:
|
||||||
|
exc = task.exception()
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
LOGGER.warning("agent runtime task cancelled: task=%s", task.get_name())
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
LOGGER.exception("failed to inspect agent runtime task result: task=%s", task.get_name())
|
||||||
|
return
|
||||||
|
if exc is not None:
|
||||||
|
LOGGER.exception("agent runtime task crashed: task=%s", task.get_name(), exc_info=exc)
|
||||||
|
|||||||
@@ -1,10 +1,14 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from app.core.api.infrastructure.streaming.sse_response_builder import build_sse_response
|
from app.core.api.infrastructure.streaming.sse_response_builder import build_sse_response
|
||||||
from app.core.rag.module import RagModule
|
from app.core.rag.module import RagModule
|
||||||
from app.core.shared.messaging import EventBus
|
from app.core.shared.messaging import EventBus
|
||||||
from app.schemas.rag_sessions import RagSessionJobResponse
|
from app.schemas.rag_sessions import RagSessionJobResponse
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class RagPublicController:
|
class RagPublicController:
|
||||||
def __init__(self, rag: RagModule) -> None:
|
def __init__(self, rag: RagModule) -> None:
|
||||||
@@ -12,6 +16,14 @@ class RagPublicController:
|
|||||||
|
|
||||||
def get_job(self, rag_session_id: str, index_job_id: str) -> RagSessionJobResponse:
|
def get_job(self, rag_session_id: str, index_job_id: str) -> RagSessionJobResponse:
|
||||||
job = self._rag.get_session_job(rag_session_id, index_job_id)
|
job = self._rag.get_session_job(rag_session_id, index_job_id)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag job polled: rag_session_id=%s job_id=%s status=%s indexed=%s failed=%s",
|
||||||
|
rag_session_id,
|
||||||
|
index_job_id,
|
||||||
|
job.status.value if hasattr(job.status, "value") else str(job.status),
|
||||||
|
job.indexed_files,
|
||||||
|
job.failed_files,
|
||||||
|
)
|
||||||
return RagSessionJobResponse(
|
return RagSessionJobResponse(
|
||||||
rag_session_id=rag_session_id,
|
rag_session_id=rag_session_id,
|
||||||
index_job_id=job.index_job_id,
|
index_job_id=job.index_job_id,
|
||||||
@@ -25,6 +37,7 @@ class RagPublicController:
|
|||||||
|
|
||||||
async def stream_job_events(self, rag_session_id: str, index_job_id: str):
|
async def stream_job_events(self, rag_session_id: str, index_job_id: str):
|
||||||
channel_id, queue = await self._rag.subscribe_session_job_events(rag_session_id, index_job_id)
|
channel_id, queue = await self._rag.subscribe_session_job_events(rag_session_id, index_job_id)
|
||||||
|
LOGGER.warning("rag job events subscribed: rag_session_id=%s job_id=%s", rag_session_id, index_job_id)
|
||||||
return build_sse_response(
|
return build_sse_response(
|
||||||
queue,
|
queue,
|
||||||
encoder=EventBus.as_sse,
|
encoder=EventBus.as_sse,
|
||||||
|
|||||||
@@ -7,5 +7,11 @@ class GigaChatEmbedder:
|
|||||||
def __init__(self, client: GigaChatClient) -> None:
|
def __init__(self, client: GigaChatClient) -> None:
|
||||||
self._client = client
|
self._client = client
|
||||||
|
|
||||||
def embed(self, texts: list[str]) -> list[list[float]]:
|
def embed(
|
||||||
return self._client.embed(texts)
|
self,
|
||||||
|
texts: list[str],
|
||||||
|
*,
|
||||||
|
timeout_sec: int | None = None,
|
||||||
|
max_retries: int | None = None,
|
||||||
|
) -> list[list[float]]:
|
||||||
|
return self._client.embed(texts, timeout_sec=timeout_sec, max_retries=max_retries)
|
||||||
|
|||||||
@@ -2,46 +2,33 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from app.core.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource
|
from app.core.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource
|
||||||
from app.core.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
|
from app.core.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
|
||||||
|
from app.core.rag.indexing.docs.frontmatter_metadata import merge_frontmatter_metadata
|
||||||
from app.core.rag.indexing.docs.frontmatter_view import DocsFrontmatterView
|
from app.core.rag.indexing.docs.frontmatter_view import DocsFrontmatterView
|
||||||
|
|
||||||
|
|
||||||
class DocsDocumentBuilder:
|
class DocsDocumentBuilder:
|
||||||
def build_document_catalog(self, source: RagSource, frontmatter: dict, summary_text: str, doc_kind: str, *, fallback_title: str) -> RagDocument:
|
def build_document_catalog(self, source: RagSource, frontmatter: dict, summary_text: str, doc_kind: str, *, fallback_title: str) -> RagDocument:
|
||||||
view = DocsFrontmatterView(frontmatter)
|
view = DocsFrontmatterView(frontmatter)
|
||||||
document_id = view.document_id or source.path
|
metadata = merge_frontmatter_metadata({}, frontmatter)
|
||||||
metadata = {
|
if view.document_id:
|
||||||
"document_id": document_id,
|
metadata["document_id"] = view.document_id
|
||||||
"type": view.doc_type,
|
metadata["source_path"] = source.path
|
||||||
"name": view.name,
|
metadata["summary_text"] = summary_text[:4000]
|
||||||
"title": view.title(fallback_title),
|
metadata["doc_kind"] = doc_kind
|
||||||
"module": view.module,
|
row_title = str(frontmatter.get("title") or "").strip() or fallback_title or source.path
|
||||||
"domain": view.domain,
|
|
||||||
"subdomain": view.subdomain,
|
|
||||||
"layer": view.layer,
|
|
||||||
"status": view.status,
|
|
||||||
"updated_at": view.updated_at,
|
|
||||||
"tags": view.tags,
|
|
||||||
"entities": view.entities,
|
|
||||||
"parent": view.parent,
|
|
||||||
"children": view.children,
|
|
||||||
"links": view.links,
|
|
||||||
"source_path": source.path,
|
|
||||||
"summary_text": summary_text[:4000],
|
|
||||||
"doc_kind": doc_kind,
|
|
||||||
"artifact_type": "DOCS",
|
|
||||||
}
|
|
||||||
return RagDocument(
|
return RagDocument(
|
||||||
layer=RagLayer.DOCS_DOCUMENT_CATALOG,
|
layer=RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||||
source=source,
|
source=source,
|
||||||
title=metadata["title"] or document_id,
|
title=row_title,
|
||||||
text=summary_text[:4000] or metadata["title"] or document_id,
|
text=summary_text[:4000] or row_title,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
def build_doc_chunk(self, source: RagSource, chunk: SectionChunk, frontmatter: dict, doc_kind: str) -> RagDocument:
|
def build_doc_chunk(self, source: RagSource, chunk: SectionChunk, frontmatter: dict, doc_kind: str) -> RagDocument:
|
||||||
view = DocsFrontmatterView(frontmatter)
|
view = DocsFrontmatterView(frontmatter)
|
||||||
document_id = view.document_id or source.path
|
document_id = view.document_id or source.path
|
||||||
metadata = {
|
metadata = merge_frontmatter_metadata(
|
||||||
|
{
|
||||||
"document_id": document_id,
|
"document_id": document_id,
|
||||||
"type": view.doc_type,
|
"type": view.doc_type,
|
||||||
"module": view.module,
|
"module": view.module,
|
||||||
@@ -54,7 +41,9 @@ class DocsDocumentBuilder:
|
|||||||
"doc_kind": doc_kind,
|
"doc_kind": doc_kind,
|
||||||
"source_path": source.path,
|
"source_path": source.path,
|
||||||
"artifact_type": "DOCS",
|
"artifact_type": "DOCS",
|
||||||
}
|
},
|
||||||
|
frontmatter,
|
||||||
|
)
|
||||||
return RagDocument(
|
return RagDocument(
|
||||||
layer=RagLayer.DOCS_DOC_CHUNKS,
|
layer=RagLayer.DOCS_DOC_CHUNKS,
|
||||||
source=source,
|
source=source,
|
||||||
@@ -67,7 +56,8 @@ class DocsDocumentBuilder:
|
|||||||
def build_entity_record(self, source: RagSource, frontmatter: dict, entity: str) -> RagDocument:
|
def build_entity_record(self, source: RagSource, frontmatter: dict, entity: str) -> RagDocument:
|
||||||
view = DocsFrontmatterView(frontmatter)
|
view = DocsFrontmatterView(frontmatter)
|
||||||
document_id = view.document_id or source.path
|
document_id = view.document_id or source.path
|
||||||
metadata = {
|
metadata = merge_frontmatter_metadata(
|
||||||
|
{
|
||||||
"entity_name": entity,
|
"entity_name": entity,
|
||||||
"document_id": document_id,
|
"document_id": document_id,
|
||||||
"document_type": view.doc_type,
|
"document_type": view.doc_type,
|
||||||
@@ -77,7 +67,9 @@ class DocsDocumentBuilder:
|
|||||||
"tags": view.tags,
|
"tags": view.tags,
|
||||||
"source_path": source.path,
|
"source_path": source.path,
|
||||||
"artifact_type": "DOCS",
|
"artifact_type": "DOCS",
|
||||||
}
|
},
|
||||||
|
frontmatter,
|
||||||
|
)
|
||||||
return RagDocument(
|
return RagDocument(
|
||||||
layer=RagLayer.DOCS_ENTITY_CATALOG,
|
layer=RagLayer.DOCS_ENTITY_CATALOG,
|
||||||
source=source,
|
source=source,
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def merge_frontmatter_metadata(base: dict[str, object], frontmatter: dict) -> dict[str, object]:
|
||||||
|
merged = dict(base)
|
||||||
|
if not isinstance(frontmatter, dict):
|
||||||
|
return merged
|
||||||
|
for raw_key, value in frontmatter.items():
|
||||||
|
key = str(raw_key or "").strip()
|
||||||
|
if not key or key == "__frontmatter_parse_error__" or key in merged:
|
||||||
|
continue
|
||||||
|
merged[key] = value
|
||||||
|
return merged
|
||||||
@@ -1,12 +1,17 @@
|
|||||||
"""Хранилище задач индексации RAG (in-memory + persistence)."""
|
"""Хранилище задач индексации RAG (in-memory + persistence)."""
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from app.core.rag.persistence.repository import RagRepository
|
from app.core.rag.persistence.repository import RagRepository
|
||||||
from app.schemas.common import ErrorPayload, ModuleName
|
from app.schemas.common import ErrorPayload, ModuleName
|
||||||
from app.schemas.indexing import IndexJobStatus
|
from app.schemas.indexing import IndexJobStatus
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class IndexJob:
|
class IndexJob:
|
||||||
@@ -46,7 +51,7 @@ class IndexJobStore:
|
|||||||
desc=row.error_desc or "",
|
desc=row.error_desc or "",
|
||||||
module=module,
|
module=module,
|
||||||
)
|
)
|
||||||
return IndexJob(
|
job = IndexJob(
|
||||||
index_job_id=row.index_job_id,
|
index_job_id=row.index_job_id,
|
||||||
rag_session_id=row.rag_session_id,
|
rag_session_id=row.rag_session_id,
|
||||||
status=IndexJobStatus(row.status),
|
status=IndexJobStatus(row.status),
|
||||||
@@ -56,6 +61,25 @@ class IndexJobStore:
|
|||||||
cache_miss_files=row.cache_miss_files,
|
cache_miss_files=row.cache_miss_files,
|
||||||
error=payload,
|
error=payload,
|
||||||
)
|
)
|
||||||
|
stale_timeout_sec = max(1, int(os.getenv("RAG_RUNNING_STALE_TIMEOUT_SEC", "8")))
|
||||||
|
if job.status == IndexJobStatus.RUNNING and self._is_stale(row.updated_at, stale_timeout_sec):
|
||||||
|
payload = ErrorPayload(
|
||||||
|
code="index_stalled",
|
||||||
|
desc="Indexing stalled in running state; likely blocked network call during embedding/auth.",
|
||||||
|
module=ModuleName.RAG,
|
||||||
|
)
|
||||||
|
job.status = IndexJobStatus.ERROR
|
||||||
|
job.error = payload
|
||||||
|
self.save(job)
|
||||||
|
LOGGER.error("rag index job marked stale->error: job_id=%s timeout_sec=%s", job.index_job_id, stale_timeout_sec)
|
||||||
|
return job
|
||||||
|
|
||||||
|
def _is_stale(self, updated_at: datetime | None, stale_timeout_sec: int) -> bool:
|
||||||
|
if updated_at is None:
|
||||||
|
return False
|
||||||
|
ts = updated_at if updated_at.tzinfo else updated_at.replace(tzinfo=UTC)
|
||||||
|
age = (datetime.now(UTC) - ts).total_seconds()
|
||||||
|
return age >= stale_timeout_sec
|
||||||
|
|
||||||
def save(self, job: IndexJob) -> None:
|
def save(self, job: IndexJob) -> None:
|
||||||
error_code = job.error.code if job.error else None
|
error_code = job.error.code if job.error else None
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
"""Оркестрация индексации RAG (очередь задач, события)."""
|
"""Оркестрация индексации RAG (очередь задач, события)."""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from app.schemas.common import ErrorPayload, ModuleName
|
from app.schemas.common import ErrorPayload, ModuleName
|
||||||
@@ -15,6 +17,8 @@ from app.core.rag.indexing.job_store import IndexJob, IndexJobStore
|
|||||||
from app.core.shared.messaging import EventBus
|
from app.core.shared.messaging import EventBus
|
||||||
from app.core.shared.resilience import RetryExecutor
|
from app.core.shared.resilience import RetryExecutor
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class IndexingOrchestrator:
|
class IndexingOrchestrator:
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -32,11 +36,23 @@ class IndexingOrchestrator:
|
|||||||
|
|
||||||
async def enqueue_snapshot(self, rag_session_id: str, files: list[dict]) -> IndexJob:
|
async def enqueue_snapshot(self, rag_session_id: str, files: list[dict]) -> IndexJob:
|
||||||
job = self._store.create(rag_session_id)
|
job = self._store.create(rag_session_id)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index snapshot queued: job_id=%s rag_session_id=%s files=%s",
|
||||||
|
job.index_job_id,
|
||||||
|
rag_session_id,
|
||||||
|
len(files),
|
||||||
|
)
|
||||||
asyncio.create_task(self._process_snapshot(job.index_job_id, rag_session_id, files))
|
asyncio.create_task(self._process_snapshot(job.index_job_id, rag_session_id, files))
|
||||||
return job
|
return job
|
||||||
|
|
||||||
async def enqueue_changes(self, rag_session_id: str, changed_files: list[dict]) -> IndexJob:
|
async def enqueue_changes(self, rag_session_id: str, changed_files: list[dict]) -> IndexJob:
|
||||||
job = self._store.create(rag_session_id)
|
job = self._store.create(rag_session_id)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index changes queued: job_id=%s rag_session_id=%s changes=%s",
|
||||||
|
job.index_job_id,
|
||||||
|
rag_session_id,
|
||||||
|
len(changed_files),
|
||||||
|
)
|
||||||
asyncio.create_task(self._process_changes(job.index_job_id, rag_session_id, changed_files))
|
asyncio.create_task(self._process_changes(job.index_job_id, rag_session_id, changed_files))
|
||||||
return job
|
return job
|
||||||
|
|
||||||
@@ -71,9 +87,16 @@ class IndexingOrchestrator:
|
|||||||
async with lock:
|
async with lock:
|
||||||
job = self._store.get(job_id)
|
job = self._store.get(job_id)
|
||||||
if not job:
|
if not job:
|
||||||
|
LOGGER.warning("rag index job missing in store before start: job_id=%s", job_id)
|
||||||
return
|
return
|
||||||
job.status = IndexJobStatus.RUNNING
|
job.status = IndexJobStatus.RUNNING
|
||||||
self._store.save(job)
|
self._store.save(job)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index job running: job_id=%s rag_session_id=%s total_files=%s",
|
||||||
|
job_id,
|
||||||
|
rag_session_id,
|
||||||
|
total_files,
|
||||||
|
)
|
||||||
await self._events.publish(
|
await self._events.publish(
|
||||||
job_id,
|
job_id,
|
||||||
"index_status",
|
"index_status",
|
||||||
@@ -94,13 +117,25 @@ class IndexingOrchestrator:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
indexed, failed, cache_hits, cache_misses = await self._retry.run(lambda: operation(progress_cb))
|
timeout_sec = max(1, int(os.getenv("RAG_INDEX_JOB_TIMEOUT_SEC", "15")))
|
||||||
|
indexed, failed, cache_hits, cache_misses = await asyncio.wait_for(
|
||||||
|
operation(progress_cb),
|
||||||
|
timeout=timeout_sec,
|
||||||
|
)
|
||||||
job.status = IndexJobStatus.DONE
|
job.status = IndexJobStatus.DONE
|
||||||
job.indexed_files = indexed
|
job.indexed_files = indexed
|
||||||
job.failed_files = failed
|
job.failed_files = failed
|
||||||
job.cache_hit_files = cache_hits
|
job.cache_hit_files = cache_hits
|
||||||
job.cache_miss_files = cache_misses
|
job.cache_miss_files = cache_misses
|
||||||
self._store.save(job)
|
self._store.save(job)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index job done: job_id=%s indexed=%s failed=%s cache_hits=%s cache_misses=%s",
|
||||||
|
job_id,
|
||||||
|
indexed,
|
||||||
|
failed,
|
||||||
|
cache_hits,
|
||||||
|
cache_misses,
|
||||||
|
)
|
||||||
await self._events.publish(
|
await self._events.publish(
|
||||||
job_id,
|
job_id,
|
||||||
"index_status",
|
"index_status",
|
||||||
@@ -129,12 +164,72 @@ class IndexingOrchestrator:
|
|||||||
)
|
)
|
||||||
except (TimeoutError, ConnectionError, OSError) as exc:
|
except (TimeoutError, ConnectionError, OSError) as exc:
|
||||||
job.status = IndexJobStatus.ERROR
|
job.status = IndexJobStatus.ERROR
|
||||||
|
job.failed_files = max(1, job.failed_files)
|
||||||
job.error = ErrorPayload(
|
job.error = ErrorPayload(
|
||||||
code="index_retry_exhausted",
|
code="index_runtime_error",
|
||||||
desc=f"Temporary indexing failure after retries: {exc}",
|
desc=f"Indexing failed: {exc}",
|
||||||
module=ModuleName.RAG,
|
module=ModuleName.RAG,
|
||||||
)
|
)
|
||||||
self._store.save(job)
|
self._store.save(job)
|
||||||
|
LOGGER.exception("rag index job runtime-error: job_id=%s", job_id)
|
||||||
|
await self._events.publish(
|
||||||
|
job_id,
|
||||||
|
"index_status",
|
||||||
|
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files},
|
||||||
|
)
|
||||||
|
await self._events.publish(
|
||||||
|
job_id,
|
||||||
|
"terminal",
|
||||||
|
{
|
||||||
|
"index_job_id": job_id,
|
||||||
|
"status": "error",
|
||||||
|
"total_files": total_files,
|
||||||
|
"error": {
|
||||||
|
"code": job.error.code,
|
||||||
|
"desc": job.error.desc,
|
||||||
|
"module": job.error.module.value,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError as exc:
|
||||||
|
job.status = IndexJobStatus.ERROR
|
||||||
|
job.failed_files = max(1, job.failed_files)
|
||||||
|
job.error = ErrorPayload(
|
||||||
|
code="index_timeout",
|
||||||
|
desc=f"Indexing timed out while processing snapshot/changes: {exc}",
|
||||||
|
module=ModuleName.RAG,
|
||||||
|
)
|
||||||
|
self._store.save(job)
|
||||||
|
LOGGER.exception("rag index job timed out: job_id=%s", job_id)
|
||||||
|
await self._events.publish(
|
||||||
|
job_id,
|
||||||
|
"index_status",
|
||||||
|
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files},
|
||||||
|
)
|
||||||
|
await self._events.publish(
|
||||||
|
job_id,
|
||||||
|
"terminal",
|
||||||
|
{
|
||||||
|
"index_job_id": job_id,
|
||||||
|
"status": "error",
|
||||||
|
"total_files": total_files,
|
||||||
|
"error": {
|
||||||
|
"code": job.error.code,
|
||||||
|
"desc": job.error.desc,
|
||||||
|
"module": job.error.module.value,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
job.status = IndexJobStatus.ERROR
|
||||||
|
job.failed_files = max(1, job.failed_files)
|
||||||
|
job.error = ErrorPayload(
|
||||||
|
code="index_unexpected_error",
|
||||||
|
desc=f"Unexpected indexing failure: {exc}",
|
||||||
|
module=ModuleName.RAG,
|
||||||
|
)
|
||||||
|
self._store.save(job)
|
||||||
|
LOGGER.exception("rag index job failed unexpectedly: job_id=%s", job_id)
|
||||||
await self._events.publish(
|
await self._events.publish(
|
||||||
job_id,
|
job_id,
|
||||||
"index_status",
|
"index_status",
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ class RagService:
|
|||||||
self._repo = repository
|
self._repo = repository
|
||||||
self._docs = DocsIndexingPipeline()
|
self._docs = DocsIndexingPipeline()
|
||||||
self._code = CodeIndexingPipeline()
|
self._code = CodeIndexingPipeline()
|
||||||
|
self._cache_enabled = os.getenv("RAG_DOCUMENT_CACHE_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
|
||||||
|
|
||||||
async def index_snapshot(
|
async def index_snapshot(
|
||||||
self,
|
self,
|
||||||
@@ -39,8 +40,16 @@ class RagService:
|
|||||||
files: list[dict],
|
files: list[dict],
|
||||||
progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
|
progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
|
||||||
) -> tuple[int, int, int, int]:
|
) -> tuple[int, int, int, int]:
|
||||||
|
LOGGER.warning("rag index snapshot started: rag_session_id=%s files=%s", rag_session_id, len(files))
|
||||||
report = await self._index_files(rag_session_id, files, progress_cb=progress_cb)
|
report = await self._index_files(rag_session_id, files, progress_cb=progress_cb)
|
||||||
self._repo.replace_documents(rag_session_id, report.documents_list)
|
self._repo.replace_documents(rag_session_id, report.documents_list)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index snapshot persisted: rag_session_id=%s indexed=%s failed=%s docs=%s",
|
||||||
|
rag_session_id,
|
||||||
|
report.indexed_files,
|
||||||
|
report.failed_files,
|
||||||
|
len(report.documents_list),
|
||||||
|
)
|
||||||
return report.as_tuple()
|
return report.as_tuple()
|
||||||
|
|
||||||
async def index_changes(
|
async def index_changes(
|
||||||
@@ -49,6 +58,7 @@ class RagService:
|
|||||||
changed_files: list[dict],
|
changed_files: list[dict],
|
||||||
progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
|
progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
|
||||||
) -> tuple[int, int, int, int]:
|
) -> tuple[int, int, int, int]:
|
||||||
|
LOGGER.warning("rag index changes started: rag_session_id=%s changes=%s", rag_session_id, len(changed_files))
|
||||||
delete_paths: list[str] = []
|
delete_paths: list[str] = []
|
||||||
upserts: list[dict] = []
|
upserts: list[dict] = []
|
||||||
for item in changed_files:
|
for item in changed_files:
|
||||||
@@ -58,6 +68,14 @@ class RagService:
|
|||||||
upserts.append(item)
|
upserts.append(item)
|
||||||
report = await self._index_files(rag_session_id, upserts, progress_cb=progress_cb)
|
report = await self._index_files(rag_session_id, upserts, progress_cb=progress_cb)
|
||||||
self._repo.apply_document_changes(rag_session_id, delete_paths, report.documents_list)
|
self._repo.apply_document_changes(rag_session_id, delete_paths, report.documents_list)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index changes persisted: rag_session_id=%s indexed=%s failed=%s docs=%s delete_paths=%s",
|
||||||
|
rag_session_id,
|
||||||
|
report.indexed_files,
|
||||||
|
report.failed_files,
|
||||||
|
len(report.documents_list),
|
||||||
|
len(delete_paths),
|
||||||
|
)
|
||||||
return report.as_tuple()
|
return report.as_tuple()
|
||||||
|
|
||||||
async def _index_files(
|
async def _index_files(
|
||||||
@@ -80,9 +98,18 @@ class RagService:
|
|||||||
for index, file in enumerate(indexable_files, start=1):
|
for index, file in enumerate(indexable_files, start=1):
|
||||||
path = str(file.get("path", ""))
|
path = str(file.get("path", ""))
|
||||||
try:
|
try:
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index file started: rag_session_id=%s file=%s/%s path=%s",
|
||||||
|
rag_session_id,
|
||||||
|
index,
|
||||||
|
total_files,
|
||||||
|
path,
|
||||||
|
)
|
||||||
blob_sha = self._blob_sha(file)
|
blob_sha = self._blob_sha(file)
|
||||||
cached = await asyncio.to_thread(self._repo.get_cached_documents, repo_id, blob_sha)
|
|
||||||
pipelines = self._resolve_pipeline_names(path)
|
pipelines = self._resolve_pipeline_names(path)
|
||||||
|
cached = []
|
||||||
|
if self._cache_enabled:
|
||||||
|
cached = await asyncio.to_thread(self._repo.get_cached_documents, repo_id, blob_sha)
|
||||||
if cached:
|
if cached:
|
||||||
self._report_missing_or_partial_docs(path, cached)
|
self._report_missing_or_partial_docs(path, cached)
|
||||||
report.documents_list.extend(self._with_file_metadata(cached, file, repo_id, blob_sha))
|
report.documents_list.extend(self._with_file_metadata(cached, file, repo_id, blob_sha))
|
||||||
@@ -95,9 +122,32 @@ class RagService:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
built = self._build_documents(repo_id, path, file)
|
built = self._build_documents(repo_id, path, file)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index file built docs: rag_session_id=%s path=%s docs=%s",
|
||||||
|
rag_session_id,
|
||||||
|
path,
|
||||||
|
len(built),
|
||||||
|
)
|
||||||
self._report_missing_or_partial_docs(path, built)
|
self._report_missing_or_partial_docs(path, built)
|
||||||
embedded = await asyncio.to_thread(self._embed_documents, built, file, repo_id, blob_sha)
|
embed_timeout_sec = max(1, int(os.getenv("RAG_EMBED_FILE_TIMEOUT_SEC", "8")))
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index file embedding started: rag_session_id=%s path=%s timeout_sec=%s",
|
||||||
|
rag_session_id,
|
||||||
|
path,
|
||||||
|
embed_timeout_sec,
|
||||||
|
)
|
||||||
|
embedded = await asyncio.wait_for(
|
||||||
|
asyncio.to_thread(self._embed_documents, built, file, repo_id, blob_sha),
|
||||||
|
timeout=embed_timeout_sec,
|
||||||
|
)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index file embedded docs: rag_session_id=%s path=%s docs=%s",
|
||||||
|
rag_session_id,
|
||||||
|
path,
|
||||||
|
len(embedded),
|
||||||
|
)
|
||||||
report.documents_list.extend(embedded)
|
report.documents_list.extend(embedded)
|
||||||
|
if self._cache_enabled:
|
||||||
await asyncio.to_thread(self._repo.cache_documents, repo_id, path, blob_sha, embedded)
|
await asyncio.to_thread(self._repo.cache_documents, repo_id, path, blob_sha, embedded)
|
||||||
report.cache_miss_files += 1
|
report.cache_miss_files += 1
|
||||||
LOGGER.warning(
|
LOGGER.warning(
|
||||||
@@ -107,6 +157,13 @@ class RagService:
|
|||||||
",".join(pipelines),
|
",".join(pipelines),
|
||||||
)
|
)
|
||||||
report.indexed_files += 1
|
report.indexed_files += 1
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag index file completed: rag_session_id=%s file=%s/%s path=%s",
|
||||||
|
rag_session_id,
|
||||||
|
index,
|
||||||
|
total_files,
|
||||||
|
path,
|
||||||
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
report.failed_files += 1
|
report.failed_files += 1
|
||||||
report.warnings.append(f"{path}: {exc}")
|
report.warnings.append(f"{path}: {exc}")
|
||||||
@@ -116,6 +173,8 @@ class RagService:
|
|||||||
path,
|
path,
|
||||||
exc,
|
exc,
|
||||||
)
|
)
|
||||||
|
# Fail-fast: stop indexing immediately so caller can expose the exact error to plugin.
|
||||||
|
raise RuntimeError(f"RAG indexing failed for '{path}': {exc}") from exc
|
||||||
await self._notify_progress(progress_cb, index, total_files, path)
|
await self._notify_progress(progress_cb, index, total_files, path)
|
||||||
report.documents = len(report.documents_list)
|
report.documents = len(report.documents_list)
|
||||||
return report
|
return report
|
||||||
@@ -156,12 +215,32 @@ class RagService:
|
|||||||
if not docs:
|
if not docs:
|
||||||
return []
|
return []
|
||||||
batch_size = max(1, int(os.getenv("RAG_EMBED_BATCH_SIZE", "16")))
|
batch_size = max(1, int(os.getenv("RAG_EMBED_BATCH_SIZE", "16")))
|
||||||
|
request_timeout_sec = max(1, int(os.getenv("RAG_EMBED_REQUEST_TIMEOUT_SEC", "5")))
|
||||||
|
request_retries = max(1, int(os.getenv("RAG_EMBED_REQUEST_MAX_RETRIES", "1")))
|
||||||
metadata = self._document_metadata(file, repo_id, blob_sha)
|
metadata = self._document_metadata(file, repo_id, blob_sha)
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
doc.metadata.update(metadata)
|
doc.metadata.update(metadata)
|
||||||
for start in range(0, len(docs), batch_size):
|
for start in range(0, len(docs), batch_size):
|
||||||
batch = docs[start : start + batch_size]
|
batch = docs[start : start + batch_size]
|
||||||
vectors = self._embedder.embed([doc.text for doc in batch])
|
LOGGER.warning(
|
||||||
|
"rag embed batch start: path=%s batch_start=%s batch_size=%s timeout_sec=%s retries=%s",
|
||||||
|
file.get("path", ""),
|
||||||
|
start,
|
||||||
|
len(batch),
|
||||||
|
request_timeout_sec,
|
||||||
|
request_retries,
|
||||||
|
)
|
||||||
|
vectors = self._embedder.embed(
|
||||||
|
[doc.text for doc in batch],
|
||||||
|
timeout_sec=request_timeout_sec,
|
||||||
|
max_retries=request_retries,
|
||||||
|
)
|
||||||
|
LOGGER.warning(
|
||||||
|
"rag embed batch done: path=%s batch_start=%s vectors=%s",
|
||||||
|
file.get("path", ""),
|
||||||
|
start,
|
||||||
|
len(vectors),
|
||||||
|
)
|
||||||
for doc, vector in zip(batch, vectors):
|
for doc, vector in zip(batch, vectors):
|
||||||
doc.embedding = vector
|
doc.embedding = vector
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
@@ -19,6 +20,7 @@ class RagJobRow:
|
|||||||
error_code: str | None
|
error_code: str | None
|
||||||
error_desc: str | None
|
error_desc: str | None
|
||||||
error_module: str | None
|
error_module: str | None
|
||||||
|
updated_at: datetime | None
|
||||||
|
|
||||||
|
|
||||||
class RagJobRepository:
|
class RagJobRepository:
|
||||||
@@ -85,7 +87,7 @@ class RagJobRepository:
|
|||||||
text(
|
text(
|
||||||
"""
|
"""
|
||||||
SELECT index_job_id, rag_session_id, status, indexed_files, failed_files,
|
SELECT index_job_id, rag_session_id, status, indexed_files, failed_files,
|
||||||
cache_hit_files, cache_miss_files, error_code, error_desc, error_module
|
cache_hit_files, cache_miss_files, error_code, error_desc, error_module, updated_at
|
||||||
FROM rag_index_jobs
|
FROM rag_index_jobs
|
||||||
WHERE index_job_id = :jid
|
WHERE index_job_id = :jid
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import time
|
import time
|
||||||
|
import logging
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -6,6 +7,9 @@ from app.infra.constants import MAX_RETRIES
|
|||||||
from app.core.shared.gigachat.errors import GigaChatError
|
from app.core.shared.gigachat.errors import GigaChatError
|
||||||
from app.core.shared.gigachat.settings import GigaChatSettings
|
from app.core.shared.gigachat.settings import GigaChatSettings
|
||||||
from app.core.shared.gigachat.token_provider import GigaChatTokenProvider
|
from app.core.shared.gigachat.token_provider import GigaChatTokenProvider
|
||||||
|
from app.core.shared.network.hard_timeout import run_with_hard_timeout
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class GigaChatClient:
|
class GigaChatClient:
|
||||||
@@ -30,13 +34,26 @@ class GigaChatClient:
|
|||||||
message = choices[0].get("message") or {}
|
message = choices[0].get("message") or {}
|
||||||
return str(message.get("content") or "")
|
return str(message.get("content") or "")
|
||||||
|
|
||||||
def embed(self, texts: list[str]) -> list[list[float]]:
|
def embed(
|
||||||
|
self,
|
||||||
|
texts: list[str],
|
||||||
|
*,
|
||||||
|
timeout_sec: int | None = None,
|
||||||
|
max_retries: int | None = None,
|
||||||
|
) -> list[list[float]]:
|
||||||
token = self._tokens.get_access_token()
|
token = self._tokens.get_access_token()
|
||||||
payload = {
|
payload = {
|
||||||
"model": self._settings.embedding_model,
|
"model": self._settings.embedding_model,
|
||||||
"input": texts,
|
"input": texts,
|
||||||
}
|
}
|
||||||
response = self._post_with_retry("/embeddings", payload, token=token, timeout=90, operation_name="embeddings")
|
response = self._post_with_retry(
|
||||||
|
"/embeddings",
|
||||||
|
payload,
|
||||||
|
token=token,
|
||||||
|
timeout=timeout_sec or 90,
|
||||||
|
operation_name="embeddings",
|
||||||
|
max_retries=max_retries,
|
||||||
|
)
|
||||||
data = response.json()
|
data = response.json()
|
||||||
items = data.get("data")
|
items = data.get("data")
|
||||||
if not isinstance(items, list):
|
if not isinstance(items, list):
|
||||||
@@ -51,11 +68,22 @@ class GigaChatClient:
|
|||||||
token: str,
|
token: str,
|
||||||
timeout: int,
|
timeout: int,
|
||||||
operation_name: str,
|
operation_name: str,
|
||||||
|
max_retries: int | None = None,
|
||||||
):
|
):
|
||||||
last_error: Exception | None = None
|
last_error: Exception | None = None
|
||||||
for attempt in range(1, MAX_RETRIES + 1):
|
retries = max(1, int(max_retries or MAX_RETRIES))
|
||||||
|
for attempt in range(1, retries + 1):
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
LOGGER.warning(
|
||||||
|
"gigachat request start: operation=%s path=%s attempt=%s/%s timeout_sec=%s",
|
||||||
|
operation_name,
|
||||||
|
path,
|
||||||
|
attempt,
|
||||||
|
retries,
|
||||||
|
timeout,
|
||||||
|
)
|
||||||
|
response = run_with_hard_timeout(
|
||||||
|
lambda: requests.post(
|
||||||
f"{self._settings.api_url.rstrip('/')}{path}",
|
f"{self._settings.api_url.rstrip('/')}{path}",
|
||||||
json=payload,
|
json=payload,
|
||||||
headers={
|
headers={
|
||||||
@@ -64,8 +92,26 @@ class GigaChatClient:
|
|||||||
},
|
},
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
verify=self._settings.ssl_verify,
|
verify=self._settings.ssl_verify,
|
||||||
|
),
|
||||||
|
timeout_sec=timeout,
|
||||||
|
operation_name=f"gigachat_{operation_name}",
|
||||||
|
)
|
||||||
|
LOGGER.warning(
|
||||||
|
"gigachat request done: operation=%s path=%s attempt=%s/%s status=%s",
|
||||||
|
operation_name,
|
||||||
|
path,
|
||||||
|
attempt,
|
||||||
|
retries,
|
||||||
|
response.status_code,
|
||||||
)
|
)
|
||||||
except requests.RequestException as exc:
|
except requests.RequestException as exc:
|
||||||
|
LOGGER.exception(
|
||||||
|
"gigachat request failed: operation=%s path=%s attempt=%s/%s",
|
||||||
|
operation_name,
|
||||||
|
path,
|
||||||
|
attempt,
|
||||||
|
retries,
|
||||||
|
)
|
||||||
last_error = GigaChatError(f"GigaChat {operation_name} request failed: {exc}")
|
last_error = GigaChatError(f"GigaChat {operation_name} request failed: {exc}")
|
||||||
else:
|
else:
|
||||||
if response.status_code < 400:
|
if response.status_code < 400:
|
||||||
@@ -73,7 +119,7 @@ class GigaChatClient:
|
|||||||
last_error = GigaChatError(f"GigaChat {operation_name} error {response.status_code}: {response.text}")
|
last_error = GigaChatError(f"GigaChat {operation_name} error {response.status_code}: {response.text}")
|
||||||
if not self._is_retryable_status(response.status_code):
|
if not self._is_retryable_status(response.status_code):
|
||||||
raise last_error
|
raise last_error
|
||||||
if attempt == MAX_RETRIES:
|
if attempt == retries:
|
||||||
break
|
break
|
||||||
time.sleep(0.1 * attempt)
|
time.sleep(0.1 * attempt)
|
||||||
if last_error is None:
|
if last_error is None:
|
||||||
|
|||||||
@@ -1,11 +1,16 @@
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from app.core.shared.gigachat.errors import GigaChatError
|
from app.core.shared.gigachat.errors import GigaChatError
|
||||||
from app.core.shared.gigachat.settings import GigaChatSettings
|
from app.core.shared.gigachat.settings import GigaChatSettings
|
||||||
|
from app.core.shared.network.hard_timeout import run_with_hard_timeout
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class GigaChatTokenProvider:
|
class GigaChatTokenProvider:
|
||||||
@@ -30,6 +35,7 @@ class GigaChatTokenProvider:
|
|||||||
def _fetch_token(self) -> tuple[str, float]:
|
def _fetch_token(self) -> tuple[str, float]:
|
||||||
if not self._settings.credentials:
|
if not self._settings.credentials:
|
||||||
raise GigaChatError("GIGACHAT_TOKEN is not set")
|
raise GigaChatError("GIGACHAT_TOKEN is not set")
|
||||||
|
timeout_sec = max(1, int(os.getenv("GIGACHAT_AUTH_TIMEOUT_SEC", "5")))
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/x-www-form-urlencoded",
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
"Accept": "application/json",
|
"Accept": "application/json",
|
||||||
@@ -37,14 +43,21 @@ class GigaChatTokenProvider:
|
|||||||
"RqUID": str(uuid.uuid4()),
|
"RqUID": str(uuid.uuid4()),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
LOGGER.warning("gigachat auth start: url=%s timeout_sec=%s", self._settings.auth_url, timeout_sec)
|
||||||
|
response = run_with_hard_timeout(
|
||||||
|
lambda: requests.post(
|
||||||
self._settings.auth_url,
|
self._settings.auth_url,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
data=f"scope={self._settings.scope}",
|
data=f"scope={self._settings.scope}",
|
||||||
timeout=30,
|
timeout=timeout_sec,
|
||||||
verify=self._settings.ssl_verify,
|
verify=self._settings.ssl_verify,
|
||||||
|
),
|
||||||
|
timeout_sec=timeout_sec,
|
||||||
|
operation_name="gigachat_auth",
|
||||||
)
|
)
|
||||||
|
LOGGER.warning("gigachat auth done: status=%s", response.status_code)
|
||||||
except requests.RequestException as exc:
|
except requests.RequestException as exc:
|
||||||
|
LOGGER.exception("gigachat auth failed")
|
||||||
raise GigaChatError(f"GigaChat auth request failed: {exc}") from exc
|
raise GigaChatError(f"GigaChat auth request failed: {exc}") from exc
|
||||||
|
|
||||||
if response.status_code >= 400:
|
if response.status_code >= 400:
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
def run_with_hard_timeout(operation: Callable[[], T], *, timeout_sec: int, operation_name: str) -> T:
|
||||||
|
result_queue: queue.Queue[tuple[bool, object]] = queue.Queue(maxsize=1)
|
||||||
|
|
||||||
|
def _runner() -> None:
|
||||||
|
try:
|
||||||
|
result_queue.put((True, operation()))
|
||||||
|
except BaseException as exc: # noqa: BLE001
|
||||||
|
result_queue.put((False, exc))
|
||||||
|
|
||||||
|
thread = threading.Thread(target=_runner, name=f"hard-timeout:{operation_name}", daemon=True)
|
||||||
|
thread.start()
|
||||||
|
thread.join(timeout=max(1, int(timeout_sec)))
|
||||||
|
if thread.is_alive():
|
||||||
|
raise TimeoutError(f"{operation_name} exceeded hard timeout ({timeout_sec}s)")
|
||||||
|
if result_queue.empty():
|
||||||
|
raise TimeoutError(f"{operation_name} finished without a result")
|
||||||
|
ok, value = result_queue.get_nowait()
|
||||||
|
if ok:
|
||||||
|
return value # type: ignore[return-value]
|
||||||
|
raise value # type: ignore[misc]
|
||||||
|
|
||||||
@@ -2,6 +2,7 @@ import logging
|
|||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from starlette.requests import Request
|
||||||
|
|
||||||
from app.infra.logging_setup import configure_logging
|
from app.infra.logging_setup import configure_logging
|
||||||
from app.infra.error_handlers import register_error_handlers
|
from app.infra.error_handlers import register_error_handlers
|
||||||
@@ -19,6 +20,7 @@ def create_app() -> FastAPI:
|
|||||||
app = FastAPI(title="Agent Backend MVP", version="0.1.0")
|
app = FastAPI(title="Agent Backend MVP", version="0.1.0")
|
||||||
modules = ModularApplication()
|
modules = ModularApplication()
|
||||||
app.state.modules = modules
|
app.state.modules = modules
|
||||||
|
logger = logging.getLogger("app.http")
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=["*"],
|
allow_origins=["*"],
|
||||||
@@ -30,6 +32,22 @@ def create_app() -> FastAPI:
|
|||||||
app.include_router(modules.api.public_router())
|
app.include_router(modules.api.public_router())
|
||||||
register_error_handlers(app)
|
register_error_handlers(app)
|
||||||
|
|
||||||
|
@app.middleware("http")
|
||||||
|
async def log_http_requests(request: Request, call_next):
|
||||||
|
logger.warning("http request: method=%s path=%s query=%s", request.method, request.url.path, request.url.query)
|
||||||
|
try:
|
||||||
|
response = await call_next(request)
|
||||||
|
logger.warning(
|
||||||
|
"http response: method=%s path=%s status=%s",
|
||||||
|
request.method,
|
||||||
|
request.url.path,
|
||||||
|
response.status_code,
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception:
|
||||||
|
logger.exception("http request failed: method=%s path=%s", request.method, request.url.path)
|
||||||
|
raise
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def startup() -> None:
|
async def startup() -> None:
|
||||||
modules.startup()
|
modules.startup()
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ def test_collector_ignores_file_paths_from_content() -> None:
|
|||||||
assert endpoints == ["GET /health"]
|
assert endpoints == ["GET /health"]
|
||||||
|
|
||||||
|
|
||||||
def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() -> None:
|
def test_collector_ignores_title_when_endpoint_metadata_missing() -> None:
|
||||||
rows = [
|
rows = [
|
||||||
{
|
{
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -65,4 +65,4 @@ def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() ->
|
|||||||
|
|
||||||
endpoints = ApiEndpointCollector().collect(rows)
|
endpoints = ApiEndpointCollector().collect(rows)
|
||||||
|
|
||||||
assert endpoints == ["GET /actions/{action}"]
|
assert endpoints == []
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ sub_domain: invoices
|
|||||||
layer: application
|
layer: application
|
||||||
status: draft
|
status: draft
|
||||||
updated_at: 2026-03-23
|
updated_at: 2026-03-23
|
||||||
|
endpoint: POST /billing/invoices
|
||||||
|
source_of_truth: analytics
|
||||||
tags: [billing, api]
|
tags: [billing, api]
|
||||||
entities: [Invoice]
|
entities: [Invoice]
|
||||||
parent: billing_api
|
parent: billing_api
|
||||||
@@ -125,9 +127,13 @@ Create invoice
|
|||||||
|
|
||||||
catalog_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
catalog_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
||||||
assert catalog_doc.metadata["document_id"] == "api.billing.create_invoice"
|
assert catalog_doc.metadata["document_id"] == "api.billing.create_invoice"
|
||||||
|
assert catalog_doc.metadata["id"] == "api.billing.create_invoice"
|
||||||
assert catalog_doc.metadata["module"] == "billing"
|
assert catalog_doc.metadata["module"] == "billing"
|
||||||
assert catalog_doc.metadata["domain"] == "billing"
|
assert catalog_doc.metadata["domain"] == "billing"
|
||||||
assert catalog_doc.metadata["subdomain"] == "invoices"
|
assert catalog_doc.metadata["sub_domain"] == "invoices"
|
||||||
|
assert "subdomain" not in catalog_doc.metadata
|
||||||
|
assert catalog_doc.metadata["endpoint"] == "POST /billing/invoices"
|
||||||
|
assert catalog_doc.metadata["source_of_truth"] == "analytics"
|
||||||
assert catalog_doc.metadata["summary_text"] == "Creates an invoice in billing."
|
assert catalog_doc.metadata["summary_text"] == "Creates an invoice in billing."
|
||||||
|
|
||||||
fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
|
fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
|
||||||
@@ -335,3 +341,4 @@ Control actions endpoint.
|
|||||||
catalog = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
catalog = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
||||||
assert catalog.metadata["type"] == "api_method"
|
assert catalog.metadata["type"] == "api_method"
|
||||||
assert catalog.metadata["title"] == "HTTP API /actions/{action}"
|
assert catalog.metadata["title"] == "HTTP API /actions/{action}"
|
||||||
|
assert catalog.metadata["endpoint"] == "GET|POST /actions/{action}"
|
||||||
|
|||||||
Reference in New Issue
Block a user