Работает агент, поправлены пути
This commit is contained in:
+6
-17
@@ -89,28 +89,17 @@ RAG сейчас используется как общее ядро индек
|
||||
Хранит карточку документа как точку входа в документ и его краткое описание.
|
||||
|
||||
Формирование:
|
||||
Источник данных - frontmatter, fallback title, summary и doc kind, вычисленный классификатором документации.
|
||||
Данные извлекаются структурированно по атрибутам.
|
||||
Источник данных - frontmatter `as is`, summary и doc kind, вычисленный классификатором документации.
|
||||
В `metadata_json` копируются все `key-value` из frontmatter без нормализации и без fallback для frontmatter-атрибутов.
|
||||
Дополнительно в `metadata_json` добавляются служебные поля `source_path`, `summary_text`, `doc_kind`.
|
||||
Атрибут `document_id` добавляется только при наличии `frontmatter.id` (fallback до пути файла не применяется).
|
||||
В `content` попадает summary документа, а не склейка всех частей документа в сплошной текст.
|
||||
|
||||
Фиксация в БД:
|
||||
| Атрибут в `metadata_json` | Описание | Источник |
|
||||
|---|---|---|
|
||||
| `document_id` | идентификатор документа | `frontmatter.id`, иначе путь файла |
|
||||
| `type` | тип документа из frontmatter | `frontmatter.type` |
|
||||
| `name` | системное имя документа | `frontmatter.name` |
|
||||
| `title` | человекочитаемый заголовок документа | `frontmatter.title`, иначе fallback title |
|
||||
| `module` | модуль документа | `frontmatter.module` |
|
||||
| `domain` | домен документа | `frontmatter.domain` |
|
||||
| `subdomain` | поддомен документа | `frontmatter.subdomain` |
|
||||
| `layer` | логический слой, указанный в frontmatter документа | `frontmatter.layer` |
|
||||
| `status` | статус документа | `frontmatter.status` |
|
||||
| `updated_at` | дата или отметка последнего обновления | `frontmatter.updated_at` |
|
||||
| `tags` | теги документа | `frontmatter.tags` |
|
||||
| `entities` | сущности, связанные с документом | `frontmatter.entities` |
|
||||
| `parent` | родительский документ | `frontmatter.parent` |
|
||||
| `children` | дочерние документы | `frontmatter.children` |
|
||||
| `links` | ссылки на связанные материалы | `frontmatter.links` |
|
||||
| `*` frontmatter fields | все поля frontmatter в исходном виде | frontmatter документа |
|
||||
| `document_id` | идентификатор документа, добавляется только если в frontmatter есть `id` | `frontmatter.id` |
|
||||
| `source_path` | исходный путь документа | путь файла |
|
||||
| `summary_text` | краткое содержание документа | секция `# Summary` |
|
||||
| `doc_kind` | классификация документа, например `readme`, `spec`, `runbook` | `DocsClassifier.classify(path)` |
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
---
|
||||
id: ui.example_page
|
||||
type: ui_page
|
||||
doc_type: ui_page
|
||||
name: example_page
|
||||
title: Пример UI-страницы
|
||||
module: example_module
|
||||
layer: presentation
|
||||
domain: example_domain
|
||||
sub_domain: example_subdomain
|
||||
related_docs: []
|
||||
status: draft
|
||||
updated_at: 2026-03-20
|
||||
source_of_truth: mixed
|
||||
parent: null
|
||||
children: []
|
||||
tags: []
|
||||
entities: []
|
||||
links: {}
|
||||
---
|
||||
|
||||
# Пример UI-страницы
|
||||
|
||||
## Summary
|
||||
|
||||
Краткое описание страницы и её назначения.
|
||||
|
||||
## Details
|
||||
|
||||
### Назначение страницы
|
||||
|
||||
### Пользовательский сценарий
|
||||
|
||||
### Основные блоки интерфейса
|
||||
|
||||
### Связанные API и сущности
|
||||
|
||||
### Функциональные требования
|
||||
|
||||
### Нефункциональные требования
|
||||
|
||||
### Ограничения и граничные случаи
|
||||
|
||||
### Ошибки и валидации
|
||||
|
||||
### Связанный код
|
||||
|
||||
### Связанные документы
|
||||
|
||||
### История изменений
|
||||
-25
@@ -8,26 +8,14 @@ class ApiEndpointCollector:
|
||||
_ENDPOINT_VALUE_RE = re.compile(
|
||||
r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)(?:\s*\|\s*(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS))*)\s+(/[-a-zA-Z0-9_./{}]+)"
|
||||
)
|
||||
_METHOD_PATH_RE = re.compile(r"\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s+(/[-a-zA-Z0-9_./{}]+)")
|
||||
_PATH_RE = re.compile(r"(/[-a-zA-Z0-9_./{}]+)")
|
||||
_DOC_EXTS = (".md", ".yaml", ".yml", ".json")
|
||||
|
||||
def collect(self, rows: list[dict]) -> list[str]:
|
||||
endpoints: list[str] = []
|
||||
for row in rows:
|
||||
self._append_from_endpoint_metadata(endpoints, row)
|
||||
self._append_from_title_fallback(endpoints, row)
|
||||
for raw in self._row_candidates(row):
|
||||
self._append_from_text(endpoints, raw)
|
||||
return sorted(set(endpoints))
|
||||
|
||||
def _append_from_title_fallback(self, out: list[str], row: dict) -> None:
|
||||
title = str(row.get("title") or "").strip()
|
||||
if not title:
|
||||
return
|
||||
for match in self._PATH_RE.findall(title):
|
||||
self._append_default(out, match)
|
||||
|
||||
def _append_from_endpoint_metadata(self, out: list[str], row: dict) -> None:
|
||||
metadata = dict(row.get("metadata") or {})
|
||||
endpoint_value = str(metadata.get("endpoint") or "").strip()
|
||||
@@ -36,19 +24,6 @@ class ApiEndpointCollector:
|
||||
for methods, path in self._ENDPOINT_VALUE_RE.findall(endpoint_value):
|
||||
self._append_methods_with_path(out, methods, path)
|
||||
|
||||
def _row_candidates(self, row: dict) -> list[str]:
|
||||
metadata = dict(row.get("metadata") or {})
|
||||
values = [
|
||||
metadata.get("name"),
|
||||
metadata.get("summary_text"),
|
||||
row.get("title"),
|
||||
]
|
||||
return [str(value or "") for value in values if str(value or "").strip()]
|
||||
|
||||
def _append_from_text(self, out: list[str], text: str) -> None:
|
||||
for method, path in self._METHOD_PATH_RE.findall(text):
|
||||
self._append_with_method(out, method, path)
|
||||
|
||||
def _append_methods_with_path(self, out: list[str], methods_raw: str, path_raw: str) -> None:
|
||||
methods = [
|
||||
part.strip().upper()
|
||||
|
||||
+66
-1
@@ -56,7 +56,17 @@ class DocRulesChangesetGenerator:
|
||||
)
|
||||
payload = self._parse_json(raw)
|
||||
if payload is None:
|
||||
return None, f"LLM вернул невалидный JSON changeset для {item.path}."
|
||||
if trace is not None:
|
||||
trace.log("changeset_json_parse_failed", {"path": item.path, "raw_chars": len(str(raw or ""))})
|
||||
repaired_raw = self._llm.generate(
|
||||
"v2_docs_update.repair_doc_changeset_json",
|
||||
self._build_repair_input(raw=raw, item=item),
|
||||
log_context="workflow.v2.docs_update.from_feature.changeset_repair",
|
||||
trace=trace,
|
||||
)
|
||||
payload = self._parse_json(repaired_raw)
|
||||
if payload is None:
|
||||
return None, f"LLM вернул невалидный JSON changeset для {item.path} даже после repair."
|
||||
payload["op"] = item.op
|
||||
payload["path"] = item.path
|
||||
payload["reason"] = str(payload.get("reason") or item.reason)[:500]
|
||||
@@ -79,8 +89,63 @@ class DocRulesChangesetGenerator:
|
||||
try:
|
||||
value = json.loads(text)
|
||||
return value if isinstance(value, dict) else None
|
||||
except json.JSONDecodeError:
|
||||
normalized = self._escape_control_chars_in_json_strings(text)
|
||||
if normalized != text:
|
||||
try:
|
||||
value = json.loads(normalized)
|
||||
return value if isinstance(value, dict) else None
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return None
|
||||
|
||||
def _build_repair_input(self, *, raw: str, item: PlannedChange) -> str:
|
||||
payload = {
|
||||
"expected_contract": {
|
||||
"op": item.op,
|
||||
"path": item.path,
|
||||
"required_keys": ["op", "path", "reason"],
|
||||
"proposed_content_required_for": ["create", "update"],
|
||||
},
|
||||
"raw_llm_output": str(raw or ""),
|
||||
}
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
|
||||
def _escape_control_chars_in_json_strings(self, text: str) -> str:
|
||||
escaped: list[str] = []
|
||||
in_string = False
|
||||
backslash = False
|
||||
for char in text:
|
||||
if not in_string:
|
||||
escaped.append(char)
|
||||
if char == '"':
|
||||
in_string = True
|
||||
continue
|
||||
if backslash:
|
||||
escaped.append(char)
|
||||
backslash = False
|
||||
continue
|
||||
if char == "\\":
|
||||
escaped.append(char)
|
||||
backslash = True
|
||||
continue
|
||||
if char == '"':
|
||||
escaped.append(char)
|
||||
in_string = False
|
||||
continue
|
||||
codepoint = ord(char)
|
||||
if codepoint < 0x20:
|
||||
if char == "\n":
|
||||
escaped.append("\\n")
|
||||
elif char == "\r":
|
||||
escaped.append("\\r")
|
||||
elif char == "\t":
|
||||
escaped.append("\\t")
|
||||
else:
|
||||
escaped.append(f"\\u{codepoint:04x}")
|
||||
continue
|
||||
escaped.append(char)
|
||||
return "".join(escaped)
|
||||
|
||||
def _resolve_base_hash(self, project_root: str, rel_path: str) -> str:
|
||||
root = Path(project_root or "").expanduser()
|
||||
|
||||
+45
-65
@@ -1,9 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.docs_state_loader import DocsState
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.plan_hints import (
|
||||
PlanUnitHint,
|
||||
parse_plan_hints,
|
||||
)
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.plan_path_policy import PlanPathPolicy
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.system_rules import (
|
||||
@@ -23,15 +27,16 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
||||
def __init__(self, llm: AgentLlmService, query_repository: RagQueryRepository | None = None) -> None:
|
||||
self._llm = llm
|
||||
self._query_repository = query_repository or RagQueryRepository()
|
||||
self._path_policy = PlanPathPolicy(DOC_TYPE_TO_FOLDER)
|
||||
|
||||
async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
|
||||
if context.answer or not context.units:
|
||||
return context
|
||||
self._load_docs_state(context)
|
||||
inferred_types = self._infer_missing_types(context)
|
||||
inferred_hints = self._infer_plan_hints(context)
|
||||
state = DocsState.from_rows(context.docs_catalog_rows)
|
||||
for index, unit in enumerate(context.units):
|
||||
planned = self._build_unit_plan(context, unit, state, inferred_types.get(index, ""))
|
||||
planned = self._build_unit_plan(context, unit, state, inferred_hints.get(index, PlanUnitHint()))
|
||||
if planned is None:
|
||||
continue
|
||||
context.planned_changes.append(planned)
|
||||
@@ -55,18 +60,26 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
||||
except Exception as exc:
|
||||
context.issues.append(f"Не удалось загрузить состояние документации из RAG: {exc}")
|
||||
|
||||
def _infer_missing_types(self, context: DocUpdateFromFeatureContext) -> dict[int, str]:
|
||||
missing: list[tuple[int, str, str]] = []
|
||||
def _infer_plan_hints(self, context: DocUpdateFromFeatureContext) -> dict[int, PlanUnitHint]:
|
||||
items = []
|
||||
for idx, unit in enumerate(context.units):
|
||||
value = str(unit.metadata.get("type") or "").strip()
|
||||
if not value:
|
||||
missing.append((idx, unit.heading, unit.body[:400]))
|
||||
if not missing:
|
||||
return {}
|
||||
items.append(
|
||||
{
|
||||
"index": idx,
|
||||
"heading": unit.heading,
|
||||
"snippet": unit.body[:400],
|
||||
"known": {
|
||||
"type": str(unit.metadata.get("type") or "").strip(),
|
||||
"id": str(unit.metadata.get("id") or "").strip(),
|
||||
"application": str(unit.metadata.get("application") or context.analytics_meta.application or "").strip(),
|
||||
"platform": str(unit.metadata.get("platform") or context.analytics_meta.platform or "").strip(),
|
||||
},
|
||||
}
|
||||
)
|
||||
payload = {
|
||||
"system_rules": SYSTEM_RULES_TEXT,
|
||||
"allowed_doc_types": list(ALLOWED_DOC_TYPES),
|
||||
"items": [{"index": idx, "heading": h, "snippet": snippet} for idx, h, snippet in missing],
|
||||
"items": items,
|
||||
}
|
||||
raw = self._llm.generate(
|
||||
"v2_docs_update.plan_change_units",
|
||||
@@ -74,43 +87,38 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
||||
log_context="workflow.v2.docs_update.from_feature.plan",
|
||||
trace=context.runtime.trace.module("workflow.v2.docs_update.from_feature.llm"),
|
||||
)
|
||||
return self._parse_type_inference(raw)
|
||||
|
||||
def _parse_type_inference(self, raw: str) -> dict[int, str]:
|
||||
try:
|
||||
data = json.loads(str(raw or "").strip())
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
rows = data.get("items") if isinstance(data, dict) else []
|
||||
if not isinstance(rows, list):
|
||||
return {}
|
||||
result: dict[int, str] = {}
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
index = row.get("index")
|
||||
doc_type = str(row.get("doc_type") or "").strip()
|
||||
if not isinstance(index, int) or doc_type not in ALLOWED_DOC_TYPES:
|
||||
continue
|
||||
result[index] = doc_type
|
||||
return result
|
||||
return parse_plan_hints(raw, ALLOWED_DOC_TYPES)
|
||||
|
||||
def _build_unit_plan(
|
||||
self,
|
||||
context: DocUpdateFromFeatureContext,
|
||||
unit,
|
||||
state: DocsState,
|
||||
inferred_doc_type: str,
|
||||
hint: PlanUnitHint,
|
||||
) -> PlannedChange | None:
|
||||
doc_type = str(unit.metadata.get("type") or inferred_doc_type).strip()
|
||||
doc_type = str(unit.metadata.get("type") or hint.doc_type).strip()
|
||||
if doc_type not in ALLOWED_DOC_TYPES:
|
||||
context.issues.append(f"Unit '{unit.heading}': неизвестный или отсутствующий type '{doc_type}'.")
|
||||
return None
|
||||
unit_id = str(unit.metadata.get("id") or self._make_doc_id(doc_type, unit.heading)).strip()
|
||||
unit_id = self._path_policy.make_doc_id(
|
||||
doc_type=doc_type,
|
||||
heading=unit.heading,
|
||||
hinted_doc_id=str(unit.metadata.get("id") or hint.doc_id or "").strip(),
|
||||
)
|
||||
op_hint = str(unit.metadata.get("op") or "create_or_update").strip().lower()
|
||||
target_hint = str(unit.metadata.get("target_path_hint") or "").strip()
|
||||
path = self._resolve_path(doc_type, unit_id, unit.heading, target_hint, state)
|
||||
op = self._resolve_op(op_hint, unit_id, path, state)
|
||||
application = str(unit.metadata.get("application") or context.analytics_meta.application or hint.application).strip()
|
||||
platform = str(unit.metadata.get("platform") or context.analytics_meta.platform or hint.platform).strip().lower()
|
||||
page_type = str(unit.metadata.get("page_type") or hint.page_type or self._path_policy.default_page_type(doc_type)).strip()
|
||||
path = self._path_policy.resolve_path(
|
||||
doc_type=doc_type,
|
||||
unit_id=unit_id,
|
||||
application=application,
|
||||
platform=platform,
|
||||
page_type=page_type,
|
||||
inferred_path=hint.path,
|
||||
state=state,
|
||||
)
|
||||
op = self._path_policy.resolve_op(op_hint=op_hint, unit_id=unit_id, path=path, state=state)
|
||||
source_refs = self._as_list(unit.metadata.get("source_refs")) or ["section: 5. Функциональные требования"]
|
||||
related_docs = self._as_list(unit.metadata.get("related_docs"))
|
||||
reason = f"Из unit '{unit.heading}' системной аналитики ({context.analytics_meta.analysis_id or 'analysis'})."
|
||||
@@ -126,34 +134,6 @@ class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
||||
related_docs=related_docs,
|
||||
)
|
||||
|
||||
def _resolve_path(self, doc_type: str, unit_id: str, heading: str, hint: str, state: DocsState) -> str:
|
||||
if unit_id in state.by_doc_id:
|
||||
return state.by_doc_id[unit_id]
|
||||
if hint:
|
||||
return hint
|
||||
folder = DOC_TYPE_TO_FOLDER.get(doc_type, "docs")
|
||||
slug = self._slugify(unit_id or heading)
|
||||
return f"{folder}/{slug}.md"
|
||||
|
||||
def _resolve_op(self, op_hint: str, unit_id: str, path: str, state: DocsState) -> str:
|
||||
if op_hint == "delete":
|
||||
return "delete"
|
||||
if op_hint == "create":
|
||||
return "create"
|
||||
if op_hint == "update":
|
||||
return "update"
|
||||
if path in state.by_path or unit_id in state.by_doc_id:
|
||||
return "update"
|
||||
return "create"
|
||||
|
||||
def _make_doc_id(self, doc_type: str, heading: str) -> str:
|
||||
slug = self._slugify(heading).replace("-", "_")
|
||||
return f"{doc_type}.{slug}".strip(".")
|
||||
|
||||
def _slugify(self, value: str) -> str:
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9а-яА-Я_-]+", "-", value.lower()).strip("-")
|
||||
return re.sub(r"-+", "-", cleaned) or "doc"
|
||||
|
||||
def _as_list(self, value: object) -> list[str]:
|
||||
if isinstance(value, list):
|
||||
return [str(item).strip() for item in value if str(item).strip()]
|
||||
|
||||
+8
-5
@@ -15,7 +15,7 @@ class ParsedFeatureSpec:
|
||||
|
||||
|
||||
class FeatureMarkdownParser:
|
||||
_META_KEYS = {"analysis_id", "domains", "subdomains"}
|
||||
_META_KEYS = {"analysis_id", "application", "platform", "domain", "sub_domain", "domains", "subdomains"}
|
||||
|
||||
def parse(self, content: str) -> ParsedFeatureSpec:
|
||||
lines = content.splitlines()
|
||||
@@ -54,8 +54,10 @@ class FeatureMarkdownParser:
|
||||
i = j
|
||||
return AnalyticsMeta(
|
||||
analysis_id=str(values.get("analysis_id") or "").strip(),
|
||||
domains=self._as_list(values.get("domains")),
|
||||
subdomains=self._as_list(values.get("subdomains")),
|
||||
application=str(values.get("application") or "").strip(),
|
||||
platform=str(values.get("platform") or "").strip(),
|
||||
domains=self._as_list(values.get("domain") or values.get("domains")),
|
||||
subdomains=self._as_list(values.get("sub_domain") or values.get("subdomains")),
|
||||
)
|
||||
|
||||
def _extract_functional_section(self, lines: list[str]) -> list[str]:
|
||||
@@ -109,9 +111,10 @@ class FeatureMarkdownParser:
|
||||
body_start = i + 1
|
||||
i += 1
|
||||
continue
|
||||
if ":" not in stripped:
|
||||
line = stripped[2:].strip() if stripped.startswith("- ") else stripped
|
||||
if ":" not in line:
|
||||
break
|
||||
key, value = [part.strip() for part in stripped.split(":", 1)]
|
||||
key, value = [part.strip() for part in line.split(":", 1)]
|
||||
if not key.isidentifier():
|
||||
break
|
||||
if value:
|
||||
|
||||
+4
-2
@@ -21,9 +21,9 @@ class ParseFeatureRequirementsStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
||||
if not context.analytics_meta.analysis_id:
|
||||
context.issues.append("Отсутствует analysis_id в metadata аналитики.")
|
||||
if not context.analytics_meta.domains:
|
||||
context.issues.append("Отсутствует domains в metadata аналитики.")
|
||||
context.issues.append("Отсутствует domain в metadata аналитики.")
|
||||
if not context.analytics_meta.subdomains:
|
||||
context.issues.append("Отсутствует subdomains в metadata аналитики.")
|
||||
context.issues.append("Отсутствует sub_domain в metadata аналитики.")
|
||||
if not context.units:
|
||||
context.issues.append(
|
||||
"Не найдены units в разделе '## 5. Функциональные требования' с заголовками уровня '###'."
|
||||
@@ -33,6 +33,8 @@ class ParseFeatureRequirementsStep(WorkflowStep[DocUpdateFromFeatureContext]):
|
||||
def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
|
||||
return {
|
||||
"analysis_id": context.analytics_meta.analysis_id,
|
||||
"application": context.analytics_meta.application,
|
||||
"platform": context.analytics_meta.platform,
|
||||
"domains": context.analytics_meta.domains,
|
||||
"subdomains": context.analytics_meta.subdomains,
|
||||
"units": len(context.units),
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PlanUnitHint:
|
||||
doc_type: str = ""
|
||||
doc_id: str = ""
|
||||
application: str = ""
|
||||
platform: str = ""
|
||||
page_type: str = ""
|
||||
path: str = ""
|
||||
|
||||
|
||||
def parse_plan_hints(raw: str, allowed_doc_types: tuple[str, ...]) -> dict[int, PlanUnitHint]:
|
||||
try:
|
||||
data = json.loads(str(raw or "").strip())
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
rows = data.get("items") if isinstance(data, dict) else []
|
||||
if not isinstance(rows, list):
|
||||
return {}
|
||||
result: dict[int, PlanUnitHint] = {}
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
index = row.get("index")
|
||||
if not isinstance(index, int):
|
||||
continue
|
||||
doc_type = str(row.get("doc_type") or "").strip()
|
||||
result[index] = PlanUnitHint(
|
||||
doc_type=doc_type if doc_type in allowed_doc_types else "",
|
||||
doc_id=str(row.get("id") or "").strip(),
|
||||
application=str(row.get("application") or "").strip(),
|
||||
platform=str(row.get("platform") or "").strip().lower(),
|
||||
page_type=str(row.get("page_type") or "").strip(),
|
||||
path=str(row.get("path") or "").strip(),
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def page_type_for_doc_type(doc_type: str, doc_type_to_folder: dict[str, str]) -> str:
|
||||
if doc_type == "index_page":
|
||||
return "index"
|
||||
folder = doc_type_to_folder.get(doc_type, "docs")
|
||||
parts = folder.split("/")
|
||||
return parts[-1] if parts else "docs"
|
||||
|
||||
|
||||
def normalize_inferred_path(inferred_path: str, unit_id: str) -> str:
|
||||
path = str(inferred_path or "").strip()
|
||||
if not path or not path.startswith("docs/"):
|
||||
return ""
|
||||
if not path.endswith(f"/{unit_id}.md"):
|
||||
return ""
|
||||
return path
|
||||
|
||||
|
||||
def normalize_path_segment(value: str) -> str:
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9._-]+", "-", str(value or "").strip().lower()).strip("-")
|
||||
return re.sub(r"-+", "-", cleaned) or "unknown"
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.docs_state_loader import DocsState
|
||||
from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.plan_hints import (
|
||||
normalize_inferred_path,
|
||||
normalize_path_segment,
|
||||
page_type_for_doc_type,
|
||||
)
|
||||
|
||||
_VALID_PLATFORMS = {"web", "ufs", "pprb"}
|
||||
|
||||
|
||||
class PlanPathPolicy:
|
||||
def __init__(self, doc_type_to_folder: dict[str, str]) -> None:
|
||||
self._doc_type_to_folder = dict(doc_type_to_folder)
|
||||
|
||||
def make_doc_id(self, *, doc_type: str, heading: str, hinted_doc_id: str) -> str:
|
||||
return str(hinted_doc_id or f"{doc_type}.{self._slugify(heading).replace('-', '_')}").strip(".")
|
||||
|
||||
def resolve_path(
|
||||
self,
|
||||
*,
|
||||
doc_type: str,
|
||||
unit_id: str,
|
||||
application: str,
|
||||
platform: str,
|
||||
page_type: str,
|
||||
inferred_path: str,
|
||||
state: DocsState,
|
||||
) -> str:
|
||||
if unit_id in state.by_doc_id:
|
||||
return state.by_doc_id[unit_id]
|
||||
normalized_inferred = normalize_inferred_path(inferred_path, unit_id)
|
||||
if normalized_inferred:
|
||||
return normalized_inferred
|
||||
page = normalize_path_segment(page_type or page_type_for_doc_type(doc_type, self._doc_type_to_folder))
|
||||
app = normalize_path_segment(application or "unknown_app")
|
||||
plat = platform if platform in _VALID_PLATFORMS else "unknown_platform"
|
||||
return f"docs/{app}/{plat}/{page}/{unit_id}.md"
|
||||
|
||||
def resolve_op(self, *, op_hint: str, unit_id: str, path: str, state: DocsState) -> str:
|
||||
if op_hint == "delete":
|
||||
return "delete"
|
||||
if op_hint == "create":
|
||||
return "create"
|
||||
if op_hint == "update":
|
||||
return "update"
|
||||
if path in state.by_path or unit_id in state.by_doc_id:
|
||||
return "update"
|
||||
return "create"
|
||||
|
||||
def default_page_type(self, doc_type: str) -> str:
|
||||
return page_type_for_doc_type(doc_type, self._doc_type_to_folder)
|
||||
|
||||
def _slugify(self, value: str) -> str:
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9а-яА-Я_-]+", "-", value.lower()).strip("-")
|
||||
return re.sub(r"-+", "-", cleaned) or "doc"
|
||||
+49
-2
@@ -7,7 +7,16 @@ prompts:
|
||||
Верни только JSON:
|
||||
{
|
||||
"items": [
|
||||
{"index": 0, "doc_type": "api_method", "reason": "..."}
|
||||
{
|
||||
"index": 0,
|
||||
"doc_type": "api_method",
|
||||
"id": "ufs.contacts_dgr.api.create",
|
||||
"application": "coverage",
|
||||
"platform": "ufs",
|
||||
"page_type": "api",
|
||||
"path": "docs/coverage/ufs/api/ufs.contacts_dgr.api.create.md",
|
||||
"reason": "..."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -15,12 +24,22 @@ prompts:
|
||||
- Используй только doc_type из allowed_doc_types.
|
||||
- Не пропускай item, даже если не уверен: выбери наиболее близкий тип.
|
||||
- Ориентируйся на heading и snippet.
|
||||
- path — это служебное поле плана изменений, не поле frontmatter.
|
||||
- id:
|
||||
- брать из metadata unit, если задан;
|
||||
- если id нет, сгенерировать стабильный id по смыслу unit и по аналогии с существующей документацией.
|
||||
- имя файла всегда формировать строго как <id>.md.
|
||||
- для существующего документа (если это видно из контекста и индекса) путь не менять.
|
||||
- для нового документа путь формировать строго как docs/<application>/<platform>/<page_type>/<id>.md.
|
||||
- platform использовать только из допустимых значений: web, ufs, pprb.
|
||||
- page_type выбирать по doc_type (например ui_page -> ui, api_method -> api, logic_block -> logic).
|
||||
- последний сегмент path обязан совпадать с <id>.md.
|
||||
- Никакого markdown и текста вне JSON.
|
||||
|
||||
build_doc_changeset: |
|
||||
Ты формируешь один item changeset для документации на основе системной аналитики и правил doc_rules.
|
||||
|
||||
Верни только JSON-объект формата:
|
||||
Верни только один JSON-объект (RFC8259) формата:
|
||||
{
|
||||
"op": "create|update|delete",
|
||||
"path": "docs/...",
|
||||
@@ -28,9 +47,37 @@ prompts:
|
||||
"proposed_content": "полный markdown документа для create/update"
|
||||
}
|
||||
|
||||
Схема и ограничения:
|
||||
- Обязательные поля всегда: op, path, reason.
|
||||
- Для op=create/update поле proposed_content обязательно и содержит полный markdown документа:
|
||||
1) frontmatter между --- и ---,
|
||||
2) затем body согласно doc_rules.
|
||||
- Для op=delete поле proposed_content запрещено.
|
||||
- В JSON используй двойные кавычки, без trailing commas.
|
||||
- Никаких code fences (```), комментариев и текста до/после JSON.
|
||||
|
||||
Правила:
|
||||
- Строго соблюдай структуру и ограничения из doc_rules_context.
|
||||
- Для create/update верни полный итоговый markdown (frontmatter + body).
|
||||
- Для update не используй placeholder-тексты; возвращай пригодный к сохранению документ.
|
||||
- reason обязателен, короткий, по сути изменения.
|
||||
- Никакого markdown и текста вне JSON.
|
||||
|
||||
repair_doc_changeset_json: |
|
||||
Ты ремонтируешь невалидный ответ модели и должен вернуть строго валидный JSON changeset.
|
||||
|
||||
Вход содержит:
|
||||
- expected_contract: ожидаемые поля и ограничения.
|
||||
- raw_llm_output: исходный (возможно невалидный) ответ.
|
||||
|
||||
Задача:
|
||||
- Извлеки максимально полный смысл из raw_llm_output.
|
||||
- Верни ровно один JSON-объект, соответствующий expected_contract.
|
||||
- Если часть данных отсутствует, используй безопасные значения по умолчанию:
|
||||
- reason: "generated by repair"
|
||||
- proposed_content: только если op=create/update, иначе не добавляй.
|
||||
|
||||
Ограничения вывода:
|
||||
- Только JSON-объект, без markdown/code fences/комментариев.
|
||||
- Двойные кавычки, без trailing commas.
|
||||
- Внутри строк (особенно proposed_content) все переносы строк должны быть экранированы как \\n, не literal newline.
|
||||
|
||||
+2
@@ -64,6 +64,8 @@ class DocUpdateFromFeatureWorkflowGraph(WorkflowGraph[TContext]):
|
||||
"project_root": str(getattr(context, "project_root", "") or ""),
|
||||
"feature_content_len": len(str(getattr(context, "feature_content", "") or "")),
|
||||
"analysis_id": str(getattr(analytics, "analysis_id", "") or ""),
|
||||
"application": str(getattr(analytics, "application", "") or ""),
|
||||
"platform": str(getattr(analytics, "platform", "") or ""),
|
||||
"domains": list(getattr(analytics, "domains", []) or []),
|
||||
"subdomains": list(getattr(analytics, "subdomains", []) or []),
|
||||
"units_count": len(units),
|
||||
|
||||
+2
@@ -6,6 +6,8 @@ from dataclasses import dataclass, field
|
||||
@dataclass(slots=True)
|
||||
class AnalyticsMeta:
|
||||
analysis_id: str = ""
|
||||
application: str = ""
|
||||
platform: str = ""
|
||||
domains: list[str] = field(default_factory=list)
|
||||
subdomains: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.core.api.application.session_service import SessionService
|
||||
@@ -16,6 +17,8 @@ from app.infra.observability.request_trace_logger import RequestTraceLogger
|
||||
from app.schemas.common import ErrorPayload, ModuleName
|
||||
from app.schemas.orchestration import RequestExecutionStatus
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AgentRuntime:
|
||||
def __init__(
|
||||
@@ -35,6 +38,12 @@ class AgentRuntime:
|
||||
self._trace_logger = trace_logger
|
||||
|
||||
async def run(self, request: AgentRequest, session: AgentSession) -> None:
|
||||
LOGGER.warning(
|
||||
"runtime run started: request_id=%s process_version=%s active_rag_session_id=%s",
|
||||
request.request_id,
|
||||
request.process_version,
|
||||
session.active_rag_session_id,
|
||||
)
|
||||
try:
|
||||
process = self._resolve_process(request.process_version)
|
||||
self._start_request(request, session)
|
||||
@@ -51,7 +60,19 @@ class AgentRuntime:
|
||||
request.apply_changeset = bool(result.apply_changeset)
|
||||
await self._publish_result(request)
|
||||
self._complete_request(request, session)
|
||||
LOGGER.warning(
|
||||
"runtime run completed: request_id=%s status=%s changeset_items=%s apply_changeset=%s",
|
||||
request.request_id,
|
||||
request.status,
|
||||
len(request.changeset),
|
||||
request.apply_changeset,
|
||||
)
|
||||
except Exception as exc:
|
||||
LOGGER.exception(
|
||||
"runtime run failed: request_id=%s process_version=%s",
|
||||
request.request_id,
|
||||
request.process_version,
|
||||
)
|
||||
await self._fail_request(request, exc)
|
||||
|
||||
def _resolve_process(self, version: str):
|
||||
@@ -66,8 +87,8 @@ class AgentRuntime:
|
||||
self._trace_logger.start_request(request, session)
|
||||
|
||||
async def _announce_start(self, request_id: str, process_version: str) -> None:
|
||||
await self._publisher.publish_status(request_id, "runtime", "Запрос принят и поставлен в обработку.")
|
||||
await self._publisher.publish_status(
|
||||
await self._safe_publish_status(request_id, "runtime", "Запрос принят и поставлен в обработку.")
|
||||
await self._safe_publish_status(
|
||||
request_id,
|
||||
"runtime",
|
||||
f"Запускаю процесс {process_version}.",
|
||||
@@ -75,8 +96,11 @@ class AgentRuntime:
|
||||
)
|
||||
|
||||
async def _publish_result(self, request: AgentRequest) -> None:
|
||||
try:
|
||||
await self._publisher.publish_user(request.request_id, "agent", request.answer or "")
|
||||
await self._publisher.publish_status(request.request_id, "runtime", "Обработка запроса завершена.")
|
||||
except Exception:
|
||||
LOGGER.exception("failed to publish user event: request_id=%s", request.request_id)
|
||||
await self._safe_publish_status(request.request_id, "runtime", "Обработка запроса завершена.")
|
||||
|
||||
def _complete_request(self, request: AgentRequest, session: AgentSession) -> None:
|
||||
session.append_turn(user_message=request.message, assistant_message=request.answer or "")
|
||||
@@ -92,7 +116,7 @@ class AgentRuntime:
|
||||
request.error = self._build_error_payload(exc)
|
||||
self._request_store.save(request)
|
||||
self._trace_logger.fail_request(request)
|
||||
await self._publisher.publish_status(
|
||||
await self._safe_publish_status(
|
||||
request.request_id,
|
||||
"runtime",
|
||||
"Во время обработки запроса произошла ошибка.",
|
||||
@@ -107,3 +131,14 @@ class AgentRuntime:
|
||||
desc="Agent request failed unexpectedly.",
|
||||
module=ModuleName.AGENT,
|
||||
)
|
||||
|
||||
async def _safe_publish_status(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
||||
try:
|
||||
await self._publisher.publish_status(request_id, source, text, payload)
|
||||
except Exception:
|
||||
LOGGER.exception(
|
||||
"failed to publish status event: request_id=%s source=%s text=%s",
|
||||
request_id,
|
||||
source,
|
||||
text,
|
||||
)
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from app.core.api.domain.events.client_event import ClientEventRecord
|
||||
from app.core.api.infrastructure.streaming.sse_event_channel import SseEventChannel
|
||||
from app.infra.observability.request_trace_logger import RequestTraceLogger
|
||||
from app.schemas.client_events import ClientEventType
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RuntimeEventPublisher:
|
||||
def __init__(self, channel: SseEventChannel, trace_logger: RequestTraceLogger) -> None:
|
||||
@@ -12,9 +16,23 @@ class RuntimeEventPublisher:
|
||||
self._trace_logger = trace_logger
|
||||
|
||||
async def publish_status(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
||||
LOGGER.warning(
|
||||
"publish status: request_id=%s source=%s text=%s payload_keys=%s",
|
||||
request_id,
|
||||
source,
|
||||
text,
|
||||
sorted(list((payload or {}).keys())),
|
||||
)
|
||||
await self._publish(request_id, ClientEventType.STATUS, source, text, payload)
|
||||
|
||||
async def publish_user(self, request_id: str, source: str, text: str, payload: dict | None = None) -> None:
|
||||
LOGGER.warning(
|
||||
"publish user: request_id=%s source=%s text_len=%s payload_keys=%s",
|
||||
request_id,
|
||||
source,
|
||||
len(text or ""),
|
||||
sorted(list((payload or {}).keys())),
|
||||
)
|
||||
await self._publish(request_id, ClientEventType.USER, source, text, payload)
|
||||
|
||||
async def _publish(
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from app.core.api.domain.models.agent_request import AgentRequest
|
||||
from app.core.api.infrastructure.ids.request_id_factory import RequestIdFactory
|
||||
@@ -8,6 +9,8 @@ from app.core.api.infrastructure.stores.in_memory_request_store import InMemoryR
|
||||
from app.core.api.application.session_service import SessionService
|
||||
from app.core.agent.runtime import AgentRuntime
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RequestService:
|
||||
def __init__(
|
||||
@@ -31,8 +34,28 @@ class RequestService:
|
||||
process_version=process_version,
|
||||
)
|
||||
self._request_store.save(request)
|
||||
asyncio.create_task(self._runtime.run(request, session))
|
||||
LOGGER.warning(
|
||||
"plugin request accepted: request_id=%s session_id=%s process_version=%s message=%s",
|
||||
request.request_id,
|
||||
session_id,
|
||||
process_version,
|
||||
(message or "").replace("\n", "\\n")[:500],
|
||||
)
|
||||
task = asyncio.create_task(self._runtime.run(request, session), name=f"agent-runtime:{request.request_id}")
|
||||
task.add_done_callback(self._log_task_result)
|
||||
return request
|
||||
|
||||
def get(self, request_id: str) -> AgentRequest | None:
|
||||
return self._request_store.get(request_id)
|
||||
|
||||
def _log_task_result(self, task: asyncio.Task) -> None:
|
||||
try:
|
||||
exc = task.exception()
|
||||
except asyncio.CancelledError:
|
||||
LOGGER.warning("agent runtime task cancelled: task=%s", task.get_name())
|
||||
return
|
||||
except Exception:
|
||||
LOGGER.exception("failed to inspect agent runtime task result: task=%s", task.get_name())
|
||||
return
|
||||
if exc is not None:
|
||||
LOGGER.exception("agent runtime task crashed: task=%s", task.get_name(), exc_info=exc)
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from app.core.api.infrastructure.streaming.sse_response_builder import build_sse_response
|
||||
from app.core.rag.module import RagModule
|
||||
from app.core.shared.messaging import EventBus
|
||||
from app.schemas.rag_sessions import RagSessionJobResponse
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RagPublicController:
|
||||
def __init__(self, rag: RagModule) -> None:
|
||||
@@ -12,6 +16,14 @@ class RagPublicController:
|
||||
|
||||
def get_job(self, rag_session_id: str, index_job_id: str) -> RagSessionJobResponse:
|
||||
job = self._rag.get_session_job(rag_session_id, index_job_id)
|
||||
LOGGER.warning(
|
||||
"rag job polled: rag_session_id=%s job_id=%s status=%s indexed=%s failed=%s",
|
||||
rag_session_id,
|
||||
index_job_id,
|
||||
job.status.value if hasattr(job.status, "value") else str(job.status),
|
||||
job.indexed_files,
|
||||
job.failed_files,
|
||||
)
|
||||
return RagSessionJobResponse(
|
||||
rag_session_id=rag_session_id,
|
||||
index_job_id=job.index_job_id,
|
||||
@@ -25,6 +37,7 @@ class RagPublicController:
|
||||
|
||||
async def stream_job_events(self, rag_session_id: str, index_job_id: str):
|
||||
channel_id, queue = await self._rag.subscribe_session_job_events(rag_session_id, index_job_id)
|
||||
LOGGER.warning("rag job events subscribed: rag_session_id=%s job_id=%s", rag_session_id, index_job_id)
|
||||
return build_sse_response(
|
||||
queue,
|
||||
encoder=EventBus.as_sse,
|
||||
|
||||
@@ -7,5 +7,11 @@ class GigaChatEmbedder:
|
||||
def __init__(self, client: GigaChatClient) -> None:
|
||||
self._client = client
|
||||
|
||||
def embed(self, texts: list[str]) -> list[list[float]]:
|
||||
return self._client.embed(texts)
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str],
|
||||
*,
|
||||
timeout_sec: int | None = None,
|
||||
max_retries: int | None = None,
|
||||
) -> list[list[float]]:
|
||||
return self._client.embed(texts, timeout_sec=timeout_sec, max_retries=max_retries)
|
||||
|
||||
@@ -2,46 +2,33 @@ from __future__ import annotations
|
||||
|
||||
from app.core.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource
|
||||
from app.core.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
|
||||
from app.core.rag.indexing.docs.frontmatter_metadata import merge_frontmatter_metadata
|
||||
from app.core.rag.indexing.docs.frontmatter_view import DocsFrontmatterView
|
||||
|
||||
|
||||
class DocsDocumentBuilder:
|
||||
def build_document_catalog(self, source: RagSource, frontmatter: dict, summary_text: str, doc_kind: str, *, fallback_title: str) -> RagDocument:
|
||||
view = DocsFrontmatterView(frontmatter)
|
||||
document_id = view.document_id or source.path
|
||||
metadata = {
|
||||
"document_id": document_id,
|
||||
"type": view.doc_type,
|
||||
"name": view.name,
|
||||
"title": view.title(fallback_title),
|
||||
"module": view.module,
|
||||
"domain": view.domain,
|
||||
"subdomain": view.subdomain,
|
||||
"layer": view.layer,
|
||||
"status": view.status,
|
||||
"updated_at": view.updated_at,
|
||||
"tags": view.tags,
|
||||
"entities": view.entities,
|
||||
"parent": view.parent,
|
||||
"children": view.children,
|
||||
"links": view.links,
|
||||
"source_path": source.path,
|
||||
"summary_text": summary_text[:4000],
|
||||
"doc_kind": doc_kind,
|
||||
"artifact_type": "DOCS",
|
||||
}
|
||||
metadata = merge_frontmatter_metadata({}, frontmatter)
|
||||
if view.document_id:
|
||||
metadata["document_id"] = view.document_id
|
||||
metadata["source_path"] = source.path
|
||||
metadata["summary_text"] = summary_text[:4000]
|
||||
metadata["doc_kind"] = doc_kind
|
||||
row_title = str(frontmatter.get("title") or "").strip() or fallback_title or source.path
|
||||
return RagDocument(
|
||||
layer=RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||
source=source,
|
||||
title=metadata["title"] or document_id,
|
||||
text=summary_text[:4000] or metadata["title"] or document_id,
|
||||
title=row_title,
|
||||
text=summary_text[:4000] or row_title,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def build_doc_chunk(self, source: RagSource, chunk: SectionChunk, frontmatter: dict, doc_kind: str) -> RagDocument:
|
||||
view = DocsFrontmatterView(frontmatter)
|
||||
document_id = view.document_id or source.path
|
||||
metadata = {
|
||||
metadata = merge_frontmatter_metadata(
|
||||
{
|
||||
"document_id": document_id,
|
||||
"type": view.doc_type,
|
||||
"module": view.module,
|
||||
@@ -54,7 +41,9 @@ class DocsDocumentBuilder:
|
||||
"doc_kind": doc_kind,
|
||||
"source_path": source.path,
|
||||
"artifact_type": "DOCS",
|
||||
}
|
||||
},
|
||||
frontmatter,
|
||||
)
|
||||
return RagDocument(
|
||||
layer=RagLayer.DOCS_DOC_CHUNKS,
|
||||
source=source,
|
||||
@@ -67,7 +56,8 @@ class DocsDocumentBuilder:
|
||||
def build_entity_record(self, source: RagSource, frontmatter: dict, entity: str) -> RagDocument:
|
||||
view = DocsFrontmatterView(frontmatter)
|
||||
document_id = view.document_id or source.path
|
||||
metadata = {
|
||||
metadata = merge_frontmatter_metadata(
|
||||
{
|
||||
"entity_name": entity,
|
||||
"document_id": document_id,
|
||||
"document_type": view.doc_type,
|
||||
@@ -77,7 +67,9 @@ class DocsDocumentBuilder:
|
||||
"tags": view.tags,
|
||||
"source_path": source.path,
|
||||
"artifact_type": "DOCS",
|
||||
}
|
||||
},
|
||||
frontmatter,
|
||||
)
|
||||
return RagDocument(
|
||||
layer=RagLayer.DOCS_ENTITY_CATALOG,
|
||||
source=source,
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def merge_frontmatter_metadata(base: dict[str, object], frontmatter: dict) -> dict[str, object]:
|
||||
merged = dict(base)
|
||||
if not isinstance(frontmatter, dict):
|
||||
return merged
|
||||
for raw_key, value in frontmatter.items():
|
||||
key = str(raw_key or "").strip()
|
||||
if not key or key == "__frontmatter_parse_error__" or key in merged:
|
||||
continue
|
||||
merged[key] = value
|
||||
return merged
|
||||
@@ -1,12 +1,17 @@
|
||||
"""Хранилище задач индексации RAG (in-memory + persistence)."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
import logging
|
||||
import os
|
||||
from uuid import uuid4
|
||||
|
||||
from app.core.rag.persistence.repository import RagRepository
|
||||
from app.schemas.common import ErrorPayload, ModuleName
|
||||
from app.schemas.indexing import IndexJobStatus
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexJob:
|
||||
@@ -46,7 +51,7 @@ class IndexJobStore:
|
||||
desc=row.error_desc or "",
|
||||
module=module,
|
||||
)
|
||||
return IndexJob(
|
||||
job = IndexJob(
|
||||
index_job_id=row.index_job_id,
|
||||
rag_session_id=row.rag_session_id,
|
||||
status=IndexJobStatus(row.status),
|
||||
@@ -56,6 +61,25 @@ class IndexJobStore:
|
||||
cache_miss_files=row.cache_miss_files,
|
||||
error=payload,
|
||||
)
|
||||
stale_timeout_sec = max(1, int(os.getenv("RAG_RUNNING_STALE_TIMEOUT_SEC", "8")))
|
||||
if job.status == IndexJobStatus.RUNNING and self._is_stale(row.updated_at, stale_timeout_sec):
|
||||
payload = ErrorPayload(
|
||||
code="index_stalled",
|
||||
desc="Indexing stalled in running state; likely blocked network call during embedding/auth.",
|
||||
module=ModuleName.RAG,
|
||||
)
|
||||
job.status = IndexJobStatus.ERROR
|
||||
job.error = payload
|
||||
self.save(job)
|
||||
LOGGER.error("rag index job marked stale->error: job_id=%s timeout_sec=%s", job.index_job_id, stale_timeout_sec)
|
||||
return job
|
||||
|
||||
def _is_stale(self, updated_at: datetime | None, stale_timeout_sec: int) -> bool:
|
||||
if updated_at is None:
|
||||
return False
|
||||
ts = updated_at if updated_at.tzinfo else updated_at.replace(tzinfo=UTC)
|
||||
age = (datetime.now(UTC) - ts).total_seconds()
|
||||
return age >= stale_timeout_sec
|
||||
|
||||
def save(self, job: IndexJob) -> None:
|
||||
error_code = job.error.code if job.error else None
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
"""Оркестрация индексации RAG (очередь задач, события)."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
from app.schemas.common import ErrorPayload, ModuleName
|
||||
@@ -15,6 +17,8 @@ from app.core.rag.indexing.job_store import IndexJob, IndexJobStore
|
||||
from app.core.shared.messaging import EventBus
|
||||
from app.core.shared.resilience import RetryExecutor
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IndexingOrchestrator:
|
||||
def __init__(
|
||||
@@ -32,11 +36,23 @@ class IndexingOrchestrator:
|
||||
|
||||
async def enqueue_snapshot(self, rag_session_id: str, files: list[dict]) -> IndexJob:
|
||||
job = self._store.create(rag_session_id)
|
||||
LOGGER.warning(
|
||||
"rag index snapshot queued: job_id=%s rag_session_id=%s files=%s",
|
||||
job.index_job_id,
|
||||
rag_session_id,
|
||||
len(files),
|
||||
)
|
||||
asyncio.create_task(self._process_snapshot(job.index_job_id, rag_session_id, files))
|
||||
return job
|
||||
|
||||
async def enqueue_changes(self, rag_session_id: str, changed_files: list[dict]) -> IndexJob:
|
||||
job = self._store.create(rag_session_id)
|
||||
LOGGER.warning(
|
||||
"rag index changes queued: job_id=%s rag_session_id=%s changes=%s",
|
||||
job.index_job_id,
|
||||
rag_session_id,
|
||||
len(changed_files),
|
||||
)
|
||||
asyncio.create_task(self._process_changes(job.index_job_id, rag_session_id, changed_files))
|
||||
return job
|
||||
|
||||
@@ -71,9 +87,16 @@ class IndexingOrchestrator:
|
||||
async with lock:
|
||||
job = self._store.get(job_id)
|
||||
if not job:
|
||||
LOGGER.warning("rag index job missing in store before start: job_id=%s", job_id)
|
||||
return
|
||||
job.status = IndexJobStatus.RUNNING
|
||||
self._store.save(job)
|
||||
LOGGER.warning(
|
||||
"rag index job running: job_id=%s rag_session_id=%s total_files=%s",
|
||||
job_id,
|
||||
rag_session_id,
|
||||
total_files,
|
||||
)
|
||||
await self._events.publish(
|
||||
job_id,
|
||||
"index_status",
|
||||
@@ -94,13 +117,25 @@ class IndexingOrchestrator:
|
||||
},
|
||||
)
|
||||
|
||||
indexed, failed, cache_hits, cache_misses = await self._retry.run(lambda: operation(progress_cb))
|
||||
timeout_sec = max(1, int(os.getenv("RAG_INDEX_JOB_TIMEOUT_SEC", "15")))
|
||||
indexed, failed, cache_hits, cache_misses = await asyncio.wait_for(
|
||||
operation(progress_cb),
|
||||
timeout=timeout_sec,
|
||||
)
|
||||
job.status = IndexJobStatus.DONE
|
||||
job.indexed_files = indexed
|
||||
job.failed_files = failed
|
||||
job.cache_hit_files = cache_hits
|
||||
job.cache_miss_files = cache_misses
|
||||
self._store.save(job)
|
||||
LOGGER.warning(
|
||||
"rag index job done: job_id=%s indexed=%s failed=%s cache_hits=%s cache_misses=%s",
|
||||
job_id,
|
||||
indexed,
|
||||
failed,
|
||||
cache_hits,
|
||||
cache_misses,
|
||||
)
|
||||
await self._events.publish(
|
||||
job_id,
|
||||
"index_status",
|
||||
@@ -129,12 +164,72 @@ class IndexingOrchestrator:
|
||||
)
|
||||
except (TimeoutError, ConnectionError, OSError) as exc:
|
||||
job.status = IndexJobStatus.ERROR
|
||||
job.failed_files = max(1, job.failed_files)
|
||||
job.error = ErrorPayload(
|
||||
code="index_retry_exhausted",
|
||||
desc=f"Temporary indexing failure after retries: {exc}",
|
||||
code="index_runtime_error",
|
||||
desc=f"Indexing failed: {exc}",
|
||||
module=ModuleName.RAG,
|
||||
)
|
||||
self._store.save(job)
|
||||
LOGGER.exception("rag index job runtime-error: job_id=%s", job_id)
|
||||
await self._events.publish(
|
||||
job_id,
|
||||
"index_status",
|
||||
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files},
|
||||
)
|
||||
await self._events.publish(
|
||||
job_id,
|
||||
"terminal",
|
||||
{
|
||||
"index_job_id": job_id,
|
||||
"status": "error",
|
||||
"total_files": total_files,
|
||||
"error": {
|
||||
"code": job.error.code,
|
||||
"desc": job.error.desc,
|
||||
"module": job.error.module.value,
|
||||
},
|
||||
},
|
||||
)
|
||||
except asyncio.TimeoutError as exc:
|
||||
job.status = IndexJobStatus.ERROR
|
||||
job.failed_files = max(1, job.failed_files)
|
||||
job.error = ErrorPayload(
|
||||
code="index_timeout",
|
||||
desc=f"Indexing timed out while processing snapshot/changes: {exc}",
|
||||
module=ModuleName.RAG,
|
||||
)
|
||||
self._store.save(job)
|
||||
LOGGER.exception("rag index job timed out: job_id=%s", job_id)
|
||||
await self._events.publish(
|
||||
job_id,
|
||||
"index_status",
|
||||
{"index_job_id": job_id, "status": job.status.value, "total_files": total_files},
|
||||
)
|
||||
await self._events.publish(
|
||||
job_id,
|
||||
"terminal",
|
||||
{
|
||||
"index_job_id": job_id,
|
||||
"status": "error",
|
||||
"total_files": total_files,
|
||||
"error": {
|
||||
"code": job.error.code,
|
||||
"desc": job.error.desc,
|
||||
"module": job.error.module.value,
|
||||
},
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
job.status = IndexJobStatus.ERROR
|
||||
job.failed_files = max(1, job.failed_files)
|
||||
job.error = ErrorPayload(
|
||||
code="index_unexpected_error",
|
||||
desc=f"Unexpected indexing failure: {exc}",
|
||||
module=ModuleName.RAG,
|
||||
)
|
||||
self._store.save(job)
|
||||
LOGGER.exception("rag index job failed unexpectedly: job_id=%s", job_id)
|
||||
await self._events.publish(
|
||||
job_id,
|
||||
"index_status",
|
||||
|
||||
@@ -32,6 +32,7 @@ class RagService:
|
||||
self._repo = repository
|
||||
self._docs = DocsIndexingPipeline()
|
||||
self._code = CodeIndexingPipeline()
|
||||
self._cache_enabled = os.getenv("RAG_DOCUMENT_CACHE_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
async def index_snapshot(
|
||||
self,
|
||||
@@ -39,8 +40,16 @@ class RagService:
|
||||
files: list[dict],
|
||||
progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
|
||||
) -> tuple[int, int, int, int]:
|
||||
LOGGER.warning("rag index snapshot started: rag_session_id=%s files=%s", rag_session_id, len(files))
|
||||
report = await self._index_files(rag_session_id, files, progress_cb=progress_cb)
|
||||
self._repo.replace_documents(rag_session_id, report.documents_list)
|
||||
LOGGER.warning(
|
||||
"rag index snapshot persisted: rag_session_id=%s indexed=%s failed=%s docs=%s",
|
||||
rag_session_id,
|
||||
report.indexed_files,
|
||||
report.failed_files,
|
||||
len(report.documents_list),
|
||||
)
|
||||
return report.as_tuple()
|
||||
|
||||
async def index_changes(
|
||||
@@ -49,6 +58,7 @@ class RagService:
|
||||
changed_files: list[dict],
|
||||
progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
|
||||
) -> tuple[int, int, int, int]:
|
||||
LOGGER.warning("rag index changes started: rag_session_id=%s changes=%s", rag_session_id, len(changed_files))
|
||||
delete_paths: list[str] = []
|
||||
upserts: list[dict] = []
|
||||
for item in changed_files:
|
||||
@@ -58,6 +68,14 @@ class RagService:
|
||||
upserts.append(item)
|
||||
report = await self._index_files(rag_session_id, upserts, progress_cb=progress_cb)
|
||||
self._repo.apply_document_changes(rag_session_id, delete_paths, report.documents_list)
|
||||
LOGGER.warning(
|
||||
"rag index changes persisted: rag_session_id=%s indexed=%s failed=%s docs=%s delete_paths=%s",
|
||||
rag_session_id,
|
||||
report.indexed_files,
|
||||
report.failed_files,
|
||||
len(report.documents_list),
|
||||
len(delete_paths),
|
||||
)
|
||||
return report.as_tuple()
|
||||
|
||||
async def _index_files(
|
||||
@@ -80,9 +98,18 @@ class RagService:
|
||||
for index, file in enumerate(indexable_files, start=1):
|
||||
path = str(file.get("path", ""))
|
||||
try:
|
||||
LOGGER.warning(
|
||||
"rag index file started: rag_session_id=%s file=%s/%s path=%s",
|
||||
rag_session_id,
|
||||
index,
|
||||
total_files,
|
||||
path,
|
||||
)
|
||||
blob_sha = self._blob_sha(file)
|
||||
cached = await asyncio.to_thread(self._repo.get_cached_documents, repo_id, blob_sha)
|
||||
pipelines = self._resolve_pipeline_names(path)
|
||||
cached = []
|
||||
if self._cache_enabled:
|
||||
cached = await asyncio.to_thread(self._repo.get_cached_documents, repo_id, blob_sha)
|
||||
if cached:
|
||||
self._report_missing_or_partial_docs(path, cached)
|
||||
report.documents_list.extend(self._with_file_metadata(cached, file, repo_id, blob_sha))
|
||||
@@ -95,9 +122,32 @@ class RagService:
|
||||
)
|
||||
else:
|
||||
built = self._build_documents(repo_id, path, file)
|
||||
LOGGER.warning(
|
||||
"rag index file built docs: rag_session_id=%s path=%s docs=%s",
|
||||
rag_session_id,
|
||||
path,
|
||||
len(built),
|
||||
)
|
||||
self._report_missing_or_partial_docs(path, built)
|
||||
embedded = await asyncio.to_thread(self._embed_documents, built, file, repo_id, blob_sha)
|
||||
embed_timeout_sec = max(1, int(os.getenv("RAG_EMBED_FILE_TIMEOUT_SEC", "8")))
|
||||
LOGGER.warning(
|
||||
"rag index file embedding started: rag_session_id=%s path=%s timeout_sec=%s",
|
||||
rag_session_id,
|
||||
path,
|
||||
embed_timeout_sec,
|
||||
)
|
||||
embedded = await asyncio.wait_for(
|
||||
asyncio.to_thread(self._embed_documents, built, file, repo_id, blob_sha),
|
||||
timeout=embed_timeout_sec,
|
||||
)
|
||||
LOGGER.warning(
|
||||
"rag index file embedded docs: rag_session_id=%s path=%s docs=%s",
|
||||
rag_session_id,
|
||||
path,
|
||||
len(embedded),
|
||||
)
|
||||
report.documents_list.extend(embedded)
|
||||
if self._cache_enabled:
|
||||
await asyncio.to_thread(self._repo.cache_documents, repo_id, path, blob_sha, embedded)
|
||||
report.cache_miss_files += 1
|
||||
LOGGER.warning(
|
||||
@@ -107,6 +157,13 @@ class RagService:
|
||||
",".join(pipelines),
|
||||
)
|
||||
report.indexed_files += 1
|
||||
LOGGER.warning(
|
||||
"rag index file completed: rag_session_id=%s file=%s/%s path=%s",
|
||||
rag_session_id,
|
||||
index,
|
||||
total_files,
|
||||
path,
|
||||
)
|
||||
except Exception as exc:
|
||||
report.failed_files += 1
|
||||
report.warnings.append(f"{path}: {exc}")
|
||||
@@ -116,6 +173,8 @@ class RagService:
|
||||
path,
|
||||
exc,
|
||||
)
|
||||
# Fail-fast: stop indexing immediately so caller can expose the exact error to plugin.
|
||||
raise RuntimeError(f"RAG indexing failed for '{path}': {exc}") from exc
|
||||
await self._notify_progress(progress_cb, index, total_files, path)
|
||||
report.documents = len(report.documents_list)
|
||||
return report
|
||||
@@ -156,12 +215,32 @@ class RagService:
|
||||
if not docs:
|
||||
return []
|
||||
batch_size = max(1, int(os.getenv("RAG_EMBED_BATCH_SIZE", "16")))
|
||||
request_timeout_sec = max(1, int(os.getenv("RAG_EMBED_REQUEST_TIMEOUT_SEC", "5")))
|
||||
request_retries = max(1, int(os.getenv("RAG_EMBED_REQUEST_MAX_RETRIES", "1")))
|
||||
metadata = self._document_metadata(file, repo_id, blob_sha)
|
||||
for doc in docs:
|
||||
doc.metadata.update(metadata)
|
||||
for start in range(0, len(docs), batch_size):
|
||||
batch = docs[start : start + batch_size]
|
||||
vectors = self._embedder.embed([doc.text for doc in batch])
|
||||
LOGGER.warning(
|
||||
"rag embed batch start: path=%s batch_start=%s batch_size=%s timeout_sec=%s retries=%s",
|
||||
file.get("path", ""),
|
||||
start,
|
||||
len(batch),
|
||||
request_timeout_sec,
|
||||
request_retries,
|
||||
)
|
||||
vectors = self._embedder.embed(
|
||||
[doc.text for doc in batch],
|
||||
timeout_sec=request_timeout_sec,
|
||||
max_retries=request_retries,
|
||||
)
|
||||
LOGGER.warning(
|
||||
"rag embed batch done: path=%s batch_start=%s vectors=%s",
|
||||
file.get("path", ""),
|
||||
start,
|
||||
len(vectors),
|
||||
)
|
||||
for doc, vector in zip(batch, vectors):
|
||||
doc.embedding = vector
|
||||
return docs
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
@@ -19,6 +20,7 @@ class RagJobRow:
|
||||
error_code: str | None
|
||||
error_desc: str | None
|
||||
error_module: str | None
|
||||
updated_at: datetime | None
|
||||
|
||||
|
||||
class RagJobRepository:
|
||||
@@ -85,7 +87,7 @@ class RagJobRepository:
|
||||
text(
|
||||
"""
|
||||
SELECT index_job_id, rag_session_id, status, indexed_files, failed_files,
|
||||
cache_hit_files, cache_miss_files, error_code, error_desc, error_module
|
||||
cache_hit_files, cache_miss_files, error_code, error_desc, error_module, updated_at
|
||||
FROM rag_index_jobs
|
||||
WHERE index_job_id = :jid
|
||||
"""
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import time
|
||||
import logging
|
||||
|
||||
import requests
|
||||
|
||||
@@ -6,6 +7,9 @@ from app.infra.constants import MAX_RETRIES
|
||||
from app.core.shared.gigachat.errors import GigaChatError
|
||||
from app.core.shared.gigachat.settings import GigaChatSettings
|
||||
from app.core.shared.gigachat.token_provider import GigaChatTokenProvider
|
||||
from app.core.shared.network.hard_timeout import run_with_hard_timeout
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GigaChatClient:
|
||||
@@ -30,13 +34,26 @@ class GigaChatClient:
|
||||
message = choices[0].get("message") or {}
|
||||
return str(message.get("content") or "")
|
||||
|
||||
def embed(self, texts: list[str]) -> list[list[float]]:
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str],
|
||||
*,
|
||||
timeout_sec: int | None = None,
|
||||
max_retries: int | None = None,
|
||||
) -> list[list[float]]:
|
||||
token = self._tokens.get_access_token()
|
||||
payload = {
|
||||
"model": self._settings.embedding_model,
|
||||
"input": texts,
|
||||
}
|
||||
response = self._post_with_retry("/embeddings", payload, token=token, timeout=90, operation_name="embeddings")
|
||||
response = self._post_with_retry(
|
||||
"/embeddings",
|
||||
payload,
|
||||
token=token,
|
||||
timeout=timeout_sec or 90,
|
||||
operation_name="embeddings",
|
||||
max_retries=max_retries,
|
||||
)
|
||||
data = response.json()
|
||||
items = data.get("data")
|
||||
if not isinstance(items, list):
|
||||
@@ -51,11 +68,22 @@ class GigaChatClient:
|
||||
token: str,
|
||||
timeout: int,
|
||||
operation_name: str,
|
||||
max_retries: int | None = None,
|
||||
):
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
retries = max(1, int(max_retries or MAX_RETRIES))
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
response = requests.post(
|
||||
LOGGER.warning(
|
||||
"gigachat request start: operation=%s path=%s attempt=%s/%s timeout_sec=%s",
|
||||
operation_name,
|
||||
path,
|
||||
attempt,
|
||||
retries,
|
||||
timeout,
|
||||
)
|
||||
response = run_with_hard_timeout(
|
||||
lambda: requests.post(
|
||||
f"{self._settings.api_url.rstrip('/')}{path}",
|
||||
json=payload,
|
||||
headers={
|
||||
@@ -64,8 +92,26 @@ class GigaChatClient:
|
||||
},
|
||||
timeout=timeout,
|
||||
verify=self._settings.ssl_verify,
|
||||
),
|
||||
timeout_sec=timeout,
|
||||
operation_name=f"gigachat_{operation_name}",
|
||||
)
|
||||
LOGGER.warning(
|
||||
"gigachat request done: operation=%s path=%s attempt=%s/%s status=%s",
|
||||
operation_name,
|
||||
path,
|
||||
attempt,
|
||||
retries,
|
||||
response.status_code,
|
||||
)
|
||||
except requests.RequestException as exc:
|
||||
LOGGER.exception(
|
||||
"gigachat request failed: operation=%s path=%s attempt=%s/%s",
|
||||
operation_name,
|
||||
path,
|
||||
attempt,
|
||||
retries,
|
||||
)
|
||||
last_error = GigaChatError(f"GigaChat {operation_name} request failed: {exc}")
|
||||
else:
|
||||
if response.status_code < 400:
|
||||
@@ -73,7 +119,7 @@ class GigaChatClient:
|
||||
last_error = GigaChatError(f"GigaChat {operation_name} error {response.status_code}: {response.text}")
|
||||
if not self._is_retryable_status(response.status_code):
|
||||
raise last_error
|
||||
if attempt == MAX_RETRIES:
|
||||
if attempt == retries:
|
||||
break
|
||||
time.sleep(0.1 * attempt)
|
||||
if last_error is None:
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
import logging
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
from app.core.shared.gigachat.errors import GigaChatError
|
||||
from app.core.shared.gigachat.settings import GigaChatSettings
|
||||
from app.core.shared.network.hard_timeout import run_with_hard_timeout
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GigaChatTokenProvider:
|
||||
@@ -30,6 +35,7 @@ class GigaChatTokenProvider:
|
||||
def _fetch_token(self) -> tuple[str, float]:
|
||||
if not self._settings.credentials:
|
||||
raise GigaChatError("GIGACHAT_TOKEN is not set")
|
||||
timeout_sec = max(1, int(os.getenv("GIGACHAT_AUTH_TIMEOUT_SEC", "5")))
|
||||
headers = {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "application/json",
|
||||
@@ -37,14 +43,21 @@ class GigaChatTokenProvider:
|
||||
"RqUID": str(uuid.uuid4()),
|
||||
}
|
||||
try:
|
||||
response = requests.post(
|
||||
LOGGER.warning("gigachat auth start: url=%s timeout_sec=%s", self._settings.auth_url, timeout_sec)
|
||||
response = run_with_hard_timeout(
|
||||
lambda: requests.post(
|
||||
self._settings.auth_url,
|
||||
headers=headers,
|
||||
data=f"scope={self._settings.scope}",
|
||||
timeout=30,
|
||||
timeout=timeout_sec,
|
||||
verify=self._settings.ssl_verify,
|
||||
),
|
||||
timeout_sec=timeout_sec,
|
||||
operation_name="gigachat_auth",
|
||||
)
|
||||
LOGGER.warning("gigachat auth done: status=%s", response.status_code)
|
||||
except requests.RequestException as exc:
|
||||
LOGGER.exception("gigachat auth failed")
|
||||
raise GigaChatError(f"GigaChat auth request failed: {exc}") from exc
|
||||
|
||||
if response.status_code >= 400:
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import queue
|
||||
import threading
|
||||
from collections.abc import Callable
|
||||
from typing import TypeVar
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def run_with_hard_timeout(operation: Callable[[], T], *, timeout_sec: int, operation_name: str) -> T:
|
||||
result_queue: queue.Queue[tuple[bool, object]] = queue.Queue(maxsize=1)
|
||||
|
||||
def _runner() -> None:
|
||||
try:
|
||||
result_queue.put((True, operation()))
|
||||
except BaseException as exc: # noqa: BLE001
|
||||
result_queue.put((False, exc))
|
||||
|
||||
thread = threading.Thread(target=_runner, name=f"hard-timeout:{operation_name}", daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=max(1, int(timeout_sec)))
|
||||
if thread.is_alive():
|
||||
raise TimeoutError(f"{operation_name} exceeded hard timeout ({timeout_sec}s)")
|
||||
if result_queue.empty():
|
||||
raise TimeoutError(f"{operation_name} finished without a result")
|
||||
ok, value = result_queue.get_nowait()
|
||||
if ok:
|
||||
return value # type: ignore[return-value]
|
||||
raise value # type: ignore[misc]
|
||||
|
||||
@@ -2,6 +2,7 @@ import logging
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from starlette.requests import Request
|
||||
|
||||
from app.infra.logging_setup import configure_logging
|
||||
from app.infra.error_handlers import register_error_handlers
|
||||
@@ -19,6 +20,7 @@ def create_app() -> FastAPI:
|
||||
app = FastAPI(title="Agent Backend MVP", version="0.1.0")
|
||||
modules = ModularApplication()
|
||||
app.state.modules = modules
|
||||
logger = logging.getLogger("app.http")
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
@@ -30,6 +32,22 @@ def create_app() -> FastAPI:
|
||||
app.include_router(modules.api.public_router())
|
||||
register_error_handlers(app)
|
||||
|
||||
@app.middleware("http")
|
||||
async def log_http_requests(request: Request, call_next):
|
||||
logger.warning("http request: method=%s path=%s query=%s", request.method, request.url.path, request.url.query)
|
||||
try:
|
||||
response = await call_next(request)
|
||||
logger.warning(
|
||||
"http response: method=%s path=%s status=%s",
|
||||
request.method,
|
||||
request.url.path,
|
||||
response.status_code,
|
||||
)
|
||||
return response
|
||||
except Exception:
|
||||
logger.exception("http request failed: method=%s path=%s", request.method, request.url.path)
|
||||
raise
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup() -> None:
|
||||
modules.startup()
|
||||
|
||||
@@ -51,7 +51,7 @@ def test_collector_ignores_file_paths_from_content() -> None:
|
||||
assert endpoints == ["GET /health"]
|
||||
|
||||
|
||||
def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() -> None:
|
||||
def test_collector_ignores_title_when_endpoint_metadata_missing() -> None:
|
||||
rows = [
|
||||
{
|
||||
"metadata": {
|
||||
@@ -65,4 +65,4 @@ def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() ->
|
||||
|
||||
endpoints = ApiEndpointCollector().collect(rows)
|
||||
|
||||
assert endpoints == ["GET /actions/{action}"]
|
||||
assert endpoints == []
|
||||
|
||||
@@ -20,6 +20,8 @@ sub_domain: invoices
|
||||
layer: application
|
||||
status: draft
|
||||
updated_at: 2026-03-23
|
||||
endpoint: POST /billing/invoices
|
||||
source_of_truth: analytics
|
||||
tags: [billing, api]
|
||||
entities: [Invoice]
|
||||
parent: billing_api
|
||||
@@ -125,9 +127,13 @@ Create invoice
|
||||
|
||||
catalog_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
||||
assert catalog_doc.metadata["document_id"] == "api.billing.create_invoice"
|
||||
assert catalog_doc.metadata["id"] == "api.billing.create_invoice"
|
||||
assert catalog_doc.metadata["module"] == "billing"
|
||||
assert catalog_doc.metadata["domain"] == "billing"
|
||||
assert catalog_doc.metadata["subdomain"] == "invoices"
|
||||
assert catalog_doc.metadata["sub_domain"] == "invoices"
|
||||
assert "subdomain" not in catalog_doc.metadata
|
||||
assert catalog_doc.metadata["endpoint"] == "POST /billing/invoices"
|
||||
assert catalog_doc.metadata["source_of_truth"] == "analytics"
|
||||
assert catalog_doc.metadata["summary_text"] == "Creates an invoice in billing."
|
||||
|
||||
fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
|
||||
@@ -335,3 +341,4 @@ Control actions endpoint.
|
||||
catalog = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
||||
assert catalog.metadata["type"] == "api_method"
|
||||
assert catalog.metadata["title"] == "HTTP API /actions/{action}"
|
||||
assert catalog.metadata["endpoint"] == "GET|POST /actions/{action}"
|
||||
|
||||
Reference in New Issue
Block a user