520 lines
23 KiB
Python
520 lines
23 KiB
Python
import json
|
||
from difflib import SequenceMatcher
|
||
|
||
from app.modules.agent.engine.graphs.docs_examples_loader import DocsExamplesLoader
|
||
from app.modules.agent.engine.graphs.file_targeting import FileTargeting
|
||
from app.modules.agent.engine.graphs.state import AgentGraphState
|
||
from app.modules.agent.llm import AgentLlmService
|
||
from app.schemas.changeset import ChangeItem
|
||
import logging
|
||
|
||
LOGGER = logging.getLogger(__name__)
|
||
|
||
|
||
class DocsContextAnalyzer:
|
||
def __init__(self, llm: AgentLlmService, targeting: FileTargeting) -> None:
|
||
self._llm = llm
|
||
self._targeting = targeting
|
||
|
||
def collect_code_context(self, state: AgentGraphState) -> dict:
|
||
message = state.get("message", "")
|
||
files_map = state.get("files_map", {}) or {}
|
||
requested_path = self._targeting.extract_target_path(message)
|
||
target_file = self._targeting.lookup_file(files_map, requested_path) if requested_path else None
|
||
docs_candidates = self._collect_doc_candidates(files_map)
|
||
target_path = str((target_file or {}).get("path") or (requested_path or "")).strip() or ""
|
||
return {
|
||
"docs_candidates": docs_candidates,
|
||
"target_path": target_path,
|
||
"target_file_content": str((target_file or {}).get("content", "")),
|
||
"target_file_hash": str((target_file or {}).get("content_hash", "")),
|
||
"validation_attempts": 0,
|
||
}
|
||
|
||
def detect_existing_docs(self, state: AgentGraphState) -> dict:
|
||
docs_candidates = state.get("docs_candidates", []) or []
|
||
if not docs_candidates:
|
||
return {
|
||
"existing_docs_detected": False,
|
||
"existing_docs_summary": "No documentation files detected in current project context.",
|
||
}
|
||
|
||
snippets = "\n\n".join(
|
||
[
|
||
f"Path: {item.get('path', '')}\nSnippet:\n{self._shorten(item.get('content', ''), 500)}"
|
||
for item in docs_candidates[:8]
|
||
]
|
||
)
|
||
user_input = "\n\n".join(
|
||
[
|
||
f"User request:\n{state.get('message', '')}",
|
||
f"Requested target path:\n{state.get('target_path', '') or '(not specified)'}",
|
||
f"Detected documentation candidates:\n{snippets}",
|
||
]
|
||
)
|
||
raw = self._llm.generate("docs_detect", user_input)
|
||
exists = self.parse_bool_marker(raw, "exists", default=True)
|
||
summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.")
|
||
return {"existing_docs_detected": exists, "existing_docs_summary": summary}
|
||
|
||
def decide_strategy(self, state: AgentGraphState) -> dict:
|
||
message = (state.get("message", "") or "").lower()
|
||
if any(token in message for token in ("с нуля", "from scratch", "new documentation", "создай документацию")):
|
||
return {"docs_strategy": "from_scratch"}
|
||
if any(token in message for token in ("дополни", "обнови документацию", "extend docs", "update docs")):
|
||
return {"docs_strategy": "incremental_update"}
|
||
|
||
user_input = "\n\n".join(
|
||
[
|
||
f"User request:\n{state.get('message', '')}",
|
||
f"Existing docs detected:\n{state.get('existing_docs_detected', False)}",
|
||
f"Existing docs summary:\n{state.get('existing_docs_summary', '')}",
|
||
]
|
||
)
|
||
raw = self._llm.generate("docs_strategy", user_input)
|
||
strategy = self.parse_text_marker(raw, "strategy", default="").lower()
|
||
if strategy not in {"incremental_update", "from_scratch"}:
|
||
strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch"
|
||
return {"docs_strategy": strategy}
|
||
|
||
def resolve_target_for_incremental(self, state: AgentGraphState) -> tuple[str, dict | None]:
|
||
files_map = state.get("files_map", {}) or {}
|
||
preferred_path = state.get("target_path", "")
|
||
preferred = self._targeting.lookup_file(files_map, preferred_path)
|
||
if preferred:
|
||
return str(preferred.get("path") or preferred_path), preferred
|
||
candidates = state.get("docs_candidates", []) or []
|
||
if candidates:
|
||
first_path = str(candidates[0].get("path", ""))
|
||
resolved = self._targeting.lookup_file(files_map, first_path) or candidates[0]
|
||
return first_path, resolved
|
||
fallback = preferred_path.strip() or "docs/AGENT_DRAFT.md"
|
||
return fallback, None
|
||
|
||
def _collect_doc_candidates(self, files_map: dict[str, dict]) -> list[dict]:
|
||
candidates: list[dict] = []
|
||
for raw_path, payload in files_map.items():
|
||
path = str(raw_path or "").replace("\\", "/").strip()
|
||
if not path:
|
||
continue
|
||
low = path.lower()
|
||
is_doc = low.startswith("docs/") or low.endswith(".md") or low.endswith(".rst") or "/readme" in low or low.startswith("readme")
|
||
if not is_doc:
|
||
continue
|
||
candidates.append(
|
||
{
|
||
"path": str(payload.get("path") or path),
|
||
"content": str(payload.get("content", "")),
|
||
"content_hash": str(payload.get("content_hash", "")),
|
||
}
|
||
)
|
||
candidates.sort(key=lambda item: (0 if str(item.get("path", "")).lower().startswith("docs/") else 1, str(item.get("path", "")).lower()))
|
||
return candidates
|
||
|
||
def _shorten(self, text: str, max_chars: int) -> str:
|
||
value = (text or "").strip()
|
||
if len(value) <= max_chars:
|
||
return value
|
||
return value[:max_chars].rstrip() + "\n...[truncated]"
|
||
|
||
@staticmethod
|
||
def parse_bool_marker(text: str, marker: str, *, default: bool) -> bool:
|
||
value = DocsContextAnalyzer.parse_text_marker(text, marker, default="")
|
||
if not value:
|
||
return default
|
||
token = value.split()[0].strip().lower()
|
||
if token in {"yes", "true", "1", "да"}:
|
||
return True
|
||
if token in {"no", "false", "0", "нет"}:
|
||
return False
|
||
return default
|
||
|
||
@staticmethod
|
||
def parse_text_marker(text: str, marker: str, *, default: str) -> str:
|
||
low_marker = f"{marker.lower()}:"
|
||
for line in (text or "").splitlines():
|
||
raw = line.strip()
|
||
if raw.lower().startswith(low_marker):
|
||
return raw.split(":", 1)[1].strip()
|
||
return default
|
||
|
||
|
||
class DocsBundleFormatter:
|
||
def shorten(self, text: str, max_chars: int) -> str:
|
||
value = (text or "").strip()
|
||
if len(value) <= max_chars:
|
||
return value
|
||
return value[:max_chars].rstrip() + "\n...[truncated]"
|
||
|
||
def normalize_file_output(self, text: str) -> str:
|
||
value = (text or "").strip()
|
||
if value.startswith("```") and value.endswith("```"):
|
||
lines = value.splitlines()
|
||
if len(lines) >= 3:
|
||
return "\n".join(lines[1:-1]).strip()
|
||
return value
|
||
|
||
def parse_docs_bundle(self, raw_text: str) -> list[dict]:
|
||
text = (raw_text or "").strip()
|
||
if not text:
|
||
return []
|
||
|
||
candidate = self.normalize_file_output(text)
|
||
parsed = self._parse_json_candidate(candidate)
|
||
if parsed is None:
|
||
start = candidate.find("{")
|
||
end = candidate.rfind("}")
|
||
if start != -1 and end > start:
|
||
parsed = self._parse_json_candidate(candidate[start : end + 1])
|
||
if parsed is None:
|
||
return []
|
||
|
||
files: list[dict]
|
||
if isinstance(parsed, dict):
|
||
raw_files = parsed.get("files")
|
||
files = raw_files if isinstance(raw_files, list) else []
|
||
elif isinstance(parsed, list):
|
||
files = parsed
|
||
else:
|
||
files = []
|
||
|
||
out: list[dict] = []
|
||
seen: set[str] = set()
|
||
for item in files:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
path = str(item.get("path", "")).replace("\\", "/").strip()
|
||
content = str(item.get("content", ""))
|
||
if not path or not content.strip():
|
||
continue
|
||
if path in seen:
|
||
continue
|
||
seen.add(path)
|
||
out.append(
|
||
{
|
||
"path": path,
|
||
"content": content,
|
||
"reason": str(item.get("reason", "")).strip(),
|
||
}
|
||
)
|
||
return out
|
||
|
||
def bundle_has_required_structure(self, bundle: list[dict]) -> bool:
|
||
if not bundle:
|
||
return False
|
||
has_api = any(str(item.get("path", "")).replace("\\", "/").startswith("docs/api/") for item in bundle)
|
||
has_logic = any(str(item.get("path", "")).replace("\\", "/").startswith("docs/logic/") for item in bundle)
|
||
return has_api and has_logic
|
||
|
||
def similarity(self, original: str, updated: str) -> float:
|
||
return SequenceMatcher(None, original or "", updated or "").ratio()
|
||
|
||
def line_change_ratio(self, original: str, updated: str) -> float:
|
||
orig_lines = (original or "").splitlines()
|
||
new_lines = (updated or "").splitlines()
|
||
if not orig_lines and not new_lines:
|
||
return 0.0
|
||
matcher = SequenceMatcher(None, orig_lines, new_lines)
|
||
changed = 0
|
||
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
||
if tag == "equal":
|
||
continue
|
||
changed += max(i2 - i1, j2 - j1)
|
||
total = max(len(orig_lines), len(new_lines), 1)
|
||
return changed / total
|
||
|
||
def added_headings(self, original: str, updated: str) -> int:
|
||
old_heads = {line.strip() for line in (original or "").splitlines() if line.strip().startswith("#")}
|
||
new_heads = {line.strip() for line in (updated or "").splitlines() if line.strip().startswith("#")}
|
||
return len(new_heads - old_heads)
|
||
|
||
def collapse_whitespace(self, text: str) -> str:
|
||
return " ".join((text or "").split())
|
||
|
||
def _parse_json_candidate(self, text: str):
|
||
try:
|
||
return json.loads(text)
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
class DocsContentComposer:
|
||
def __init__(self, llm: AgentLlmService, targeting: FileTargeting) -> None:
|
||
self._llm = llm
|
||
self._targeting = targeting
|
||
self._examples = DocsExamplesLoader()
|
||
self._bundle = DocsBundleFormatter()
|
||
|
||
def load_rules_and_examples(self, _state: AgentGraphState) -> dict:
|
||
return {"rules_bundle": self._examples.load_bundle()}
|
||
|
||
def plan_incremental_changes(self, state: AgentGraphState, analyzer: DocsContextAnalyzer) -> dict:
|
||
target_path, target = analyzer.resolve_target_for_incremental(state)
|
||
user_input = "\n\n".join(
|
||
[
|
||
"Strategy: incremental_update",
|
||
f"User request:\n{state.get('message', '')}",
|
||
f"Target path:\n{target_path}",
|
||
f"Current target content:\n{self._bundle.shorten((target or {}).get('content', ''), 3000)}",
|
||
f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 6000)}",
|
||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||
]
|
||
)
|
||
plan = self._llm.generate("docs_plan_sections", user_input)
|
||
return {
|
||
"doc_plan": plan,
|
||
"target_path": target_path,
|
||
"target_file_content": str((target or {}).get("content", "")),
|
||
"target_file_hash": str((target or {}).get("content_hash", "")),
|
||
}
|
||
|
||
def plan_new_document(self, state: AgentGraphState) -> dict:
|
||
target_path = state.get("target_path", "").strip() or "docs/AGENT_DRAFT.md"
|
||
user_input = "\n\n".join(
|
||
[
|
||
"Strategy: from_scratch",
|
||
f"User request:\n{state.get('message', '')}",
|
||
f"Target path:\n{target_path}",
|
||
f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 6000)}",
|
||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||
]
|
||
)
|
||
plan = self._llm.generate("docs_plan_sections", user_input)
|
||
return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""}
|
||
|
||
def generate_doc_content(self, state: AgentGraphState) -> dict:
|
||
user_input = "\n\n".join(
|
||
[
|
||
f"Strategy:\n{state.get('docs_strategy', 'from_scratch')}",
|
||
f"User request:\n{state.get('message', '')}",
|
||
f"Target path:\n{state.get('target_path', '')}",
|
||
f"Document plan:\n{state.get('doc_plan', '')}",
|
||
f"Current target content:\n{self._bundle.shorten(state.get('target_file_content', ''), 3500)}",
|
||
f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 7000)}",
|
||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||
]
|
||
)
|
||
raw = self._llm.generate("docs_generation", user_input)
|
||
bundle = self._bundle.parse_docs_bundle(raw)
|
||
if bundle:
|
||
first_content = str(bundle[0].get("content", "")).strip()
|
||
return {"generated_docs_bundle": bundle, "generated_doc": first_content}
|
||
content = self._bundle.normalize_file_output(raw)
|
||
return {"generated_docs_bundle": [], "generated_doc": content}
|
||
|
||
def self_check(self, state: AgentGraphState) -> dict:
|
||
attempts = int(state.get("validation_attempts", 0) or 0) + 1
|
||
bundle = state.get("generated_docs_bundle", []) or []
|
||
generated = state.get("generated_doc", "")
|
||
if not generated.strip() and not bundle:
|
||
return {
|
||
"validation_attempts": attempts,
|
||
"validation_passed": False,
|
||
"validation_feedback": "Generated document is empty.",
|
||
}
|
||
strategy = state.get("docs_strategy", "from_scratch")
|
||
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
|
||
return {
|
||
"validation_attempts": attempts,
|
||
"validation_passed": False,
|
||
"validation_feedback": "Bundle must include both docs/api and docs/logic for from_scratch strategy.",
|
||
}
|
||
if strategy == "incremental_update":
|
||
if bundle and len(bundle) > 1 and not self._is_broad_rewrite_request(str(state.get("message", ""))):
|
||
return {
|
||
"validation_attempts": attempts,
|
||
"validation_passed": False,
|
||
"validation_feedback": "Incremental update should not touch multiple files without explicit broad rewrite request.",
|
||
}
|
||
original = str(state.get("target_file_content", ""))
|
||
broad = self._is_broad_rewrite_request(str(state.get("message", "")))
|
||
if original and generated:
|
||
if self._bundle.collapse_whitespace(original) == self._bundle.collapse_whitespace(generated):
|
||
return {
|
||
"validation_attempts": attempts,
|
||
"validation_passed": False,
|
||
"validation_feedback": "Only formatting/whitespace changes detected.",
|
||
}
|
||
similarity = self._bundle.similarity(original, generated)
|
||
change_ratio = self._bundle.line_change_ratio(original, generated)
|
||
added_headings = self._bundle.added_headings(original, generated)
|
||
min_similarity = 0.75 if broad else 0.9
|
||
max_change_ratio = 0.7 if broad else 0.35
|
||
if similarity < min_similarity:
|
||
return {
|
||
"validation_attempts": attempts,
|
||
"validation_passed": False,
|
||
"validation_feedback": f"Incremental update is too broad (similarity={similarity:.2f}).",
|
||
}
|
||
if change_ratio > max_change_ratio:
|
||
return {
|
||
"validation_attempts": attempts,
|
||
"validation_passed": False,
|
||
"validation_feedback": f"Incremental update changes too many lines (change_ratio={change_ratio:.2f}).",
|
||
}
|
||
if not broad and added_headings > 0:
|
||
return {
|
||
"validation_attempts": attempts,
|
||
"validation_passed": False,
|
||
"validation_feedback": "New section headings were added outside requested scope.",
|
||
}
|
||
|
||
bundle_text = "\n".join([f"- {item.get('path', '')}" for item in bundle[:30]])
|
||
user_input = "\n\n".join(
|
||
[
|
||
f"Strategy:\n{strategy}",
|
||
f"User request:\n{state.get('message', '')}",
|
||
f"Document plan:\n{state.get('doc_plan', '')}",
|
||
f"Generated file paths:\n{bundle_text or '(single-file mode)'}",
|
||
f"Generated document:\n{generated}",
|
||
]
|
||
)
|
||
raw = self._llm.generate("docs_self_check", user_input)
|
||
passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False)
|
||
feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.")
|
||
return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback}
|
||
|
||
def build_changeset(self, state: AgentGraphState) -> dict:
|
||
files_map = state.get("files_map", {}) or {}
|
||
bundle = state.get("generated_docs_bundle", []) or []
|
||
strategy = state.get("docs_strategy", "from_scratch")
|
||
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
|
||
LOGGER.warning(
|
||
"build_changeset fallback bundle used: strategy=%s bundle_items=%s",
|
||
strategy,
|
||
len(bundle),
|
||
)
|
||
bundle = self._build_fallback_bundle_from_text(state.get("generated_doc", ""))
|
||
if bundle:
|
||
changes: list[ChangeItem] = []
|
||
for item in bundle:
|
||
path = str(item.get("path", "")).replace("\\", "/").strip()
|
||
content = str(item.get("content", ""))
|
||
if not path or not content.strip():
|
||
continue
|
||
target = self._targeting.lookup_file(files_map, path)
|
||
reason = str(item.get("reason", "")).strip() or f"Documentation {strategy}: generated file from structured bundle."
|
||
if target and target.get("content_hash"):
|
||
changes.append(
|
||
ChangeItem(
|
||
op="update",
|
||
path=str(target.get("path") or path),
|
||
base_hash=str(target.get("content_hash", "")),
|
||
proposed_content=content,
|
||
reason=reason,
|
||
)
|
||
)
|
||
else:
|
||
changes.append(
|
||
ChangeItem(
|
||
op="create",
|
||
path=path,
|
||
proposed_content=content,
|
||
reason=reason,
|
||
)
|
||
)
|
||
if changes:
|
||
return {"changeset": changes}
|
||
|
||
target_path = (state.get("target_path", "") or "").strip() or "docs/AGENT_DRAFT.md"
|
||
target = self._targeting.lookup_file(files_map, target_path)
|
||
content = state.get("generated_doc", "")
|
||
if target and target.get("content_hash"):
|
||
change = ChangeItem(
|
||
op="update",
|
||
path=str(target.get("path") or target_path),
|
||
base_hash=str(target.get("content_hash", "")),
|
||
proposed_content=content,
|
||
reason=f"Documentation {strategy}: update existing document increment.",
|
||
)
|
||
else:
|
||
change = ChangeItem(
|
||
op="create",
|
||
path=target_path,
|
||
proposed_content=content,
|
||
reason=f"Documentation {strategy}: create document from current project context.",
|
||
)
|
||
return {"changeset": [change]}
|
||
|
||
def build_execution_summary(self, state: AgentGraphState) -> dict:
|
||
changeset = state.get("changeset", []) or []
|
||
if not changeset:
|
||
return {"answer": "Документация не была изменена: итоговый changeset пуст."}
|
||
|
||
file_lines = self._format_changed_files(changeset)
|
||
user_input = "\n\n".join(
|
||
[
|
||
f"User request:\n{state.get('message', '')}",
|
||
f"Documentation strategy:\n{state.get('docs_strategy', 'from_scratch')}",
|
||
f"Document plan:\n{state.get('doc_plan', '')}",
|
||
f"Validation feedback:\n{state.get('validation_feedback', '')}",
|
||
f"Changed files:\n{file_lines}",
|
||
]
|
||
)
|
||
try:
|
||
summary = self._llm.generate("docs_execution_summary", user_input).strip()
|
||
except Exception:
|
||
summary = ""
|
||
if not summary:
|
||
summary = self._build_fallback_summary(state, file_lines)
|
||
return {"answer": summary}
|
||
|
||
def _build_fallback_bundle_from_text(self, text: str) -> list[dict]:
|
||
content = (text or "").strip()
|
||
if not content:
|
||
content = (
|
||
"# Project Documentation Draft\n\n"
|
||
"## Overview\n"
|
||
"Documentation draft was generated, but structured sections require уточнение.\n"
|
||
)
|
||
return [
|
||
{
|
||
"path": "docs/logic/project_overview.md",
|
||
"content": content,
|
||
"reason": "Fallback: generated structured logic document from non-JSON model output.",
|
||
},
|
||
{
|
||
"path": "docs/api/README.md",
|
||
"content": (
|
||
"# API Methods\n\n"
|
||
"This file is a fallback placeholder for API method documentation.\n\n"
|
||
"## Next Step\n"
|
||
"- Add one file per API method under `docs/api/`.\n"
|
||
),
|
||
"reason": "Fallback: ensure required docs/api structure exists.",
|
||
},
|
||
]
|
||
|
||
def _format_changed_files(self, changeset: list[ChangeItem]) -> str:
|
||
lines: list[str] = []
|
||
for item in changeset[:30]:
|
||
lines.append(f"- {item.op.value} {item.path}: {item.reason}")
|
||
return "\n".join(lines)
|
||
|
||
def _build_fallback_summary(self, state: AgentGraphState, file_lines: str) -> str:
|
||
request = (state.get("message", "") or "").strip()
|
||
return "\n".join(
|
||
[
|
||
"Выполненные действия:",
|
||
f"- Обработан запрос: {request or '(пустой запрос)'}",
|
||
f"- Применена стратегия документации: {state.get('docs_strategy', 'from_scratch')}",
|
||
"- Сформирован и проверен changeset для документации.",
|
||
"",
|
||
"Измененные файлы:",
|
||
file_lines or "- (нет изменений)",
|
||
]
|
||
)
|
||
|
||
def _is_broad_rewrite_request(self, message: str) -> bool:
|
||
low = (message or "").lower()
|
||
markers = (
|
||
"перепиши",
|
||
"полностью",
|
||
"целиком",
|
||
"с нуля",
|
||
"full rewrite",
|
||
"rewrite all",
|
||
"реорганизуй",
|
||
)
|
||
return any(marker in low for marker in markers)
|