agent/app/modules/agent/engine/graphs/docs_graph_logic.py

import json
from difflib import SequenceMatcher

from app.modules.agent.engine.graphs.docs_examples_loader import DocsExamplesLoader
from app.modules.agent.engine.graphs.file_targeting import FileTargeting
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.llm import AgentLlmService
from app.schemas.changeset import ChangeItem
import logging

LOGGER = logging.getLogger(__name__)


class DocsContextAnalyzer:
    def __init__(self, llm: AgentLlmService, targeting: FileTargeting) -> None:
        self._llm = llm
        self._targeting = targeting

    def collect_code_context(self, state: AgentGraphState) -> dict:
        message = state.get("message", "")
        files_map = state.get("files_map", {}) or {}
        requested_path = self._targeting.extract_target_path(message)
        target_file = self._targeting.lookup_file(files_map, requested_path) if requested_path else None
        docs_candidates = self._collect_doc_candidates(files_map)
        target_path = str((target_file or {}).get("path") or (requested_path or "")).strip() or ""
        return {
            "docs_candidates": docs_candidates,
            "target_path": target_path,
            "target_file_content": str((target_file or {}).get("content", "")),
            "target_file_hash": str((target_file or {}).get("content_hash", "")),
            "validation_attempts": 0,
        }

    def detect_existing_docs(self, state: AgentGraphState) -> dict:
        docs_candidates = state.get("docs_candidates", []) or []
        if not docs_candidates:
            return {
                "existing_docs_detected": False,
                "existing_docs_summary": "No documentation files detected in current project context.",
            }

        snippets = "\n\n".join(
            [
                f"Path: {item.get('path', '')}\nSnippet:\n{self._shorten(item.get('content', ''), 500)}"
                for item in docs_candidates[:8]
            ]
        )
        user_input = "\n\n".join(
            [
                f"User request:\n{state.get('message', '')}",
                f"Requested target path:\n{state.get('target_path', '') or '(not specified)'}",
                f"Detected documentation candidates:\n{snippets}",
            ]
        )
        raw = self._llm.generate("docs_detect", user_input)
        exists = self.parse_bool_marker(raw, "exists", default=True)
        summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.")
        return {"existing_docs_detected": exists, "existing_docs_summary": summary}

    def decide_strategy(self, state: AgentGraphState) -> dict:
        message = (state.get("message", "") or "").lower()
        if any(token in message for token in ("с нуля", "from scratch", "new documentation", "создай документацию")):
            return {"docs_strategy": "from_scratch"}
        if any(token in message for token in ("дополни", "обнови документацию", "extend docs", "update docs")):
            return {"docs_strategy": "incremental_update"}

        user_input = "\n\n".join(
            [
                f"User request:\n{state.get('message', '')}",
                f"Existing docs detected:\n{state.get('existing_docs_detected', False)}",
                f"Existing docs summary:\n{state.get('existing_docs_summary', '')}",
            ]
        )
        raw = self._llm.generate("docs_strategy", user_input)
        strategy = self.parse_text_marker(raw, "strategy", default="").lower()
        if strategy not in {"incremental_update", "from_scratch"}:
            strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch"
        return {"docs_strategy": strategy}

    def resolve_target_for_incremental(self, state: AgentGraphState) -> tuple[str, dict | None]:
        files_map = state.get("files_map", {}) or {}
        preferred_path = state.get("target_path", "")
        preferred = self._targeting.lookup_file(files_map, preferred_path)
        if preferred:
            return str(preferred.get("path") or preferred_path), preferred
        candidates = state.get("docs_candidates", []) or []
        if candidates:
            first_path = str(candidates[0].get("path", ""))
            resolved = self._targeting.lookup_file(files_map, first_path) or candidates[0]
            return first_path, resolved
        fallback = preferred_path.strip() or "docs/AGENT_DRAFT.md"
        return fallback, None

    def _collect_doc_candidates(self, files_map: dict[str, dict]) -> list[dict]:
        candidates: list[dict] = []
        for raw_path, payload in files_map.items():
            path = str(raw_path or "").replace("\\", "/").strip()
            if not path:
                continue
            low = path.lower()
            is_doc = low.startswith("docs/") or low.endswith(".md") or low.endswith(".rst") or "/readme" in low or low.startswith("readme")
            if not is_doc:
                continue
            candidates.append(
                {
                    "path": str(payload.get("path") or path),
                    "content": str(payload.get("content", "")),
                    "content_hash": str(payload.get("content_hash", "")),
                }
            )
        candidates.sort(key=lambda item: (0 if str(item.get("path", "")).lower().startswith("docs/") else 1, str(item.get("path", "")).lower()))
        return candidates

    def _shorten(self, text: str, max_chars: int) -> str:
        value = (text or "").strip()
        if len(value) <= max_chars:
            return value
        return value[:max_chars].rstrip() + "\n...[truncated]"

    @staticmethod
    def parse_bool_marker(text: str, marker: str, *, default: bool) -> bool:
        value = DocsContextAnalyzer.parse_text_marker(text, marker, default="")
        if not value:
            return default
        token = value.split()[0].strip().lower()
        if token in {"yes", "true", "1", "да"}:
            return True
        if token in {"no", "false", "0", "нет"}:
            return False
        return default

    @staticmethod
    def parse_text_marker(text: str, marker: str, *, default: str) -> str:
        low_marker = f"{marker.lower()}:"
        for line in (text or "").splitlines():
            raw = line.strip()
            if raw.lower().startswith(low_marker):
                return raw.split(":", 1)[1].strip()
        return default


class DocsBundleFormatter:
    def shorten(self, text: str, max_chars: int) -> str:
        value = (text or "").strip()
        if len(value) <= max_chars:
            return value
        return value[:max_chars].rstrip() + "\n...[truncated]"

    def normalize_file_output(self, text: str) -> str:
        value = (text or "").strip()
        if value.startswith("```") and value.endswith("```"):
            lines = value.splitlines()
            if len(lines) >= 3:
                return "\n".join(lines[1:-1]).strip()
        return value

    def parse_docs_bundle(self, raw_text: str) -> list[dict]:
        text = (raw_text or "").strip()
        if not text:
            return []

        candidate = self.normalize_file_output(text)
        parsed = self._parse_json_candidate(candidate)
        if parsed is None:
            start = candidate.find("{")
            end = candidate.rfind("}")
            if start != -1 and end > start:
                parsed = self._parse_json_candidate(candidate[start : end + 1])
        if parsed is None:
            return []

        files: list[dict]
        if isinstance(parsed, dict):
            raw_files = parsed.get("files")
            files = raw_files if isinstance(raw_files, list) else []
        elif isinstance(parsed, list):
            files = parsed
        else:
            files = []

        out: list[dict] = []
        seen: set[str] = set()
        for item in files:
            if not isinstance(item, dict):
                continue
            path = str(item.get("path", "")).replace("\\", "/").strip()
            content = str(item.get("content", ""))
            if not path or not content.strip():
                continue
            if path in seen:
                continue
            seen.add(path)
            out.append(
                {
                    "path": path,
                    "content": content,
                    "reason": str(item.get("reason", "")).strip(),
                }
            )
        return out

    def bundle_has_required_structure(self, bundle: list[dict]) -> bool:
        if not bundle:
            return False
        has_api = any(str(item.get("path", "")).replace("\\", "/").startswith("docs/api/") for item in bundle)
        has_logic = any(str(item.get("path", "")).replace("\\", "/").startswith("docs/logic/") for item in bundle)
        return has_api and has_logic

    def similarity(self, original: str, updated: str) -> float:
        return SequenceMatcher(None, original or "", updated or "").ratio()

    def line_change_ratio(self, original: str, updated: str) -> float:
        orig_lines = (original or "").splitlines()
        new_lines = (updated or "").splitlines()
        if not orig_lines and not new_lines:
            return 0.0
        matcher = SequenceMatcher(None, orig_lines, new_lines)
        changed = 0
        for tag, i1, i2, j1, j2 in matcher.get_opcodes():
            if tag == "equal":
                continue
            changed += max(i2 - i1, j2 - j1)
        total = max(len(orig_lines), len(new_lines), 1)
        return changed / total

    def added_headings(self, original: str, updated: str) -> int:
        old_heads = {line.strip() for line in (original or "").splitlines() if line.strip().startswith("#")}
        new_heads = {line.strip() for line in (updated or "").splitlines() if line.strip().startswith("#")}
        return len(new_heads - old_heads)

    def collapse_whitespace(self, text: str) -> str:
        return " ".join((text or "").split())

    def _parse_json_candidate(self, text: str):
        try:
            return json.loads(text)
        except Exception:
            return None


class DocsContentComposer:
    def __init__(self, llm: AgentLlmService, targeting: FileTargeting) -> None:
        self._llm = llm
        self._targeting = targeting
        self._examples = DocsExamplesLoader()
        self._bundle = DocsBundleFormatter()

    def load_rules_and_examples(self, _state: AgentGraphState) -> dict:
        return {"rules_bundle": self._examples.load_bundle()}

    def plan_incremental_changes(self, state: AgentGraphState, analyzer: DocsContextAnalyzer) -> dict:
        target_path, target = analyzer.resolve_target_for_incremental(state)
        user_input = "\n\n".join(
            [
                "Strategy: incremental_update",
                f"User request:\n{state.get('message', '')}",
                f"Target path:\n{target_path}",
                f"Current target content:\n{self._bundle.shorten((target or {}).get('content', ''), 3000)}",
                f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 6000)}",
                f"Examples bundle:\n{state.get('rules_bundle', '')}",
            ]
        )
        plan = self._llm.generate("docs_plan_sections", user_input)
        return {
            "doc_plan": plan,
            "target_path": target_path,
            "target_file_content": str((target or {}).get("content", "")),
            "target_file_hash": str((target or {}).get("content_hash", "")),
        }

    def plan_new_document(self, state: AgentGraphState) -> dict:
        target_path = state.get("target_path", "").strip() or "docs/AGENT_DRAFT.md"
        user_input = "\n\n".join(
            [
                "Strategy: from_scratch",
                f"User request:\n{state.get('message', '')}",
                f"Target path:\n{target_path}",
                f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 6000)}",
                f"Examples bundle:\n{state.get('rules_bundle', '')}",
            ]
        )
        plan = self._llm.generate("docs_plan_sections", user_input)
        return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""}

    def generate_doc_content(self, state: AgentGraphState) -> dict:
        user_input = "\n\n".join(
            [
                f"Strategy:\n{state.get('docs_strategy', 'from_scratch')}",
                f"User request:\n{state.get('message', '')}",
                f"Target path:\n{state.get('target_path', '')}",
                f"Document plan:\n{state.get('doc_plan', '')}",
                f"Current target content:\n{self._bundle.shorten(state.get('target_file_content', ''), 3500)}",
                f"RAG context:\n{self._bundle.shorten(state.get('rag_context', ''), 7000)}",
                f"Examples bundle:\n{state.get('rules_bundle', '')}",
            ]
        )
        raw = self._llm.generate("docs_generation", user_input)
        bundle = self._bundle.parse_docs_bundle(raw)
        if bundle:
            first_content = str(bundle[0].get("content", "")).strip()
            return {"generated_docs_bundle": bundle, "generated_doc": first_content}
        content = self._bundle.normalize_file_output(raw)
        return {"generated_docs_bundle": [], "generated_doc": content}

    def self_check(self, state: AgentGraphState) -> dict:
        attempts = int(state.get("validation_attempts", 0) or 0) + 1
        bundle = state.get("generated_docs_bundle", []) or []
        generated = state.get("generated_doc", "")
        if not generated.strip() and not bundle:
            return {
                "validation_attempts": attempts,
                "validation_passed": False,
                "validation_feedback": "Generated document is empty.",
            }
        strategy = state.get("docs_strategy", "from_scratch")
        if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
            return {
                "validation_attempts": attempts,
                "validation_passed": False,
                "validation_feedback": "Bundle must include both docs/api and docs/logic for from_scratch strategy.",
            }
        if strategy == "incremental_update":
            if bundle and len(bundle) > 1 and not self._is_broad_rewrite_request(str(state.get("message", ""))):
                return {
                    "validation_attempts": attempts,
                    "validation_passed": False,
                    "validation_feedback": "Incremental update should not touch multiple files without explicit broad rewrite request.",
                }
            original = str(state.get("target_file_content", ""))
            broad = self._is_broad_rewrite_request(str(state.get("message", "")))
            if original and generated:
                if self._bundle.collapse_whitespace(original) == self._bundle.collapse_whitespace(generated):
                    return {
                        "validation_attempts": attempts,
                        "validation_passed": False,
                        "validation_feedback": "Only formatting/whitespace changes detected.",
                    }
                similarity = self._bundle.similarity(original, generated)
                change_ratio = self._bundle.line_change_ratio(original, generated)
                added_headings = self._bundle.added_headings(original, generated)
                min_similarity = 0.75 if broad else 0.9
                max_change_ratio = 0.7 if broad else 0.35
                if similarity < min_similarity:
                    return {
                        "validation_attempts": attempts,
                        "validation_passed": False,
                        "validation_feedback": f"Incremental update is too broad (similarity={similarity:.2f}).",
                    }
                if change_ratio > max_change_ratio:
                    return {
                        "validation_attempts": attempts,
                        "validation_passed": False,
                        "validation_feedback": f"Incremental update changes too many lines (change_ratio={change_ratio:.2f}).",
                    }
                if not broad and added_headings > 0:
                    return {
                        "validation_attempts": attempts,
                        "validation_passed": False,
                        "validation_feedback": "New section headings were added outside requested scope.",
                    }

        bundle_text = "\n".join([f"- {item.get('path', '')}" for item in bundle[:30]])
        user_input = "\n\n".join(
            [
                f"Strategy:\n{strategy}",
                f"User request:\n{state.get('message', '')}",
                f"Document plan:\n{state.get('doc_plan', '')}",
                f"Generated file paths:\n{bundle_text or '(single-file mode)'}",
                f"Generated document:\n{generated}",
            ]
        )
        raw = self._llm.generate("docs_self_check", user_input)
        passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False)
        feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.")
        return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback}

    def build_changeset(self, state: AgentGraphState) -> dict:
        files_map = state.get("files_map", {}) or {}
        bundle = state.get("generated_docs_bundle", []) or []
        strategy = state.get("docs_strategy", "from_scratch")
        if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
            LOGGER.warning(
                "build_changeset fallback bundle used: strategy=%s bundle_items=%s",
                strategy,
                len(bundle),
            )
            bundle = self._build_fallback_bundle_from_text(state.get("generated_doc", ""))
        if bundle:
            changes: list[ChangeItem] = []
            for item in bundle:
                path = str(item.get("path", "")).replace("\\", "/").strip()
                content = str(item.get("content", ""))
                if not path or not content.strip():
                    continue
                target = self._targeting.lookup_file(files_map, path)
                reason = str(item.get("reason", "")).strip() or f"Documentation {strategy}: generated file from structured bundle."
                if target and target.get("content_hash"):
                    changes.append(
                        ChangeItem(
                            op="update",
                            path=str(target.get("path") or path),
                            base_hash=str(target.get("content_hash", "")),
                            proposed_content=content,
                            reason=reason,
                        )
                    )
                else:
                    changes.append(
                        ChangeItem(
                            op="create",
                            path=path,
                            proposed_content=content,
                            reason=reason,
                        )
                    )
            if changes:
                return {"changeset": changes}

        target_path = (state.get("target_path", "") or "").strip() or "docs/AGENT_DRAFT.md"
        target = self._targeting.lookup_file(files_map, target_path)
        content = state.get("generated_doc", "")
        if target and target.get("content_hash"):
            change = ChangeItem(
                op="update",
                path=str(target.get("path") or target_path),
                base_hash=str(target.get("content_hash", "")),
                proposed_content=content,
                reason=f"Documentation {strategy}: update existing document increment.",
            )
        else:
            change = ChangeItem(
                op="create",
                path=target_path,
                proposed_content=content,
                reason=f"Documentation {strategy}: create document from current project context.",
            )
        return {"changeset": [change]}

    def build_execution_summary(self, state: AgentGraphState) -> dict:
        changeset = state.get("changeset", []) or []
        if not changeset:
            return {"answer": "Документация не была изменена: итоговый changeset пуст."}

        file_lines = self._format_changed_files(changeset)
        user_input = "\n\n".join(
            [
                f"User request:\n{state.get('message', '')}",
                f"Documentation strategy:\n{state.get('docs_strategy', 'from_scratch')}",
                f"Document plan:\n{state.get('doc_plan', '')}",
                f"Validation feedback:\n{state.get('validation_feedback', '')}",
                f"Changed files:\n{file_lines}",
            ]
        )
        try:
            summary = self._llm.generate("docs_execution_summary", user_input).strip()
        except Exception:
            summary = ""
        if not summary:
            summary = self._build_fallback_summary(state, file_lines)
        return {"answer": summary}

    def _build_fallback_bundle_from_text(self, text: str) -> list[dict]:
        content = (text or "").strip()
        if not content:
            content = (
                "# Project Documentation Draft\n\n"
                "## Overview\n"
                "Documentation draft was generated, but structured sections require уточнение.\n"
            )
        return [
            {
                "path": "docs/logic/project_overview.md",
                "content": content,
                "reason": "Fallback: generated structured logic document from non-JSON model output.",
            },
            {
                "path": "docs/api/README.md",
                "content": (
                    "# API Methods\n\n"
                    "This file is a fallback placeholder for API method documentation.\n\n"
                    "## Next Step\n"
                    "- Add one file per API method under `docs/api/`.\n"
                ),
                "reason": "Fallback: ensure required docs/api structure exists.",
            },
        ]

    def _format_changed_files(self, changeset: list[ChangeItem]) -> str:
        lines: list[str] = []
        for item in changeset[:30]:
            lines.append(f"- {item.op.value} {item.path}: {item.reason}")
        return "\n".join(lines)

    def _build_fallback_summary(self, state: AgentGraphState, file_lines: str) -> str:
        request = (state.get("message", "") or "").strip()
        return "\n".join(
            [
                "Выполненные действия:",
                f"- Обработан запрос: {request or '(пустой запрос)'}",
                f"- Применена стратегия документации: {state.get('docs_strategy', 'from_scratch')}",
                "- Сформирован и проверен changeset для документации.",
                "",
                "Измененные файлы:",
                file_lines or "- (нет изменений)",
            ]
        )

    def _is_broad_rewrite_request(self, message: str) -> bool:
        low = (message or "").lower()
        markers = (
            "перепиши",
            "полностью",
            "целиком",
            "с нуля",
            "full rewrite",
            "rewrite all",
            "реорганизуй",
        )
        return any(marker in low for marker in markers)