фиксирую состояние

2026-04-07 21:41:27 +03:00
parent bc29d51a29
commit 8fb76bb331
56 changed files with 7011 additions and 316 deletions
@@ -4,17 +4,17 @@ from app.core.agent.processes.v2.models import V2AnchorType, V2RouteAnchors, V2R


 def anchor_signal_types(route: V2RouteResult) -> set[str]:
-    hints = [str(item).strip().lower() for item in route.anchors.target_doc_hints if str(item or "").strip()]
+    texts = _signal_texts(route)
    signals: set[str] = set()
    if route.subintent == V2Subintent.FIND_FILES:
        signals.add(V2AnchorType.FIND_FILES)
-    if route.anchors.endpoint_paths or _has_hint(hints, "/api/"):
+    if route.anchors.endpoint_paths or _has_any(texts, ("/api/", "api", "endpoint")):
        signals.add(V2AnchorType.API_ENDPOINT)
-    if _has_hint(hints, "/architecture/"):
+    if _has_any(texts, ("/architecture/", "architecture", "arch")):
        signals.add(V2AnchorType.ARCHITECTURE)
-    if _has_hint(hints, "/logic/"):
+    if _has_any(texts, ("/logic/", "logic", "workflow", "flow", "process")):
        signals.add(V2AnchorType.LOGIC_FLOW)
-    if _has_hint(hints, "/domains/"):
+    if route.anchors.entity_names or _has_any(texts, ("/domains/", "domain", "entity", "component")):
        signals.add(V2AnchorType.DOMAIN_ENTITY)
    return signals

@@ -44,5 +44,14 @@ def anchors_have_signal(anchors: V2RouteAnchors, signal: str, *, subintent: str
    return signal in anchor_signal_types(route)


-def _has_hint(hints: list[str], marker: str) -> bool:
-    return any(marker in hint for hint in hints)
+def _signal_texts(route: V2RouteResult) -> list[str]:
+    items = [
+        *route.anchors.target_doc_hints,
+        *route.anchors.file_names,
+        *route.anchors.matched_aliases,
+    ]
+    return [str(item).strip().lower() for item in items if str(item or "").strip()]
+
+
+def _has_any(items: list[str], markers: tuple[str, ...]) -> bool:
+    return any(marker in item for item in items for marker in markers)
@@ -11,6 +11,8 @@ from app.core.rag.contracts.enums import RagLayer


 class DocsEvidenceAssembler:
+    _API_PATH_PREFIXES = ("docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/")
+    _GENERIC_DOC_MARKERS = ("readme", "overview", "index", "navigation", "related docs", "catalog")
    def assemble_summaries(self, rows: list[dict], route: V2RouteResult) -> list[RetrievedSummary]:
        items = self._rank_rows(rows, route, mode="summary")
        ranked = [
@@ -71,10 +73,12 @@ class DocsEvidenceAssembler:
                    "score": score,
                    "score_breakdown": breakdown,
                    "match_reason": self._match_reason(breakdown),
+                    "is_generic_doc": self._is_generic_doc(path, self._title(row, path), self._summary(row), row),
                }
            )
        ranked.sort(key=lambda item: (-item["score"], item["path"]))
-        return self._ensure_target_docs_in_top_k(ranked, route, k=4 if mode == "find_files" else 3)
+        ranked = self._ensure_target_docs_in_top_k(ranked, route, k=4 if mode == "find_files" else 3)
+        return self._promote_specific_primary(ranked, route)

    def _score_breakdown(self, row: dict, route: V2RouteResult, *, mode: str) -> dict[str, int]:
        path_raw = self._path(row)
@@ -93,6 +97,7 @@ class DocsEvidenceAssembler:
            "alias_match": 0,
            "anchor_boost": 0,
            "target_doc_boost": 0,
+            "specificity_boost": 0,
            "generic_penalty": 0,
        }
        if route.intent == "GENERAL_QA":
@@ -100,6 +105,7 @@ class DocsEvidenceAssembler:
        hint_norm_lower = {normalize_doc_path(h).lower() for h in route.anchors.target_doc_hints if str(h or "").strip()}
        if normalize_doc_path(path_raw).lower() in hint_norm_lower:
            breakdown["target_doc_boost"] += 1000
+        hint_texts = [str(hint or "").strip().lower() for hint in route.anchors.target_doc_hints if str(hint or "").strip()]
        if any(alias.lower() in " ".join([path, title, summary, entity]) for alias in route.anchors.matched_aliases):
            breakdown["alias_match"] += 500
        for token in query_tokens:
@@ -111,10 +117,25 @@ class DocsEvidenceAssembler:
                breakdown["semantic"] += 20
            if self._compact(token) in compact_haystack:
                breakdown["alias_match"] += 250
+        for hint in hint_texts:
+            compact_hint = self._compact(hint)
+            if compact_hint and compact_hint in compact_haystack:
+                breakdown["target_doc_boost"] += 180
+            elif hint and hint.strip("/") in " ".join([path, title, summary, entity]):
+                breakdown["semantic"] += 70
+        endpoint_text = self._summary(row).lower()
+        for endpoint in route.anchors.endpoint_paths:
+            normalized_endpoint = endpoint.strip().lower()
+            endpoint_slug = normalized_endpoint.strip("/")
+            if normalized_endpoint and normalized_endpoint in endpoint_text:
+                breakdown["target_doc_boost"] += 260
+            if endpoint_slug and endpoint_slug in filename:
+                breakdown["filename_match"] += 200
        if any(endpoint.strip("/").lower() in filename for endpoint in route.anchors.endpoint_paths):
            breakdown["filename_match"] += 200
        signals = anchor_signal_types(route)
        breakdown["anchor_boost"] += self._anchor_boost(path, signals)
+        breakdown["specificity_boost"] += self._specificity_boost(row, path, title, summary, route)
        breakdown["generic_penalty"] += self._generic_penalty(path, signals)
        if mode == "find_files":
            breakdown["path_match"] *= 3
@@ -125,8 +146,8 @@ class DocsEvidenceAssembler:

    def _anchor_boost(self, path: str, signals: set[str]) -> int:
        boost = 0
-        if V2AnchorType.API_ENDPOINT in signals and path.startswith("docs/api/"):
-            boost += 300
+        if V2AnchorType.API_ENDPOINT in signals and path.startswith(self._API_PATH_PREFIXES):
+            boost += 360
        if V2AnchorType.LOGIC_FLOW in signals and path.startswith("docs/logic/"):
            boost += 300
        if V2AnchorType.DOMAIN_ENTITY in signals and path.startswith("docs/domains/"):
@@ -139,8 +160,11 @@ class DocsEvidenceAssembler:

    def _generic_penalty(self, path: str, signals: set[str]) -> int:
        penalty = 0
+        lowered = path.lower()
        if path == "docs/README.md" and V2AnchorType.ARCHITECTURE not in signals:
-            penalty -= 200
+            penalty -= 260
+        if any(marker in lowered for marker in ("/readme", "readme.md", "/index", "/overview", "/catalog", "/navigation")):
+            penalty -= 220
        if "/architecture/" in path and V2AnchorType.ARCHITECTURE not in signals and signals.intersection(
            {V2AnchorType.API_ENDPOINT, V2AnchorType.DOMAIN_ENTITY}
        ):
@@ -173,6 +197,17 @@ class DocsEvidenceAssembler:
        top.sort(key=lambda item: (-item["score"], item["path"]))
        return top + remaining

+    def _promote_specific_primary(self, ranked: list[dict], route: V2RouteResult) -> list[dict]:
+        if len(ranked) < 2:
+            return ranked
+        first = ranked[0]
+        if not first.get("is_generic_doc"):
+            return ranked
+        promoted = next((item for item in ranked[1:] if not item.get("is_generic_doc") and self._is_specific_candidate(item, route)), None)
+        if promoted is None:
+            return ranked
+        return [promoted] + [item for item in ranked if item["path"] != promoted["path"]]
+
    def _match_reason(self, breakdown: dict[str, int]) -> str:
        if breakdown["target_doc_boost"] > 0:
            return "exact_path"
@@ -189,6 +224,53 @@ class DocsEvidenceAssembler:
        section = str(metadata.get("section_path") or "").lower()
        return "summary" in section or "свод" in section or "overview" in section

+    def _specificity_boost(self, row: dict, path: str, title: str, summary: str, route: V2RouteResult) -> int:
+        boost = 0
+        filename = path.split("/")[-1]
+        lowered_title = title.lower()
+        lowered_summary = summary.lower()
+        if not self._is_generic_doc(path, title, summary, row):
+            boost += 90
+        if path.startswith(self._API_PATH_PREFIXES):
+            boost += 160
+        if "endpoint" in filename or "endpoint" in lowered_title or "method" in lowered_title:
+            boost += 120
+        if row.get("layer") == RagLayer.DOCS_DOC_CHUNKS and not self._looks_like_navigation_chunk(row):
+            boost += 80
+        for token in self._query_tokens(route):
+            if token and token in filename:
+                boost += 90
+            if token and token in lowered_title:
+                boost += 70
+            if token and token in lowered_summary:
+                boost += 40
+        return boost
+
+    def _is_specific_candidate(self, item: dict, route: V2RouteResult) -> bool:
+        breakdown = dict(item.get("score_breakdown") or {})
+        if breakdown.get("target_doc_boost", 0) > 0:
+            return True
+        if breakdown.get("specificity_boost", 0) >= 160:
+            return True
+        return V2AnchorType.API_ENDPOINT in anchor_signal_types(route) and item["path"].startswith(self._API_PATH_PREFIXES)
+
+    def _is_generic_doc(self, path: str, title: str, summary: str, row: dict) -> bool:
+        haystack = " ".join([path.lower(), title.lower(), summary.lower()])
+        if any(marker in haystack for marker in self._GENERIC_DOC_MARKERS):
+            return True
+        return self._looks_like_navigation_chunk(row)
+
+    def _looks_like_navigation_chunk(self, row: dict) -> bool:
+        text = self._summary(row).lower()
+        if not text:
+            return False
+        lines = [line.strip() for line in text.splitlines() if line.strip()]
+        bullet_lines = sum(1 for line in lines if line.startswith(("- ", "* ", "1.", "2.", "3.")))
+        link_lines = sum(1 for line in lines if "](" in line or line.startswith("docs/"))
+        if "related docs" in text or "navigation" in text:
+            return True
+        return bullet_lines >= 3 or link_lines >= 3
+
    def _query_tokens(self, route: V2RouteResult) -> list[str]:
        values = list(route.target_terms) + list(route.anchors.matched_aliases)
        tokens: list[str] = []
@@ -8,6 +8,7 @@ class QueryFeatures:
    normalized_query: str
    target_terms: list[str]
    endpoint_paths: list[str]
+    file_names: list[str]
    matched_aliases: list[str]
    target_doc_hints: list[str]
    file_markers: list[str]
@@ -34,10 +34,42 @@ class _MarkerScanner:
        "где описано",
        "документ с описанием",
    )
-    _ARCHITECTURE_MARKERS = ("архитектура", "как устроено приложение", "как устроен сервис", "основные части системы", "из чего состоит")
-    _LOGIC_MARKERS = ("цикл", "loop", "worker", "как работает отправка уведомлений", "логика отправки", "background job", "runtime loop")
+    _ARCHITECTURE_MARKERS = (
+        "архитектура",
+        "архитектур",
+        "architecture",
+        "arch overview",
+        "как устроено приложение",
+        "как устроен сервис",
+        "основные части системы",
+        "из чего состоит",
+    )
+    _LOGIC_MARKERS = (
+        "цикл",
+        "loop",
+        "flow",
+        "workflow",
+        "process",
+        "worker",
+        "как работает отправка уведомлений",
+        "логика отправки",
+        "background job",
+        "runtime loop",
+    )
    _DOMAIN_MARKERS = ("runtime health", "health model", "статусы здоровья", "сущность", "entity", "здоровье runtime")
-    _ENDPOINT_MARKERS = ("endpoint", "метод api", "ручка", "эндпоинт")
+    _ENDPOINT_MARKERS = (
+        "endpoint",
+        "api",
+        "route",
+        "method",
+        "метод api",
+        "метод",
+        "метода",
+        "ручка",
+        "эндпоинт",
+        "маршрут",
+        "роут",
+    )

    def scan(self, lowered_query: str) -> dict[str, list[str]]:
        return {
@@ -54,12 +86,13 @@ class _MarkerScanner:

 class _EntityNameExtractor:
    _ENTITY_RE = re.compile(r"\b[A-Z][A-Za-z0-9_]+\b")
+    _IGNORE = {"arch"}

    def extract(self, query: str) -> list[str]:
        items: list[str] = []
        for match in self._ENTITY_RE.finditer(query):
            candidate = match.group(0).strip()
-            if candidate and candidate not in items:
+            if candidate and candidate.lower() not in self._IGNORE and candidate not in items:
                items.append(candidate)
        return items

@@ -92,33 +125,61 @@ class _FileNameExtractor:
            items.append(value)


+class _ProcessAnchorExtractor:
+    _DOMAIN_KEYWORDS = {
+        "billing": "billing",
+        "notifications": "notifications",
+    }
+    _SUBDOMAIN_KEYWORDS = {
+        "invoice": ("billing", "invoice"),
+        "invoices": ("billing", "invoice"),
+        "delivery_loop": ("notifications", "delivery_loop"),
+        "delivery": ("notifications", "delivery_loop"),
+    }
+
+    def extract(self, lowered_query: str) -> tuple[str | None, str | None]:
+        domain = next((value for token, value in self._DOMAIN_KEYWORDS.items() if token in lowered_query), None)
+        subdomain: str | None = None
+        for token, mapping in self._SUBDOMAIN_KEYWORDS.items():
+            if token in lowered_query:
+                domain = domain or mapping[0]
+                subdomain = mapping[1]
+                break
+        return domain, subdomain
+
+
 class V2AnchorExtractor:
    def __init__(
        self,
        marker_scanner: _MarkerScanner | None = None,
        entity_extractor: _EntityNameExtractor | None = None,
        file_name_extractor: _FileNameExtractor | None = None,
+        process_anchor_extractor: _ProcessAnchorExtractor | None = None,
    ) -> None:
        self._marker_scanner = marker_scanner or _MarkerScanner()
        self._entity_extractor = entity_extractor or _EntityNameExtractor()
        self._file_name_extractor = file_name_extractor or _FileNameExtractor()
+        self._process_anchor_extractor = process_anchor_extractor or _ProcessAnchorExtractor()

    def extract(self, normalized_query: str, terms: TargetTermsAnalysis) -> AnchorAnalysis:
-        markers = self._marker_scanner.scan(normalized_query.lower())
+        lowered_query = normalized_query.lower()
+        markers = self._marker_scanner.scan(lowered_query)
+        process_domain, process_subdomain = self._process_anchor_extractor.extract(lowered_query)
        anchors = V2RouteAnchors(
            entity_names=self._entity_extractor.extract(normalized_query),
            file_names=self._file_name_extractor.extract(normalized_query),
            endpoint_paths=list(terms.endpoint_paths),
            target_doc_hints=self._target_doc_hints(
                endpoint_paths=terms.endpoint_paths,
+                api_like_terms=terms.api_like_terms,
                alias_docs=terms.alias_docs,
                architecture_markers=markers["architecture_markers"],
                logic_markers=markers["logic_markers"],
                domain_markers=markers["domain_markers"],
            ),
            matched_aliases=list(terms.matched_aliases),
-            process_domain=None,
-            process_subdomain=None,
+            process_domain=process_domain,
+            process_subdomain=process_subdomain,
        )
        return AnchorAnalysis(
            anchors=anchors,
@@ -133,6 +194,7 @@ class V2AnchorExtractor:
        self,
        *,
        endpoint_paths: list[str],
+        api_like_terms: list[str],
        alias_docs: list[str],
        architecture_markers: list[str],
        logic_markers: list[str],
@@ -145,13 +207,41 @@ class V2AnchorExtractor:
            "/actions/{action}": "docs/api/control-actions-endpoint.md",
        }
        for endpoint in endpoint_paths:
+            for hint in self._endpoint_hint_variants(endpoint):
+                self._append_unique(hints, hint)
            hint = endpoint_map.get(endpoint)
-            if hint and hint not in hints:
-                hints.append(hint)
-        if architecture_markers and "docs/architecture/telegram-notify-app-overview.md" not in hints:
-            hints.append("docs/architecture/telegram-notify-app-overview.md")
-        if logic_markers and "docs/logic/telegram-notification-loop.md" not in hints:
-            hints.append("docs/logic/telegram-notification-loop.md")
-        if domain_markers and "docs/domains/runtime-health-entity.md" not in hints:
-            hints.append("docs/domains/runtime-health-entity.md")
+            self._append_unique(hints, hint)
+        for term in api_like_terms:
+            for hint in self._api_like_hint_variants(term):
+                self._append_unique(hints, hint)
+        if architecture_markers:
+            self._append_unique(hints, "docs/architecture/telegram-notify-app-overview.md")
+        if logic_markers:
+            self._append_unique(hints, "docs/logic/telegram-notification-loop.md")
+        if domain_markers:
+            self._append_unique(hints, "docs/domains/runtime-health-entity.md")
        return hints
+
+    def _endpoint_hint_variants(self, endpoint: str) -> list[str]:
+        normalized = str(endpoint or "").strip().lower()
+        if not normalized:
+            return []
+        slug = normalized.strip("/").replace("/", "-").replace("{", "").replace("}", "")
+        leaf = next((part for part in reversed(slug.split("-")) if part and part != "id"), "")
+        hints: list[str] = [normalized]
+        for value in (slug, leaf):
+            if not value:
+                continue
+            hints.extend([value, f"{value}-endpoint", f"{value} endpoint"])
+        return list(dict.fromkeys(hints))
+
+    def _api_like_hint_variants(self, term: str) -> list[str]:
+        normalized = str(term or "").strip().lower().lstrip("/")
+        if not normalized:
+            return []
+        return [normalized, f"/{normalized}", f"{normalized}-endpoint", f"{normalized} endpoint"]
+
+    def _append_unique(self, items: list[str], value: str | None) -> None:
+        normalized = str(value or "").strip()
+        if normalized and normalized not in items:
+            items.append(normalized)
@@ -8,6 +8,7 @@ from dataclasses import dataclass
 class TargetTermsAnalysis:
    target_terms: list[str]
    endpoint_paths: list[str]
+    api_like_terms: list[str]
    matched_aliases: list[str]
    alias_docs: list[str]

@@ -26,7 +27,7 @@ class _AliasMatcher:
        _AliasRule(("control actions", "управление runtime"), "/actions/{action}", "docs/api/control-actions-endpoint.md"),
        _AliasRule(("runtime health", "здоровье runtime", "статусы здоровья"), "runtime_health", "docs/domains/runtime-health-entity.md"),
        _AliasRule(("цикл отправки уведомлений", "notification loop", "worker loop"), "telegram-notify-loop", "docs/logic/telegram-notification-loop.md"),
-        _AliasRule(("архитектура приложения", "overview"), "architecture_overview", "docs/architecture/telegram-notify-app-overview.md"),
+        _AliasRule(("архитектура приложения",), "architecture_overview", "docs/architecture/telegram-notify-app-overview.md"),
        _AliasRule(("архитектура",), "architecture_overview", "docs/architecture/telegram-notify-app-overview.md"),
        _AliasRule(("каталог ошибок", "errors catalog"), "errors_catalog", "docs/errors/catalog.yaml"),
        _AliasRule(("файл-индекс документации", "docs index", "индекс документации"), "docs_index", "docs/README.md"),
@@ -51,6 +52,7 @@ class _AliasMatcher:
 class _EndpointPathExtractor:
    _PATH_RE = re.compile(r"`([^`]+)`|(/[A-Za-z0-9_./{}-]+)")
    _VALID_ENDPOINT_RE = re.compile(r"^/[a-z0-9._/-]+(?:/\{[a-z0-9_]+\})?$")
+    _DOC_EXTENSIONS = (".md", ".yaml", ".yml", ".json")

    def extract(self, query: str) -> list[str]:
        values: list[str] = []
@@ -68,28 +70,161 @@ class _EndpointPathExtractor:
        return trimmed.lower()

    def _is_endpoint(self, token: str) -> bool:
-        return bool(token and self._VALID_ENDPOINT_RE.fullmatch(token))
+        if not token or not self._VALID_ENDPOINT_RE.fullmatch(token):
+            return False
+        return not token.endswith(self._DOC_EXTENSIONS)

    def _append_unique(self, items: list[str], value: str) -> None:
        if value and value not in items:
            items.append(value)


+@dataclass(slots=True)
+class _ApiLikeAnchorAnalysis:
+    endpoint_paths: list[str]
+    candidate_terms: list[str]
+
+
+class _ApiLikeAnchorExtractor:
+    _TOKEN_RE = re.compile(r"[A-Za-zА-Яа-я0-9_./{}-]+")
+    _ASCII_ENDPOINT_RE = re.compile(r"^[a-z0-9]+(?:[-_][a-z0-9]+)*$")
+    _API_MARKERS = {
+        "api",
+        "endpoint",
+        "route",
+        "method",
+        "метод",
+        "метода",
+        "методу",
+        "ручка",
+        "ручки",
+        "эндпоинт",
+        "эндпоинта",
+        "маршрут",
+        "роут",
+    }
+    _EXPLAIN_MARKERS = {
+        "как",
+        "что",
+        "делает",
+        "работает",
+        "объясни",
+        "объяснить",
+        "расскажи",
+        "опиши",
+        "смысл",
+    }
+    _NOISE_WORDS = _API_MARKERS | _EXPLAIN_MARKERS | {
+        "про",
+        "какой",
+        "какая",
+        "какие",
+        "какого",
+        "какую",
+        "кратко",
+        "нужен",
+        "нужно",
+        "у",
+    }
+    _SHORT_QUERY_TOKEN_LIMIT = 7
+
+    def extract(self, query: str, explicit_endpoint_paths: list[str]) -> _ApiLikeAnchorAnalysis:
+        if explicit_endpoint_paths:
+            return _ApiLikeAnchorAnalysis(endpoint_paths=list(explicit_endpoint_paths), candidate_terms=[])
+        token_entries = self._token_entries(query)
+        if not token_entries:
+            return _ApiLikeAnchorAnalysis(endpoint_paths=[], candidate_terms=[])
+        candidate_terms = [token for token, _start in token_entries if self._is_api_candidate(token)]
+        if not candidate_terms:
+            return _ApiLikeAnchorAnalysis(endpoint_paths=[], candidate_terms=[])
+        if self._has_api_marker(token_entries):
+            primary = self._primary_candidate(token_entries)
+            endpoint_paths = [self._ensure_endpoint(primary)] if primary else []
+            return _ApiLikeAnchorAnalysis(
+                endpoint_paths=[path for path in endpoint_paths if path],
+                candidate_terms=[primary] if primary else [],
+            )
+        if self._is_short_explain_query(token_entries) and len(candidate_terms) == 1:
+            return _ApiLikeAnchorAnalysis(endpoint_paths=[], candidate_terms=list(candidate_terms))
+        return _ApiLikeAnchorAnalysis(endpoint_paths=[], candidate_terms=[])
+
+    def _token_entries(self, query: str) -> list[tuple[str, int]]:
+        entries: list[tuple[str, int]] = []
+        for match in self._TOKEN_RE.finditer(query):
+            token = str(match.group(0) or "").strip().strip("`'\"()[]!?.,:;").lower()
+            if token:
+                entries.append((token, match.start()))
+        return entries
+
+    def _has_api_marker(self, token_entries: list[tuple[str, int]]) -> bool:
+        return any(token in self._API_MARKERS for token, _start in token_entries)
+
+    def _is_short_explain_query(self, token_entries: list[tuple[str, int]]) -> bool:
+        if len(token_entries) > self._SHORT_QUERY_TOKEN_LIMIT:
+            return False
+        return any(token in self._EXPLAIN_MARKERS for token, _start in token_entries)
+
+    def _primary_candidate(self, token_entries: list[tuple[str, int]]) -> str | None:
+        marker_positions = [start for token, start in token_entries if token in self._API_MARKERS]
+        candidates = [(token, start) for token, start in token_entries if self._is_api_candidate(token)]
+        if not candidates:
+            return None
+        if not marker_positions:
+            return candidates[-1][0]
+        primary = min(
+            candidates,
+            key=lambda item: min(abs(item[1] - marker_pos) for marker_pos in marker_positions),
+        )
+        return primary[0]
+
+    def _is_api_candidate(self, token: str) -> bool:
+        if (
+            not token
+            or token in self._NOISE_WORDS
+            or token.startswith("docs/")
+            or token.endswith((".md", ".yaml", ".yml", ".json"))
+        ):
+            return False
+        if token.startswith("/"):
+            return True
+        return self._ASCII_ENDPOINT_RE.fullmatch(token) is not None and len(token) >= 3
+
+    def _ensure_endpoint(self, token: str) -> str:
+        return token if token.startswith("/") else f"/{token}"
+
+
 class _TermCollector:
    _TOKEN_RE = re.compile(r"[A-Za-zА-Яа-я0-9_./{}-]+")
    _IDENTIFIER_RE = re.compile(
        r"^(?:[a-z0-9]+(?:[_-][a-z0-9]+)+|[a-z]+[A-Z][A-Za-z0-9]+|(?:[A-Z][a-z0-9]+){2,})$"
    )
    _QUESTION_WORDS = {"что", "как", "где", "какой", "какие", "каком", "когда", "чего"}
-    _INTENT_WORDS = {"объясни", "покажи", "найди", "расскажи", "дай", "опиши", "нужен"}
-    _FILLER_WORDS = {"про", "там", "тут", "плз"}
+    _INTENT_WORDS = {"объясни", "покажи", "найди", "расскажи", "дай", "опиши", "нужен", "show"}
+    _FILLER_WORDS = {"про", "там", "тут", "плз", "pls", "for"}
    _MARKER_WORDS = {
        "файл",
        "файле",
+        "file",
+        "method",
+        "метод",
+        "метода",
+        "методу",
+        "route",
+        "ручка",
+        "ручки",
+        "эндпоинт",
+        "эндпоинта",
+        "overview",
+        "architecture",
+        "arch",
+        "flow",
+        "process",
+        "workflow",
        "док",
        "дока",
        "доках",
        "документ",
+        "doc",
        "описан",
        "док-саммари",
        "summary",
@@ -115,6 +250,7 @@ class _TermCollector:
        "service",
        "summary",
        "endpoint",
+        "docs",
    }
    _MAX_TERMS = 7

@@ -191,19 +327,23 @@ class V2TargetTermsExtractor:
        self,
        alias_matcher: _AliasMatcher | None = None,
        endpoint_extractor: _EndpointPathExtractor | None = None,
+        api_like_extractor: _ApiLikeAnchorExtractor | None = None,
        term_collector: _TermCollector | None = None,
    ) -> None:
        self._alias_matcher = alias_matcher or _AliasMatcher()
        self._endpoint_extractor = endpoint_extractor or _EndpointPathExtractor()
+        self._api_like_extractor = api_like_extractor or _ApiLikeAnchorExtractor()
        self._term_collector = term_collector or _TermCollector()

    def extract(self, normalized_query: str) -> TargetTermsAnalysis:
        lowered = normalized_query.lower()
        endpoint_paths = self._endpoint_extractor.extract(normalized_query)
+        api_like = self._api_like_extractor.extract(normalized_query, endpoint_paths)
        alias_terms, alias_docs, alias_hits = self._alias_matcher.match(lowered)
        return TargetTermsAnalysis(
-            target_terms=self._term_collector.collect(normalized_query, alias_terms, endpoint_paths),
-            endpoint_paths=endpoint_paths,
+            target_terms=self._term_collector.collect(normalized_query, alias_terms, api_like.endpoint_paths),
+            endpoint_paths=api_like.endpoint_paths,
+            api_like_terms=api_like.candidate_terms,
            matched_aliases=alias_hits,
            alias_docs=alias_docs,
        )
@@ -44,6 +44,7 @@ class V2IntentRouter:
            normalized_query=normalized_query,
            target_terms=list(target_terms_analysis.target_terms),
            endpoint_paths=list(target_terms_analysis.endpoint_paths),
+            file_names=list(anchor_analysis.anchors.file_names),
            matched_aliases=list(target_terms_analysis.matched_aliases),
            target_doc_hints=list(anchor_analysis.anchors.target_doc_hints),
            file_markers=list(anchor_analysis.file_markers),
@@ -58,6 +59,7 @@ class V2IntentRouter:
            anchors=anchor_analysis.anchors,
        )
        llm_result = self._validator.validate(llm_candidate)
+        llm_result = self._apply_deterministic_corrections(llm_result, features)
        if llm_result is not None:
            confidence = self._confidence_adjuster.adjust(float(llm_result["confidence"]), features)
            return V2RouteResult(
@@ -99,3 +101,18 @@ class V2IntentRouter:
            )
        except Exception:
            return None
+
+    def _apply_deterministic_corrections(self, candidate: dict | None, features: QueryFeatures) -> dict | None:
+        if candidate is None:
+            return None
+        if candidate.get("routing_domain") == "DOCS" and self._should_force_find_files(features):
+            corrected = dict(candidate)
+            corrected["subintent"] = "FIND_FILES"
+            return corrected
+        return candidate
+
+    def _should_force_find_files(self, features: QueryFeatures) -> bool:
+        if features.file_markers or features.file_names:
+            return True
+        query = features.normalized_query.lower()
+        return "show doc" in query or "show file" in query or "doc for" in query
@@ -6,7 +6,7 @@ from app.core.agent.processes.v2.models import V2Subintent

 class DocsSubintentResolver:
    def resolve(self, features: QueryFeatures) -> str | None:
-        if features.file_markers:
+        if features.file_markers or self._has_file_like_anchor(features):
            return V2Subintent.FIND_FILES
        if any(
            (
@@ -20,3 +20,9 @@ class DocsSubintentResolver:
        ):
            return V2Subintent.SUMMARY
        return None
+
+    def _has_file_like_anchor(self, features: QueryFeatures) -> bool:
+        return any(
+            hint.endswith((".md", ".yaml", ".yml", ".json"))
+            for hint in features.target_doc_hints
+        ) or any(token.endswith((".md", ".yaml", ".yml", ".json")) for token in features.file_names)
@@ -14,7 +14,6 @@ from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
    merge_row_lists,
    normalize_doc_path,
    normalized_path_set,
-    path_variants_for_rag_query,
    row_path,
    seed_candidates_from_target_hints,
 )
@@ -121,11 +120,9 @@ class V2Process(AgentProcess):
            "retrieval_profile_selected",
            {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
        )
-        seeded_rows = await self._seed_candidates_from_target_hints(rag_session_id, plan.layers, route)
-        semantic_rows = await self._rag_adapter.fetch_rows(rag_session_id, route.normalized_query, plan)
-        metadata_rows = self._metadata_lookup_candidates([*seeded_rows, *semantic_rows], route)
-        rows = self._merge_candidate_rows(seeded_rows, metadata_rows, semantic_rows)
-        rows = await self._ensure_target_hints_in_pool(rag_session_id, rows, route)
+        retrieved_rows = await self._rag_adapter.fetch_rows(rag_session_id, route.normalized_query, plan)
+        metadata_rows = self._metadata_lookup_candidates(retrieved_rows, route)
+        rows = self._merge_candidate_rows(retrieved_rows, metadata_rows)
        rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
        self._print_missing_target_hints(route, rows)
        context.trace.module("process.v2.rag_retrieval").log(
@@ -150,9 +147,9 @@ class V2Process(AgentProcess):
                "target_doc_hints": route.anchors.target_doc_hints,
                "candidate_docs_before_ranking": [self._trace_row(row) for row in rows[:8]],
                "sources": {
-                    "seeded": [self._trace_row(row) for row in seeded_rows[:5]],
+                    "seeded": [self._trace_row(row) for row in retrieved_rows[:5] if row_path(row) in {normalize_doc_path(h) for h in route.anchors.target_doc_hints}],
                    "metadata_lookup": [self._trace_row(row) for row in metadata_rows[:5]],
-                    "semantic": [self._trace_row(row) for row in semantic_rows[:5]],
+                    "semantic": [self._trace_row(row) for row in retrieved_rows[:5]],
                },
            },
        )
@@ -262,61 +259,11 @@ class V2Process(AgentProcess):
            if not str(hint or "").strip():
                continue
            normalized = normalize_doc_path(hint)
+            if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
+                continue
            if normalized not in candidate_paths:
                print("ERROR: target doc missing from candidates:", normalized)

-    async def _ensure_target_hints_in_pool(self, rag_session_id: str, rows: list[dict], route) -> list[dict]:
-        hints_raw = [str(item).strip() for item in route.anchors.target_doc_hints if str(item or "").strip()]
-        if not hints_raw:
-            return rows
-        pool = normalized_path_set(rows)
-        missing_hints = [h for h in hints_raw if normalize_doc_path(h) not in pool]
-        if not missing_hints:
-            return rows
-        variant_paths: list[str] = []
-        for h in missing_hints:
-            variant_paths.extend(path_variants_for_rag_query(h))
-        variant_paths = list(dict.fromkeys(variant_paths))
-        extra_exact = await self._rag_adapter.fetch_exact_paths(rag_session_id, paths=variant_paths, layers=None)
-        pool2 = normalized_path_set(extra_exact)
-        still_missing = [h for h in missing_hints if normalize_doc_path(h) not in pool2]
-        fallback_rows: list[dict] = []
-        if still_missing:
-            needles = [normalize_doc_path(h).split("/")[-1] for h in still_missing]
-            needles = list(dict.fromkeys(n for n in needles if n))
-            if needles:
-                fallback_rows = await self._rag_adapter.fetch_chunks_by_path_substrings(
-                    rag_session_id,
-                    path_needles=needles,
-                    layers=None,
-                )
-        return merge_row_lists(rows, extra_exact, fallback_rows)
-
-    async def _seed_candidates_from_target_hints(self, rag_session_id: str, layers: list[str], route) -> list[dict]:
-        del layers  # seed по пути должен видеть все слои (иначе D0-only чанки теряются при file_lookup).
-        hints_raw = [str(item).strip() for item in route.anchors.target_doc_hints if str(item or "").strip()]
-        if not hints_raw:
-            return []
-        variant_paths: list[str] = []
-        for h in hints_raw:
-            variant_paths.extend(path_variants_for_rag_query(h))
-        variant_paths = list(dict.fromkeys(variant_paths))
-        exact_rows = await self._rag_adapter.fetch_exact_paths(rag_session_id, paths=variant_paths, layers=None)
-        paths_found = normalized_path_set(exact_rows)
-        missing = [h for h in hints_raw if normalize_doc_path(h) not in paths_found]
-        if not missing:
-            return exact_rows
-        needles = [normalize_doc_path(h).split("/")[-1] for h in missing]
-        needles = list(dict.fromkeys(n for n in needles if n))
-        if not needles:
-            return exact_rows
-        fallback_rows = await self._rag_adapter.fetch_chunks_by_path_substrings(
-            rag_session_id,
-            path_needles=needles,
-            layers=None,
-        )
-        return merge_row_lists(exact_rows, fallback_rows)
-
    def _metadata_lookup_candidates(self, rows: list[dict], route) -> list[dict]:
        return DocsMetadataLookupIndex(rows).lookup(route)

@@ -1,4 +1,4 @@
-"""Intent-aware retrieval policy resolver для процесса v2."""
+"""Intent-aware retrieval policy resolver for process v2."""

 from __future__ import annotations

@@ -8,91 +8,113 @@ from app.core.rag.contracts.enums import RagLayer
 from app.core.rag.retrieval.session_retriever import RetrievalPlan


-class V2RetrievalPolicyResolver:
-    _SUMMARY_LAYERS = [
-        RagLayer.DOCS_DOCUMENT_CATALOG,
-        RagLayer.DOCS_ENTITY_CATALOG,
-        RagLayer.DOCS_DOC_CHUNKS,
-    ]
-    _GENERAL_LAYERS = [
-        RagLayer.DOCS_DOCUMENT_CATALOG,
-        RagLayer.DOCS_DOC_CHUNKS,
+class _AnchorTermCollector:
+    def prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
+        terms = self._hint_basenames(route)
+        terms.extend(route.anchors.endpoint_paths)
+        terms.extend(route.target_terms)
+        terms.extend(route.anchors.file_names)
+        terms.extend(route.anchors.entity_names)
+        terms.extend(route.anchors.matched_aliases)
+        terms.extend(self._process_terms(route))
+        return [f"%{term.lower()}%" for term in _unique_terms(terms)]
+
+    def find_files_patterns(self, route: V2RouteResult) -> list[str]:
+        if route.anchors.target_doc_hints:
+            return [f"%{name.lower()}%" for name in self._hint_basenames(route)]
+        return self.prefer_like_patterns(route)
+
+    def api_method_patterns(self, route: V2RouteResult) -> list[str]:
+        terms = self._hint_basenames(route)
+        terms.extend(route.anchors.target_doc_hints)
+        terms.extend(route.anchors.endpoint_paths)
+        terms.extend(route.target_terms)
+        patterns: list[str] = []
+        for term in _unique_terms(terms):
+            lowered = term.lower()
+            stripped = lowered.strip("/")
+            if stripped:
+                patterns.append(f"%{stripped}%")
+            if lowered:
+                patterns.append(f"%{lowered}%")
+        return _unique_terms(patterns)
+
+    def _hint_basenames(self, route: V2RouteResult) -> list[str]:
+        return [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
+
+    def _process_terms(self, route: V2RouteResult) -> list[str]:
+        terms: list[str] = []
+        if route.anchors.process_domain:
+            terms.append(route.anchors.process_domain)
+        if route.anchors.process_subdomain:
+            terms.append(route.anchors.process_subdomain)
+        return terms
+
+
+class _RouteFilterBuilder:
+    _API_DOC_PREFIXES = [
+        "docs/api/",
+        "docs/endpoints/",
+        "docs/methods/",
+        "api/",
+        "endpoints/",
+        "methods/",
    ]

-    def resolve(self, route: V2RouteResult) -> RetrievalPlan:
-        if route.intent == V2Intent.GENERAL_QA:
-            return RetrievalPlan(
-                profile="general_qa_grounded_summary",
-                layers=list(self._GENERAL_LAYERS),
-                limit=8,
-                filters=self._general_filters(route),
-            )
-        if route.subintent == V2Subintent.FIND_FILES:
-            return RetrievalPlan(
-                profile="file_lookup",
-                layers=[RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG],
-                limit=12,
-                filters=self._find_files_filters(route),
-            )
-        return RetrievalPlan(
-            profile=self._summary_profile(route),
-            layers=list(self._SUMMARY_LAYERS),
-            limit=8,
-            filters=self._summary_filters(route),
-        )
+    def __init__(self) -> None:
+        self._terms = _AnchorTermCollector()

-    def _summary_profile(self, route: V2RouteResult) -> str:
-        signals = anchor_signal_types(route)
-        if len(signals - {V2AnchorType.FIND_FILES}) != 1:
-            return "docs_summary_generic"
-        mapping = {
-            V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
-            V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
-            V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
-            V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
-        }
-        signal = next(iter(signals - {V2AnchorType.FIND_FILES}), None)
-        return mapping.get(signal, "docs_summary_generic")
-
-    def _general_filters(self, route: V2RouteResult) -> dict[str, object]:
+    def general_filters(self, route: V2RouteResult) -> dict[str, object]:
        return {
            "prefer_path_prefixes": ["docs/architecture/", "docs/"],
-            "prefer_like_patterns": ["%README.md%", "%overview%"],
+            "prefer_like_patterns": ["%readme.md%", "%overview%"],
            "target_doc_hints": list(route.anchors.target_doc_hints),
        }

-    def _summary_filters(self, route: V2RouteResult) -> dict[str, object]:
-        filters: dict[str, object] = {
-            "prefer_path_prefixes": self._summary_prefixes(route),
-            "prefer_like_patterns": self._prefer_like_patterns(route),
-            "target_doc_hints": list(route.anchors.target_doc_hints),
-        }
+    def summary_filters(self, route: V2RouteResult) -> dict[str, object]:
+        if _is_api_method_explain(route):
+            return self.api_method_filters(route)
+        filters = self._base_filters(route)
+        filters["prefer_path_prefixes"] = self._summary_prefixes(route)
+        filters["prefer_like_patterns"] = self._terms.prefer_like_patterns(route)
        if V2AnchorType.API_ENDPOINT in anchor_signal_types(route):
-            filters["path_prefixes"] = ["docs/api/", "docs/architecture/", "docs/"]
+            filters["path_prefixes"] = ["docs/api/", "docs/"]
        return filters

-    def _find_files_filters(self, route: V2RouteResult) -> dict[str, object]:
+    def api_method_filters(self, route: V2RouteResult) -> dict[str, object]:
+        filters = self._base_filters(route)
+        filters["path_prefixes"] = list(self._API_DOC_PREFIXES)
+        filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES)
+        filters["prefer_like_patterns"] = self._terms.api_method_patterns(route)
+        return filters
+
+    def find_files_filters(self, route: V2RouteResult) -> dict[str, object]:
+        filters = self._base_filters(route)
+        prefixes = self._find_files_prefixes(route)
+        if prefixes:
+            filters["path_prefixes"] = prefixes
+        filters["prefer_path_prefixes"] = self._find_files_prefer_prefixes(route, prefixes)
+        filters["prefer_like_patterns"] = self._terms.find_files_patterns(route)
+        return filters
+
+    def _base_filters(self, route: V2RouteResult) -> dict[str, object]:
        filters: dict[str, object] = {
-            "prefer_path_prefixes": self._find_files_prefixes(route),
-            "prefer_like_patterns": self._prefer_like_patterns(route),
            "target_doc_hints": list(route.anchors.target_doc_hints),
        }
-        if route.anchors.target_doc_hints:
-            filters["prefer_like_patterns"] = [f"%{path.split('/')[-1]}%" for path in route.anchors.target_doc_hints]
+        if route.anchors.process_domain:
+            filters["metadata.domain"] = route.anchors.process_domain
+        if route.anchors.process_subdomain:
+            filters["metadata.subdomain"] = route.anchors.process_subdomain
        return filters

-    def _prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
-        patterns: list[str] = []
-        for path in route.anchors.target_doc_hints:
-            patterns.append(f"%{path.split('/')[-1]}%")
-        for endpoint in route.anchors.endpoint_paths:
-            patterns.append(f"%{endpoint}%")
-        return patterns
-
    def _find_files_prefixes(self, route: V2RouteResult) -> list[str]:
-        if route.anchors.target_doc_hints:
-            prefixes = ["/".join(path.split("/")[:-1]) + "/" for path in route.anchors.target_doc_hints]
-            return [prefix for prefix in prefixes if prefix]
+        hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints)
+        if hint_prefixes:
+            return hint_prefixes
+        file_prefixes = [name for name in route.anchors.file_names if str(name).strip().startswith("docs/")]
+        derived = _prefixes_from_paths(file_prefixes)
+        if derived:
+            return derived
        signals = anchor_signal_types(route)
        if V2AnchorType.API_ENDPOINT in signals:
            return ["docs/api/", "docs/"]
@@ -104,6 +126,12 @@ class V2RetrievalPolicyResolver:
            return ["docs/domains/", "docs/"]
        return ["docs/"]

+    def _find_files_prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]:
+        preferred = list(prefixes)
+        if route.anchors.process_domain or route.anchors.process_subdomain:
+            preferred.extend(["docs/domains/", "docs/logic/"])
+        return _unique_terms(preferred or ["docs/"])
+
    def _summary_prefixes(self, route: V2RouteResult) -> list[str]:
        signals = anchor_signal_types(route)
        prefixes: list[str] = []
@@ -114,5 +142,129 @@ class V2RetrievalPolicyResolver:
        if V2AnchorType.LOGIC_FLOW in signals:
            prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"])
        if V2AnchorType.DOMAIN_ENTITY in signals:
-            prefixes.extend(["docs/domains/", "docs/api/", "docs/architecture/"])
-        return list(dict.fromkeys(prefixes or ["docs/"]))
+            prefixes.extend(["docs/domains/", "docs/", "docs/api/"])
+        return _unique_terms(prefixes or ["docs/"])
+
+
+class V2RetrievalPolicyResolver:
+    _GENERAL_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_DOC_CHUNKS]
+    _FIND_FILES_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG]
+    _SUMMARY_LAYERS = {
+        "docs_api_method_explain": [
+            RagLayer.DOCS_DOCUMENT_CATALOG,
+            RagLayer.DOCS_FACT_INDEX,
+            RagLayer.DOCS_DOC_CHUNKS,
+        ],
+        "docs_summary_api_endpoint": [
+            RagLayer.DOCS_DOCUMENT_CATALOG,
+            RagLayer.DOCS_FACT_INDEX,
+            RagLayer.DOCS_DOC_CHUNKS,
+        ],
+        "docs_summary_logic_flow": [
+            RagLayer.DOCS_WORKFLOW_INDEX,
+            RagLayer.DOCS_DOCUMENT_CATALOG,
+            RagLayer.DOCS_DOC_CHUNKS,
+        ],
+        "docs_summary_domain_entity": [
+            RagLayer.DOCS_ENTITY_CATALOG,
+            RagLayer.DOCS_DOCUMENT_CATALOG,
+            RagLayer.DOCS_DOC_CHUNKS,
+        ],
+        "docs_summary_architecture": [
+            RagLayer.DOCS_DOCUMENT_CATALOG,
+            RagLayer.DOCS_RELATION_GRAPH,
+            RagLayer.DOCS_DOC_CHUNKS,
+        ],
+        "docs_summary_generic": [
+            RagLayer.DOCS_DOCUMENT_CATALOG,
+            RagLayer.DOCS_DOC_CHUNKS,
+        ],
+    }
+
+    def __init__(self) -> None:
+        self._filters = _RouteFilterBuilder()
+
+    def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+        if route.intent == V2Intent.GENERAL_QA:
+            return RetrievalPlan(
+                profile="general_qa_grounded_summary",
+                layers=list(self._GENERAL_LAYERS),
+                limit=8,
+                filters=self._filters.general_filters(route),
+            )
+        if route.subintent == V2Subintent.FIND_FILES:
+            return RetrievalPlan(
+                profile="file_lookup",
+                layers=list(self._FIND_FILES_LAYERS),
+                limit=12,
+                filters=self._filters.find_files_filters(route),
+            )
+        profile = self._summary_profile(route)
+        return RetrievalPlan(
+            profile=profile,
+            layers=list(self._SUMMARY_LAYERS[profile]),
+            limit=10 if profile == "docs_api_method_explain" else 8,
+            filters=self._filters.summary_filters(route),
+        )
+
+    def _summary_profile(self, route: V2RouteResult) -> str:
+        if _is_api_method_explain(route):
+            return "docs_api_method_explain"
+        meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES}
+        if len(meaningful) != 1:
+            return "docs_summary_generic"
+        mapping = {
+            V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
+            V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
+            V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
+            V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
+        }
+        return mapping.get(next(iter(meaningful)), "docs_summary_generic")
+
+
+def _prefixes_from_paths(paths: list[str]) -> list[str]:
+    prefixes = []
+    for path in paths:
+        value = str(path).strip().strip("/")
+        if "/" not in value:
+            continue
+        prefix = value.rsplit("/", 1)[0] + "/"
+        if prefix:
+            prefixes.append(prefix)
+    return _unique_terms(prefixes)
+
+
+def _unique_terms(items: list[str]) -> list[str]:
+    seen: set[str] = set()
+    unique: list[str] = []
+    for raw in items:
+        value = str(raw or "").strip()
+        if not value or value in seen:
+            continue
+        seen.add(value)
+        unique.append(value)
+    return unique
+
+
+def _is_api_method_explain(route: V2RouteResult) -> bool:
+    if route.subintent != V2Subintent.SUMMARY:
+        return False
+    if route.anchors.endpoint_paths:
+        return True
+    if _has_api_like_hints(route.anchors.target_doc_hints):
+        return True
+    return V2AnchorType.API_ENDPOINT in anchor_signal_types(route)
+
+
+def _has_api_like_hints(hints: list[str]) -> bool:
+    for hint in hints:
+        value = str(hint or "").strip().lower()
+        if not value:
+            continue
+        if value.startswith("/"):
+            return True
+        if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")):
+            return True
+        if "endpoint" in value or "method" in value:
+            return True
+    return False
@@ -1,18 +1,23 @@
-"""Адаптер v2 к :class:`RagSessionRetriever` для подстановки в тестах."""
+"""Адаптер v2 к :class:`RagSessionRetriever` с plan-driven execution strategy."""

 from __future__ import annotations

+from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
+    merge_row_lists,
+    normalize_doc_path,
+    path_variants_for_rag_query,
+)
 from app.core.rag.retrieval.session_retriever import RagSessionRetriever, RetrievalPlan


-class V2RagRetrievalAdapter:
-    """Обёртка над :class:`RagSessionRetriever` для подмены в тестах."""
-
+class _PlanDrivenRetrieval:
    def __init__(self, retriever: RagSessionRetriever) -> None:
        self._retriever = retriever

    async def fetch_rows(self, rag_session_id: str, query_text: str, plan: RetrievalPlan) -> list[dict]:
-        return await self._retriever.retrieve(rag_session_id, query_text, plan)
+        seeded_rows = await self._seed_from_target_hints(rag_session_id, plan)
+        semantic_rows = await self._retriever.retrieve(rag_session_id, query_text, plan)
+        return merge_row_lists(seeded_rows, semantic_rows)

    async def fetch_exact_paths(self, rag_session_id: str, *, paths: list[str], layers: list[str] | None = None) -> list[dict]:
        return await self._retriever.retrieve_exact_files(rag_session_id, paths=paths, layers=layers)
@@ -31,3 +36,73 @@ class V2RagRetrievalAdapter:
            layers=layers,
            limit=limit,
        )
+
+    async def _seed_from_target_hints(self, rag_session_id: str, plan: RetrievalPlan) -> list[dict]:
+        hints = self._target_doc_hints(plan)
+        if not hints:
+            return []
+        exact_rows = await self._fetch_exact_rows(rag_session_id, hints)
+        missing = self._missing_hints(hints, exact_rows)
+        if not missing:
+            return exact_rows
+        fallback_rows = await self._fetch_substring_rows(rag_session_id, missing)
+        return merge_row_lists(exact_rows, fallback_rows)
+
+    async def _fetch_exact_rows(self, rag_session_id: str, hints: list[str]) -> list[dict]:
+        variant_paths: list[str] = []
+        for hint in hints:
+            variant_paths.extend(path_variants_for_rag_query(hint))
+        unique_paths = list(dict.fromkeys(path for path in variant_paths if path))
+        if not unique_paths:
+            return []
+        return await self._retriever.retrieve_exact_files(rag_session_id, paths=unique_paths, layers=None)
+
+    async def _fetch_substring_rows(self, rag_session_id: str, hints: list[str]) -> list[dict]:
+        needles = [normalize_doc_path(hint).split("/")[-1] for hint in hints]
+        unique_needles = list(dict.fromkeys(needle for needle in needles if needle))
+        if not unique_needles:
+            return []
+        return await self._retriever.retrieve_chunks_by_path_substrings(
+            rag_session_id,
+            path_needles=unique_needles,
+            layers=None,
+            limit=200,
+        )
+
+    def _target_doc_hints(self, plan: RetrievalPlan) -> list[str]:
+        raw = plan.filters.get("target_doc_hints")
+        if not isinstance(raw, list):
+            return []
+        return [str(item).strip() for item in raw if str(item or "").strip()]
+
+    def _missing_hints(self, hints: list[str], rows: list[dict]) -> list[str]:
+        pool = {normalize_doc_path(str(row.get("path") or "")) for row in rows}
+        return [hint for hint in hints if normalize_doc_path(hint) not in pool]
+
+
+class V2RagRetrievalAdapter:
+    """Обёртка над :class:`RagSessionRetriever` для plan-driven retrieval и подмены в тестах."""
+
+    def __init__(self, retriever: RagSessionRetriever) -> None:
+        self._retriever = _PlanDrivenRetrieval(retriever)
+
+    async def fetch_rows(self, rag_session_id: str, query_text: str, plan: RetrievalPlan) -> list[dict]:
+        return await self._retriever.fetch_rows(rag_session_id, query_text, plan)
+
+    async def fetch_exact_paths(self, rag_session_id: str, *, paths: list[str], layers: list[str] | None = None) -> list[dict]:
+        return await self._retriever.fetch_exact_paths(rag_session_id, paths=paths, layers=layers)
+
+    async def fetch_chunks_by_path_substrings(
+        self,
+        rag_session_id: str,
+        *,
+        path_needles: list[str],
+        layers: list[str] | None = None,
+        limit: int = 200,
+    ) -> list[dict]:
+        return await self._retriever.fetch_chunks_by_path_substrings(
+            rag_session_id,
+            path_needles=path_needles,
+            layers=layers,
+            limit=limit,
+        )
@@ -1,20 +1,24 @@
 from __future__ import annotations

+import logging
+
 import yaml

 from app.core.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
 from app.core.rag.indexing.docs.models import IntegrationRecord

+LOGGER = logging.getLogger(__name__)
+

 class DocsIntegrationExtractor:
    _SECTION_TITLES = {"integrations", "интеграции"}

-    def extract(self, sections: list[SectionChunk]) -> list[IntegrationRecord]:
+    def extract(self, sections: list[SectionChunk], *, path: str = "") -> list[IntegrationRecord]:
        records: list[IntegrationRecord] = []
        for section in sections:
            if not self._is_integration_section(section.section_path):
                continue
-            payload = self._payload(section.content)
+            payload = self._payload(section.content, path=path, section_path=section.section_path)
            target = str(payload.get("target") or "").strip()
            if not target:
                continue
@@ -40,7 +44,7 @@ class DocsIntegrationExtractor:
        parts = [item.strip().lower() for item in section_path.split(" > ") if item.strip()]
        return any(part in self._SECTION_TITLES for part in parts[:-1]) or (parts and parts[-1] in self._SECTION_TITLES)

-    def _payload(self, text: str) -> dict:
+    def _payload(self, text: str, *, path: str, section_path: str) -> dict:
        payload: dict = {}
        details_lines: list[str] = []
        collecting_details = False
@@ -61,15 +65,27 @@ class DocsIntegrationExtractor:
                collecting_details = True
                details_lines = []
                if value:
-                    payload[key] = self._yaml_value(value)
+                    payload[key] = self._yaml_value(
+                        value,
+                        path=path,
+                        section_path=section_path,
+                        field_name=key,
+                        fallback="",
+                    )
                continue
            collecting_details = False
-            payload[key] = self._yaml_value(value)
+            payload[key] = self._yaml_value(
+                value,
+                path=path,
+                section_path=section_path,
+                field_name=key,
+                fallback=value,
+            )
        if details_lines:
-            payload["details"] = self._details_payload(details_lines)
+            payload["details"] = self._details_payload(details_lines, path=path, section_path=section_path)
        return payload

-    def _details_payload(self, lines: list[str]) -> dict:
+    def _details_payload(self, lines: list[str], *, path: str, section_path: str) -> dict:
        normalized: list[str] = []
        for raw_line in lines:
            line = raw_line[2:] if raw_line.startswith("  ") else raw_line
@@ -78,7 +94,13 @@ class DocsIntegrationExtractor:
            if indent == 0 and stripped.startswith("- "):
                stripped = stripped[2:]
            normalized.append((" " * indent) + stripped)
-        payload = yaml.safe_load("\n".join(normalized)) or {}
+        payload = self._yaml_value(
+            "\n".join(normalized),
+            path=path,
+            section_path=section_path,
+            field_name="details",
+            fallback={},
+        ) or {}
        return payload if isinstance(payload, dict) else {}

    def _split_key_value(self, text: str) -> tuple[str, str]:
@@ -87,7 +109,17 @@ class DocsIntegrationExtractor:
        key, value = text.split(":", 1)
        return key.strip(), value.strip()

-    def _yaml_value(self, value: str):
+    def _yaml_value(self, value: str, *, path: str, section_path: str, field_name: str, fallback):
        if not value:
            return ""
-        return yaml.safe_load(value)
+        try:
+            return yaml.safe_load(value)
+        except yaml.YAMLError as exc:
+            LOGGER.warning(
+                "docs integration parse warning: path=%s section=%s field=%s reason=%s",
+                path or "<unknown>",
+                section_path,
+                field_name,
+                exc.__class__.__name__,
+            )
+            return fallback
@@ -1,5 +1,8 @@
 from __future__ import annotations

+import logging
+from collections.abc import Callable
+
 from app.core.rag.contracts import RagDocument, RagSource
 from app.core.rag.indexing.docs.chunkers.markdown_chunker import MarkdownDocChunker
 from app.core.rag.indexing.docs.classifier import DocsClassifier
@@ -15,6 +18,8 @@ from app.core.rag.indexing.docs.relation_extractor import DocsRelationExtractor
 from app.core.rag.indexing.docs.support_layer_builder import DocsSupportLayerBuilder
 from app.core.rag.indexing.docs.workflow_extractor import DocsWorkflowExtractor

+LOGGER = logging.getLogger(__name__)
+

 class DocsIndexingPipeline:
    def __init__(self) -> None:
@@ -59,7 +64,11 @@ class DocsIndexingPipeline:
        for section in sections:
            docs.append(self._builder.build_doc_chunk(source, section, parsed.frontmatter, doc_kind))
        document_id = frontmatter_view.document_id or source.path
-        for fact in self._facts.extract(parsed.frontmatter, sections):
+        for fact in self._safe_extract(
+            extractor_name="fact_extractor",
+            path=path,
+            run=lambda: self._facts.extract(parsed.frontmatter, sections),
+        ):
            docs.append(
                self._support_builder.build_fact(
                    source,
@@ -72,13 +81,29 @@ class DocsIndexingPipeline:
                    subdomain=frontmatter_view.subdomain,
                )
            )
-        for entity in self._entities.extract(parsed.frontmatter):
+        for entity in self._safe_extract(
+            extractor_name="entity_extractor",
+            path=path,
+            run=lambda: self._entities.extract(parsed.frontmatter),
+        ):
            docs.append(self._builder.build_entity_record(source, parsed.frontmatter, entity))
-        for workflow in self._workflows.extract(parsed.detail_sections):
+        for workflow in self._safe_extract(
+            extractor_name="workflow_extractor",
+            path=path,
+            run=lambda: self._workflows.extract(parsed.detail_sections),
+        ):
            docs.append(self._support_builder.build_workflow_record(source, parsed.frontmatter, workflow))
-        for edge in self._relations.extract(parsed.frontmatter, source_id=document_id):
+        for edge in self._safe_extract(
+            extractor_name="relation_extractor",
+            path=path,
+            run=lambda: self._relations.extract(parsed.frontmatter, source_id=document_id),
+        ):
            docs.append(self._support_builder.build_relation_record(source, parsed.frontmatter, edge))
-        for integration in self._integrations.extract(sections):
+        for integration in self._safe_extract(
+            extractor_name="integration_extractor",
+            path=path,
+            run=lambda: self._integrations.extract(sections, path=path),
+        ):
            docs.append(self._support_builder.build_integration_record(source, parsed.frontmatter, integration))
        return docs

@@ -86,3 +111,15 @@ class DocsIndexingPipeline:
        tail = path.rsplit("/", 1)[-1]
        stem = tail.rsplit(".", 1)[0]
        return stem.replace("-", " ").replace("_", " ").strip().title()
+
+    def _safe_extract(self, *, extractor_name: str, path: str, run: Callable[[], list]) -> list:
+        try:
+            return run()
+        except Exception as exc:
+            LOGGER.warning(
+                "docs pipeline extractor warning: path=%s extractor=%s reason=%s",
+                path,
+                extractor_name,
+                exc.__class__.__name__,
+            )
+            return []
@@ -25,6 +25,8 @@ class RagQueryRepository:
        exclude_like_patterns: list[str] | None = None,
        prefer_path_prefixes: list[str] | None = None,
        prefer_like_patterns: list[str] | None = None,
+        metadata_domain: str | None = None,
+        metadata_subdomain: str | None = None,
        prefer_non_tests: bool = False,
    ) -> list[dict]:
        sql, params = self._builder.build_retrieve(
@@ -38,6 +40,8 @@ class RagQueryRepository:
            exclude_like_patterns=exclude_like_patterns,
            prefer_path_prefixes=prefer_path_prefixes,
            prefer_like_patterns=prefer_like_patterns,
+            metadata_domain=metadata_domain,
+            metadata_subdomain=metadata_subdomain,
            prefer_non_tests=prefer_non_tests,
        )
        with get_engine().connect() as conn:
@@ -234,6 +238,54 @@ class RagQueryRepository:
            rows = conn.execute(stmt, params).mappings().fetchall()
        return [self._row_to_dict(row) for row in rows]

+    def retrieve_chunks_by_path_substrings(
+        self,
+        rag_session_id: str,
+        *,
+        path_needles: list[str],
+        layers: list[str] | None = None,
+        limit: int = 200,
+    ) -> list[dict]:
+        normalized_needles = [str(item).strip().lower() for item in path_needles if str(item).strip()]
+        if not normalized_needles:
+            return []
+        params: dict = {
+            "sid": rag_session_id,
+            "lim": max(1, int(limit)),
+        }
+        filters = ["rag_session_id = :sid"]
+        like_parts: list[str] = []
+        for idx, needle in enumerate(normalized_needles):
+            key = f"needle_{idx}"
+            params[key] = f"%{needle}%"
+            like_parts.append(f"lower(path) LIKE :{key}")
+        filters.append("(" + " OR ".join(like_parts) + ")")
+        if layers:
+            normalized_layers = [str(item).strip() for item in layers if str(item).strip()]
+            if normalized_layers:
+                params["layers"] = normalized_layers
+                filters.append("layer IN :layers")
+        stmt = text(
+            f"""
+            SELECT path, content, layer, title, metadata_json, span_start, span_end,
+                   0 AS lexical_rank,
+                   0 AS prefer_bonus,
+                   0 AS test_penalty,
+                   0 AS structural_rank,
+                   0 AS layer_rank,
+                   0 AS distance
+            FROM rag_chunks
+            WHERE {' AND '.join(filters)}
+            ORDER BY path ASC, COALESCE(span_start, 0) ASC, COALESCE(chunk_index, 0) ASC
+            LIMIT :lim
+            """
+        )
+        if "layers" in params:
+            stmt = stmt.bindparams(bindparam("layers", expanding=True))
+        with get_engine().connect() as conn:
+            rows = conn.execute(stmt, params).mappings().fetchall()
+        return [self._row_to_dict(row) for row in rows]
+
    def _row_to_dict(self, row) -> dict:
        data = dict(row)
        raw_metadata = data.pop("metadata_json")
@@ -69,6 +69,8 @@ class RagRepository:
        exclude_like_patterns: list[str] | None = None,
        prefer_path_prefixes: list[str] | None = None,
        prefer_like_patterns: list[str] | None = None,
+        metadata_domain: str | None = None,
+        metadata_subdomain: str | None = None,
        prefer_non_tests: bool = False,
    ) -> list[dict]:
        return self._query.retrieve(
@@ -82,6 +84,8 @@ class RagRepository:
            exclude_like_patterns=exclude_like_patterns,
            prefer_path_prefixes=prefer_path_prefixes,
            prefer_like_patterns=prefer_like_patterns,
+            metadata_domain=metadata_domain,
+            metadata_subdomain=metadata_subdomain,
            prefer_non_tests=prefer_non_tests,
        )

@@ -141,3 +145,18 @@ class RagRepository:
            layers=layers,
            limit=limit,
        )
+
+    def retrieve_chunks_by_path_substrings(
+        self,
+        rag_session_id: str,
+        *,
+        path_needles: list[str],
+        layers: list[str] | None = None,
+        limit: int = 200,
+    ) -> list[dict]:
+        return self._query.retrieve_chunks_by_path_substrings(
+            rag_session_id,
+            path_needles=path_needles,
+            layers=layers,
+            limit=limit,
+        )
@@ -19,6 +19,8 @@ class RetrievalStatementBuilder:
        exclude_like_patterns: list[str] | None = None,
        prefer_path_prefixes: list[str] | None = None,
        prefer_like_patterns: list[str] | None = None,
+        metadata_domain: str | None = None,
+        metadata_subdomain: str | None = None,
        prefer_non_tests: bool = False,
    ) -> tuple[str, dict]:
        emb = "[" + ",".join(str(x) for x in query_embedding) + "]"
@@ -29,6 +31,8 @@ class RetrievalStatementBuilder:
        self._append_prefix_group(filters, params, "path", path_prefixes)
        self._append_prefix_group(filters, params, "exclude_prefix", exclude_path_prefixes, negate=True)
        self._append_like_group(filters, params, "exclude_like", exclude_like_patterns, negate=True)
+        self._append_metadata_equals(filters, params, "metadata_domain", "domain", metadata_domain)
+        self._append_metadata_equals(filters, params, "metadata_subdomain", "subdomain", metadata_subdomain)
        if layers:
            filters.append("layer = ANY(:layers)")
            params["layers"] = layers
@@ -202,6 +206,20 @@ class RetrievalStatementBuilder:
        joined = " OR ".join(parts)
        filters.append(f"NOT ({joined})" if negate else f"({joined})")

+    def _append_metadata_equals(
+        self,
+        filters: list[str],
+        params: dict,
+        param_key: str,
+        metadata_key: str,
+        value: str | None,
+    ) -> None:
+        normalized = str(value or "").strip().lower()
+        if not normalized:
+            return
+        params[param_key] = normalized
+        filters.append(f"lower(COALESCE({self._metadata_text(metadata_key)}, '')) = :{param_key}")
+
    def _test_penalty_sql(
        self,
        enabled: bool,
@@ -94,4 +94,8 @@ class RagSessionRetriever:
        for key in keys:
            if key in filters:
                out[key] = filters[key]
+        if "metadata.domain" in filters:
+            out["metadata_domain"] = filters["metadata.domain"]
+        if "metadata.subdomain" in filters:
+            out["metadata_subdomain"] = filters["metadata.subdomain"]
        return out