Фиксация изменений

This commit is contained in:
2026-03-05 11:03:17 +03:00
parent 1ef0b4d68c
commit 417b8b6f72
261 changed files with 8215 additions and 332 deletions

Binary file not shown.

Binary file not shown.

49
app/core/logging_setup.py Normal file
View File

@@ -0,0 +1,49 @@
from __future__ import annotations
import logging
import re
class ScrubbingFormatter(logging.Formatter):
_KEY_VALUE_PATTERNS = (
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*id)=([^\s,]+)"),
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*_key)=([^\s,]+)"),
)
_TEXT_PATTERNS = (
re.compile(r"\b(index|task|dialog|rag|session|plan|artifact|evidence|symbol|edge|entry) id\b[:=]\s*([^\s,]+)", re.IGNORECASE),
)
def format(self, record: logging.LogRecord) -> str:
rendered = super().format(record)
scrubbed = self._scrub(rendered).rstrip("\n")
return scrubbed + "\n"
def _scrub(self, message: str) -> str:
output = message
for pattern in self._KEY_VALUE_PATTERNS:
output = pattern.sub(self._replace_key_value, output)
for pattern in self._TEXT_PATTERNS:
output = pattern.sub(self._replace_text, output)
return output
def _replace_key_value(self, match: re.Match[str]) -> str:
return f"{match.group(1)}=<redacted>"
def _replace_text(self, match: re.Match[str]) -> str:
return f"{match.group(1)} id=<redacted>"
def configure_logging() -> None:
logging.basicConfig(
level=logging.WARNING,
force=True,
format="%(levelname)s:%(name)s:%(message)s",
)
root_logger = logging.getLogger()
root_logger.setLevel(logging.WARNING)
formatter = ScrubbingFormatter("%(levelname)s:%(name)s:%(message)s")
for handler in root_logger.handlers:
handler.setFormatter(formatter)
logging.getLogger("uvicorn").setLevel(logging.WARNING)
logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)

View File

@@ -1,10 +1,20 @@
import logging
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from app.core.logging_setup import configure_logging
from app.core.error_handlers import register_error_handlers from app.core.error_handlers import register_error_handlers
from app.modules.application import ModularApplication from app.modules.application import ModularApplication
def _configure_logging() -> None:
configure_logging()
_configure_logging()
def create_app() -> FastAPI: def create_app() -> FastAPI:
app = FastAPI(title="Agent Backend MVP", version="0.1.0") app = FastAPI(title="Agent Backend MVP", version="0.1.0")
modules = ModularApplication() modules = ModularApplication()

View File

@@ -37,6 +37,8 @@ classDiagram
Методы: `run` — строит, валидирует и исполняет execution plan. Методы: `run` — строит, валидирует и исполняет execution plan.
- `TaskSpecBuilder`: формирует спецификацию задачи для оркестратора. - `TaskSpecBuilder`: формирует спецификацию задачи для оркестратора.
Методы: `build` — собирает `TaskSpec` из route, контекстов и ограничений. Методы: `build` — собирает `TaskSpec` из route, контекстов и ограничений.
- `ProjectQaConversationGraphFactory`, `ProjectQaClassificationGraphFactory`, `ProjectQaRetrievalGraphFactory`, `ProjectQaAnalysisGraphFactory`, `ProjectQaAnswerGraphFactory`: набор маленьких graph-исполнителей для `project/qa`.
Роли: нормализация запроса; классификация project-question; поздний retrieval из `RAG`; анализ code/docs контекста; сборка финального ответа.
- `StorySessionRecorder`: пишет session-scoped артефакты для последующего bind к Story. - `StorySessionRecorder`: пишет session-scoped артефакты для последующего bind к Story.
Методы: `record_run` — сохраняет входные источники и выходные артефакты сессии. Методы: `record_run` — сохраняет входные источники и выходные артефакты сессии.
- `StoryContextRepository`: репозиторий Story-контекста и его связей. - `StoryContextRepository`: репозиторий Story-контекста и его связей.
@@ -58,3 +60,32 @@ sequenceDiagram
Router->>Confluence: fetch_page(url) Router->>Confluence: fetch_page(url)
Confluence-->>Router: page(content_markdown, metadata) Confluence-->>Router: page(content_markdown, metadata)
``` ```
### `project/qa` reasoning flow
Назначение: оркестратор планирует шаги, а каждый шаг исполняется отдельным graph. Retrieval вызывается поздно, внутри шага `context_retrieval`.
```mermaid
sequenceDiagram
participant Runtime as GraphAgentRuntime
participant Orch as OrchestratorService
participant G1 as conversation_understanding
participant G2 as question_classification
participant G3 as context_retrieval
participant Rag as RagService
participant G4 as context_analysis
participant G5 as answer_composition
Runtime->>Orch: run(task)
Orch->>G1: execute
G1-->>Orch: resolved_request
Orch->>G2: execute
G2-->>Orch: question_profile
Orch->>G3: execute
G3->>Rag: retrieve(query)
Rag-->>G3: rag_items
G3-->>Orch: source_bundle
Orch->>G4: execute
G4-->>Orch: analysis_brief
Orch->>G5: execute
G5-->>Orch: final_answer
Orch-->>Runtime: final_answer
```

View File

@@ -1,8 +1,13 @@
__all__ = [ __all__ = [
"BaseGraphFactory", "BaseGraphFactory",
"DocsGraphFactory", "DocsGraphFactory",
"ProjectQaAnalysisGraphFactory",
"ProjectQaAnswerGraphFactory",
"ProjectQaClassificationGraphFactory",
"ProjectQaConversationGraphFactory",
"ProjectEditsGraphFactory", "ProjectEditsGraphFactory",
"ProjectQaGraphFactory", "ProjectQaGraphFactory",
"ProjectQaRetrievalGraphFactory",
] ]
@@ -15,6 +20,26 @@ def __getattr__(name: str):
from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory
return DocsGraphFactory return DocsGraphFactory
if name == "ProjectQaConversationGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaConversationGraphFactory
return ProjectQaConversationGraphFactory
if name == "ProjectQaClassificationGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaClassificationGraphFactory
return ProjectQaClassificationGraphFactory
if name == "ProjectQaRetrievalGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaRetrievalGraphFactory
return ProjectQaRetrievalGraphFactory
if name == "ProjectQaAnalysisGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnalysisGraphFactory
return ProjectQaAnalysisGraphFactory
if name == "ProjectQaAnswerGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnswerGraphFactory
return ProjectQaAnswerGraphFactory
if name == "ProjectEditsGraphFactory": if name == "ProjectEditsGraphFactory":
from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory

View File

@@ -59,7 +59,7 @@ class BaseGraphFactory:
f"Confluence context:\n{conf}", f"Confluence context:\n{conf}",
] ]
) )
answer = self._llm.generate("general_answer", user_input) answer = self._llm.generate("general_answer", user_input, log_context="graph.default.answer")
emit_progress_sync( emit_progress_sync(
state, state,
stage="graph.default.answer.done", stage="graph.default.answer.done",

View File

@@ -52,7 +52,7 @@ class DocsContextAnalyzer:
f"Detected documentation candidates:\n{snippets}", f"Detected documentation candidates:\n{snippets}",
] ]
) )
raw = self._llm.generate("docs_detect", user_input) raw = self._llm.generate("docs_detect", user_input, log_context="graph.docs.detect_existing_docs")
exists = self.parse_bool_marker(raw, "exists", default=True) exists = self.parse_bool_marker(raw, "exists", default=True)
summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.") summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.")
return {"existing_docs_detected": exists, "existing_docs_summary": summary} return {"existing_docs_detected": exists, "existing_docs_summary": summary}
@@ -71,7 +71,7 @@ class DocsContextAnalyzer:
f"Existing docs summary:\n{state.get('existing_docs_summary', '')}", f"Existing docs summary:\n{state.get('existing_docs_summary', '')}",
] ]
) )
raw = self._llm.generate("docs_strategy", user_input) raw = self._llm.generate("docs_strategy", user_input, log_context="graph.docs.decide_strategy")
strategy = self.parse_text_marker(raw, "strategy", default="").lower() strategy = self.parse_text_marker(raw, "strategy", default="").lower()
if strategy not in {"incremental_update", "from_scratch"}: if strategy not in {"incremental_update", "from_scratch"}:
strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch" strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch"
@@ -260,7 +260,7 @@ class DocsContentComposer:
f"Examples bundle:\n{state.get('rules_bundle', '')}", f"Examples bundle:\n{state.get('rules_bundle', '')}",
] ]
) )
plan = self._llm.generate("docs_plan_sections", user_input) plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_incremental_changes")
return { return {
"doc_plan": plan, "doc_plan": plan,
"target_path": target_path, "target_path": target_path,
@@ -279,7 +279,7 @@ class DocsContentComposer:
f"Examples bundle:\n{state.get('rules_bundle', '')}", f"Examples bundle:\n{state.get('rules_bundle', '')}",
] ]
) )
plan = self._llm.generate("docs_plan_sections", user_input) plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_new_document")
return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""} return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""}
def generate_doc_content(self, state: AgentGraphState) -> dict: def generate_doc_content(self, state: AgentGraphState) -> dict:
@@ -294,7 +294,7 @@ class DocsContentComposer:
f"Examples bundle:\n{state.get('rules_bundle', '')}", f"Examples bundle:\n{state.get('rules_bundle', '')}",
] ]
) )
raw = self._llm.generate("docs_generation", user_input) raw = self._llm.generate("docs_generation", user_input, log_context="graph.docs.generate_doc_content")
bundle = self._bundle.parse_docs_bundle(raw) bundle = self._bundle.parse_docs_bundle(raw)
if bundle: if bundle:
first_content = str(bundle[0].get("content", "")).strip() first_content = str(bundle[0].get("content", "")).strip()
@@ -369,7 +369,7 @@ class DocsContentComposer:
f"Generated document:\n{generated}", f"Generated document:\n{generated}",
] ]
) )
raw = self._llm.generate("docs_self_check", user_input) raw = self._llm.generate("docs_self_check", user_input, log_context="graph.docs.self_check")
passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False) passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False)
feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.") feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.")
return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback} return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback}
@@ -379,7 +379,7 @@ class DocsContentComposer:
bundle = state.get("generated_docs_bundle", []) or [] bundle = state.get("generated_docs_bundle", []) or []
strategy = state.get("docs_strategy", "from_scratch") strategy = state.get("docs_strategy", "from_scratch")
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle): if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
LOGGER.warning( LOGGER.info(
"build_changeset fallback bundle used: strategy=%s bundle_items=%s", "build_changeset fallback bundle used: strategy=%s bundle_items=%s",
strategy, strategy,
len(bundle), len(bundle),
@@ -452,7 +452,11 @@ class DocsContentComposer:
] ]
) )
try: try:
summary = self._llm.generate("docs_execution_summary", user_input).strip() summary = self._llm.generate(
"docs_execution_summary",
user_input,
log_context="graph.docs.summarize_result",
).strip()
except Exception: except Exception:
summary = "" summary = ""
if not summary: if not summary:

View File

@@ -48,7 +48,9 @@ class ProjectEditsLogic:
}, },
ensure_ascii=False, ensure_ascii=False,
) )
parsed = self._support.parse_json(self._llm.generate("project_edits_plan", user_input)) parsed = self._support.parse_json(
self._llm.generate("project_edits_plan", user_input, log_context="graph.project_edits.plan_changes")
)
contracts = self._contracts.parse( contracts = self._contracts.parse(
parsed, parsed,
request=str(state.get("message", "")), request=str(state.get("message", "")),
@@ -165,7 +167,13 @@ class ProjectEditsLogic:
"changeset": [{"op": x.op.value, "path": x.path, "reason": x.reason} for x in changeset[:20]], "changeset": [{"op": x.op.value, "path": x.path, "reason": x.reason} for x in changeset[:20]],
"rule": "Changes must stay inside contract blocks and not affect unrelated sections.", "rule": "Changes must stay inside contract blocks and not affect unrelated sections.",
} }
parsed = self._support.parse_json(self._llm.generate("project_edits_self_check", json.dumps(payload, ensure_ascii=False))) parsed = self._support.parse_json(
self._llm.generate(
"project_edits_self_check",
json.dumps(payload, ensure_ascii=False),
log_context="graph.project_edits.self_check",
)
)
passed = bool(parsed.get("pass")) if isinstance(parsed, dict) else False passed = bool(parsed.get("pass")) if isinstance(parsed, dict) else False
feedback = str(parsed.get("feedback", "")).strip() if isinstance(parsed, dict) else "" feedback = str(parsed.get("feedback", "")).strip() if isinstance(parsed, dict) else ""
return { return {
@@ -192,7 +200,11 @@ class ProjectEditsLogic:
"rag_context": self._support.shorten(state.get("rag_context", ""), 5000), "rag_context": self._support.shorten(state.get("rag_context", ""), 5000),
"confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000), "confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000),
} }
raw = self._llm.generate("project_edits_hunks", json.dumps(prompt_payload, ensure_ascii=False)) raw = self._llm.generate(
"project_edits_hunks",
json.dumps(prompt_payload, ensure_ascii=False),
log_context="graph.project_edits.generate_changeset",
)
parsed = self._support.parse_json(raw) parsed = self._support.parse_json(raw)
hunks = parsed.get("hunks", []) if isinstance(parsed, dict) else [] hunks = parsed.get("hunks", []) if isinstance(parsed, dict) else []
if not isinstance(hunks, list) or not hunks: if not isinstance(hunks, list) or not hunks:

View File

@@ -33,7 +33,7 @@ class ProjectQaGraphFactory:
f"Confluence context:\n{state.get('confluence_context', '')}", f"Confluence context:\n{state.get('confluence_context', '')}",
] ]
) )
answer = self._llm.generate("project_answer", user_input) answer = self._llm.generate("project_answer", user_input, log_context="graph.project_qa.answer")
emit_progress_sync( emit_progress_sync(
state, state,
stage="graph.project_qa.answer.done", stage="graph.project_qa.answer.done",

View File

@@ -0,0 +1,172 @@
from __future__ import annotations
import logging
from langgraph.graph import END, START, StateGraph
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
from app.modules.agent.llm import AgentLlmService
from app.modules.contracts import RagRetriever
from app.modules.rag.explain import ExplainPack, PromptBudgeter
LOGGER = logging.getLogger(__name__)
class ProjectQaConversationGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("resolve_request", self._resolve_request)
graph.add_edge(START, "resolve_request")
graph.add_edge("resolve_request", END)
return graph.compile(checkpointer=checkpointer)
def _resolve_request(self, state: AgentGraphState) -> dict:
emit_progress_sync(state, stage="graph.project_qa.conversation_understanding", message="Нормализую пользовательский запрос.")
resolved = self._support.resolve_request(str(state.get("message", "") or ""))
LOGGER.warning("graph step result: graph=project_qa/conversation_understanding normalized=%s", resolved.get("normalized_message", ""))
return {"resolved_request": resolved}
class ProjectQaClassificationGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("classify_question", self._classify_question)
graph.add_edge(START, "classify_question")
graph.add_edge("classify_question", END)
return graph.compile(checkpointer=checkpointer)
def _classify_question(self, state: AgentGraphState) -> dict:
resolved = state.get("resolved_request", {}) or {}
message = str(resolved.get("normalized_message") or state.get("message", "") or "")
profile = self._support.build_profile(message)
LOGGER.warning("graph step result: graph=project_qa/question_classification domain=%s intent=%s", profile.get("domain"), profile.get("intent"))
return {"question_profile": profile}
class ProjectQaRetrievalGraphFactory:
def __init__(self, rag: RagRetriever, llm: AgentLlmService | None = None) -> None:
self._rag = rag
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("retrieve_context", self._retrieve_context)
graph.add_edge(START, "retrieve_context")
graph.add_edge("retrieve_context", END)
return graph.compile(checkpointer=checkpointer)
def _retrieve_context(self, state: AgentGraphState) -> dict:
emit_progress_sync(state, stage="graph.project_qa.context_retrieval", message="Собираю контекст по проекту.")
resolved = state.get("resolved_request", {}) or {}
profile = state.get("question_profile", {}) or {}
files_map = dict(state.get("files_map", {}) or {})
rag_items: list[dict] = []
source_bundle = self._support.build_source_bundle(profile, list(rag_items), files_map)
LOGGER.warning(
"graph step result: graph=project_qa/context_retrieval mode=%s rag_items=%s file_candidates=%s legacy_rag=%s",
profile.get("domain"),
len(source_bundle.get("rag_items", []) or []),
len(source_bundle.get("file_candidates", []) or []),
False,
)
return {"source_bundle": source_bundle}
class ProjectQaAnalysisGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
self._analyzer = ProjectQaAnalyzer()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("analyze_context", self._analyze_context)
graph.add_edge(START, "analyze_context")
graph.add_edge("analyze_context", END)
return graph.compile(checkpointer=checkpointer)
def _analyze_context(self, state: AgentGraphState) -> dict:
explain_pack = state.get("explain_pack")
if explain_pack:
analysis = self._analysis_from_pack(explain_pack)
LOGGER.warning(
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
len(analysis.get("findings", []) or []),
len(analysis.get("evidence", []) or []),
)
return {"analysis_brief": analysis}
bundle = state.get("source_bundle", {}) or {}
profile = bundle.get("profile", {}) or state.get("question_profile", {}) or {}
rag_items = list(bundle.get("rag_items", []) or [])
file_candidates = list(bundle.get("file_candidates", []) or [])
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates) if str(profile.get("domain")) == "code" else self._analyzer.analyze_docs(profile, rag_items)
LOGGER.warning(
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
len(analysis.get("findings", []) or []),
len(analysis.get("evidence", []) or []),
)
return {"analysis_brief": analysis}
def _analysis_from_pack(self, raw_pack) -> dict:
pack = ExplainPack.model_validate(raw_pack)
findings: list[str] = []
evidence: list[str] = []
for entrypoint in pack.selected_entrypoints[:3]:
findings.append(f"Entrypoint `{entrypoint.title}` maps to handler `{entrypoint.metadata.get('handler_symbol_id', '')}`.")
if entrypoint.source:
evidence.append(entrypoint.source)
for path in pack.trace_paths[:3]:
if path.symbol_ids:
findings.append(f"Trace path: {' -> '.join(path.symbol_ids)}")
for excerpt in pack.code_excerpts[:4]:
evidence.append(f"{excerpt.path}:{excerpt.start_line}-{excerpt.end_line} [{excerpt.evidence_id}]")
return {
"subject": pack.intent.normalized_query,
"findings": findings or ["No explain trace was built from the available code evidence."],
"evidence": evidence,
"gaps": list(pack.missing),
"answer_mode": "summary",
}
class ProjectQaAnswerGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
self._llm = llm
self._budgeter = PromptBudgeter()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("compose_answer", self._compose_answer)
graph.add_edge(START, "compose_answer")
graph.add_edge("compose_answer", END)
return graph.compile(checkpointer=checkpointer)
def _compose_answer(self, state: AgentGraphState) -> dict:
profile = state.get("question_profile", {}) or {}
analysis = state.get("analysis_brief", {}) or {}
brief = self._support.build_answer_brief(profile, analysis)
explain_pack = state.get("explain_pack")
answer = self._compose_explain_answer(state, explain_pack)
if not answer:
answer = self._support.compose_answer(brief)
LOGGER.warning("graph step result: graph=project_qa/answer_composition answer_len=%s", len(answer or ""))
return {"answer_brief": brief, "final_answer": answer}
def _compose_explain_answer(self, state: AgentGraphState, raw_pack) -> str:
if raw_pack is None or self._llm is None:
return ""
pack = ExplainPack.model_validate(raw_pack)
prompt_input = self._budgeter.build_prompt_input(str(state.get("message", "") or ""), pack)
return self._llm.generate(
"code_explain_answer_v2",
prompt_input,
log_context="graph.project_qa.answer_v2",
).strip()

View File

@@ -25,6 +25,12 @@ class AgentGraphState(TypedDict, total=False):
validation_passed: bool validation_passed: bool
validation_feedback: str validation_feedback: str
validation_attempts: int validation_attempts: int
resolved_request: dict
question_profile: dict
source_bundle: dict
analysis_brief: dict
answer_brief: dict
final_answer: str
answer: str answer: str
changeset: list[ChangeItem] changeset: list[ChangeItem]
edits_requested_path: str edits_requested_path: str

View File

@@ -1,13 +1,17 @@
from app.modules.agent.engine.orchestrator.actions.code_explain_actions import CodeExplainActions
from app.modules.agent.engine.orchestrator.actions.docs_actions import DocsActions from app.modules.agent.engine.orchestrator.actions.docs_actions import DocsActions
from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions
from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions
from app.modules.agent.engine.orchestrator.actions.gherkin_actions import GherkinActions from app.modules.agent.engine.orchestrator.actions.gherkin_actions import GherkinActions
from app.modules.agent.engine.orchestrator.actions.project_qa_actions import ProjectQaActions
from app.modules.agent.engine.orchestrator.actions.review_actions import ReviewActions from app.modules.agent.engine.orchestrator.actions.review_actions import ReviewActions
__all__ = [ __all__ = [
"CodeExplainActions",
"DocsActions", "DocsActions",
"EditActions", "EditActions",
"ExplainActions", "ExplainActions",
"GherkinActions", "GherkinActions",
"ProjectQaActions",
"ReviewActions", "ReviewActions",
] ]

View File

@@ -0,0 +1,46 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
from app.modules.rag.explain.models import ExplainPack
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
LOGGER = logging.getLogger(__name__)
class CodeExplainActions(ActionSupport):
def __init__(self, retriever: CodeExplainRetrieverV2 | None = None) -> None:
self._retriever = retriever
self._intent_builder = ExplainIntentBuilder()
def build_code_explain_pack(self, ctx: ExecutionContext) -> list[str]:
file_candidates = list((self.get(ctx, "source_bundle", {}) or {}).get("file_candidates", []) or [])
if self._retriever is None:
pack = ExplainPack(
intent=self._intent_builder.build(ctx.task.user_message),
missing=["code_explain_retriever_unavailable"],
)
else:
pack = self._retriever.build_pack(
ctx.task.rag_session_id,
ctx.task.user_message,
file_candidates=file_candidates,
)
LOGGER.warning(
"code explain action: task_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s missing=%s",
ctx.task.task_id,
len(pack.selected_entrypoints),
len(pack.seed_symbols),
len(pack.trace_paths),
len(pack.code_excerpts),
pack.missing,
)
return [self.put(ctx, "explain_pack", ArtifactType.STRUCTURED_JSON, pack.model_dump(mode="json"))]

View File

@@ -0,0 +1,117 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class ProjectQaActions(ActionSupport):
def __init__(self) -> None:
self._support = ProjectQaSupport()
self._analyzer = ProjectQaAnalyzer()
def classify_project_question(self, ctx: ExecutionContext) -> list[str]:
message = str(ctx.task.user_message or "")
profile = self._support.build_profile(message)
return [self.put(ctx, "question_profile", ArtifactType.STRUCTURED_JSON, profile)]
def collect_project_sources(self, ctx: ExecutionContext) -> list[str]:
profile = self.get(ctx, "question_profile", {}) or {}
terms = list(profile.get("terms", []) or [])
entities = list(profile.get("entities", []) or [])
rag_items = list(ctx.task.metadata.get("rag_items", []) or [])
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
ranked_rag = []
for item in rag_items:
score = self._support.rag_score(item, terms, entities)
source = str(item.get("source", "") or "")
if not explicit_test and self._support.is_test_path(source):
score -= 3
if score > 0:
ranked_rag.append((score, item))
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
ranked_files = []
for path, payload in files_map.items():
score = self._support.file_score(path, payload, terms, entities)
if not explicit_test and self._support.is_test_path(path):
score -= 3
if score > 0:
ranked_files.append(
(
score,
{
"path": path,
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
},
)
)
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
bundle = {
"profile": profile,
"rag_items": [item for _, item in ranked_rag[:12]],
"file_candidates": [item for _, item in ranked_files[:10]],
"rag_total": len(ranked_rag),
"files_total": len(ranked_files),
}
return [self.put(ctx, "source_bundle", ArtifactType.STRUCTURED_JSON, bundle)]
def analyze_project_sources(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "source_bundle", {}) or {}
profile = bundle.get("profile", {}) or {}
rag_items = list(bundle.get("rag_items", []) or [])
file_candidates = list(bundle.get("file_candidates", []) or [])
if str(profile.get("domain")) == "code":
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates)
else:
analysis = self._analyzer.analyze_docs(profile, rag_items)
return [self.put(ctx, "analysis_brief", ArtifactType.STRUCTURED_JSON, analysis)]
def build_project_answer_brief(self, ctx: ExecutionContext) -> list[str]:
profile = self.get(ctx, "question_profile", {}) or {}
analysis = self.get(ctx, "analysis_brief", {}) or {}
brief = {
"question_profile": profile,
"resolved_subject": analysis.get("subject"),
"key_findings": analysis.get("findings", []),
"supporting_evidence": analysis.get("evidence", []),
"missing_evidence": analysis.get("gaps", []),
"answer_mode": analysis.get("answer_mode", "summary"),
}
return [self.put(ctx, "answer_brief", ArtifactType.STRUCTURED_JSON, brief)]
def compose_project_answer(self, ctx: ExecutionContext) -> list[str]:
brief = self.get(ctx, "answer_brief", {}) or {}
profile = brief.get("question_profile", {}) or {}
russian = bool(profile.get("russian"))
answer_mode = str(brief.get("answer_mode") or "summary")
findings = list(brief.get("key_findings", []) or [])
evidence = list(brief.get("supporting_evidence", []) or [])
gaps = list(brief.get("missing_evidence", []) or [])
title = "## Кратко" if russian else "## Summary"
lines = [title]
if answer_mode == "inventory":
lines.append("### Что реализовано" if russian else "### Implemented items")
else:
lines.append("### Что видно по проекту" if russian else "### What the project shows")
if findings:
lines.extend(f"- {item}" for item in findings)
else:
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
if evidence:
lines.append("")
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
lines.extend(f"- `{item}`" for item in evidence[:5])
if gaps:
lines.append("")
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
lines.extend(f"- {item}" for item in gaps[:3])
return [self.put(ctx, "final_answer", ArtifactType.TEXT, "\n".join(lines))]

View File

@@ -0,0 +1,154 @@
from __future__ import annotations
class ProjectQaAnalyzer:
def analyze_code(self, profile: dict, rag_items: list[dict], file_candidates: list[dict]) -> dict:
terms = list(profile.get("terms", []) or [])
intent = str(profile.get("intent") or "lookup")
russian = bool(profile.get("russian"))
findings: list[str] = []
evidence: list[str] = []
gaps: list[str] = []
symbol_titles = [str(item.get("title", "") or "") for item in rag_items if str(item.get("layer", "")).startswith("C1")]
symbol_set = set(symbol_titles)
file_paths = [str(item.get("path", "") or item.get("source", "") or "") for item in rag_items]
file_paths.extend(str(item.get("path", "") or "") for item in file_candidates)
if "ConfigManager" in profile.get("entities", []) or "configmanager" in terms or "config_manager" in terms:
alias_file = self.find_path(file_paths, "src/config_manager/__init__.py")
if alias_file:
findings.append(
"Публичный `ConfigManager` экспортируется из `src/config_manager/__init__.py` как alias на `ConfigManagerV2`."
if russian
else "Public `ConfigManager` is exported from `src/config_manager/__init__.py` as an alias to `ConfigManagerV2`."
)
evidence.append("src/config_manager/__init__.py")
if "controlchannel" in {name.lower() for name in symbol_set}:
findings.append(
"Базовый контракт управления задает `ControlChannel`: он определяет команды `start` и `stop` для внешнего канала управления."
if russian
else "`ControlChannel` defines the base management contract with `start` and `stop` commands."
)
evidence.append("src/config_manager/v2/control/base.py")
if "ControlChannelBridge" in symbol_set:
findings.append(
"`ControlChannelBridge` связывает внешний канал управления с lifecycle-методами менеджера: `on_start`, `on_stop`, `on_status`."
if russian
else "`ControlChannelBridge` connects the external control channel to manager lifecycle methods: `on_start`, `on_stop`, `on_status`."
)
evidence.append("src/config_manager/v2/core/control_bridge.py")
implementation_files = self.find_management_implementations(file_candidates)
if implementation_files:
labels = ", ".join(f"`{path}`" for path in implementation_files)
channel_names = self.implementation_names(implementation_files)
findings.append(
f"В коде найдены конкретные реализации каналов управления: {', '.join(channel_names)} ({labels})."
if russian
else f"Concrete management channel implementations were found in code: {', '.join(channel_names)} ({labels})."
)
evidence.extend(implementation_files)
elif intent == "inventory":
gaps.append(
"В текущем контексте не удалось уверенно подтвердить конкретные файлы-реализации каналов, кроме базового контракта и bridge-слоя."
if russian
else "The current context does not yet confirm concrete channel implementation files beyond the base contract and bridge layer."
)
package_doc = self.find_management_doc(file_candidates)
if package_doc:
findings.append(
f"Пакет управления прямо описывает внешние каналы через `{package_doc}`."
if russian
else f"The control package directly describes external channels in `{package_doc}`."
)
evidence.append(package_doc)
subject = "management channels"
if profile.get("entities"):
subject = ", ".join(profile["entities"])
return {
"subject": subject,
"findings": self.dedupe(findings),
"evidence": self.dedupe(evidence),
"gaps": gaps,
"answer_mode": "inventory" if intent == "inventory" else "summary",
}
def analyze_docs(self, profile: dict, rag_items: list[dict]) -> dict:
findings: list[str] = []
evidence: list[str] = []
for item in rag_items[:5]:
title = str(item.get("title", "") or "")
source = str(item.get("source", "") or "")
content = str(item.get("content", "") or "").strip()
if content:
findings.append(content.splitlines()[0][:220])
if source:
evidence.append(source)
elif title:
evidence.append(title)
return {
"subject": "docs",
"findings": self.dedupe(findings),
"evidence": self.dedupe(evidence),
"gaps": [] if findings else ["Недостаточно данных в документации." if profile.get("russian") else "Not enough data in documentation."],
"answer_mode": "summary",
}
def find_management_implementations(self, file_candidates: list[dict]) -> list[str]:
found: list[str] = []
for item in file_candidates:
path = str(item.get("path", "") or "")
lowered = path.lower()
if self.is_test_path(path):
continue
if any(token in lowered for token in ("http_channel.py", "telegram.py", "telegram_channel.py", "http.py")):
found.append(path)
continue
content = str(item.get("content", "") or "").lower()
if "controlchannel" in content and "class " in content:
found.append(path)
continue
if ("channel" in lowered or "control" in lowered) and any(token in content for token in ("http", "telegram", "bot")):
found.append(path)
return self.dedupe(found)[:4]
def implementation_names(self, paths: list[str]) -> list[str]:
names: list[str] = []
for path in paths:
stem = path.rsplit("/", 1)[-1].rsplit(".", 1)[0]
label = stem.replace("_", " ").strip()
if label and label not in names:
names.append(label)
return names
def find_management_doc(self, file_candidates: list[dict]) -> str | None:
for item in file_candidates:
path = str(item.get("path", "") or "")
if self.is_test_path(path):
continue
content = str(item.get("content", "") or "").lower()
if any(token in content for token in ("каналы внешнего управления", "external control channels", "http api", "telegram")):
return path
return None
def find_path(self, paths: list[str], target: str) -> str | None:
for path in paths:
if path == target:
return path
return None
def dedupe(self, items: list[str]) -> list[str]:
seen: list[str] = []
for item in items:
if item and item not in seen:
seen.append(item)
return seen
def is_test_path(self, path: str) -> bool:
lowered = path.lower()
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered

View File

@@ -0,0 +1,166 @@
from __future__ import annotations
import re
from app.modules.rag.retrieval.query_terms import extract_query_terms
class ProjectQaSupport:
def resolve_request(self, message: str) -> dict:
profile = self.build_profile(message)
subject = profile["entities"][0] if profile.get("entities") else ""
return {
"original_message": message,
"normalized_message": " ".join((message or "").split()),
"subject_hint": subject,
"source_hint": profile["domain"],
"russian": profile["russian"],
}
def build_profile(self, message: str) -> dict:
lowered = message.lower()
return {
"domain": "code" if self.looks_like_code_question(lowered) else "docs",
"intent": self.detect_intent(lowered),
"terms": extract_query_terms(message),
"entities": self.extract_entities(message),
"russian": self.is_russian(message),
}
def build_retrieval_query(self, resolved_request: dict, profile: dict) -> str:
normalized = str(resolved_request.get("normalized_message") or resolved_request.get("original_message") or "").strip()
if profile.get("domain") == "code" and "по коду" not in normalized.lower():
return f"по коду {normalized}".strip()
return normalized
def build_source_bundle(self, profile: dict, rag_items: list[dict], files_map: dict[str, dict]) -> dict:
terms = list(profile.get("terms", []) or [])
entities = list(profile.get("entities", []) or [])
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
ranked_rag: list[tuple[int, dict]] = []
for item in rag_items:
score = self.rag_score(item, terms, entities)
source = str(item.get("source", "") or "")
if not explicit_test and self.is_test_path(source):
score -= 3
if score > 0:
ranked_rag.append((score, item))
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
ranked_files: list[tuple[int, dict]] = []
for path, payload in files_map.items():
score = self.file_score(path, payload, terms, entities)
if not explicit_test and self.is_test_path(path):
score -= 3
if score > 0:
ranked_files.append(
(
score,
{
"path": path,
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
},
)
)
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
return {
"profile": profile,
"rag_items": [item for _, item in ranked_rag[:12]],
"file_candidates": [item for _, item in ranked_files[:10]],
"rag_total": len(ranked_rag),
"files_total": len(ranked_files),
}
def build_answer_brief(self, profile: dict, analysis: dict) -> dict:
return {
"question_profile": profile,
"resolved_subject": analysis.get("subject"),
"key_findings": analysis.get("findings", []),
"supporting_evidence": analysis.get("evidence", []),
"missing_evidence": analysis.get("gaps", []),
"answer_mode": analysis.get("answer_mode", "summary"),
}
def compose_answer(self, brief: dict) -> str:
profile = brief.get("question_profile", {}) or {}
russian = bool(profile.get("russian"))
answer_mode = str(brief.get("answer_mode") or "summary")
findings = list(brief.get("key_findings", []) or [])
evidence = list(brief.get("supporting_evidence", []) or [])
gaps = list(brief.get("missing_evidence", []) or [])
title = "## Кратко" if russian else "## Summary"
lines = [title]
lines.append("### Что реализовано" if answer_mode == "inventory" and russian else "### Implemented items" if answer_mode == "inventory" else "### Что видно по проекту" if russian else "### What the project shows")
if findings:
lines.extend(f"- {item}" for item in findings)
else:
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
if evidence:
lines.append("")
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
lines.extend(f"- `{item}`" for item in evidence[:5])
if gaps:
lines.append("")
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
lines.extend(f"- {item}" for item in gaps[:3])
return "\n".join(lines)
def detect_intent(self, lowered: str) -> str:
if any(token in lowered for token in ("какие", "что уже реализ", "список", "перечень", "какие есть")):
return "inventory"
if any(token in lowered for token in ("где", "find", "where")):
return "lookup"
if any(token in lowered for token in ("сравни", "compare")):
return "compare"
return "explain"
def looks_like_code_question(self, lowered: str) -> bool:
code_markers = ("по коду", "код", "реализ", "имплементац", "класс", "метод", "модул", "файл", "канал", "handler", "endpoint")
return any(marker in lowered for marker in code_markers) or bool(re.search(r"\b[A-Z][A-Za-z0-9_]{2,}\b", lowered))
def extract_entities(self, message: str) -> list[str]:
return re.findall(r"\b[A-Z][A-Za-z0-9_]{2,}\b", message)[:5]
def rag_score(self, item: dict, terms: list[str], entities: list[str]) -> int:
haystacks = [
str(item.get("source", "") or "").lower(),
str(item.get("title", "") or "").lower(),
str(item.get("content", "") or "").lower(),
str((item.get("metadata", {}) or {}).get("qname", "") or "").lower(),
]
score = 0
for term in terms:
if any(term in hay for hay in haystacks):
score += 3
for entity in entities:
if any(entity.lower() in hay for hay in haystacks):
score += 5
return score
def file_score(self, path: str, payload: dict, terms: list[str], entities: list[str]) -> int:
content = str(payload.get("content", "") or "").lower()
path_lower = path.lower()
score = 0
for term in terms:
if term in path_lower:
score += 4
elif term in content:
score += 2
for entity in entities:
entity_lower = entity.lower()
if entity_lower in path_lower:
score += 5
elif entity_lower in content:
score += 3
return score
def is_test_path(self, path: str) -> bool:
lowered = path.lower()
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
def is_russian(self, text: str) -> bool:
return any("а" <= ch.lower() <= "я" or ch.lower() == "ё" for ch in text)

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import asyncio import asyncio
import inspect import inspect
import logging
import time import time
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
@@ -9,6 +10,8 @@ from app.modules.agent.engine.orchestrator.models import PlanStatus, PlanStep, S
from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
LOGGER = logging.getLogger(__name__)
class ExecutionEngine: class ExecutionEngine:
def __init__(self, step_registry: StepRegistry, gates: QualityGateRunner) -> None: def __init__(self, step_registry: StepRegistry, gates: QualityGateRunner) -> None:
@@ -22,17 +25,18 @@ class ExecutionEngine:
for step in ctx.plan.steps: for step in ctx.plan.steps:
dep_issue = self._dependency_issue(step, step_results) dep_issue = self._dependency_issue(step, step_results)
if dep_issue: if dep_issue:
step_results.append( result = StepResult(
StepResult(
step_id=step.step_id, step_id=step.step_id,
status=StepStatus.SKIPPED, status=StepStatus.SKIPPED,
warnings=[dep_issue], warnings=[dep_issue],
) )
) step_results.append(result)
self._log_step_result(ctx, step, result)
continue continue
result = await self._run_with_retry(step, ctx) result = await self._run_with_retry(step, ctx)
step_results.append(result) step_results.append(result)
self._log_step_result(ctx, step, result)
if result.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} and step.on_failure == "fail": if result.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} and step.on_failure == "fail":
ctx.plan.status = PlanStatus.FAILED ctx.plan.status = PlanStatus.FAILED
return step_results return step_results
@@ -65,6 +69,15 @@ class ExecutionEngine:
while attempt < max_attempts: while attempt < max_attempts:
attempt += 1 attempt += 1
started_at = time.monotonic() started_at = time.monotonic()
LOGGER.warning(
"orchestrator step start: task_id=%s step_id=%s action_id=%s executor=%s attempt=%s graph_id=%s",
ctx.task.task_id,
step.step_id,
step.action_id,
step.executor,
attempt,
step.graph_id or "",
)
await self._emit_progress(ctx, f"orchestrator.step.{step.step_id}", step.title) await self._emit_progress(ctx, f"orchestrator.step.{step.step_id}", step.title)
try: try:
@@ -113,3 +126,21 @@ class ExecutionEngine:
result = ctx.progress_cb(stage, message, "task_progress", {"layer": "orchestrator"}) result = ctx.progress_cb(stage, message, "task_progress", {"layer": "orchestrator"})
if inspect.isawaitable(result): if inspect.isawaitable(result):
await result await result
def _log_step_result(self, ctx: ExecutionContext, step: PlanStep, result: StepResult) -> None:
artifact_keys = []
for artifact_id in result.produced_artifact_ids:
item = next((artifact for artifact in ctx.artifacts.all_items() if artifact.artifact_id == artifact_id), None)
if item is not None:
artifact_keys.append(item.key)
LOGGER.warning(
"orchestrator step result: task_id=%s step_id=%s action_id=%s status=%s duration_ms=%s artifact_keys=%s warnings=%s error=%s",
ctx.task.task_id,
step.step_id,
step.action_id,
result.status.value,
result.duration_ms,
artifact_keys,
result.warnings,
result.error_message or "",
)

View File

@@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import inspect import inspect
import logging
from app.core.exceptions import AppError from app.core.exceptions import AppError
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext, GraphInvoker, GraphResolver, ProgressCallback from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext, GraphInvoker, GraphResolver, ProgressCallback
@@ -14,6 +15,8 @@ from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry
from app.schemas.common import ModuleName from app.schemas.common import ModuleName
LOGGER = logging.getLogger(__name__)
class OrchestratorService: class OrchestratorService:
def __init__( def __init__(
@@ -74,6 +77,21 @@ class OrchestratorService:
) )
result = self._assembler.assemble(ctx, step_results) result = self._assembler.assemble(ctx, step_results)
await self._emit_progress(progress_cb, "orchestrator.done", "Execution plan completed.") await self._emit_progress(progress_cb, "orchestrator.done", "Execution plan completed.")
LOGGER.warning(
"orchestrator decision: task_id=%s scenario=%s plan_status=%s steps=%s changeset_items=%s answer_len=%s",
task.task_id,
task.scenario.value,
result.meta.get("plan", {}).get("status", ""),
[
{
"step_id": step.step_id,
"status": step.status.value,
}
for step in result.steps
],
len(result.changeset),
len(result.answer or ""),
)
return result return result
async def _emit_progress(self, progress_cb: ProgressCallback | None, stage: str, message: str) -> None: async def _emit_progress(self, progress_cb: ProgressCallback | None, stage: str, message: str) -> None:

View File

@@ -2,29 +2,50 @@ from __future__ import annotations
import asyncio import asyncio
from collections.abc import Callable from collections.abc import Callable
from typing import TYPE_CHECKING
from app.modules.agent.engine.graphs.progress_registry import progress_registry from app.modules.agent.engine.graphs.progress_registry import progress_registry
from app.modules.agent.engine.orchestrator.actions import DocsActions, EditActions, ExplainActions, GherkinActions, ReviewActions from app.modules.agent.engine.orchestrator.actions import (
CodeExplainActions,
DocsActions,
EditActions,
ExplainActions,
GherkinActions,
ProjectQaActions,
ReviewActions,
)
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType, PlanStep from app.modules.agent.engine.orchestrator.models import ArtifactType, PlanStep
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
StepFn = Callable[[ExecutionContext], list[str]] StepFn = Callable[[ExecutionContext], list[str]]
class StepRegistry: class StepRegistry:
def __init__(self) -> None: def __init__(self, code_explain_retriever: CodeExplainRetrieverV2 | None = None) -> None:
code_explain = CodeExplainActions(code_explain_retriever)
explain = ExplainActions() explain = ExplainActions()
review = ReviewActions() review = ReviewActions()
docs = DocsActions() docs = DocsActions()
edits = EditActions() edits = EditActions()
gherkin = GherkinActions() gherkin = GherkinActions()
project_qa = ProjectQaActions()
self._functions: dict[str, StepFn] = { self._functions: dict[str, StepFn] = {
"collect_state": self._collect_state, "collect_state": self._collect_state,
"finalize_graph_output": self._finalize_graph_output, "finalize_graph_output": self._finalize_graph_output,
"execute_project_qa_graph": self._collect_state,
"build_code_explain_pack": code_explain.build_code_explain_pack,
"collect_sources": explain.collect_sources, "collect_sources": explain.collect_sources,
"extract_logic": explain.extract_logic, "extract_logic": explain.extract_logic,
"summarize": explain.summarize, "summarize": explain.summarize,
"classify_project_question": project_qa.classify_project_question,
"collect_project_sources": project_qa.collect_project_sources,
"analyze_project_sources": project_qa.analyze_project_sources,
"build_project_answer_brief": project_qa.build_project_answer_brief,
"compose_project_answer": project_qa.compose_project_answer,
"fetch_source_doc": review.fetch_source_doc, "fetch_source_doc": review.fetch_source_doc,
"normalize_document": review.normalize_document, "normalize_document": review.normalize_document,
"structural_check": review.structural_check, "structural_check": review.structural_check,
@@ -66,6 +87,7 @@ class StepRegistry:
state = { state = {
"task_id": ctx.task.task_id, "task_id": ctx.task.task_id,
"project_id": ctx.task.rag_session_id, "project_id": ctx.task.rag_session_id,
"scenario": ctx.task.scenario.value,
"message": ctx.task.user_message, "message": ctx.task.user_message,
"progress_key": ctx.task.task_id, "progress_key": ctx.task.task_id,
"rag_context": str(ctx.task.metadata.get("rag_context", "")), "rag_context": str(ctx.task.metadata.get("rag_context", "")),
@@ -86,7 +108,7 @@ class StepRegistry:
raise RuntimeError(f"Unsupported graph_id: {graph_key}") raise RuntimeError(f"Unsupported graph_id: {graph_key}")
graph = ctx.graph_resolver(domain_id, process_id) graph = ctx.graph_resolver(domain_id, process_id)
state = ctx.artifacts.get_content("agent_state", {}) or {} state = self._build_graph_state(ctx)
if ctx.progress_cb is not None: if ctx.progress_cb is not None:
progress_registry.register(ctx.task.task_id, ctx.progress_cb) progress_registry.register(ctx.task.task_id, ctx.progress_cb)
@@ -96,8 +118,29 @@ class StepRegistry:
if ctx.progress_cb is not None: if ctx.progress_cb is not None:
progress_registry.unregister(ctx.task.task_id) progress_registry.unregister(ctx.task.task_id)
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result) return self._store_graph_outputs(step, ctx, result)
return [item.artifact_id]
def _build_graph_state(self, ctx: ExecutionContext) -> dict:
state = dict(ctx.artifacts.get_content("agent_state", {}) or {})
for item in ctx.artifacts.all_items():
state[item.key] = ctx.artifacts.get_content(item.key)
return state
def _store_graph_outputs(self, step: PlanStep, ctx: ExecutionContext, result: dict) -> list[str]:
if not isinstance(result, dict):
raise RuntimeError("graph_result must be an object")
if len(step.outputs) == 1 and step.outputs[0].key == "graph_result":
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result)
return [item.artifact_id]
artifact_ids: list[str] = []
for output in step.outputs:
value = result.get(output.key)
if value is None and output.required:
raise RuntimeError(f"graph_output_missing:{step.step_id}:{output.key}")
item = ctx.artifacts.put(key=output.key, artifact_type=output.type, content=value)
artifact_ids.append(item.artifact_id)
return artifact_ids
def _finalize_graph_output(self, ctx: ExecutionContext) -> list[str]: def _finalize_graph_output(self, ctx: ExecutionContext) -> list[str]:
raw = ctx.artifacts.get_content("graph_result", {}) or {} raw = ctx.artifacts.get_content("graph_result", {}) or {}

View File

@@ -16,6 +16,8 @@ class ScenarioTemplateRegistry:
return builders.get(task.scenario, self._general)(task) return builders.get(task.scenario, self._general)(task)
def _general(self, task: TaskSpec) -> ExecutionPlan: def _general(self, task: TaskSpec) -> ExecutionPlan:
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
return self._project_qa(task)
steps = [ steps = [
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]), self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
self._step( self._step(
@@ -39,7 +41,77 @@ class ScenarioTemplateRegistry:
] ]
return self._plan(task, "general_qa_v1", steps, [self._gate("non_empty_answer_or_changeset")]) return self._plan(task, "general_qa_v1", steps, [self._gate("non_empty_answer_or_changeset")])
def _project_qa(self, task: TaskSpec) -> ExecutionPlan:
steps = [
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
self._step(
"conversation_understanding",
"Conversation understanding",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/conversation_understanding",
depends_on=["collect_state"],
outputs=[self._out("resolved_request", ArtifactType.STRUCTURED_JSON)],
),
self._step(
"question_classification",
"Question classification",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/question_classification",
depends_on=["conversation_understanding"],
outputs=[self._out("question_profile", ArtifactType.STRUCTURED_JSON)],
),
self._step(
"context_retrieval",
"Context retrieval",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/context_retrieval",
depends_on=["question_classification"],
outputs=[self._out("source_bundle", ArtifactType.STRUCTURED_JSON)],
),
]
analysis_depends_on = ["context_retrieval"]
if task.scenario == Scenario.EXPLAIN_PART:
steps.append(
self._step(
"code_explain_pack_step",
"Build code explain pack",
"build_code_explain_pack",
depends_on=["context_retrieval"],
outputs=[self._out("explain_pack", ArtifactType.STRUCTURED_JSON)],
)
)
analysis_depends_on = ["code_explain_pack_step"]
steps.extend(
[
self._step(
"context_analysis",
"Context analysis",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/context_analysis",
depends_on=analysis_depends_on,
outputs=[self._out("analysis_brief", ArtifactType.STRUCTURED_JSON)],
),
self._step(
"answer_composition",
"Answer composition",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/answer_composition",
depends_on=["context_analysis"],
outputs=[self._out("answer_brief", ArtifactType.STRUCTURED_JSON, required=False), self._out("final_answer", ArtifactType.TEXT)],
gates=[self._gate("non_empty_answer_or_changeset")],
),
]
)
return self._plan(task, "project_qa_reasoning_v1", steps, [self._gate("non_empty_answer_or_changeset")])
def _explain(self, task: TaskSpec) -> ExecutionPlan: def _explain(self, task: TaskSpec) -> ExecutionPlan:
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
return self._project_qa(task)
steps = [ steps = [
self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]), self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]),
self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]), self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]),

View File

@@ -2,21 +2,28 @@ from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from app.modules.agent.llm import AgentLlmService from app.modules.agent.llm import AgentLlmService
from app.modules.contracts import RagRetriever
if TYPE_CHECKING: if TYPE_CHECKING:
from app.modules.agent.repository import AgentRepository from app.modules.agent.repository import AgentRepository
from app.modules.agent.engine.router.router_service import RouterService from app.modules.agent.engine.router.router_service import RouterService
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository") -> "RouterService": def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository", rag: RagRetriever) -> "RouterService":
from app.modules.agent.engine.graphs import ( from app.modules.agent.engine.graphs import (
BaseGraphFactory, BaseGraphFactory,
DocsGraphFactory, DocsGraphFactory,
ProjectEditsGraphFactory, ProjectEditsGraphFactory,
ProjectQaAnalysisGraphFactory,
ProjectQaAnswerGraphFactory,
ProjectQaClassificationGraphFactory,
ProjectQaConversationGraphFactory,
ProjectQaGraphFactory, ProjectQaGraphFactory,
ProjectQaRetrievalGraphFactory,
) )
from app.modules.agent.engine.router.context_store import RouterContextStore from app.modules.agent.engine.router.context_store import RouterContextStore
from app.modules.agent.engine.router.intent_classifier import IntentClassifier from app.modules.agent.engine.router.intent_classifier import IntentClassifier
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
from app.modules.agent.engine.router.registry import IntentRegistry from app.modules.agent.engine.router.registry import IntentRegistry
from app.modules.agent.engine.router.router_service import RouterService from app.modules.agent.engine.router.router_service import RouterService
@@ -26,13 +33,20 @@ def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepositor
registry.register("project", "qa", ProjectQaGraphFactory(llm).build) registry.register("project", "qa", ProjectQaGraphFactory(llm).build)
registry.register("project", "edits", ProjectEditsGraphFactory(llm).build) registry.register("project", "edits", ProjectEditsGraphFactory(llm).build)
registry.register("docs", "generation", DocsGraphFactory(llm).build) registry.register("docs", "generation", DocsGraphFactory(llm).build)
registry.register("project_qa", "conversation_understanding", ProjectQaConversationGraphFactory(llm).build)
registry.register("project_qa", "question_classification", ProjectQaClassificationGraphFactory(llm).build)
registry.register("project_qa", "context_retrieval", ProjectQaRetrievalGraphFactory(rag, llm).build)
registry.register("project_qa", "context_analysis", ProjectQaAnalysisGraphFactory(llm).build)
registry.register("project_qa", "answer_composition", ProjectQaAnswerGraphFactory(llm).build)
classifier = IntentClassifier(llm) classifier = IntentClassifier(llm)
switch_detector = IntentSwitchDetector()
context_store = RouterContextStore(agent_repository) context_store = RouterContextStore(agent_repository)
return RouterService( return RouterService(
registry=registry, registry=registry,
classifier=classifier, classifier=classifier,
context_store=context_store, context_store=context_store,
switch_detector=switch_detector,
) )

View File

@@ -17,6 +17,7 @@ class RouterContextStore:
process_id: str, process_id: str,
user_message: str, user_message: str,
assistant_message: str, assistant_message: str,
decision_type: str = "start",
max_history: int = 10, max_history: int = 10,
) -> None: ) -> None:
self._repo.update_router_context( self._repo.update_router_context(
@@ -25,5 +26,6 @@ class RouterContextStore:
process_id=process_id, process_id=process_id,
user_message=user_message, user_message=user_message,
assistant_message=assistant_message, assistant_message=assistant_message,
decision_type=decision_type,
max_history=max_history, max_history=max_history,
) )

View File

@@ -17,11 +17,7 @@ class IntentClassifier:
def __init__(self, llm: AgentLlmService) -> None: def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm self._llm = llm
def classify(self, user_message: str, context: RouterContext, mode: str = "auto") -> RouteDecision: def classify_new_intent(self, user_message: str, context: RouterContext) -> RouteDecision:
forced = self._from_mode(mode)
if forced:
return forced
text = (user_message or "").strip().lower() text = (user_message or "").strip().lower()
if text in self._short_confirmations and context.last_routing: if text in self._short_confirmations and context.last_routing:
return RouteDecision( return RouteDecision(
@@ -30,6 +26,7 @@ class IntentClassifier:
confidence=1.0, confidence=1.0,
reason="short_confirmation", reason="short_confirmation",
use_previous=True, use_previous=True,
decision_type="continue",
) )
deterministic = self._deterministic_route(text) deterministic = self._deterministic_route(text)
@@ -45,9 +42,10 @@ class IntentClassifier:
process_id="general", process_id="general",
confidence=0.8, confidence=0.8,
reason="default", reason="default",
decision_type="start",
) )
def _from_mode(self, mode: str) -> RouteDecision | None: def from_mode(self, mode: str) -> RouteDecision | None:
mapping = { mapping = {
"project_qa": ("project", "qa"), "project_qa": ("project", "qa"),
"project_edits": ("project", "edits"), "project_edits": ("project", "edits"),
@@ -65,6 +63,8 @@ class IntentClassifier:
process_id=route[1], process_id=route[1],
confidence=1.0, confidence=1.0,
reason=f"mode_override:{mode}", reason=f"mode_override:{mode}",
decision_type="switch",
explicit_switch=True,
) )
def _classify_with_llm(self, user_message: str, context: RouterContext) -> RouteDecision | None: def _classify_with_llm(self, user_message: str, context: RouterContext) -> RouteDecision | None:
@@ -96,6 +96,7 @@ class IntentClassifier:
process_id=route[1], process_id=route[1],
confidence=confidence, confidence=confidence,
reason=f"llm_router:{payload.get('reason', 'ok')}", reason=f"llm_router:{payload.get('reason', 'ok')}",
decision_type="start",
) )
def _parse_llm_payload(self, raw: str) -> dict[str, str | float] | None: def _parse_llm_payload(self, raw: str) -> dict[str, str | float] | None:
@@ -139,6 +140,8 @@ class IntentClassifier:
process_id="edits", process_id="edits",
confidence=0.97, confidence=0.97,
reason="deterministic_targeted_file_edit", reason="deterministic_targeted_file_edit",
decision_type="switch",
explicit_switch=True,
) )
if self._is_broad_docs_request(text): if self._is_broad_docs_request(text):
return RouteDecision( return RouteDecision(
@@ -146,6 +149,8 @@ class IntentClassifier:
process_id="generation", process_id="generation",
confidence=0.95, confidence=0.95,
reason="deterministic_docs_generation", reason="deterministic_docs_generation",
decision_type="switch",
explicit_switch=True,
) )
return None return None

View File

@@ -0,0 +1,81 @@
from __future__ import annotations
import re
from app.modules.agent.engine.router.schemas import RouterContext
class IntentSwitchDetector:
_EXPLICIT_SWITCH_MARKERS = (
"теперь",
"а теперь",
"давай теперь",
"переключись",
"переключаемся",
"сейчас другое",
"новая задача",
"new task",
"switch to",
"now do",
"instead",
)
_FOLLOW_UP_MARKERS = (
"а еще",
"а ещё",
"подробнее",
"почему",
"зачем",
"что если",
"и еще",
"и ещё",
"покажи подробнее",
"можешь подробнее",
)
def should_switch(self, user_message: str, context: RouterContext) -> bool:
if not context.dialog_started or context.active_intent is None:
return False
text = " ".join((user_message or "").strip().lower().split())
if not text:
return False
if self._is_follow_up(text):
return False
if any(marker in text for marker in self._EXPLICIT_SWITCH_MARKERS):
return True
return self._is_strong_targeted_edit_request(text) or self._is_strong_docs_request(text)
def _is_follow_up(self, text: str) -> bool:
return any(marker in text for marker in self._FOLLOW_UP_MARKERS)
def _is_strong_targeted_edit_request(self, text: str) -> bool:
edit_markers = (
"добавь",
"добавить",
"измени",
"исправь",
"обнови",
"удали",
"замени",
"append",
"update",
"edit",
"remove",
"replace",
)
has_edit_marker = any(marker in text for marker in edit_markers)
has_file_marker = (
"readme" in text
or bool(re.search(r"\b[\w.\-/]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg|py)\b", text))
)
return has_edit_marker and has_file_marker
def _is_strong_docs_request(self, text: str) -> bool:
docs_markers = (
"подготовь документац",
"сгенерируй документац",
"создай документац",
"опиши документац",
"generate documentation",
"write documentation",
)
return any(marker in text for marker in docs_markers)

View File

@@ -1,7 +1,8 @@
from app.modules.agent.engine.router.context_store import RouterContextStore from app.modules.agent.engine.router.context_store import RouterContextStore
from app.modules.agent.engine.router.intent_classifier import IntentClassifier from app.modules.agent.engine.router.intent_classifier import IntentClassifier
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
from app.modules.agent.engine.router.registry import IntentRegistry from app.modules.agent.engine.router.registry import IntentRegistry
from app.modules.agent.engine.router.schemas import RouteResolution from app.modules.agent.engine.router.schemas import RouteDecision, RouteResolution
class RouterService: class RouterService:
@@ -10,27 +11,48 @@ class RouterService:
registry: IntentRegistry, registry: IntentRegistry,
classifier: IntentClassifier, classifier: IntentClassifier,
context_store: RouterContextStore, context_store: RouterContextStore,
switch_detector: IntentSwitchDetector | None = None,
min_confidence: float = 0.7, min_confidence: float = 0.7,
) -> None: ) -> None:
self._registry = registry self._registry = registry
self._classifier = classifier self._classifier = classifier
self._ctx = context_store self._ctx = context_store
self._switch_detector = switch_detector or IntentSwitchDetector()
self._min_confidence = min_confidence self._min_confidence = min_confidence
def resolve(self, user_message: str, conversation_key: str, mode: str = "auto") -> RouteResolution: def resolve(self, user_message: str, conversation_key: str, mode: str = "auto") -> RouteResolution:
context = self._ctx.get(conversation_key) context = self._ctx.get(conversation_key)
decision = self._classifier.classify(user_message, context, mode=mode) forced = self._classifier.from_mode(mode)
if decision.confidence < self._min_confidence: if forced:
return self._fallback("low_confidence") return self._resolution(forced)
if not self._registry.is_valid(decision.domain_id, decision.process_id):
return self._fallback("invalid_route") if not context.dialog_started or context.active_intent is None:
return RouteResolution( decision = self._classifier.classify_new_intent(user_message, context)
domain_id=decision.domain_id, if not self._is_acceptable(decision):
process_id=decision.process_id, return self._fallback("low_confidence")
confidence=decision.confidence, return self._resolution(
reason=decision.reason, decision.model_copy(
fallback_used=False, update={
) "decision_type": "start",
"explicit_switch": False,
}
)
)
if self._switch_detector.should_switch(user_message, context):
decision = self._classifier.classify_new_intent(user_message, context)
if self._is_acceptable(decision):
return self._resolution(
decision.model_copy(
update={
"decision_type": "switch",
"explicit_switch": True,
}
)
)
return self._continue_current(context, "explicit_switch_unresolved_keep_current")
return self._continue_current(context, "continue_current_intent")
def persist_context( def persist_context(
self, self,
@@ -40,6 +62,7 @@ class RouterService:
process_id: str, process_id: str,
user_message: str, user_message: str,
assistant_message: str, assistant_message: str,
decision_type: str = "start",
) -> None: ) -> None:
self._ctx.update( self._ctx.update(
conversation_key, conversation_key,
@@ -47,6 +70,7 @@ class RouterService:
process_id=process_id, process_id=process_id,
user_message=user_message, user_message=user_message,
assistant_message=assistant_message, assistant_message=assistant_message,
decision_type=decision_type,
) )
def graph_factory(self, domain_id: str, process_id: str): def graph_factory(self, domain_id: str, process_id: str):
@@ -59,4 +83,32 @@ class RouterService:
confidence=0.0, confidence=0.0,
reason=reason, reason=reason,
fallback_used=True, fallback_used=True,
decision_type="start",
explicit_switch=False,
)
def _continue_current(self, context, reason: str) -> RouteResolution:
active = context.active_intent or context.last_routing or {"domain_id": "default", "process_id": "general"}
return RouteResolution(
domain_id=str(active["domain_id"]),
process_id=str(active["process_id"]),
confidence=1.0,
reason=reason,
fallback_used=False,
decision_type="continue",
explicit_switch=False,
)
def _is_acceptable(self, decision: RouteDecision) -> bool:
return decision.confidence >= self._min_confidence and self._registry.is_valid(decision.domain_id, decision.process_id)
def _resolution(self, decision: RouteDecision) -> RouteResolution:
return RouteResolution(
domain_id=decision.domain_id,
process_id=decision.process_id,
confidence=decision.confidence,
reason=decision.reason,
fallback_used=False,
decision_type=decision.decision_type,
explicit_switch=decision.explicit_switch,
) )

View File

@@ -7,6 +7,8 @@ class RouteDecision(BaseModel):
confidence: float = 0.0 confidence: float = 0.0
reason: str = "" reason: str = ""
use_previous: bool = False use_previous: bool = False
decision_type: str = "start"
explicit_switch: bool = False
@field_validator("confidence") @field_validator("confidence")
@classmethod @classmethod
@@ -20,8 +22,13 @@ class RouteResolution(BaseModel):
confidence: float confidence: float
reason: str reason: str
fallback_used: bool = False fallback_used: bool = False
decision_type: str = "start"
explicit_switch: bool = False
class RouterContext(BaseModel): class RouterContext(BaseModel):
last_routing: dict[str, str] | None = None last_routing: dict[str, str] | None = None
message_history: list[dict[str, str]] = Field(default_factory=list) message_history: list[dict[str, str]] = Field(default_factory=list)
active_intent: dict[str, str] | None = None
dialog_started: bool = False
turn_index: int = 0

View File

@@ -1,14 +1,40 @@
import logging
from app.modules.agent.prompt_loader import PromptLoader from app.modules.agent.prompt_loader import PromptLoader
from app.modules.shared.gigachat.client import GigaChatClient from app.modules.shared.gigachat.client import GigaChatClient
LOGGER = logging.getLogger(__name__)
def _truncate_for_log(text: str, max_chars: int = 1500) -> str:
value = (text or "").replace("\n", "\\n").strip()
if len(value) <= max_chars:
return value
return value[:max_chars].rstrip() + "...[truncated]"
class AgentLlmService: class AgentLlmService:
def __init__(self, client: GigaChatClient, prompts: PromptLoader) -> None: def __init__(self, client: GigaChatClient, prompts: PromptLoader) -> None:
self._client = client self._client = client
self._prompts = prompts self._prompts = prompts
def generate(self, prompt_name: str, user_input: str) -> str: def generate(self, prompt_name: str, user_input: str, *, log_context: str | None = None) -> str:
system_prompt = self._prompts.load(prompt_name) system_prompt = self._prompts.load(prompt_name)
if not system_prompt: if not system_prompt:
system_prompt = "You are a helpful assistant." system_prompt = "You are a helpful assistant."
return self._client.complete(system_prompt=system_prompt, user_prompt=user_input) if log_context:
LOGGER.warning(
"graph llm input: context=%s prompt=%s user_input=%s",
log_context,
prompt_name,
_truncate_for_log(user_input),
)
output = self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
if log_context:
LOGGER.warning(
"graph llm output: context=%s prompt=%s output=%s",
log_context,
prompt_name,
_truncate_for_log(output),
)
return output

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
from fastapi import APIRouter from fastapi import APIRouter
from pydantic import BaseModel, HttpUrl from pydantic import BaseModel, HttpUrl
from typing import TYPE_CHECKING
from app.modules.agent.changeset_validator import ChangeSetValidator from app.modules.agent.changeset_validator import ChangeSetValidator
from app.modules.agent.confluence_service import ConfluenceService from app.modules.agent.confluence_service import ConfluenceService
@@ -19,12 +22,17 @@ class ConfluenceFetchRequest(BaseModel):
url: HttpUrl url: HttpUrl
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
class AgentModule: class AgentModule:
def __init__( def __init__(
self, self,
rag_retriever: RagRetriever, rag_retriever: RagRetriever,
agent_repository: AgentRepository, agent_repository: AgentRepository,
story_context_repository: StoryContextRepository, story_context_repository: StoryContextRepository,
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
) -> None: ) -> None:
self.confluence = ConfluenceService() self.confluence = ConfluenceService()
self.changeset_validator = ChangeSetValidator() self.changeset_validator = ChangeSetValidator()
@@ -34,14 +42,16 @@ class AgentModule:
client = GigaChatClient(settings, token_provider) client = GigaChatClient(settings, token_provider)
prompt_loader = PromptLoader() prompt_loader = PromptLoader()
llm = AgentLlmService(client=client, prompts=prompt_loader) llm = AgentLlmService(client=client, prompts=prompt_loader)
self.llm = llm
story_recorder = StorySessionRecorder(story_context_repository) story_recorder = StorySessionRecorder(story_context_repository)
self.runtime = GraphAgentRuntime( self.runtime = GraphAgentRuntime(
rag=rag_retriever, rag=rag_retriever,
confluence=self.confluence, confluence=self.confluence,
changeset_validator=self.changeset_validator, changeset_validator=self.changeset_validator,
llm=llm, llm=self.llm,
agent_repository=agent_repository, agent_repository=agent_repository,
story_recorder=story_recorder, story_recorder=story_recorder,
code_explain_retriever=code_explain_retriever,
) )
def internal_router(self) -> APIRouter: def internal_router(self) -> APIRouter:

View File

@@ -0,0 +1,17 @@
Объяснение кода осуществляется только с использованием предоставленного ExplainPack.
Правила:
- Сначала используйте доказательства.
- Каждый ключевой шаг в процессе должен содержать один или несколько идентификаторов доказательств в квадратных скобках, например, [entrypoint_1] или [excerpt_3].
- Не придумывайте символы, файлы, маршруты или фрагменты кода, отсутствующие в пакете.
- Если доказательства неполные, укажите это явно.
- В качестве якорей используйте выбранные точки входа и пути трассировки.
Верните Markdown со следующей структурой:
1. Краткое описание
2. Пошаговый процесс
3. Данные и побочные эффекты
4. Ошибки и граничные случаи
5. Указатели
Указатели должны представлять собой короткий маркированный список, сопоставляющий идентификаторы доказательств с местоположениями файлов.

View File

@@ -0,0 +1,24 @@
Ты intent-router для layered RAG.
На вход ты получаешь JSON с полями:
- message: текущий запрос пользователя
- active_intent: текущий активный intent диалога или null
- last_query: предыдущий запрос пользователя
- allowed_intents: допустимые intent'ы
Выбери ровно один intent из allowed_intents.
Верни только JSON без markdown и пояснений.
Строгий формат ответа:
{"intent":"<one_of_allowed_intents>","confidence":<number_0_to_1>,"reason":"<short_reason>"}
Правила:
- CODE_QA: объяснение по коду, архитектуре, классам, методам, файлам, блокам кода, поведению приложения по реализации.
- DOCS_QA: объяснение по документации, README, markdown, specs, runbooks, разделам документации.
- GENERATE_DOCS_FROM_CODE: просьба сгенерировать, подготовить или обновить документацию по коду.
- PROJECT_MISC: прочие вопросы по проекту, не относящиеся явно к коду или документации.
Приоритет:
- Если пользователь просит именно подготовить документацию по коду, выбирай GENERATE_DOCS_FROM_CODE.
- Если пользователь спрашивает про конкретный класс, файл, метод или блок кода, выбирай CODE_QA.
- Если пользователь спрашивает про README, docs, markdown или конкретную документацию, выбирай DOCS_QA.
- Если сигнал неочевиден, выбирай PROJECT_MISC и confidence <= 0.6.

View File

@@ -18,6 +18,10 @@ class AgentRepository:
conversation_key VARCHAR(64) PRIMARY KEY, conversation_key VARCHAR(64) PRIMARY KEY,
last_domain_id VARCHAR(64) NULL, last_domain_id VARCHAR(64) NULL,
last_process_id VARCHAR(64) NULL, last_process_id VARCHAR(64) NULL,
active_domain_id VARCHAR(64) NULL,
active_process_id VARCHAR(64) NULL,
dialog_started BOOLEAN NOT NULL DEFAULT FALSE,
turn_index INTEGER NOT NULL DEFAULT 0,
message_history_json TEXT NOT NULL DEFAULT '[]', message_history_json TEXT NOT NULL DEFAULT '[]',
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
) )
@@ -64,14 +68,24 @@ class AgentRepository:
""" """
) )
) )
self._ensure_router_context_columns(conn)
conn.commit() conn.commit()
def _ensure_router_context_columns(self, conn) -> None:
for statement in (
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_domain_id VARCHAR(64) NULL",
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_process_id VARCHAR(64) NULL",
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS dialog_started BOOLEAN NOT NULL DEFAULT FALSE",
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS turn_index INTEGER NOT NULL DEFAULT 0",
):
conn.execute(text(statement))
def get_router_context(self, conversation_key: str) -> RouterContext: def get_router_context(self, conversation_key: str) -> RouterContext:
with get_engine().connect() as conn: with get_engine().connect() as conn:
row = conn.execute( row = conn.execute(
text( text(
""" """
SELECT last_domain_id, last_process_id, message_history_json SELECT last_domain_id, last_process_id, active_domain_id, active_process_id, dialog_started, turn_index, message_history_json
FROM router_context FROM router_context
WHERE conversation_key = :key WHERE conversation_key = :key
""" """
@@ -82,7 +96,7 @@ class AgentRepository:
if not row: if not row:
return RouterContext() return RouterContext()
history_raw = row[2] or "[]" history_raw = row[6] or "[]"
try: try:
history = json.loads(history_raw) history = json.loads(history_raw)
except json.JSONDecodeError: except json.JSONDecodeError:
@@ -91,6 +105,9 @@ class AgentRepository:
last = None last = None
if row[0] and row[1]: if row[0] and row[1]:
last = {"domain_id": str(row[0]), "process_id": str(row[1])} last = {"domain_id": str(row[0]), "process_id": str(row[1])}
active = None
if row[2] and row[3]:
active = {"domain_id": str(row[2]), "process_id": str(row[3])}
clean_history = [] clean_history = []
for item in history if isinstance(history, list) else []: for item in history if isinstance(history, list) else []:
@@ -101,7 +118,13 @@ class AgentRepository:
if role in {"user", "assistant"} and content: if role in {"user", "assistant"} and content:
clean_history.append({"role": role, "content": content}) clean_history.append({"role": role, "content": content})
return RouterContext(last_routing=last, message_history=clean_history) return RouterContext(
last_routing=last,
message_history=clean_history,
active_intent=active or last,
dialog_started=bool(row[4]),
turn_index=int(row[5] or 0),
)
def update_router_context( def update_router_context(
self, self,
@@ -111,6 +134,7 @@ class AgentRepository:
process_id: str, process_id: str,
user_message: str, user_message: str,
assistant_message: str, assistant_message: str,
decision_type: str,
max_history: int, max_history: int,
) -> None: ) -> None:
current = self.get_router_context(conversation_key) current = self.get_router_context(conversation_key)
@@ -121,17 +145,29 @@ class AgentRepository:
history.append({"role": "assistant", "content": assistant_message}) history.append({"role": "assistant", "content": assistant_message})
if max_history > 0: if max_history > 0:
history = history[-max_history:] history = history[-max_history:]
current_active = current.active_intent or current.last_routing or {"domain_id": domain_id, "process_id": process_id}
next_active = (
{"domain_id": domain_id, "process_id": process_id}
if decision_type in {"start", "switch"}
else current_active
)
next_turn_index = max(0, int(current.turn_index or 0)) + (1 if user_message else 0)
with get_engine().connect() as conn: with get_engine().connect() as conn:
conn.execute( conn.execute(
text( text(
""" """
INSERT INTO router_context ( INSERT INTO router_context (
conversation_key, last_domain_id, last_process_id, message_history_json conversation_key, last_domain_id, last_process_id, active_domain_id, active_process_id,
) VALUES (:key, :domain, :process, :history) dialog_started, turn_index, message_history_json
) VALUES (:key, :domain, :process, :active_domain, :active_process, :dialog_started, :turn_index, :history)
ON CONFLICT (conversation_key) DO UPDATE SET ON CONFLICT (conversation_key) DO UPDATE SET
last_domain_id = EXCLUDED.last_domain_id, last_domain_id = EXCLUDED.last_domain_id,
last_process_id = EXCLUDED.last_process_id, last_process_id = EXCLUDED.last_process_id,
active_domain_id = EXCLUDED.active_domain_id,
active_process_id = EXCLUDED.active_process_id,
dialog_started = EXCLUDED.dialog_started,
turn_index = EXCLUDED.turn_index,
message_history_json = EXCLUDED.message_history_json, message_history_json = EXCLUDED.message_history_json,
updated_at = CURRENT_TIMESTAMP updated_at = CURRENT_TIMESTAMP
""" """
@@ -140,6 +176,10 @@ class AgentRepository:
"key": conversation_key, "key": conversation_key,
"domain": domain_id, "domain": domain_id,
"process": process_id, "process": process_id,
"active_domain": str(next_active["domain_id"]),
"active_process": str(next_active["process_id"]),
"dialog_started": True,
"turn_index": next_turn_index,
"history": json.dumps(history, ensure_ascii=False), "history": json.dumps(history, ensure_ascii=False),
}, },
) )

View File

@@ -1,12 +1,16 @@
from __future__ import annotations
from dataclasses import dataclass, field from dataclasses import dataclass, field
from collections.abc import Awaitable, Callable from collections.abc import Awaitable, Callable
import inspect import inspect
import logging import logging
import re import re
from typing import TYPE_CHECKING
from app.modules.agent.engine.orchestrator import OrchestratorService, TaskSpecBuilder from app.modules.agent.engine.orchestrator import OrchestratorService, TaskSpecBuilder
from app.modules.agent.engine.orchestrator.metrics_persister import MetricsPersister from app.modules.agent.engine.orchestrator.metrics_persister import MetricsPersister
from app.modules.agent.engine.orchestrator.models import RoutingMeta from app.modules.agent.engine.orchestrator.models import RoutingMeta
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
from app.modules.agent.engine.router import build_router_service from app.modules.agent.engine.router import build_router_service
from app.modules.agent.llm import AgentLlmService from app.modules.agent.llm import AgentLlmService
from app.modules.agent.story_session_recorder import StorySessionRecorder from app.modules.agent.story_session_recorder import StorySessionRecorder
@@ -22,6 +26,9 @@ from app.schemas.common import ModuleName
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
def _truncate_for_log(text: str | None, max_chars: int = 1500) -> str: def _truncate_for_log(text: str | None, max_chars: int = 1500) -> str:
value = (text or "").replace("\n", "\\n").strip() value = (text or "").replace("\n", "\\n").strip()
@@ -47,13 +54,14 @@ class GraphAgentRuntime:
llm: AgentLlmService, llm: AgentLlmService,
agent_repository: AgentRepository, agent_repository: AgentRepository,
story_recorder: StorySessionRecorder | None = None, story_recorder: StorySessionRecorder | None = None,
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
) -> None: ) -> None:
self._rag = rag self._rag = rag
self._confluence = confluence self._confluence = confluence
self._changeset_validator = changeset_validator self._changeset_validator = changeset_validator
self._router = build_router_service(llm, agent_repository) self._router = build_router_service(llm, agent_repository, rag)
self._task_spec_builder = TaskSpecBuilder() self._task_spec_builder = TaskSpecBuilder()
self._orchestrator = OrchestratorService() self._orchestrator = OrchestratorService(step_registry=StepRegistry(code_explain_retriever))
self._metrics_persister = MetricsPersister(agent_repository) self._metrics_persister = MetricsPersister(agent_repository)
self._story_recorder = story_recorder self._story_recorder = story_recorder
self._checkpointer = None self._checkpointer = None
@@ -70,7 +78,7 @@ class GraphAgentRuntime:
files: list[dict], files: list[dict],
progress_cb: Callable[[str, str, str, dict | None], Awaitable[None] | None] | None = None, progress_cb: Callable[[str, str, str, dict | None], Awaitable[None] | None] | None = None,
) -> AgentResult: ) -> AgentResult:
LOGGER.warning( LOGGER.info(
"GraphAgentRuntime.run started: task_id=%s dialog_session_id=%s mode=%s", "GraphAgentRuntime.run started: task_id=%s dialog_session_id=%s mode=%s",
task_id, task_id,
dialog_session_id, dialog_session_id,
@@ -96,9 +104,7 @@ class GraphAgentRuntime:
meta={"domain_id": route.domain_id, "process_id": route.process_id}, meta={"domain_id": route.domain_id, "process_id": route.process_id},
) )
files_map = self._build_files_map(files) files_map = self._build_files_map(files)
rag_ctx: list[dict] = []
await self._emit_progress(progress_cb, "agent.rag", "Собираю релевантный контекст из RAG.")
rag_ctx = await self._rag.retrieve(rag_session_id, message)
await self._emit_progress(progress_cb, "agent.attachments", "Обрабатываю дополнительные вложения.") await self._emit_progress(progress_cb, "agent.attachments", "Обрабатываю дополнительные вложения.")
conf_pages = await self._fetch_confluence_pages(attachments) conf_pages = await self._fetch_confluence_pages(attachments)
route_meta = RoutingMeta( route_meta = RoutingMeta(
@@ -157,8 +163,9 @@ class GraphAgentRuntime:
process_id=route.process_id, process_id=route.process_id,
user_message=message, user_message=message,
assistant_message=final_answer, assistant_message=final_answer,
decision_type=route.decision_type,
) )
LOGGER.warning( LOGGER.info(
"final agent answer: task_id=%s route=%s/%s answer=%s", "final agent answer: task_id=%s route=%s/%s answer=%s",
task_id, task_id,
route.domain_id, route.domain_id,
@@ -178,7 +185,7 @@ class GraphAgentRuntime:
answer=final_answer, answer=final_answer,
meta={ meta={
"route": route.model_dump(), "route": route.model_dump(),
"used_rag": True, "used_rag": False,
"used_confluence": bool(conf_pages), "used_confluence": bool(conf_pages),
"changeset_filtered_out": True, "changeset_filtered_out": True,
"orchestrator": orchestrator_meta, "orchestrator": orchestrator_meta,
@@ -193,6 +200,7 @@ class GraphAgentRuntime:
process_id=route.process_id, process_id=route.process_id,
user_message=message, user_message=message,
assistant_message=final_answer or f"changeset:{len(validated)}", assistant_message=final_answer or f"changeset:{len(validated)}",
decision_type=route.decision_type,
) )
final = AgentResult( final = AgentResult(
result_type=TaskResultType.CHANGESET, result_type=TaskResultType.CHANGESET,
@@ -200,7 +208,7 @@ class GraphAgentRuntime:
changeset=validated, changeset=validated,
meta={ meta={
"route": route.model_dump(), "route": route.model_dump(),
"used_rag": True, "used_rag": False,
"used_confluence": bool(conf_pages), "used_confluence": bool(conf_pages),
"orchestrator": orchestrator_meta, "orchestrator": orchestrator_meta,
"orchestrator_steps": orchestrator_steps, "orchestrator_steps": orchestrator_steps,
@@ -214,7 +222,7 @@ class GraphAgentRuntime:
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)), scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
quality=quality_meta, quality=quality_meta,
) )
LOGGER.warning( LOGGER.info(
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s changeset_items=%s", "GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s changeset_items=%s",
task_id, task_id,
route.domain_id, route.domain_id,
@@ -222,7 +230,7 @@ class GraphAgentRuntime:
final.result_type.value, final.result_type.value,
len(final.changeset), len(final.changeset),
) )
LOGGER.warning( LOGGER.info(
"final agent answer: task_id=%s route=%s/%s answer=%s", "final agent answer: task_id=%s route=%s/%s answer=%s",
task_id, task_id,
route.domain_id, route.domain_id,
@@ -239,13 +247,14 @@ class GraphAgentRuntime:
process_id=route.process_id, process_id=route.process_id,
user_message=message, user_message=message,
assistant_message=final_answer, assistant_message=final_answer,
decision_type=route.decision_type,
) )
final = AgentResult( final = AgentResult(
result_type=TaskResultType.ANSWER, result_type=TaskResultType.ANSWER,
answer=final_answer, answer=final_answer,
meta={ meta={
"route": route.model_dump(), "route": route.model_dump(),
"used_rag": True, "used_rag": False,
"used_confluence": bool(conf_pages), "used_confluence": bool(conf_pages),
"orchestrator": orchestrator_meta, "orchestrator": orchestrator_meta,
"orchestrator_steps": orchestrator_steps, "orchestrator_steps": orchestrator_steps,
@@ -259,7 +268,7 @@ class GraphAgentRuntime:
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)), scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
quality=quality_meta, quality=quality_meta,
) )
LOGGER.warning( LOGGER.info(
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s answer_len=%s", "GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s answer_len=%s",
task_id, task_id,
route.domain_id, route.domain_id,
@@ -267,7 +276,7 @@ class GraphAgentRuntime:
final.result_type.value, final.result_type.value,
len(final.answer or ""), len(final.answer or ""),
) )
LOGGER.warning( LOGGER.info(
"final agent answer: task_id=%s route=%s/%s answer=%s", "final agent answer: task_id=%s route=%s/%s answer=%s",
task_id, task_id,
route.domain_id, route.domain_id,
@@ -351,7 +360,7 @@ class GraphAgentRuntime:
factory = self._router.graph_factory("default", "general") factory = self._router.graph_factory("default", "general")
if factory is None: if factory is None:
raise RuntimeError("No graph factory configured") raise RuntimeError("No graph factory configured")
LOGGER.warning("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id) LOGGER.debug("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id)
return factory(self._checkpointer) return factory(self._checkpointer)
def _invoke_graph(self, graph, state: dict, dialog_session_id: str): def _invoke_graph(self, graph, state: dict, dialog_session_id: str):
@@ -365,7 +374,7 @@ class GraphAgentRuntime:
for item in attachments: for item in attachments:
if item.get("type") == "confluence_url": if item.get("type") == "confluence_url":
pages.append(await self._confluence.fetch_page(item["url"])) pages.append(await self._confluence.fetch_page(item["url"]))
LOGGER.warning("_fetch_confluence_pages completed: pages=%s", len(pages)) LOGGER.info("_fetch_confluence_pages completed: pages=%s", len(pages))
return pages return pages
def _format_rag(self, items: list[dict]) -> str: def _format_rag(self, items: list[dict]) -> str:
@@ -411,7 +420,7 @@ class GraphAgentRuntime:
"content": str(item.get("content", "")), "content": str(item.get("content", "")),
"content_hash": str(item.get("content_hash", "")), "content_hash": str(item.get("content_hash", "")),
} }
LOGGER.warning("_build_files_map completed: files=%s", len(output)) LOGGER.debug("_build_files_map completed: files=%s", len(output))
return output return output
def _lookup_file(self, files_map: dict[str, dict], path: str) -> dict | None: def _lookup_file(self, files_map: dict[str, dict], path: str) -> dict | None:
@@ -437,7 +446,7 @@ class GraphAgentRuntime:
) )
item.base_hash = str(source["content_hash"]) item.base_hash = str(source["content_hash"])
enriched.append(item) enriched.append(item)
LOGGER.warning("_enrich_changeset_hashes completed: items=%s", len(enriched)) LOGGER.debug("_enrich_changeset_hashes completed: items=%s", len(enriched))
return enriched return enriched
def _sanitize_changeset(self, items: list[ChangeItem], files_map: dict[str, dict]) -> list[ChangeItem]: def _sanitize_changeset(self, items: list[ChangeItem], files_map: dict[str, dict]) -> list[ChangeItem]:
@@ -462,7 +471,7 @@ class GraphAgentRuntime:
continue continue
sanitized.append(item) sanitized.append(item)
if dropped_noop or dropped_ws: if dropped_noop or dropped_ws:
LOGGER.warning( LOGGER.info(
"_sanitize_changeset dropped items: noop=%s whitespace_only=%s kept=%s", "_sanitize_changeset dropped items: noop=%s whitespace_only=%s kept=%s",
dropped_noop, dropped_noop,
dropped_ws, dropped_ws,

View File

@@ -1,9 +1,14 @@
from app.modules.agent.module import AgentModule from app.modules.agent.module import AgentModule
from app.modules.agent.repository import AgentRepository from app.modules.agent.repository import AgentRepository
from app.modules.agent.story_context_repository import StoryContextRepository, StoryContextSchemaRepository from app.modules.agent.story_context_repository import StoryContextRepository, StoryContextSchemaRepository
from app.modules.chat.direct_service import CodeExplainChatService
from app.modules.chat.dialog_store import DialogSessionStore
from app.modules.chat.repository import ChatRepository from app.modules.chat.repository import ChatRepository
from app.modules.chat.module import ChatModule from app.modules.chat.module import ChatModule
from app.modules.chat.session_resolver import ChatSessionResolver
from app.modules.chat.task_store import TaskStore
from app.modules.rag.persistence.repository import RagRepository from app.modules.rag.persistence.repository import RagRepository
from app.modules.rag.explain import CodeExplainRetrieverV2, CodeGraphRepository, LayeredRetrievalGateway
from app.modules.rag_session.module import RagModule from app.modules.rag_session.module import RagModule
from app.modules.rag_repo.module import RagRepoModule from app.modules.rag_repo.module import RagRepoModule
from app.modules.shared.bootstrap import bootstrap_database from app.modules.shared.bootstrap import bootstrap_database
@@ -20,16 +25,32 @@ class ModularApplication:
self.agent_repository = AgentRepository() self.agent_repository = AgentRepository()
self.story_context_schema_repository = StoryContextSchemaRepository() self.story_context_schema_repository = StoryContextSchemaRepository()
self.story_context_repository = StoryContextRepository() self.story_context_repository = StoryContextRepository()
self.chat_tasks = TaskStore()
self.rag_session = RagModule(event_bus=self.events, retry=self.retry, repository=self.rag_repository) self.rag_session = RagModule(event_bus=self.events, retry=self.retry, repository=self.rag_repository)
self.rag_repo = RagRepoModule( self.rag_repo = RagRepoModule(
story_context_repository=self.story_context_repository, story_context_repository=self.story_context_repository,
rag_repository=self.rag_repository, rag_repository=self.rag_repository,
) )
self.code_explain_retriever = CodeExplainRetrieverV2(
gateway=LayeredRetrievalGateway(self.rag_repository, self.rag_session.embedder),
graph_repository=CodeGraphRepository(),
)
self.agent = AgentModule( self.agent = AgentModule(
rag_retriever=self.rag_session.rag, rag_retriever=self.rag_session.rag,
agent_repository=self.agent_repository, agent_repository=self.agent_repository,
story_context_repository=self.story_context_repository, story_context_repository=self.story_context_repository,
code_explain_retriever=self.code_explain_retriever,
)
self.direct_chat = CodeExplainChatService(
retriever=self.code_explain_retriever,
llm=self.agent.llm,
session_resolver=ChatSessionResolver(
dialogs=DialogSessionStore(self.chat_repository),
rag_session_exists=lambda rag_session_id: self.rag_session.sessions.get(rag_session_id) is not None,
),
task_store=self.chat_tasks,
message_sink=self.chat_repository.add_message,
) )
self.chat = ChatModule( self.chat = ChatModule(
agent_runner=self.agent.runtime, agent_runner=self.agent.runtime,
@@ -37,6 +58,8 @@ class ModularApplication:
retry=self.retry, retry=self.retry,
rag_sessions=self.rag_session.sessions, rag_sessions=self.rag_session.sessions,
repository=self.chat_repository, repository=self.chat_repository,
direct_chat=self.direct_chat,
task_store=self.chat_tasks,
) )
def startup(self) -> None: def startup(self) -> None:

View File

@@ -1,7 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING
from uuid import uuid4 from uuid import uuid4
from app.modules.chat.repository import ChatRepository if TYPE_CHECKING:
from app.modules.chat.repository import ChatRepository
@dataclass @dataclass

View File

@@ -0,0 +1,71 @@
from __future__ import annotations
import logging
from uuid import uuid4
from app.modules.agent.llm import AgentLlmService
from app.modules.chat.evidence_gate import CodeExplainEvidenceGate
from app.modules.chat.session_resolver import ChatSessionResolver
from app.modules.chat.task_store import TaskState, TaskStore
from app.modules.rag.explain import CodeExplainRetrieverV2, PromptBudgeter
from app.schemas.chat import ChatMessageRequest, TaskQueuedResponse, TaskResultType, TaskStatus
LOGGER = logging.getLogger(__name__)
class CodeExplainChatService:
def __init__(
self,
retriever: CodeExplainRetrieverV2,
llm: AgentLlmService,
session_resolver: ChatSessionResolver,
task_store: TaskStore,
message_sink,
budgeter: PromptBudgeter | None = None,
evidence_gate: CodeExplainEvidenceGate | None = None,
) -> None:
self._retriever = retriever
self._llm = llm
self._session_resolver = session_resolver
self._task_store = task_store
self._message_sink = message_sink
self._budgeter = budgeter or PromptBudgeter()
self._evidence_gate = evidence_gate or CodeExplainEvidenceGate()
async def handle_message(self, request: ChatMessageRequest) -> TaskQueuedResponse:
dialog_session_id, rag_session_id = self._session_resolver.resolve(request)
task_id = str(uuid4())
task = TaskState(task_id=task_id, status=TaskStatus.RUNNING)
self._task_store.save(task)
self._message_sink(dialog_session_id, "user", request.message, task_id=task_id)
pack = self._retriever.build_pack(
rag_session_id,
request.message,
file_candidates=[item.model_dump(mode="json") for item in request.files],
)
decision = self._evidence_gate.evaluate(pack)
if decision.passed:
prompt_input = self._budgeter.build_prompt_input(request.message, pack)
answer = self._llm.generate(
"code_explain_answer_v2",
prompt_input,
log_context="chat.code_explain.direct",
).strip()
else:
answer = decision.answer
self._message_sink(dialog_session_id, "assistant", answer, task_id=task_id)
task.status = TaskStatus.DONE
task.result_type = TaskResultType.ANSWER
task.answer = answer
self._task_store.save(task)
LOGGER.warning(
"direct code explain response: task_id=%s rag_session_id=%s excerpts=%s missing=%s",
task_id,
rag_session_id,
len(pack.code_excerpts),
pack.missing,
)
return TaskQueuedResponse(
task_id=task_id,
status=TaskStatus.DONE.value,
)

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
from dataclasses import dataclass, field
from app.modules.rag.explain.models import ExplainPack
@dataclass(slots=True)
class EvidenceGateDecision:
passed: bool
answer: str = ""
diagnostics: dict[str, list[str]] = field(default_factory=dict)
class CodeExplainEvidenceGate:
def __init__(self, min_excerpts: int = 2) -> None:
self._min_excerpts = min_excerpts
def evaluate(self, pack: ExplainPack) -> EvidenceGateDecision:
diagnostics = self._diagnostics(pack)
if len(pack.code_excerpts) >= self._min_excerpts:
return EvidenceGateDecision(passed=True, diagnostics=diagnostics)
return EvidenceGateDecision(
passed=False,
answer=self._build_answer(pack, diagnostics),
diagnostics=diagnostics,
)
def _diagnostics(self, pack: ExplainPack) -> dict[str, list[str]]:
return {
"entrypoints": [item.title for item in pack.selected_entrypoints[:3] if item.title],
"symbols": [item.title for item in pack.seed_symbols[:5] if item.title],
"paths": self._paths(pack),
"missing": list(pack.missing),
}
def _paths(self, pack: ExplainPack) -> list[str]:
values: list[str] = []
for item in pack.selected_entrypoints + pack.seed_symbols:
path = item.source or (item.location.path if item.location else "")
if path and path not in values:
values.append(path)
for excerpt in pack.code_excerpts:
if excerpt.path and excerpt.path not in values:
values.append(excerpt.path)
return values[:6]
def _build_answer(self, pack: ExplainPack, diagnostics: dict[str, list[str]]) -> str:
lines = [
"Недостаточно опоры в коде, чтобы дать объяснение без догадок.",
"",
f"Найдено фрагментов кода: {len(pack.code_excerpts)} из {self._min_excerpts} минимально необходимых.",
]
if diagnostics["paths"]:
lines.append(f"Пути: {', '.join(diagnostics['paths'])}")
if diagnostics["entrypoints"]:
lines.append(f"Entrypoints: {', '.join(diagnostics['entrypoints'])}")
if diagnostics["symbols"]:
lines.append(f"Символы: {', '.join(diagnostics['symbols'])}")
if diagnostics["missing"]:
lines.append(f"Диагностика: {', '.join(diagnostics['missing'])}")
return "\n".join(lines).strip()

View File

@@ -1,13 +1,16 @@
from __future__ import annotations
import os
from typing import TYPE_CHECKING
from fastapi import APIRouter, Header from fastapi import APIRouter, Header
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from app.core.exceptions import AppError from app.core.exceptions import AppError
from app.modules.chat.direct_service import CodeExplainChatService
from app.modules.chat.dialog_store import DialogSessionStore from app.modules.chat.dialog_store import DialogSessionStore
from app.modules.chat.repository import ChatRepository
from app.modules.chat.service import ChatOrchestrator from app.modules.chat.service import ChatOrchestrator
from app.modules.chat.task_store import TaskStore from app.modules.chat.task_store import TaskStore
from app.modules.contracts import AgentRunner
from app.modules.rag_session.session_store import RagSessionStore
from app.modules.shared.event_bus import EventBus from app.modules.shared.event_bus import EventBus
from app.modules.shared.idempotency_store import IdempotencyStore from app.modules.shared.idempotency_store import IdempotencyStore
from app.modules.shared.retry_executor import RetryExecutor from app.modules.shared.retry_executor import RetryExecutor
@@ -20,6 +23,11 @@ from app.schemas.chat import (
) )
from app.schemas.common import ModuleName from app.schemas.common import ModuleName
if TYPE_CHECKING:
from app.modules.chat.repository import ChatRepository
from app.modules.contracts import AgentRunner
from app.modules.rag_session.session_store import RagSessionStore
class ChatModule: class ChatModule:
def __init__( def __init__(
@@ -29,12 +37,16 @@ class ChatModule:
retry: RetryExecutor, retry: RetryExecutor,
rag_sessions: RagSessionStore, rag_sessions: RagSessionStore,
repository: ChatRepository, repository: ChatRepository,
direct_chat: CodeExplainChatService | None = None,
task_store: TaskStore | None = None,
) -> None: ) -> None:
self._rag_sessions = rag_sessions self._rag_sessions = rag_sessions
self.tasks = TaskStore() self._simple_code_explain_only = os.getenv("SIMPLE_CODE_EXPLAIN_ONLY", "true").lower() in {"1", "true", "yes"}
self.tasks = task_store or TaskStore()
self.dialogs = DialogSessionStore(repository) self.dialogs = DialogSessionStore(repository)
self.idempotency = IdempotencyStore() self.idempotency = IdempotencyStore()
self.events = event_bus self.events = event_bus
self.direct_chat = direct_chat
self.chat = ChatOrchestrator( self.chat = ChatOrchestrator(
task_store=self.tasks, task_store=self.tasks,
dialogs=self.dialogs, dialogs=self.dialogs,
@@ -59,11 +71,13 @@ class ChatModule:
rag_session_id=dialog.rag_session_id, rag_session_id=dialog.rag_session_id,
) )
@router.post("/api/chat/messages", response_model=TaskQueuedResponse) @router.post("/api/chat/messages", response_model=TaskQueuedResponse | TaskResultResponse)
async def send_message( async def send_message(
request: ChatMessageRequest, request: ChatMessageRequest,
idempotency_key: str | None = Header(default=None, alias="Idempotency-Key"), idempotency_key: str | None = Header(default=None, alias="Idempotency-Key"),
) -> TaskQueuedResponse: ) -> TaskQueuedResponse | TaskResultResponse:
if self._simple_code_explain_only and self.direct_chat is not None:
return await self.direct_chat.handle_message(request)
task = await self.chat.enqueue_message(request, idempotency_key) task = await self.chat.enqueue_message(request, idempotency_key)
return TaskQueuedResponse(task_id=task.task_id, status=task.status.value) return TaskQueuedResponse(task_id=task.task_id, status=task.status.value)

View File

@@ -6,6 +6,7 @@ from app.modules.contracts import AgentRunner
from app.schemas.chat import ChatMessageRequest, TaskResultType, TaskStatus from app.schemas.chat import ChatMessageRequest, TaskResultType, TaskStatus
from app.schemas.common import ErrorPayload, ModuleName from app.schemas.common import ErrorPayload, ModuleName
from app.modules.chat.dialog_store import DialogSessionStore from app.modules.chat.dialog_store import DialogSessionStore
from app.modules.chat.session_resolver import ChatSessionResolver
from app.modules.chat.task_store import TaskState, TaskStore from app.modules.chat.task_store import TaskState, TaskStore
from app.modules.shared.event_bus import EventBus from app.modules.shared.event_bus import EventBus
from app.modules.shared.idempotency_store import IdempotencyStore from app.modules.shared.idempotency_store import IdempotencyStore
@@ -41,6 +42,7 @@ class ChatOrchestrator:
self._retry = retry self._retry = retry
self._rag_session_exists = rag_session_exists self._rag_session_exists = rag_session_exists
self._message_sink = message_sink self._message_sink = message_sink
self._session_resolver = ChatSessionResolver(dialogs, rag_session_exists)
async def enqueue_message( async def enqueue_message(
self, self,
@@ -52,7 +54,7 @@ class ChatOrchestrator:
if existing: if existing:
task = self._task_store.get(existing) task = self._task_store.get(existing)
if task: if task:
LOGGER.warning( LOGGER.info(
"enqueue_message reused task by idempotency key: task_id=%s mode=%s", "enqueue_message reused task by idempotency key: task_id=%s mode=%s",
task.task_id, task.task_id,
request.mode.value, request.mode.value,
@@ -63,7 +65,7 @@ class ChatOrchestrator:
if idempotency_key: if idempotency_key:
self._idempotency.put(idempotency_key, task.task_id) self._idempotency.put(idempotency_key, task.task_id)
asyncio.create_task(self._process_task(task.task_id, request)) asyncio.create_task(self._process_task(task.task_id, request))
LOGGER.warning( LOGGER.info(
"enqueue_message created task: task_id=%s mode=%s", "enqueue_message created task: task_id=%s mode=%s",
task.task_id, task.task_id,
request.mode.value, request.mode.value,
@@ -135,6 +137,13 @@ class ChatOrchestrator:
task.changeset = result.changeset task.changeset = result.changeset
if task.result_type == TaskResultType.ANSWER and task.answer: if task.result_type == TaskResultType.ANSWER and task.answer:
self._message_sink(dialog_session_id, "assistant", task.answer, task_id=task_id) self._message_sink(dialog_session_id, "assistant", task.answer, task_id=task_id)
LOGGER.warning(
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s answer=%s",
task_id,
dialog_session_id,
task.result_type.value,
_truncate_for_log(task.answer),
)
elif task.result_type == TaskResultType.CHANGESET: elif task.result_type == TaskResultType.CHANGESET:
self._message_sink( self._message_sink(
dialog_session_id, dialog_session_id,
@@ -146,6 +155,14 @@ class ChatOrchestrator:
"changeset": [item.model_dump(mode="json") for item in task.changeset], "changeset": [item.model_dump(mode="json") for item in task.changeset],
}, },
) )
LOGGER.warning(
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s changeset_items=%s answer=%s",
task_id,
dialog_session_id,
task.result_type.value,
len(task.changeset),
_truncate_for_log(task.answer or ""),
)
self._task_store.save(task) self._task_store.save(task)
await self._events.publish( await self._events.publish(
task_id, task_id,
@@ -160,7 +177,7 @@ class ChatOrchestrator:
}, },
) )
await self._publish_progress(task_id, "task.done", "Обработка завершена.", progress=100) await self._publish_progress(task_id, "task.done", "Обработка завершена.", progress=100)
LOGGER.warning( LOGGER.info(
"_process_task completed: task_id=%s status=%s result_type=%s changeset_items=%s", "_process_task completed: task_id=%s status=%s result_type=%s changeset_items=%s",
task_id, task_id,
task.status.value, task.status.value,
@@ -232,7 +249,7 @@ class ChatOrchestrator:
if progress is not None: if progress is not None:
payload["progress"] = max(0, min(100, int(progress))) payload["progress"] = max(0, min(100, int(progress)))
await self._events.publish(task_id, kind, payload) await self._events.publish(task_id, kind, payload)
LOGGER.warning( LOGGER.debug(
"_publish_progress emitted: task_id=%s kind=%s stage=%s progress=%s", "_publish_progress emitted: task_id=%s kind=%s stage=%s progress=%s",
task_id, task_id,
kind, kind,
@@ -259,35 +276,7 @@ class ChatOrchestrator:
meta={"heartbeat": True}, meta={"heartbeat": True},
) )
index += 1 index += 1
LOGGER.warning("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index) LOGGER.debug("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index)
def _resolve_sessions(self, request: ChatMessageRequest) -> tuple[str, str]: def _resolve_sessions(self, request: ChatMessageRequest) -> tuple[str, str]:
# Legacy compatibility: old session_id/project_id flow. return self._session_resolver.resolve(request)
if request.dialog_session_id and request.rag_session_id:
dialog = self._dialogs.get(request.dialog_session_id)
if not dialog:
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
if dialog.rag_session_id != request.rag_session_id:
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
LOGGER.warning(
"_resolve_sessions resolved by dialog_session_id: dialog_session_id=%s rag_session_id=%s",
request.dialog_session_id,
request.rag_session_id,
)
return request.dialog_session_id, request.rag_session_id
if request.session_id and request.project_id:
if not self._rag_session_exists(request.project_id):
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
LOGGER.warning(
"_resolve_sessions resolved by legacy session/project: session_id=%s project_id=%s",
request.session_id,
request.project_id,
)
return request.session_id, request.project_id
raise AppError(
"missing_sessions",
"dialog_session_id and rag_session_id are required",
ModuleName.BACKEND,
)

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from app.core.exceptions import AppError
from app.schemas.chat import ChatMessageRequest
from app.schemas.common import ModuleName
if TYPE_CHECKING:
from app.modules.chat.dialog_store import DialogSessionStore
class ChatSessionResolver:
def __init__(self, dialogs: DialogSessionStore, rag_session_exists) -> None:
self._dialogs = dialogs
self._rag_session_exists = rag_session_exists
def resolve(self, request: ChatMessageRequest) -> tuple[str, str]:
if request.dialog_session_id and request.rag_session_id:
dialog = self._dialogs.get(request.dialog_session_id)
if not dialog:
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
if dialog.rag_session_id != request.rag_session_id:
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
return request.dialog_session_id, request.rag_session_id
if request.session_id and request.project_id:
if not self._rag_session_exists(request.project_id):
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
return request.session_id, request.project_id
raise AppError(
"missing_sessions",
"dialog_session_id and rag_session_id are required",
ModuleName.BACKEND,
)

View File

@@ -90,6 +90,41 @@ sequenceDiagram
Rag-->>Agent: items Rag-->>Agent: items
``` ```
### Retrieval + project/qa reasoning
Назначение: `RAG` вызывается не в начале runtime, а внутри отдельного graph-шага `context_retrieval` для `project/qa`.
```mermaid
sequenceDiagram
participant Agent as GraphAgentRuntime
participant Orch as OrchestratorService
participant G1 as conversation_understanding
participant G2 as question_classification
participant G3 as context_retrieval
participant Rag as RagService
participant G4 as context_analysis
participant G5 as answer_composition
Agent->>Orch: run(task)
Orch->>G1: execute
G1-->>Orch: resolved_request
Orch->>G2: execute
G2-->>Orch: question_profile
Orch->>G3: execute
G3->>Rag: retrieve(query)
Rag-->>G3: rag_items
G3-->>Orch: source_bundle
Orch->>G4: execute
G4-->>Orch: analysis_brief
Orch->>G5: execute
G5-->>Orch: final_answer
Orch-->>Agent: final_answer
```
Для `project/qa` это означает:
- ранний глобальный retrieval больше не нужен;
- `RAG` возвращает записи только для конкретного шага `context_retrieval`;
- оркестратор управляет цепочкой graph-шагов;
- пользовательский ответ собирается после анализа, а не напрямую из сырого retrieval.
## 5. Слои, фиксируемые в RAG ## 5. Слои, фиксируемые в RAG
### 5.1. Слои DOCS ### 5.1. Слои DOCS

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from importlib import import_module
__all__ = [
"CodeExcerpt",
"CodeExplainRetrieverV2",
"CodeGraphRepository",
"EvidenceItem",
"ExplainIntent",
"ExplainIntentBuilder",
"ExplainPack",
"LayeredRetrievalGateway",
"PromptBudgeter",
"TracePath",
]
def __getattr__(name: str):
module_map = {
"CodeExcerpt": "app.modules.rag.explain.models",
"EvidenceItem": "app.modules.rag.explain.models",
"ExplainIntent": "app.modules.rag.explain.models",
"ExplainPack": "app.modules.rag.explain.models",
"TracePath": "app.modules.rag.explain.models",
"ExplainIntentBuilder": "app.modules.rag.explain.intent_builder",
"PromptBudgeter": "app.modules.rag.explain.budgeter",
"LayeredRetrievalGateway": "app.modules.rag.explain.layered_gateway",
"CodeGraphRepository": "app.modules.rag.explain.graph_repository",
"CodeExplainRetrieverV2": "app.modules.rag.explain.retriever_v2",
}
module_name = module_map.get(name)
if module_name is None:
raise AttributeError(name)
module = import_module(module_name)
return getattr(module, name)

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
import json
from app.modules.rag.explain.models import ExplainPack
class PromptBudgeter:
def __init__(
self,
*,
max_paths: int = 3,
max_symbols: int = 25,
max_excerpts: int = 40,
max_chars: int = 30000,
) -> None:
self._max_paths = max_paths
self._max_symbols = max_symbols
self._max_excerpts = max_excerpts
self._max_chars = max_chars
def build_prompt_input(self, question: str, pack: ExplainPack) -> str:
symbol_ids: list[str] = []
for path in pack.trace_paths[: self._max_paths]:
for symbol_id in path.symbol_ids:
if symbol_id and symbol_id not in symbol_ids and len(symbol_ids) < self._max_symbols:
symbol_ids.append(symbol_id)
excerpts = []
total_chars = 0
for excerpt in pack.code_excerpts:
if symbol_ids and excerpt.symbol_id and excerpt.symbol_id not in symbol_ids:
continue
body = excerpt.content.strip()
remaining = self._max_chars - total_chars
if remaining <= 0 or len(excerpts) >= self._max_excerpts:
break
if len(body) > remaining:
body = body[:remaining].rstrip() + "...[truncated]"
excerpts.append(
{
"evidence_id": excerpt.evidence_id,
"title": excerpt.title,
"path": excerpt.path,
"start_line": excerpt.start_line,
"end_line": excerpt.end_line,
"focus": excerpt.focus,
"content": body,
}
)
total_chars += len(body)
payload = {
"question": question,
"intent": pack.intent.model_dump(mode="json"),
"selected_entrypoints": [item.model_dump(mode="json") for item in pack.selected_entrypoints[:5]],
"seed_symbols": [item.model_dump(mode="json") for item in pack.seed_symbols[: self._max_symbols]],
"trace_paths": [path.model_dump(mode="json") for path in pack.trace_paths[: self._max_paths]],
"evidence_index": {key: value.model_dump(mode="json") for key, value in pack.evidence_index.items()},
"code_excerpts": excerpts,
"missing": pack.missing,
"conflicts": pack.conflicts,
}
return json.dumps(payload, ensure_ascii=False, indent=2)

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
from app.modules.rag.explain.models import CodeExcerpt, LayeredRetrievalItem
class ExcerptPlanner:
_FOCUS_TOKENS = ("raise", "except", "db", "select", "insert", "update", "delete", "http", "publish", "emit")
def plan(self, chunk: LayeredRetrievalItem, *, evidence_id: str, symbol_id: str | None) -> list[CodeExcerpt]:
location = chunk.location
if location is None:
return []
excerpts = [
CodeExcerpt(
evidence_id=evidence_id,
symbol_id=symbol_id,
title=chunk.title,
path=location.path,
start_line=location.start_line,
end_line=location.end_line,
content=chunk.content.strip(),
focus="overview",
)
]
focus = self._focus_excerpt(chunk, evidence_id=evidence_id, symbol_id=symbol_id)
if focus is not None:
excerpts.append(focus)
return excerpts
def _focus_excerpt(
self,
chunk: LayeredRetrievalItem,
*,
evidence_id: str,
symbol_id: str | None,
) -> CodeExcerpt | None:
location = chunk.location
if location is None:
return None
lines = chunk.content.splitlines()
for index, line in enumerate(lines):
lowered = line.lower()
if not any(token in lowered for token in self._FOCUS_TOKENS):
continue
start = max(0, index - 2)
end = min(len(lines), index + 3)
if end - start >= len(lines):
return None
return CodeExcerpt(
evidence_id=evidence_id,
symbol_id=symbol_id,
title=f"{chunk.title}:focus",
path=location.path,
start_line=(location.start_line or 1) + start,
end_line=(location.start_line or 1) + end - 1,
content="\n".join(lines[start:end]).strip(),
focus="focus",
)
return None

View File

@@ -0,0 +1,216 @@
from __future__ import annotations
import json
from sqlalchemy import text
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
from app.modules.shared.db import get_engine
class CodeGraphRepository:
def get_out_edges(
self,
rag_session_id: str,
src_symbol_ids: list[str],
edge_types: list[str],
limit_per_src: int,
) -> list[LayeredRetrievalItem]:
if not src_symbol_ids:
return []
sql = """
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C2_DEPENDENCY_GRAPH'
AND CAST(metadata_json AS jsonb)->>'src_symbol_id' = ANY(:src_ids)
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
ORDER BY path, span_start
"""
with get_engine().connect() as conn:
rows = conn.execute(
text(sql),
{"sid": rag_session_id, "src_ids": src_symbol_ids, "edge_types": edge_types},
).mappings().fetchall()
grouped: dict[str, int] = {}
items: list[LayeredRetrievalItem] = []
for row in rows:
metadata = self._loads(row.get("metadata_json"))
src_symbol_id = str(metadata.get("src_symbol_id") or "")
grouped[src_symbol_id] = grouped.get(src_symbol_id, 0) + 1
if grouped[src_symbol_id] > limit_per_src:
continue
items.append(self._to_item(row, metadata))
return items
def get_in_edges(
self,
rag_session_id: str,
dst_symbol_ids: list[str],
edge_types: list[str],
limit_per_dst: int,
) -> list[LayeredRetrievalItem]:
if not dst_symbol_ids:
return []
sql = """
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C2_DEPENDENCY_GRAPH'
AND CAST(metadata_json AS jsonb)->>'dst_symbol_id' = ANY(:dst_ids)
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
ORDER BY path, span_start
"""
with get_engine().connect() as conn:
rows = conn.execute(
text(sql),
{"sid": rag_session_id, "dst_ids": dst_symbol_ids, "edge_types": edge_types},
).mappings().fetchall()
grouped: dict[str, int] = {}
items: list[LayeredRetrievalItem] = []
for row in rows:
metadata = self._loads(row.get("metadata_json"))
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
grouped[dst_symbol_id] = grouped.get(dst_symbol_id, 0) + 1
if grouped[dst_symbol_id] > limit_per_dst:
continue
items.append(self._to_item(row, metadata))
return items
def resolve_symbol_by_ref(
self,
rag_session_id: str,
dst_ref: str,
package_hint: str | None = None,
) -> LayeredRetrievalItem | None:
ref = (dst_ref or "").strip()
if not ref:
return None
with get_engine().connect() as conn:
rows = conn.execute(
text(
"""
SELECT path, content, layer, title, metadata_json, span_start, span_end, qname
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C1_SYMBOL_CATALOG'
AND (qname = :ref OR title = :ref OR qname LIKE :tail)
ORDER BY path
LIMIT 12
"""
),
{"sid": rag_session_id, "ref": ref, "tail": f"%{ref}"},
).mappings().fetchall()
best: LayeredRetrievalItem | None = None
best_score = -1
for row in rows:
metadata = self._loads(row.get("metadata_json"))
package = str(metadata.get("package_or_module") or "")
score = 0
if str(row.get("qname") or "") == ref:
score += 3
if str(row.get("title") or "") == ref:
score += 2
if package_hint and package.startswith(package_hint):
score += 3
if package_hint and package_hint in str(row.get("path") or ""):
score += 1
if score > best_score:
best = self._to_item(row, metadata)
best_score = score
return best
def get_symbols_by_ids(self, rag_session_id: str, symbol_ids: list[str]) -> list[LayeredRetrievalItem]:
if not symbol_ids:
return []
with get_engine().connect() as conn:
rows = conn.execute(
text(
"""
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C1_SYMBOL_CATALOG'
AND symbol_id = ANY(:symbol_ids)
ORDER BY path, span_start
"""
),
{"sid": rag_session_id, "symbol_ids": symbol_ids},
).mappings().fetchall()
return [self._to_item(row, self._loads(row.get("metadata_json"))) for row in rows]
def get_chunks_by_symbol_ids(
self,
rag_session_id: str,
symbol_ids: list[str],
prefer_chunk_type: str = "symbol_block",
) -> list[LayeredRetrievalItem]:
symbols = self.get_symbols_by_ids(rag_session_id, symbol_ids)
chunks: list[LayeredRetrievalItem] = []
for symbol in symbols:
location = symbol.location
if location is None:
continue
chunk = self._chunk_for_symbol(rag_session_id, symbol, prefer_chunk_type=prefer_chunk_type)
if chunk is not None:
chunks.append(chunk)
return chunks
def _chunk_for_symbol(
self,
rag_session_id: str,
symbol: LayeredRetrievalItem,
*,
prefer_chunk_type: str,
) -> LayeredRetrievalItem | None:
location = symbol.location
if location is None:
return None
with get_engine().connect() as conn:
rows = conn.execute(
text(
"""
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C0_SOURCE_CHUNKS'
AND path = :path
AND COALESCE(span_start, 0) <= :end_line
AND COALESCE(span_end, 999999) >= :start_line
ORDER BY
CASE WHEN CAST(metadata_json AS jsonb)->>'chunk_type' = :prefer_chunk_type THEN 0 ELSE 1 END,
ABS(COALESCE(span_start, 0) - :start_line)
LIMIT 1
"""
),
{
"sid": rag_session_id,
"path": location.path,
"start_line": location.start_line or 0,
"end_line": location.end_line or 999999,
"prefer_chunk_type": prefer_chunk_type,
},
).mappings().fetchall()
if not rows:
return None
row = rows[0]
return self._to_item(row, self._loads(row.get("metadata_json")))
def _to_item(self, row, metadata: dict) -> LayeredRetrievalItem:
return LayeredRetrievalItem(
source=str(row.get("path") or ""),
content=str(row.get("content") or ""),
layer=str(row.get("layer") or ""),
title=str(row.get("title") or ""),
metadata=metadata,
location=CodeLocation(
path=str(row.get("path") or ""),
start_line=row.get("span_start"),
end_line=row.get("span_end"),
),
)
def _loads(self, value) -> dict:
if not value:
return {}
return json.loads(str(value))

View File

@@ -0,0 +1,102 @@
from __future__ import annotations
import re
from app.modules.rag.explain.models import ExplainHints, ExplainIntent
from app.modules.rag.retrieval.query_terms import extract_query_terms
class ExplainIntentBuilder:
_ROUTE_RE = re.compile(r"(/[A-Za-z0-9_./{}:-]+)")
_FILE_RE = re.compile(r"([A-Za-z0-9_./-]+\.py)")
_SYMBOL_RE = re.compile(r"\b([A-Z][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*|[A-Z][A-Za-z0-9_]{2,}|[a-z_][A-Za-z0-9_]{2,})\b")
_COMMAND_RE = re.compile(r"`([A-Za-z0-9:_-]+)`")
_TEST_KEYWORDS = (
"тест",
"tests",
"test ",
"unit-test",
"unit test",
"юнит-тест",
"pytest",
"spec",
"как покрыто тестами",
"как проверяется",
"how is it tested",
"how it's tested",
)
def build(self, user_query: str) -> ExplainIntent:
normalized = " ".join((user_query or "").split())
lowered = normalized.lower()
keywords = self._keywords(normalized)
hints = ExplainHints(
paths=self._dedupe(self._FILE_RE.findall(normalized)),
symbols=self._symbols(normalized),
endpoints=self._dedupe(self._ROUTE_RE.findall(normalized)),
commands=self._commands(normalized, lowered),
)
return ExplainIntent(
raw_query=user_query,
normalized_query=normalized,
keywords=keywords[:12],
hints=hints,
include_tests=self._include_tests(lowered),
expected_entry_types=self._entry_types(lowered, hints),
depth=self._depth(lowered),
)
def _keywords(self, text: str) -> list[str]:
keywords = extract_query_terms(text)
for token in self._symbols(text):
if token not in keywords:
keywords.append(token)
for token in self._ROUTE_RE.findall(text):
if token not in keywords:
keywords.append(token)
return self._dedupe(keywords)
def _symbols(self, text: str) -> list[str]:
values = []
for raw in self._SYMBOL_RE.findall(text):
token = raw.strip()
if len(token) < 3:
continue
if token.endswith(".py"):
continue
values.append(token)
return self._dedupe(values)
def _commands(self, text: str, lowered: str) -> list[str]:
values = list(self._COMMAND_RE.findall(text))
if " command " in f" {lowered} ":
values.extend(re.findall(r"command\s+([A-Za-z0-9:_-]+)", lowered))
if " cli " in f" {lowered} ":
values.extend(re.findall(r"cli\s+([A-Za-z0-9:_-]+)", lowered))
return self._dedupe(values)
def _entry_types(self, lowered: str, hints: ExplainHints) -> list[str]:
if hints.endpoints or any(token in lowered for token in ("endpoint", "route", "handler", "http", "api")):
return ["http"]
if hints.commands or any(token in lowered for token in ("cli", "command", "click", "typer")):
return ["cli"]
return ["http", "cli"]
def _depth(self, lowered: str) -> str:
if any(token in lowered for token in ("deep", "подроб", "деталь", "full flow", "trace")):
return "deep"
if any(token in lowered for token in ("high level", "overview", "кратко", "summary")):
return "high"
return "medium"
def _include_tests(self, lowered: str) -> bool:
normalized = f" {lowered} "
return any(token in normalized for token in self._TEST_KEYWORDS)
def _dedupe(self, values: list[str]) -> list[str]:
result: list[str] = []
for value in values:
item = value.strip()
if item and item not in result:
result.append(item)
return result

View File

@@ -0,0 +1,289 @@
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Callable
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
from app.modules.rag.retrieval.test_filter import build_test_filters, debug_disable_test_filter
LOGGER = logging.getLogger(__name__)
if TYPE_CHECKING:
from app.modules.rag.persistence.repository import RagRepository
from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder
@dataclass(slots=True)
class LayerRetrievalResult:
items: list[LayeredRetrievalItem]
missing: list[str] = field(default_factory=list)
class LayeredRetrievalGateway:
def __init__(self, repository: RagRepository, embedder: GigaChatEmbedder) -> None:
self._repository = repository
self._embedder = embedder
def retrieve_layer(
self,
rag_session_id: str,
query: str,
layer: str,
*,
limit: int,
path_prefixes: list[str] | None = None,
exclude_tests: bool = True,
prefer_non_tests: bool = False,
include_spans: bool = False,
) -> LayerRetrievalResult:
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
filter_args = self._filter_args(effective_exclude_tests)
query_embedding: list[float] | None = None
try:
query_embedding = self._embedder.embed([query])[0]
rows = self._repository.retrieve(
rag_session_id,
query_embedding,
query_text=query,
limit=limit,
layers=[layer],
path_prefixes=path_prefixes,
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
exclude_like_patterns=filter_args["exclude_like_patterns"],
prefer_non_tests=prefer_non_tests or not effective_exclude_tests,
)
return self._success_result(
rows,
rag_session_id=rag_session_id,
label="layered retrieval",
include_spans=include_spans,
layer=layer,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
)
except Exception as exc:
if query_embedding is None:
self._log_failure(
label="layered retrieval",
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
)
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
retry_result = self._retry_without_test_filter(
operation=lambda: self._repository.retrieve(
rag_session_id,
query_embedding,
query_text=query,
limit=limit,
layers=[layer],
path_prefixes=path_prefixes,
exclude_path_prefixes=None,
exclude_like_patterns=None,
prefer_non_tests=True,
),
label="layered retrieval",
rag_session_id=rag_session_id,
include_spans=include_spans,
layer=layer,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
missing_prefix=f"layer:{layer} retrieval_failed",
)
if retry_result is not None:
return retry_result
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
def retrieve_lexical_code(
self,
rag_session_id: str,
query: str,
*,
limit: int,
path_prefixes: list[str] | None = None,
exclude_tests: bool = True,
include_spans: bool = False,
) -> LayerRetrievalResult:
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
filter_args = self._filter_args(effective_exclude_tests)
try:
rows = self._repository.retrieve_lexical_code(
rag_session_id,
query_text=query,
limit=limit,
path_prefixes=path_prefixes,
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
exclude_like_patterns=filter_args["exclude_like_patterns"],
prefer_non_tests=not effective_exclude_tests,
)
return self._success_result(
rows,
rag_session_id=rag_session_id,
label="lexical retrieval",
include_spans=include_spans,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
)
except Exception as exc:
retry_result = self._retry_without_test_filter(
operation=lambda: self._repository.retrieve_lexical_code(
rag_session_id,
query_text=query,
limit=limit,
path_prefixes=path_prefixes,
exclude_path_prefixes=None,
exclude_like_patterns=None,
prefer_non_tests=True,
),
label="lexical retrieval",
rag_session_id=rag_session_id,
include_spans=include_spans,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
missing_prefix="layer:C0 lexical_retrieval_failed",
)
if retry_result is not None:
return retry_result
return LayerRetrievalResult(items=[], missing=[self._failure_missing("layer:C0 lexical_retrieval_failed", exc)])
def _retry_without_test_filter(
self,
*,
operation: Callable[[], list[dict]],
label: str,
rag_session_id: str,
include_spans: bool,
exclude_tests: bool,
path_prefixes: list[str] | None,
exc: Exception,
missing_prefix: str,
layer: str | None = None,
) -> LayerRetrievalResult | None:
if not exclude_tests:
self._log_failure(
label=label,
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
)
return None
self._log_failure(
label=label,
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
retried_without_test_filter=True,
)
try:
rows = operation()
except Exception as retry_exc:
self._log_failure(
label=f"{label} retry",
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=False,
path_prefixes=path_prefixes,
exc=retry_exc,
)
return None
result = self._success_result(
rows,
rag_session_id=rag_session_id,
label=f"{label} retry",
include_spans=include_spans,
layer=layer,
exclude_tests=False,
path_prefixes=path_prefixes,
)
result.missing.append(f"{missing_prefix}:retried_without_test_filter")
return result
def _success_result(
self,
rows: list[dict],
*,
rag_session_id: str,
label: str,
include_spans: bool,
exclude_tests: bool,
path_prefixes: list[str] | None,
layer: str | None = None,
) -> LayerRetrievalResult:
items = [self._to_item(row, include_spans=include_spans) for row in rows]
LOGGER.warning(
"%s: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s returned_count=%s top_paths=%s",
label,
rag_session_id,
layer,
exclude_tests,
path_prefixes or [],
len(items),
[item.source for item in items[:3]],
)
return LayerRetrievalResult(items=items)
def _log_failure(
self,
*,
label: str,
rag_session_id: str,
exclude_tests: bool,
path_prefixes: list[str] | None,
exc: Exception,
layer: str | None = None,
retried_without_test_filter: bool = False,
) -> None:
LOGGER.warning(
"%s failed: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s retried_without_test_filter=%s error=%s",
label,
rag_session_id,
layer,
exclude_tests,
path_prefixes or [],
retried_without_test_filter,
self._exception_summary(exc),
exc_info=True,
)
def _filter_args(self, exclude_tests: bool) -> dict[str, list[str] | None]:
test_filters = build_test_filters() if exclude_tests else None
return {
"exclude_path_prefixes": test_filters.exclude_path_prefixes if test_filters else None,
"exclude_like_patterns": test_filters.exclude_like_patterns if test_filters else None,
}
def _failure_missing(self, prefix: str, exc: Exception) -> str:
return f"{prefix}:{self._exception_summary(exc)}"
def _exception_summary(self, exc: Exception) -> str:
message = " ".join(str(exc).split())
if len(message) > 180:
message = message[:177] + "..."
return f"{type(exc).__name__}:{message or 'no_message'}"
def _to_item(self, row: dict, *, include_spans: bool) -> LayeredRetrievalItem:
location = None
if include_spans:
location = CodeLocation(
path=str(row.get("path") or ""),
start_line=row.get("span_start"),
end_line=row.get("span_end"),
)
return LayeredRetrievalItem(
source=str(row.get("path") or ""),
content=str(row.get("content") or ""),
layer=str(row.get("layer") or ""),
title=str(row.get("title") or ""),
metadata=dict(row.get("metadata", {}) or {}),
score=row.get("distance"),
location=location,
)

View File

@@ -0,0 +1,91 @@
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
class ExplainHints(BaseModel):
model_config = ConfigDict(extra="forbid")
paths: list[str] = Field(default_factory=list)
symbols: list[str] = Field(default_factory=list)
endpoints: list[str] = Field(default_factory=list)
commands: list[str] = Field(default_factory=list)
class ExplainIntent(BaseModel):
model_config = ConfigDict(extra="forbid")
raw_query: str
normalized_query: str
keywords: list[str] = Field(default_factory=list)
hints: ExplainHints = Field(default_factory=ExplainHints)
include_tests: bool = False
expected_entry_types: list[Literal["http", "cli"]] = Field(default_factory=list)
depth: Literal["high", "medium", "deep"] = "medium"
class CodeLocation(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
start_line: int | None = None
end_line: int | None = None
class LayeredRetrievalItem(BaseModel):
model_config = ConfigDict(extra="forbid")
source: str
content: str
layer: str
title: str
metadata: dict[str, Any] = Field(default_factory=dict)
score: float | None = None
location: CodeLocation | None = None
class TracePath(BaseModel):
model_config = ConfigDict(extra="forbid")
symbol_ids: list[str] = Field(default_factory=list)
score: float = 0.0
entrypoint_id: str | None = None
notes: list[str] = Field(default_factory=list)
class EvidenceItem(BaseModel):
model_config = ConfigDict(extra="forbid")
evidence_id: str
kind: Literal["entrypoint", "symbol", "edge", "excerpt"]
summary: str
location: CodeLocation | None = None
supports: list[str] = Field(default_factory=list)
class CodeExcerpt(BaseModel):
model_config = ConfigDict(extra="forbid")
evidence_id: str
symbol_id: str | None = None
title: str
path: str
start_line: int | None = None
end_line: int | None = None
content: str
focus: str = "overview"
class ExplainPack(BaseModel):
model_config = ConfigDict(extra="forbid")
intent: ExplainIntent
selected_entrypoints: list[LayeredRetrievalItem] = Field(default_factory=list)
seed_symbols: list[LayeredRetrievalItem] = Field(default_factory=list)
trace_paths: list[TracePath] = Field(default_factory=list)
evidence_index: dict[str, EvidenceItem] = Field(default_factory=dict)
code_excerpts: list[CodeExcerpt] = Field(default_factory=list)
missing: list[str] = Field(default_factory=list)
conflicts: list[str] = Field(default_factory=list)

View File

@@ -0,0 +1,328 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from app.modules.rag.contracts.enums import RagLayer
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
from app.modules.rag.explain.layered_gateway import LayerRetrievalResult, LayeredRetrievalGateway
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, ExplainPack, LayeredRetrievalItem
from app.modules.rag.explain.source_excerpt_fetcher import SourceExcerptFetcher
from app.modules.rag.explain.trace_builder import TraceBuilder
from app.modules.rag.retrieval.test_filter import exclude_tests_default, is_test_path
LOGGER = logging.getLogger(__name__)
_MIN_EXCERPTS = 2
if TYPE_CHECKING:
from app.modules.rag.explain.graph_repository import CodeGraphRepository
from app.modules.rag.explain.models import ExplainIntent
class CodeExplainRetrieverV2:
def __init__(
self,
gateway: LayeredRetrievalGateway,
graph_repository: CodeGraphRepository,
intent_builder: ExplainIntentBuilder | None = None,
trace_builder: TraceBuilder | None = None,
excerpt_fetcher: SourceExcerptFetcher | None = None,
) -> None:
self._gateway = gateway
self._graph = graph_repository
self._intent_builder = intent_builder or ExplainIntentBuilder()
self._trace_builder = trace_builder or TraceBuilder(graph_repository)
self._excerpt_fetcher = excerpt_fetcher or SourceExcerptFetcher(graph_repository)
def build_pack(
self,
rag_session_id: str,
user_query: str,
*,
file_candidates: list[dict] | None = None,
) -> ExplainPack:
intent = self._intent_builder.build(user_query)
path_prefixes = _path_prefixes(intent, file_candidates or [])
exclude_tests = exclude_tests_default() and not intent.include_tests
pack = self._run_pass(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
if exclude_tests and len(pack.code_excerpts) < _MIN_EXCERPTS:
self._merge_test_fallback(pack, rag_session_id, intent, path_prefixes)
self._log_pack(rag_session_id, pack)
return pack
def _run_pass(
self,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
*,
exclude_tests: bool,
) -> ExplainPack:
missing: list[str] = []
entrypoints_result = self._entrypoints(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
missing.extend(entrypoints_result.missing)
selected_entrypoints = self._filter_entrypoints(intent, entrypoints_result.items)
if not selected_entrypoints:
missing.append("layer:C3 empty")
seed_result = self._seed_symbols(rag_session_id, intent, path_prefixes, selected_entrypoints, exclude_tests=exclude_tests)
missing.extend(seed_result.missing)
seed_symbols = seed_result.items
if not seed_symbols:
missing.append("layer:C1 empty")
depth = 4 if intent.depth == "deep" else 3 if intent.depth == "medium" else 2
trace_paths = self._trace_builder.build_paths(rag_session_id, seed_symbols, max_depth=depth) if seed_symbols else []
excerpts, excerpt_evidence = self._excerpt_fetcher.fetch(rag_session_id, trace_paths) if trace_paths else ([], {})
if not excerpts:
lexical_result = self._gateway.retrieve_lexical_code(
rag_session_id,
intent.normalized_query,
limit=6,
path_prefixes=path_prefixes or None,
exclude_tests=exclude_tests,
include_spans=True,
)
missing.extend(lexical_result.missing)
excerpts, excerpt_evidence = _lexical_excerpts(lexical_result.items)
if not excerpts:
missing.append("layer:C0 empty")
evidence_index = _evidence_index(selected_entrypoints, seed_symbols)
evidence_index.update(excerpt_evidence)
missing.extend(_missing(selected_entrypoints, seed_symbols, trace_paths, excerpts))
return ExplainPack(
intent=intent,
selected_entrypoints=selected_entrypoints,
seed_symbols=seed_symbols,
trace_paths=trace_paths,
evidence_index=evidence_index,
code_excerpts=excerpts,
missing=_cleanup_missing(_dedupe(missing), has_excerpts=bool(excerpts)),
conflicts=[],
)
def _merge_test_fallback(
self,
pack: ExplainPack,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
) -> None:
lexical_result = self._gateway.retrieve_lexical_code(
rag_session_id,
intent.normalized_query,
limit=6,
path_prefixes=path_prefixes or None,
exclude_tests=False,
include_spans=True,
)
excerpt_offset = len([key for key in pack.evidence_index if key.startswith("excerpt_")])
excerpts, evidence = _lexical_excerpts(
lexical_result.items,
start_index=excerpt_offset,
is_test_fallback=True,
)
if not excerpts:
pack.missing = _dedupe(pack.missing + lexical_result.missing)
return
seen = {(item.path, item.start_line, item.end_line, item.content) for item in pack.code_excerpts}
for excerpt in excerpts:
key = (excerpt.path, excerpt.start_line, excerpt.end_line, excerpt.content)
if key in seen:
continue
pack.code_excerpts.append(excerpt)
seen.add(key)
pack.evidence_index.update(evidence)
pack.missing = _cleanup_missing(_dedupe(pack.missing + lexical_result.missing), has_excerpts=bool(pack.code_excerpts))
def _entrypoints(
self,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
*,
exclude_tests: bool,
) -> LayerRetrievalResult:
return self._gateway.retrieve_layer(
rag_session_id,
intent.normalized_query,
RagLayer.CODE_ENTRYPOINTS,
limit=6,
path_prefixes=path_prefixes or None,
exclude_tests=exclude_tests,
prefer_non_tests=True,
include_spans=True,
)
def _filter_entrypoints(self, intent: ExplainIntent, items: list[LayeredRetrievalItem]) -> list[LayeredRetrievalItem]:
if not intent.expected_entry_types:
return items[:3]
filtered = [item for item in items if str(item.metadata.get("entry_type") or "") in intent.expected_entry_types]
return filtered[:3] or items[:3]
def _seed_symbols(
self,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
entrypoints: list[LayeredRetrievalItem],
*,
exclude_tests: bool,
) -> LayerRetrievalResult:
symbol_result = self._gateway.retrieve_layer(
rag_session_id,
intent.normalized_query,
RagLayer.CODE_SYMBOL_CATALOG,
limit=12,
path_prefixes=path_prefixes or None,
exclude_tests=exclude_tests,
prefer_non_tests=True,
include_spans=True,
)
handlers: list[LayeredRetrievalItem] = []
handler_ids = [str(item.metadata.get("handler_symbol_id") or "") for item in entrypoints]
if handler_ids:
handlers = self._graph.get_symbols_by_ids(rag_session_id, [item for item in handler_ids if item])
seeds: list[LayeredRetrievalItem] = []
seen: set[str] = set()
for item in handlers + symbol_result.items:
symbol_id = str(item.metadata.get("symbol_id") or "")
if not symbol_id or symbol_id in seen:
continue
seen.add(symbol_id)
seeds.append(item)
if len(seeds) >= 8:
break
return LayerRetrievalResult(items=seeds, missing=list(symbol_result.missing))
def _log_pack(self, rag_session_id: str, pack: ExplainPack) -> None:
prod_excerpt_count = len([excerpt for excerpt in pack.code_excerpts if not _is_test_excerpt(excerpt)])
test_excerpt_count = len(pack.code_excerpts) - prod_excerpt_count
LOGGER.warning(
"code explain pack: rag_session_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s prod_excerpt_count=%s test_excerpt_count=%s missing=%s",
rag_session_id,
len(pack.selected_entrypoints),
len(pack.seed_symbols),
len(pack.trace_paths),
len(pack.code_excerpts),
prod_excerpt_count,
test_excerpt_count,
pack.missing,
)
def _evidence_index(
entrypoints: list[LayeredRetrievalItem],
seed_symbols: list[LayeredRetrievalItem],
) -> dict[str, EvidenceItem]:
result: dict[str, EvidenceItem] = {}
for index, item in enumerate(entrypoints, start=1):
evidence_id = f"entrypoint_{index}"
result[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="entrypoint",
summary=item.title,
location=item.location,
supports=[str(item.metadata.get("handler_symbol_id") or "")],
)
for index, item in enumerate(seed_symbols, start=1):
evidence_id = f"symbol_{index}"
result[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="symbol",
summary=item.title,
location=item.location,
supports=[str(item.metadata.get("symbol_id") or "")],
)
return result
def _missing(
entrypoints: list[LayeredRetrievalItem],
seed_symbols: list[LayeredRetrievalItem],
trace_paths,
excerpts,
) -> list[str]:
missing: list[str] = []
if not entrypoints:
missing.append("entrypoints")
if not seed_symbols:
missing.append("seed_symbols")
if not trace_paths:
missing.append("trace_paths")
if not excerpts:
missing.append("code_excerpts")
return missing
def _lexical_excerpts(
items: list[LayeredRetrievalItem],
*,
start_index: int = 0,
is_test_fallback: bool = False,
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
excerpts: list[CodeExcerpt] = []
evidence_index: dict[str, EvidenceItem] = {}
for item in items:
evidence_id = f"excerpt_{start_index + len(evidence_index) + 1}"
location = item.location
evidence_index[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="excerpt",
summary=item.title or item.source,
location=location,
supports=[],
)
focus = "lexical"
if _item_is_test(item):
focus = "test:lexical"
elif is_test_fallback:
focus = "lexical"
excerpts.append(
CodeExcerpt(
evidence_id=evidence_id,
symbol_id=str(item.metadata.get("symbol_id") or "") or None,
title=item.title or item.source,
path=item.source,
start_line=location.start_line if location else None,
end_line=location.end_line if location else None,
content=item.content,
focus=focus,
)
)
return excerpts, evidence_index
def _item_is_test(item: LayeredRetrievalItem) -> bool:
return bool(item.metadata.get("is_test")) or is_test_path(item.source)
def _is_test_excerpt(excerpt: CodeExcerpt) -> bool:
return excerpt.focus.startswith("test:") or is_test_path(excerpt.path)
def _path_prefixes(intent: ExplainIntent, file_candidates: list[dict]) -> list[str]:
values: list[str] = []
for path in intent.hints.paths:
prefix = path.rsplit("/", 1)[0] if "/" in path else path
if prefix and prefix not in values:
values.append(prefix)
for item in file_candidates[:6]:
path = str(item.get("path") or "")
prefix = path.rsplit("/", 1)[0] if "/" in path else ""
if prefix and prefix not in values:
values.append(prefix)
return values
def _cleanup_missing(values: list[str], *, has_excerpts: bool) -> list[str]:
if not has_excerpts:
return values
return [value for value in values if value not in {"code_excerpts", "layer:C0 empty"}]
def _dedupe(values: list[str]) -> list[str]:
result: list[str] = []
for value in values:
item = value.strip()
if item and item not in result:
result.append(item)
return result

View File

@@ -0,0 +1,53 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from app.modules.rag.explain.excerpt_planner import ExcerptPlanner
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, TracePath
from app.modules.rag.retrieval.test_filter import is_test_path
if TYPE_CHECKING:
from app.modules.rag.explain.graph_repository import CodeGraphRepository
class SourceExcerptFetcher:
def __init__(self, graph_repository: CodeGraphRepository, planner: ExcerptPlanner | None = None) -> None:
self._graph = graph_repository
self._planner = planner or ExcerptPlanner()
def fetch(
self,
rag_session_id: str,
trace_paths: list[TracePath],
*,
max_excerpts: int = 40,
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
ordered_symbol_ids: list[str] = []
for path in trace_paths:
for symbol_id in path.symbol_ids:
if symbol_id and symbol_id not in ordered_symbol_ids:
ordered_symbol_ids.append(symbol_id)
chunks = self._graph.get_chunks_by_symbol_ids(rag_session_id, ordered_symbol_ids)
excerpts: list[CodeExcerpt] = []
evidence_index: dict[str, EvidenceItem] = {}
for chunk in chunks:
symbol_id = str(chunk.metadata.get("symbol_id") or "")
evidence_id = f"excerpt_{len(evidence_index) + 1}"
location = chunk.location
evidence_index[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="excerpt",
summary=chunk.title,
location=location,
supports=[symbol_id] if symbol_id else [],
)
is_test_chunk = bool(chunk.metadata.get("is_test")) or is_test_path(location.path if location else chunk.source)
for excerpt in self._planner.plan(chunk, evidence_id=evidence_id, symbol_id=symbol_id):
if len(excerpts) >= max_excerpts:
break
if is_test_chunk and not excerpt.focus.startswith("test:"):
excerpt.focus = f"test:{excerpt.focus}"
excerpts.append(excerpt)
if len(excerpts) >= max_excerpts:
break
return excerpts, evidence_index

View File

@@ -0,0 +1,102 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from app.modules.rag.explain.models import LayeredRetrievalItem, TracePath
if TYPE_CHECKING:
from app.modules.rag.explain.graph_repository import CodeGraphRepository
class TraceBuilder:
def __init__(self, graph_repository: CodeGraphRepository) -> None:
self._graph = graph_repository
def build_paths(
self,
rag_session_id: str,
seed_symbols: list[LayeredRetrievalItem],
*,
max_depth: int,
max_paths: int = 3,
edge_types: list[str] | None = None,
) -> list[TracePath]:
edges_filter = edge_types or ["calls", "imports", "inherits"]
symbol_map = self._symbol_map(seed_symbols)
paths: list[TracePath] = []
for seed in seed_symbols:
seed_id = str(seed.metadata.get("symbol_id") or "")
if not seed_id:
continue
queue: list[tuple[list[str], float, list[str]]] = [([seed_id], 0.0, [])]
while queue and len(paths) < max_paths * 3:
current_path, score, notes = queue.pop(0)
src_symbol_id = current_path[-1]
out_edges = self._graph.get_out_edges(rag_session_id, [src_symbol_id], edges_filter, limit_per_src=4)
if not out_edges or len(current_path) >= max_depth:
paths.append(TracePath(symbol_ids=current_path, score=score, notes=notes))
continue
for edge in out_edges:
metadata = edge.metadata
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
next_notes = list(notes)
next_score = score + self._edge_score(edge, symbol_map.get(src_symbol_id))
if not dst_symbol_id:
dst_ref = str(metadata.get("dst_ref") or "")
package_hint = self._package_hint(symbol_map.get(src_symbol_id))
resolved = self._graph.resolve_symbol_by_ref(rag_session_id, dst_ref, package_hint=package_hint)
if resolved is not None:
dst_symbol_id = str(resolved.metadata.get("symbol_id") or "")
symbol_map[dst_symbol_id] = resolved
next_score += 2.0
next_notes.append(f"resolved:{dst_ref}")
if not dst_symbol_id or dst_symbol_id in current_path:
paths.append(TracePath(symbol_ids=current_path, score=next_score, notes=next_notes))
continue
if dst_symbol_id not in symbol_map:
symbols = self._graph.get_symbols_by_ids(rag_session_id, [dst_symbol_id])
if symbols:
symbol_map[dst_symbol_id] = symbols[0]
queue.append((current_path + [dst_symbol_id], next_score, next_notes))
unique = self._unique_paths(paths)
unique.sort(key=lambda item: item.score, reverse=True)
return unique[:max_paths] or [TracePath(symbol_ids=[seed.metadata.get("symbol_id", "")], score=0.0) for seed in seed_symbols[:1]]
def _edge_score(self, edge: LayeredRetrievalItem, source_symbol: LayeredRetrievalItem | None) -> float:
metadata = edge.metadata
score = 1.0
if str(metadata.get("resolution") or "") == "resolved":
score += 2.0
source_path = source_symbol.source if source_symbol is not None else ""
if source_path and edge.source == source_path:
score += 1.0
if "tests/" in edge.source or "/tests/" in edge.source:
score -= 3.0
return score
def _package_hint(self, symbol: LayeredRetrievalItem | None) -> str | None:
if symbol is None:
return None
package = str(symbol.metadata.get("package_or_module") or "")
if not package:
return None
return ".".join(package.split(".")[:-1]) or package
def _symbol_map(self, items: list[LayeredRetrievalItem]) -> dict[str, LayeredRetrievalItem]:
result: dict[str, LayeredRetrievalItem] = {}
for item in items:
symbol_id = str(item.metadata.get("symbol_id") or "")
if symbol_id:
result[symbol_id] = item
return result
def _unique_paths(self, items: list[TracePath]) -> list[TracePath]:
result: list[TracePath] = []
seen: set[tuple[str, ...]] = set()
for item in items:
key = tuple(symbol_id for symbol_id in item.symbol_ids if symbol_id)
if not key or key in seen:
continue
seen.add(key)
result.append(item)
return result

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
from app.modules.rag.indexing.code.code_text.chunker import CodeChunk from app.modules.rag.indexing.code.code_text.chunker import CodeChunk
from app.modules.rag.retrieval.test_filter import is_test_path
class CodeTextDocumentBuilder: class CodeTextDocumentBuilder:
@@ -17,6 +18,7 @@ class CodeTextDocumentBuilder:
"chunk_index": chunk_index, "chunk_index": chunk_index,
"chunk_type": chunk.chunk_type, "chunk_type": chunk.chunk_type,
"module_or_unit": source.path.replace("/", ".").removesuffix(".py"), "module_or_unit": source.path.replace("/", ".").removesuffix(".py"),
"is_test": is_test_path(source.path),
"artifact_type": "CODE", "artifact_type": "CODE",
}, },
) )

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
from app.modules.rag.indexing.code.edges.extractor import PyEdge from app.modules.rag.indexing.code.edges.extractor import PyEdge
from app.modules.rag.retrieval.test_filter import is_test_path
class EdgeDocumentBuilder: class EdgeDocumentBuilder:
@@ -22,6 +23,7 @@ class EdgeDocumentBuilder:
"dst_symbol_id": edge.dst_symbol_id, "dst_symbol_id": edge.dst_symbol_id,
"dst_ref": edge.dst_ref, "dst_ref": edge.dst_ref,
"resolution": edge.resolution, "resolution": edge.resolution,
"is_test": is_test_path(source.path),
"lang_payload": edge.metadata, "lang_payload": edge.metadata,
"artifact_type": "CODE", "artifact_type": "CODE",
}, },

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
from app.modules.rag.retrieval.test_filter import is_test_path
class EntrypointDocumentBuilder: class EntrypointDocumentBuilder:
@@ -19,6 +20,7 @@ class EntrypointDocumentBuilder:
"framework": entrypoint.framework, "framework": entrypoint.framework,
"route_or_command": entrypoint.route_or_command, "route_or_command": entrypoint.route_or_command,
"handler_symbol_id": entrypoint.handler_symbol_id, "handler_symbol_id": entrypoint.handler_symbol_id,
"is_test": is_test_path(source.path),
"lang_payload": entrypoint.metadata, "lang_payload": entrypoint.metadata,
"artifact_type": "CODE", "artifact_type": "CODE",
}, },

Some files were not shown because too many files have changed in this diff Show More