Фиксация изменений

This commit is contained in:
2026-03-05 11:03:17 +03:00
parent 1ef0b4d68c
commit 417b8b6f72
261 changed files with 8215 additions and 332 deletions

Binary file not shown.

Binary file not shown.

49
app/core/logging_setup.py Normal file
View File

@@ -0,0 +1,49 @@
from __future__ import annotations
import logging
import re
class ScrubbingFormatter(logging.Formatter):
_KEY_VALUE_PATTERNS = (
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*id)=([^\s,]+)"),
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*_key)=([^\s,]+)"),
)
_TEXT_PATTERNS = (
re.compile(r"\b(index|task|dialog|rag|session|plan|artifact|evidence|symbol|edge|entry) id\b[:=]\s*([^\s,]+)", re.IGNORECASE),
)
def format(self, record: logging.LogRecord) -> str:
rendered = super().format(record)
scrubbed = self._scrub(rendered).rstrip("\n")
return scrubbed + "\n"
def _scrub(self, message: str) -> str:
output = message
for pattern in self._KEY_VALUE_PATTERNS:
output = pattern.sub(self._replace_key_value, output)
for pattern in self._TEXT_PATTERNS:
output = pattern.sub(self._replace_text, output)
return output
def _replace_key_value(self, match: re.Match[str]) -> str:
return f"{match.group(1)}=<redacted>"
def _replace_text(self, match: re.Match[str]) -> str:
return f"{match.group(1)} id=<redacted>"
def configure_logging() -> None:
logging.basicConfig(
level=logging.WARNING,
force=True,
format="%(levelname)s:%(name)s:%(message)s",
)
root_logger = logging.getLogger()
root_logger.setLevel(logging.WARNING)
formatter = ScrubbingFormatter("%(levelname)s:%(name)s:%(message)s")
for handler in root_logger.handlers:
handler.setFormatter(formatter)
logging.getLogger("uvicorn").setLevel(logging.WARNING)
logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)

View File

@@ -1,10 +1,20 @@
import logging
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.core.logging_setup import configure_logging
from app.core.error_handlers import register_error_handlers
from app.modules.application import ModularApplication
def _configure_logging() -> None:
configure_logging()
_configure_logging()
def create_app() -> FastAPI:
app = FastAPI(title="Agent Backend MVP", version="0.1.0")
modules = ModularApplication()

View File

@@ -37,6 +37,8 @@ classDiagram
Методы: `run` — строит, валидирует и исполняет execution plan.
- `TaskSpecBuilder`: формирует спецификацию задачи для оркестратора.
Методы: `build` — собирает `TaskSpec` из route, контекстов и ограничений.
- `ProjectQaConversationGraphFactory`, `ProjectQaClassificationGraphFactory`, `ProjectQaRetrievalGraphFactory`, `ProjectQaAnalysisGraphFactory`, `ProjectQaAnswerGraphFactory`: набор маленьких graph-исполнителей для `project/qa`.
Роли: нормализация запроса; классификация project-question; поздний retrieval из `RAG`; анализ code/docs контекста; сборка финального ответа.
- `StorySessionRecorder`: пишет session-scoped артефакты для последующего bind к Story.
Методы: `record_run` — сохраняет входные источники и выходные артефакты сессии.
- `StoryContextRepository`: репозиторий Story-контекста и его связей.
@@ -58,3 +60,32 @@ sequenceDiagram
Router->>Confluence: fetch_page(url)
Confluence-->>Router: page(content_markdown, metadata)
```
### `project/qa` reasoning flow
Назначение: оркестратор планирует шаги, а каждый шаг исполняется отдельным graph. Retrieval вызывается поздно, внутри шага `context_retrieval`.
```mermaid
sequenceDiagram
participant Runtime as GraphAgentRuntime
participant Orch as OrchestratorService
participant G1 as conversation_understanding
participant G2 as question_classification
participant G3 as context_retrieval
participant Rag as RagService
participant G4 as context_analysis
participant G5 as answer_composition
Runtime->>Orch: run(task)
Orch->>G1: execute
G1-->>Orch: resolved_request
Orch->>G2: execute
G2-->>Orch: question_profile
Orch->>G3: execute
G3->>Rag: retrieve(query)
Rag-->>G3: rag_items
G3-->>Orch: source_bundle
Orch->>G4: execute
G4-->>Orch: analysis_brief
Orch->>G5: execute
G5-->>Orch: final_answer
Orch-->>Runtime: final_answer
```

View File

@@ -1,8 +1,13 @@
__all__ = [
"BaseGraphFactory",
"DocsGraphFactory",
"ProjectQaAnalysisGraphFactory",
"ProjectQaAnswerGraphFactory",
"ProjectQaClassificationGraphFactory",
"ProjectQaConversationGraphFactory",
"ProjectEditsGraphFactory",
"ProjectQaGraphFactory",
"ProjectQaRetrievalGraphFactory",
]
@@ -15,6 +20,26 @@ def __getattr__(name: str):
from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory
return DocsGraphFactory
if name == "ProjectQaConversationGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaConversationGraphFactory
return ProjectQaConversationGraphFactory
if name == "ProjectQaClassificationGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaClassificationGraphFactory
return ProjectQaClassificationGraphFactory
if name == "ProjectQaRetrievalGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaRetrievalGraphFactory
return ProjectQaRetrievalGraphFactory
if name == "ProjectQaAnalysisGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnalysisGraphFactory
return ProjectQaAnalysisGraphFactory
if name == "ProjectQaAnswerGraphFactory":
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnswerGraphFactory
return ProjectQaAnswerGraphFactory
if name == "ProjectEditsGraphFactory":
from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory

View File

@@ -59,7 +59,7 @@ class BaseGraphFactory:
f"Confluence context:\n{conf}",
]
)
answer = self._llm.generate("general_answer", user_input)
answer = self._llm.generate("general_answer", user_input, log_context="graph.default.answer")
emit_progress_sync(
state,
stage="graph.default.answer.done",

View File

@@ -52,7 +52,7 @@ class DocsContextAnalyzer:
f"Detected documentation candidates:\n{snippets}",
]
)
raw = self._llm.generate("docs_detect", user_input)
raw = self._llm.generate("docs_detect", user_input, log_context="graph.docs.detect_existing_docs")
exists = self.parse_bool_marker(raw, "exists", default=True)
summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.")
return {"existing_docs_detected": exists, "existing_docs_summary": summary}
@@ -71,7 +71,7 @@ class DocsContextAnalyzer:
f"Existing docs summary:\n{state.get('existing_docs_summary', '')}",
]
)
raw = self._llm.generate("docs_strategy", user_input)
raw = self._llm.generate("docs_strategy", user_input, log_context="graph.docs.decide_strategy")
strategy = self.parse_text_marker(raw, "strategy", default="").lower()
if strategy not in {"incremental_update", "from_scratch"}:
strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch"
@@ -260,7 +260,7 @@ class DocsContentComposer:
f"Examples bundle:\n{state.get('rules_bundle', '')}",
]
)
plan = self._llm.generate("docs_plan_sections", user_input)
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_incremental_changes")
return {
"doc_plan": plan,
"target_path": target_path,
@@ -279,7 +279,7 @@ class DocsContentComposer:
f"Examples bundle:\n{state.get('rules_bundle', '')}",
]
)
plan = self._llm.generate("docs_plan_sections", user_input)
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_new_document")
return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""}
def generate_doc_content(self, state: AgentGraphState) -> dict:
@@ -294,7 +294,7 @@ class DocsContentComposer:
f"Examples bundle:\n{state.get('rules_bundle', '')}",
]
)
raw = self._llm.generate("docs_generation", user_input)
raw = self._llm.generate("docs_generation", user_input, log_context="graph.docs.generate_doc_content")
bundle = self._bundle.parse_docs_bundle(raw)
if bundle:
first_content = str(bundle[0].get("content", "")).strip()
@@ -369,7 +369,7 @@ class DocsContentComposer:
f"Generated document:\n{generated}",
]
)
raw = self._llm.generate("docs_self_check", user_input)
raw = self._llm.generate("docs_self_check", user_input, log_context="graph.docs.self_check")
passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False)
feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.")
return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback}
@@ -379,7 +379,7 @@ class DocsContentComposer:
bundle = state.get("generated_docs_bundle", []) or []
strategy = state.get("docs_strategy", "from_scratch")
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
LOGGER.warning(
LOGGER.info(
"build_changeset fallback bundle used: strategy=%s bundle_items=%s",
strategy,
len(bundle),
@@ -452,7 +452,11 @@ class DocsContentComposer:
]
)
try:
summary = self._llm.generate("docs_execution_summary", user_input).strip()
summary = self._llm.generate(
"docs_execution_summary",
user_input,
log_context="graph.docs.summarize_result",
).strip()
except Exception:
summary = ""
if not summary:

View File

@@ -48,7 +48,9 @@ class ProjectEditsLogic:
},
ensure_ascii=False,
)
parsed = self._support.parse_json(self._llm.generate("project_edits_plan", user_input))
parsed = self._support.parse_json(
self._llm.generate("project_edits_plan", user_input, log_context="graph.project_edits.plan_changes")
)
contracts = self._contracts.parse(
parsed,
request=str(state.get("message", "")),
@@ -165,7 +167,13 @@ class ProjectEditsLogic:
"changeset": [{"op": x.op.value, "path": x.path, "reason": x.reason} for x in changeset[:20]],
"rule": "Changes must stay inside contract blocks and not affect unrelated sections.",
}
parsed = self._support.parse_json(self._llm.generate("project_edits_self_check", json.dumps(payload, ensure_ascii=False)))
parsed = self._support.parse_json(
self._llm.generate(
"project_edits_self_check",
json.dumps(payload, ensure_ascii=False),
log_context="graph.project_edits.self_check",
)
)
passed = bool(parsed.get("pass")) if isinstance(parsed, dict) else False
feedback = str(parsed.get("feedback", "")).strip() if isinstance(parsed, dict) else ""
return {
@@ -192,7 +200,11 @@ class ProjectEditsLogic:
"rag_context": self._support.shorten(state.get("rag_context", ""), 5000),
"confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000),
}
raw = self._llm.generate("project_edits_hunks", json.dumps(prompt_payload, ensure_ascii=False))
raw = self._llm.generate(
"project_edits_hunks",
json.dumps(prompt_payload, ensure_ascii=False),
log_context="graph.project_edits.generate_changeset",
)
parsed = self._support.parse_json(raw)
hunks = parsed.get("hunks", []) if isinstance(parsed, dict) else []
if not isinstance(hunks, list) or not hunks:

View File

@@ -33,7 +33,7 @@ class ProjectQaGraphFactory:
f"Confluence context:\n{state.get('confluence_context', '')}",
]
)
answer = self._llm.generate("project_answer", user_input)
answer = self._llm.generate("project_answer", user_input, log_context="graph.project_qa.answer")
emit_progress_sync(
state,
stage="graph.project_qa.answer.done",

View File

@@ -0,0 +1,172 @@
from __future__ import annotations
import logging
from langgraph.graph import END, START, StateGraph
from app.modules.agent.engine.graphs.progress import emit_progress_sync
from app.modules.agent.engine.graphs.state import AgentGraphState
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
from app.modules.agent.llm import AgentLlmService
from app.modules.contracts import RagRetriever
from app.modules.rag.explain import ExplainPack, PromptBudgeter
LOGGER = logging.getLogger(__name__)
class ProjectQaConversationGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("resolve_request", self._resolve_request)
graph.add_edge(START, "resolve_request")
graph.add_edge("resolve_request", END)
return graph.compile(checkpointer=checkpointer)
def _resolve_request(self, state: AgentGraphState) -> dict:
emit_progress_sync(state, stage="graph.project_qa.conversation_understanding", message="Нормализую пользовательский запрос.")
resolved = self._support.resolve_request(str(state.get("message", "") or ""))
LOGGER.warning("graph step result: graph=project_qa/conversation_understanding normalized=%s", resolved.get("normalized_message", ""))
return {"resolved_request": resolved}
class ProjectQaClassificationGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("classify_question", self._classify_question)
graph.add_edge(START, "classify_question")
graph.add_edge("classify_question", END)
return graph.compile(checkpointer=checkpointer)
def _classify_question(self, state: AgentGraphState) -> dict:
resolved = state.get("resolved_request", {}) or {}
message = str(resolved.get("normalized_message") or state.get("message", "") or "")
profile = self._support.build_profile(message)
LOGGER.warning("graph step result: graph=project_qa/question_classification domain=%s intent=%s", profile.get("domain"), profile.get("intent"))
return {"question_profile": profile}
class ProjectQaRetrievalGraphFactory:
def __init__(self, rag: RagRetriever, llm: AgentLlmService | None = None) -> None:
self._rag = rag
self._support = ProjectQaSupport()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("retrieve_context", self._retrieve_context)
graph.add_edge(START, "retrieve_context")
graph.add_edge("retrieve_context", END)
return graph.compile(checkpointer=checkpointer)
def _retrieve_context(self, state: AgentGraphState) -> dict:
emit_progress_sync(state, stage="graph.project_qa.context_retrieval", message="Собираю контекст по проекту.")
resolved = state.get("resolved_request", {}) or {}
profile = state.get("question_profile", {}) or {}
files_map = dict(state.get("files_map", {}) or {})
rag_items: list[dict] = []
source_bundle = self._support.build_source_bundle(profile, list(rag_items), files_map)
LOGGER.warning(
"graph step result: graph=project_qa/context_retrieval mode=%s rag_items=%s file_candidates=%s legacy_rag=%s",
profile.get("domain"),
len(source_bundle.get("rag_items", []) or []),
len(source_bundle.get("file_candidates", []) or []),
False,
)
return {"source_bundle": source_bundle}
class ProjectQaAnalysisGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
self._analyzer = ProjectQaAnalyzer()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("analyze_context", self._analyze_context)
graph.add_edge(START, "analyze_context")
graph.add_edge("analyze_context", END)
return graph.compile(checkpointer=checkpointer)
def _analyze_context(self, state: AgentGraphState) -> dict:
explain_pack = state.get("explain_pack")
if explain_pack:
analysis = self._analysis_from_pack(explain_pack)
LOGGER.warning(
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
len(analysis.get("findings", []) or []),
len(analysis.get("evidence", []) or []),
)
return {"analysis_brief": analysis}
bundle = state.get("source_bundle", {}) or {}
profile = bundle.get("profile", {}) or state.get("question_profile", {}) or {}
rag_items = list(bundle.get("rag_items", []) or [])
file_candidates = list(bundle.get("file_candidates", []) or [])
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates) if str(profile.get("domain")) == "code" else self._analyzer.analyze_docs(profile, rag_items)
LOGGER.warning(
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
len(analysis.get("findings", []) or []),
len(analysis.get("evidence", []) or []),
)
return {"analysis_brief": analysis}
def _analysis_from_pack(self, raw_pack) -> dict:
pack = ExplainPack.model_validate(raw_pack)
findings: list[str] = []
evidence: list[str] = []
for entrypoint in pack.selected_entrypoints[:3]:
findings.append(f"Entrypoint `{entrypoint.title}` maps to handler `{entrypoint.metadata.get('handler_symbol_id', '')}`.")
if entrypoint.source:
evidence.append(entrypoint.source)
for path in pack.trace_paths[:3]:
if path.symbol_ids:
findings.append(f"Trace path: {' -> '.join(path.symbol_ids)}")
for excerpt in pack.code_excerpts[:4]:
evidence.append(f"{excerpt.path}:{excerpt.start_line}-{excerpt.end_line} [{excerpt.evidence_id}]")
return {
"subject": pack.intent.normalized_query,
"findings": findings or ["No explain trace was built from the available code evidence."],
"evidence": evidence,
"gaps": list(pack.missing),
"answer_mode": "summary",
}
class ProjectQaAnswerGraphFactory:
def __init__(self, llm: AgentLlmService | None = None) -> None:
self._support = ProjectQaSupport()
self._llm = llm
self._budgeter = PromptBudgeter()
def build(self, checkpointer=None):
graph = StateGraph(AgentGraphState)
graph.add_node("compose_answer", self._compose_answer)
graph.add_edge(START, "compose_answer")
graph.add_edge("compose_answer", END)
return graph.compile(checkpointer=checkpointer)
def _compose_answer(self, state: AgentGraphState) -> dict:
profile = state.get("question_profile", {}) or {}
analysis = state.get("analysis_brief", {}) or {}
brief = self._support.build_answer_brief(profile, analysis)
explain_pack = state.get("explain_pack")
answer = self._compose_explain_answer(state, explain_pack)
if not answer:
answer = self._support.compose_answer(brief)
LOGGER.warning("graph step result: graph=project_qa/answer_composition answer_len=%s", len(answer or ""))
return {"answer_brief": brief, "final_answer": answer}
def _compose_explain_answer(self, state: AgentGraphState, raw_pack) -> str:
if raw_pack is None or self._llm is None:
return ""
pack = ExplainPack.model_validate(raw_pack)
prompt_input = self._budgeter.build_prompt_input(str(state.get("message", "") or ""), pack)
return self._llm.generate(
"code_explain_answer_v2",
prompt_input,
log_context="graph.project_qa.answer_v2",
).strip()

View File

@@ -25,6 +25,12 @@ class AgentGraphState(TypedDict, total=False):
validation_passed: bool
validation_feedback: str
validation_attempts: int
resolved_request: dict
question_profile: dict
source_bundle: dict
analysis_brief: dict
answer_brief: dict
final_answer: str
answer: str
changeset: list[ChangeItem]
edits_requested_path: str

View File

@@ -1,13 +1,17 @@
from app.modules.agent.engine.orchestrator.actions.code_explain_actions import CodeExplainActions
from app.modules.agent.engine.orchestrator.actions.docs_actions import DocsActions
from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions
from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions
from app.modules.agent.engine.orchestrator.actions.gherkin_actions import GherkinActions
from app.modules.agent.engine.orchestrator.actions.project_qa_actions import ProjectQaActions
from app.modules.agent.engine.orchestrator.actions.review_actions import ReviewActions
__all__ = [
"CodeExplainActions",
"DocsActions",
"EditActions",
"ExplainActions",
"GherkinActions",
"ProjectQaActions",
"ReviewActions",
]

View File

@@ -0,0 +1,46 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
from app.modules.rag.explain.models import ExplainPack
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
LOGGER = logging.getLogger(__name__)
class CodeExplainActions(ActionSupport):
def __init__(self, retriever: CodeExplainRetrieverV2 | None = None) -> None:
self._retriever = retriever
self._intent_builder = ExplainIntentBuilder()
def build_code_explain_pack(self, ctx: ExecutionContext) -> list[str]:
file_candidates = list((self.get(ctx, "source_bundle", {}) or {}).get("file_candidates", []) or [])
if self._retriever is None:
pack = ExplainPack(
intent=self._intent_builder.build(ctx.task.user_message),
missing=["code_explain_retriever_unavailable"],
)
else:
pack = self._retriever.build_pack(
ctx.task.rag_session_id,
ctx.task.user_message,
file_candidates=file_candidates,
)
LOGGER.warning(
"code explain action: task_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s missing=%s",
ctx.task.task_id,
len(pack.selected_entrypoints),
len(pack.seed_symbols),
len(pack.trace_paths),
len(pack.code_excerpts),
pack.missing,
)
return [self.put(ctx, "explain_pack", ArtifactType.STRUCTURED_JSON, pack.model_dump(mode="json"))]

View File

@@ -0,0 +1,117 @@
from __future__ import annotations
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType
class ProjectQaActions(ActionSupport):
def __init__(self) -> None:
self._support = ProjectQaSupport()
self._analyzer = ProjectQaAnalyzer()
def classify_project_question(self, ctx: ExecutionContext) -> list[str]:
message = str(ctx.task.user_message or "")
profile = self._support.build_profile(message)
return [self.put(ctx, "question_profile", ArtifactType.STRUCTURED_JSON, profile)]
def collect_project_sources(self, ctx: ExecutionContext) -> list[str]:
profile = self.get(ctx, "question_profile", {}) or {}
terms = list(profile.get("terms", []) or [])
entities = list(profile.get("entities", []) or [])
rag_items = list(ctx.task.metadata.get("rag_items", []) or [])
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
ranked_rag = []
for item in rag_items:
score = self._support.rag_score(item, terms, entities)
source = str(item.get("source", "") or "")
if not explicit_test and self._support.is_test_path(source):
score -= 3
if score > 0:
ranked_rag.append((score, item))
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
ranked_files = []
for path, payload in files_map.items():
score = self._support.file_score(path, payload, terms, entities)
if not explicit_test and self._support.is_test_path(path):
score -= 3
if score > 0:
ranked_files.append(
(
score,
{
"path": path,
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
},
)
)
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
bundle = {
"profile": profile,
"rag_items": [item for _, item in ranked_rag[:12]],
"file_candidates": [item for _, item in ranked_files[:10]],
"rag_total": len(ranked_rag),
"files_total": len(ranked_files),
}
return [self.put(ctx, "source_bundle", ArtifactType.STRUCTURED_JSON, bundle)]
def analyze_project_sources(self, ctx: ExecutionContext) -> list[str]:
bundle = self.get(ctx, "source_bundle", {}) or {}
profile = bundle.get("profile", {}) or {}
rag_items = list(bundle.get("rag_items", []) or [])
file_candidates = list(bundle.get("file_candidates", []) or [])
if str(profile.get("domain")) == "code":
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates)
else:
analysis = self._analyzer.analyze_docs(profile, rag_items)
return [self.put(ctx, "analysis_brief", ArtifactType.STRUCTURED_JSON, analysis)]
def build_project_answer_brief(self, ctx: ExecutionContext) -> list[str]:
profile = self.get(ctx, "question_profile", {}) or {}
analysis = self.get(ctx, "analysis_brief", {}) or {}
brief = {
"question_profile": profile,
"resolved_subject": analysis.get("subject"),
"key_findings": analysis.get("findings", []),
"supporting_evidence": analysis.get("evidence", []),
"missing_evidence": analysis.get("gaps", []),
"answer_mode": analysis.get("answer_mode", "summary"),
}
return [self.put(ctx, "answer_brief", ArtifactType.STRUCTURED_JSON, brief)]
def compose_project_answer(self, ctx: ExecutionContext) -> list[str]:
brief = self.get(ctx, "answer_brief", {}) or {}
profile = brief.get("question_profile", {}) or {}
russian = bool(profile.get("russian"))
answer_mode = str(brief.get("answer_mode") or "summary")
findings = list(brief.get("key_findings", []) or [])
evidence = list(brief.get("supporting_evidence", []) or [])
gaps = list(brief.get("missing_evidence", []) or [])
title = "## Кратко" if russian else "## Summary"
lines = [title]
if answer_mode == "inventory":
lines.append("### Что реализовано" if russian else "### Implemented items")
else:
lines.append("### Что видно по проекту" if russian else "### What the project shows")
if findings:
lines.extend(f"- {item}" for item in findings)
else:
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
if evidence:
lines.append("")
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
lines.extend(f"- `{item}`" for item in evidence[:5])
if gaps:
lines.append("")
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
lines.extend(f"- {item}" for item in gaps[:3])
return [self.put(ctx, "final_answer", ArtifactType.TEXT, "\n".join(lines))]

View File

@@ -0,0 +1,154 @@
from __future__ import annotations
class ProjectQaAnalyzer:
def analyze_code(self, profile: dict, rag_items: list[dict], file_candidates: list[dict]) -> dict:
terms = list(profile.get("terms", []) or [])
intent = str(profile.get("intent") or "lookup")
russian = bool(profile.get("russian"))
findings: list[str] = []
evidence: list[str] = []
gaps: list[str] = []
symbol_titles = [str(item.get("title", "") or "") for item in rag_items if str(item.get("layer", "")).startswith("C1")]
symbol_set = set(symbol_titles)
file_paths = [str(item.get("path", "") or item.get("source", "") or "") for item in rag_items]
file_paths.extend(str(item.get("path", "") or "") for item in file_candidates)
if "ConfigManager" in profile.get("entities", []) or "configmanager" in terms or "config_manager" in terms:
alias_file = self.find_path(file_paths, "src/config_manager/__init__.py")
if alias_file:
findings.append(
"Публичный `ConfigManager` экспортируется из `src/config_manager/__init__.py` как alias на `ConfigManagerV2`."
if russian
else "Public `ConfigManager` is exported from `src/config_manager/__init__.py` as an alias to `ConfigManagerV2`."
)
evidence.append("src/config_manager/__init__.py")
if "controlchannel" in {name.lower() for name in symbol_set}:
findings.append(
"Базовый контракт управления задает `ControlChannel`: он определяет команды `start` и `stop` для внешнего канала управления."
if russian
else "`ControlChannel` defines the base management contract with `start` and `stop` commands."
)
evidence.append("src/config_manager/v2/control/base.py")
if "ControlChannelBridge" in symbol_set:
findings.append(
"`ControlChannelBridge` связывает внешний канал управления с lifecycle-методами менеджера: `on_start`, `on_stop`, `on_status`."
if russian
else "`ControlChannelBridge` connects the external control channel to manager lifecycle methods: `on_start`, `on_stop`, `on_status`."
)
evidence.append("src/config_manager/v2/core/control_bridge.py")
implementation_files = self.find_management_implementations(file_candidates)
if implementation_files:
labels = ", ".join(f"`{path}`" for path in implementation_files)
channel_names = self.implementation_names(implementation_files)
findings.append(
f"В коде найдены конкретные реализации каналов управления: {', '.join(channel_names)} ({labels})."
if russian
else f"Concrete management channel implementations were found in code: {', '.join(channel_names)} ({labels})."
)
evidence.extend(implementation_files)
elif intent == "inventory":
gaps.append(
"В текущем контексте не удалось уверенно подтвердить конкретные файлы-реализации каналов, кроме базового контракта и bridge-слоя."
if russian
else "The current context does not yet confirm concrete channel implementation files beyond the base contract and bridge layer."
)
package_doc = self.find_management_doc(file_candidates)
if package_doc:
findings.append(
f"Пакет управления прямо описывает внешние каналы через `{package_doc}`."
if russian
else f"The control package directly describes external channels in `{package_doc}`."
)
evidence.append(package_doc)
subject = "management channels"
if profile.get("entities"):
subject = ", ".join(profile["entities"])
return {
"subject": subject,
"findings": self.dedupe(findings),
"evidence": self.dedupe(evidence),
"gaps": gaps,
"answer_mode": "inventory" if intent == "inventory" else "summary",
}
def analyze_docs(self, profile: dict, rag_items: list[dict]) -> dict:
findings: list[str] = []
evidence: list[str] = []
for item in rag_items[:5]:
title = str(item.get("title", "") or "")
source = str(item.get("source", "") or "")
content = str(item.get("content", "") or "").strip()
if content:
findings.append(content.splitlines()[0][:220])
if source:
evidence.append(source)
elif title:
evidence.append(title)
return {
"subject": "docs",
"findings": self.dedupe(findings),
"evidence": self.dedupe(evidence),
"gaps": [] if findings else ["Недостаточно данных в документации." if profile.get("russian") else "Not enough data in documentation."],
"answer_mode": "summary",
}
def find_management_implementations(self, file_candidates: list[dict]) -> list[str]:
found: list[str] = []
for item in file_candidates:
path = str(item.get("path", "") or "")
lowered = path.lower()
if self.is_test_path(path):
continue
if any(token in lowered for token in ("http_channel.py", "telegram.py", "telegram_channel.py", "http.py")):
found.append(path)
continue
content = str(item.get("content", "") or "").lower()
if "controlchannel" in content and "class " in content:
found.append(path)
continue
if ("channel" in lowered or "control" in lowered) and any(token in content for token in ("http", "telegram", "bot")):
found.append(path)
return self.dedupe(found)[:4]
def implementation_names(self, paths: list[str]) -> list[str]:
names: list[str] = []
for path in paths:
stem = path.rsplit("/", 1)[-1].rsplit(".", 1)[0]
label = stem.replace("_", " ").strip()
if label and label not in names:
names.append(label)
return names
def find_management_doc(self, file_candidates: list[dict]) -> str | None:
for item in file_candidates:
path = str(item.get("path", "") or "")
if self.is_test_path(path):
continue
content = str(item.get("content", "") or "").lower()
if any(token in content for token in ("каналы внешнего управления", "external control channels", "http api", "telegram")):
return path
return None
def find_path(self, paths: list[str], target: str) -> str | None:
for path in paths:
if path == target:
return path
return None
def dedupe(self, items: list[str]) -> list[str]:
seen: list[str] = []
for item in items:
if item and item not in seen:
seen.append(item)
return seen
def is_test_path(self, path: str) -> bool:
lowered = path.lower()
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered

View File

@@ -0,0 +1,166 @@
from __future__ import annotations
import re
from app.modules.rag.retrieval.query_terms import extract_query_terms
class ProjectQaSupport:
def resolve_request(self, message: str) -> dict:
profile = self.build_profile(message)
subject = profile["entities"][0] if profile.get("entities") else ""
return {
"original_message": message,
"normalized_message": " ".join((message or "").split()),
"subject_hint": subject,
"source_hint": profile["domain"],
"russian": profile["russian"],
}
def build_profile(self, message: str) -> dict:
lowered = message.lower()
return {
"domain": "code" if self.looks_like_code_question(lowered) else "docs",
"intent": self.detect_intent(lowered),
"terms": extract_query_terms(message),
"entities": self.extract_entities(message),
"russian": self.is_russian(message),
}
def build_retrieval_query(self, resolved_request: dict, profile: dict) -> str:
normalized = str(resolved_request.get("normalized_message") or resolved_request.get("original_message") or "").strip()
if profile.get("domain") == "code" and "по коду" not in normalized.lower():
return f"по коду {normalized}".strip()
return normalized
def build_source_bundle(self, profile: dict, rag_items: list[dict], files_map: dict[str, dict]) -> dict:
terms = list(profile.get("terms", []) or [])
entities = list(profile.get("entities", []) or [])
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
ranked_rag: list[tuple[int, dict]] = []
for item in rag_items:
score = self.rag_score(item, terms, entities)
source = str(item.get("source", "") or "")
if not explicit_test and self.is_test_path(source):
score -= 3
if score > 0:
ranked_rag.append((score, item))
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
ranked_files: list[tuple[int, dict]] = []
for path, payload in files_map.items():
score = self.file_score(path, payload, terms, entities)
if not explicit_test and self.is_test_path(path):
score -= 3
if score > 0:
ranked_files.append(
(
score,
{
"path": path,
"content": str(payload.get("content", "")),
"content_hash": str(payload.get("content_hash", "")),
},
)
)
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
return {
"profile": profile,
"rag_items": [item for _, item in ranked_rag[:12]],
"file_candidates": [item for _, item in ranked_files[:10]],
"rag_total": len(ranked_rag),
"files_total": len(ranked_files),
}
def build_answer_brief(self, profile: dict, analysis: dict) -> dict:
return {
"question_profile": profile,
"resolved_subject": analysis.get("subject"),
"key_findings": analysis.get("findings", []),
"supporting_evidence": analysis.get("evidence", []),
"missing_evidence": analysis.get("gaps", []),
"answer_mode": analysis.get("answer_mode", "summary"),
}
def compose_answer(self, brief: dict) -> str:
profile = brief.get("question_profile", {}) or {}
russian = bool(profile.get("russian"))
answer_mode = str(brief.get("answer_mode") or "summary")
findings = list(brief.get("key_findings", []) or [])
evidence = list(brief.get("supporting_evidence", []) or [])
gaps = list(brief.get("missing_evidence", []) or [])
title = "## Кратко" if russian else "## Summary"
lines = [title]
lines.append("### Что реализовано" if answer_mode == "inventory" and russian else "### Implemented items" if answer_mode == "inventory" else "### Что видно по проекту" if russian else "### What the project shows")
if findings:
lines.extend(f"- {item}" for item in findings)
else:
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
if evidence:
lines.append("")
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
lines.extend(f"- `{item}`" for item in evidence[:5])
if gaps:
lines.append("")
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
lines.extend(f"- {item}" for item in gaps[:3])
return "\n".join(lines)
def detect_intent(self, lowered: str) -> str:
if any(token in lowered for token in ("какие", "что уже реализ", "список", "перечень", "какие есть")):
return "inventory"
if any(token in lowered for token in ("где", "find", "where")):
return "lookup"
if any(token in lowered for token in ("сравни", "compare")):
return "compare"
return "explain"
def looks_like_code_question(self, lowered: str) -> bool:
code_markers = ("по коду", "код", "реализ", "имплементац", "класс", "метод", "модул", "файл", "канал", "handler", "endpoint")
return any(marker in lowered for marker in code_markers) or bool(re.search(r"\b[A-Z][A-Za-z0-9_]{2,}\b", lowered))
def extract_entities(self, message: str) -> list[str]:
return re.findall(r"\b[A-Z][A-Za-z0-9_]{2,}\b", message)[:5]
def rag_score(self, item: dict, terms: list[str], entities: list[str]) -> int:
haystacks = [
str(item.get("source", "") or "").lower(),
str(item.get("title", "") or "").lower(),
str(item.get("content", "") or "").lower(),
str((item.get("metadata", {}) or {}).get("qname", "") or "").lower(),
]
score = 0
for term in terms:
if any(term in hay for hay in haystacks):
score += 3
for entity in entities:
if any(entity.lower() in hay for hay in haystacks):
score += 5
return score
def file_score(self, path: str, payload: dict, terms: list[str], entities: list[str]) -> int:
content = str(payload.get("content", "") or "").lower()
path_lower = path.lower()
score = 0
for term in terms:
if term in path_lower:
score += 4
elif term in content:
score += 2
for entity in entities:
entity_lower = entity.lower()
if entity_lower in path_lower:
score += 5
elif entity_lower in content:
score += 3
return score
def is_test_path(self, path: str) -> bool:
lowered = path.lower()
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
def is_russian(self, text: str) -> bool:
return any("а" <= ch.lower() <= "я" or ch.lower() == "ё" for ch in text)

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import asyncio
import inspect
import logging
import time
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
@@ -9,6 +10,8 @@ from app.modules.agent.engine.orchestrator.models import PlanStatus, PlanStep, S
from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
LOGGER = logging.getLogger(__name__)
class ExecutionEngine:
def __init__(self, step_registry: StepRegistry, gates: QualityGateRunner) -> None:
@@ -22,17 +25,18 @@ class ExecutionEngine:
for step in ctx.plan.steps:
dep_issue = self._dependency_issue(step, step_results)
if dep_issue:
step_results.append(
StepResult(
result = StepResult(
step_id=step.step_id,
status=StepStatus.SKIPPED,
warnings=[dep_issue],
)
)
step_results.append(result)
self._log_step_result(ctx, step, result)
continue
result = await self._run_with_retry(step, ctx)
step_results.append(result)
self._log_step_result(ctx, step, result)
if result.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} and step.on_failure == "fail":
ctx.plan.status = PlanStatus.FAILED
return step_results
@@ -65,6 +69,15 @@ class ExecutionEngine:
while attempt < max_attempts:
attempt += 1
started_at = time.monotonic()
LOGGER.warning(
"orchestrator step start: task_id=%s step_id=%s action_id=%s executor=%s attempt=%s graph_id=%s",
ctx.task.task_id,
step.step_id,
step.action_id,
step.executor,
attempt,
step.graph_id or "",
)
await self._emit_progress(ctx, f"orchestrator.step.{step.step_id}", step.title)
try:
@@ -113,3 +126,21 @@ class ExecutionEngine:
result = ctx.progress_cb(stage, message, "task_progress", {"layer": "orchestrator"})
if inspect.isawaitable(result):
await result
def _log_step_result(self, ctx: ExecutionContext, step: PlanStep, result: StepResult) -> None:
artifact_keys = []
for artifact_id in result.produced_artifact_ids:
item = next((artifact for artifact in ctx.artifacts.all_items() if artifact.artifact_id == artifact_id), None)
if item is not None:
artifact_keys.append(item.key)
LOGGER.warning(
"orchestrator step result: task_id=%s step_id=%s action_id=%s status=%s duration_ms=%s artifact_keys=%s warnings=%s error=%s",
ctx.task.task_id,
step.step_id,
step.action_id,
result.status.value,
result.duration_ms,
artifact_keys,
result.warnings,
result.error_message or "",
)

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import inspect
import logging
from app.core.exceptions import AppError
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext, GraphInvoker, GraphResolver, ProgressCallback
@@ -14,6 +15,8 @@ from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry
from app.schemas.common import ModuleName
LOGGER = logging.getLogger(__name__)
class OrchestratorService:
def __init__(
@@ -74,6 +77,21 @@ class OrchestratorService:
)
result = self._assembler.assemble(ctx, step_results)
await self._emit_progress(progress_cb, "orchestrator.done", "Execution plan completed.")
LOGGER.warning(
"orchestrator decision: task_id=%s scenario=%s plan_status=%s steps=%s changeset_items=%s answer_len=%s",
task.task_id,
task.scenario.value,
result.meta.get("plan", {}).get("status", ""),
[
{
"step_id": step.step_id,
"status": step.status.value,
}
for step in result.steps
],
len(result.changeset),
len(result.answer or ""),
)
return result
async def _emit_progress(self, progress_cb: ProgressCallback | None, stage: str, message: str) -> None:

View File

@@ -2,29 +2,50 @@ from __future__ import annotations
import asyncio
from collections.abc import Callable
from typing import TYPE_CHECKING
from app.modules.agent.engine.graphs.progress_registry import progress_registry
from app.modules.agent.engine.orchestrator.actions import DocsActions, EditActions, ExplainActions, GherkinActions, ReviewActions
from app.modules.agent.engine.orchestrator.actions import (
CodeExplainActions,
DocsActions,
EditActions,
ExplainActions,
GherkinActions,
ProjectQaActions,
ReviewActions,
)
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
from app.modules.agent.engine.orchestrator.models import ArtifactType, PlanStep
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
StepFn = Callable[[ExecutionContext], list[str]]
class StepRegistry:
def __init__(self) -> None:
def __init__(self, code_explain_retriever: CodeExplainRetrieverV2 | None = None) -> None:
code_explain = CodeExplainActions(code_explain_retriever)
explain = ExplainActions()
review = ReviewActions()
docs = DocsActions()
edits = EditActions()
gherkin = GherkinActions()
project_qa = ProjectQaActions()
self._functions: dict[str, StepFn] = {
"collect_state": self._collect_state,
"finalize_graph_output": self._finalize_graph_output,
"execute_project_qa_graph": self._collect_state,
"build_code_explain_pack": code_explain.build_code_explain_pack,
"collect_sources": explain.collect_sources,
"extract_logic": explain.extract_logic,
"summarize": explain.summarize,
"classify_project_question": project_qa.classify_project_question,
"collect_project_sources": project_qa.collect_project_sources,
"analyze_project_sources": project_qa.analyze_project_sources,
"build_project_answer_brief": project_qa.build_project_answer_brief,
"compose_project_answer": project_qa.compose_project_answer,
"fetch_source_doc": review.fetch_source_doc,
"normalize_document": review.normalize_document,
"structural_check": review.structural_check,
@@ -66,6 +87,7 @@ class StepRegistry:
state = {
"task_id": ctx.task.task_id,
"project_id": ctx.task.rag_session_id,
"scenario": ctx.task.scenario.value,
"message": ctx.task.user_message,
"progress_key": ctx.task.task_id,
"rag_context": str(ctx.task.metadata.get("rag_context", "")),
@@ -86,7 +108,7 @@ class StepRegistry:
raise RuntimeError(f"Unsupported graph_id: {graph_key}")
graph = ctx.graph_resolver(domain_id, process_id)
state = ctx.artifacts.get_content("agent_state", {}) or {}
state = self._build_graph_state(ctx)
if ctx.progress_cb is not None:
progress_registry.register(ctx.task.task_id, ctx.progress_cb)
@@ -96,9 +118,30 @@ class StepRegistry:
if ctx.progress_cb is not None:
progress_registry.unregister(ctx.task.task_id)
return self._store_graph_outputs(step, ctx, result)
def _build_graph_state(self, ctx: ExecutionContext) -> dict:
state = dict(ctx.artifacts.get_content("agent_state", {}) or {})
for item in ctx.artifacts.all_items():
state[item.key] = ctx.artifacts.get_content(item.key)
return state
def _store_graph_outputs(self, step: PlanStep, ctx: ExecutionContext, result: dict) -> list[str]:
if not isinstance(result, dict):
raise RuntimeError("graph_result must be an object")
if len(step.outputs) == 1 and step.outputs[0].key == "graph_result":
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result)
return [item.artifact_id]
artifact_ids: list[str] = []
for output in step.outputs:
value = result.get(output.key)
if value is None and output.required:
raise RuntimeError(f"graph_output_missing:{step.step_id}:{output.key}")
item = ctx.artifacts.put(key=output.key, artifact_type=output.type, content=value)
artifact_ids.append(item.artifact_id)
return artifact_ids
def _finalize_graph_output(self, ctx: ExecutionContext) -> list[str]:
raw = ctx.artifacts.get_content("graph_result", {}) or {}
if not isinstance(raw, dict):

View File

@@ -16,6 +16,8 @@ class ScenarioTemplateRegistry:
return builders.get(task.scenario, self._general)(task)
def _general(self, task: TaskSpec) -> ExecutionPlan:
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
return self._project_qa(task)
steps = [
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
self._step(
@@ -39,7 +41,77 @@ class ScenarioTemplateRegistry:
]
return self._plan(task, "general_qa_v1", steps, [self._gate("non_empty_answer_or_changeset")])
def _project_qa(self, task: TaskSpec) -> ExecutionPlan:
steps = [
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
self._step(
"conversation_understanding",
"Conversation understanding",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/conversation_understanding",
depends_on=["collect_state"],
outputs=[self._out("resolved_request", ArtifactType.STRUCTURED_JSON)],
),
self._step(
"question_classification",
"Question classification",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/question_classification",
depends_on=["conversation_understanding"],
outputs=[self._out("question_profile", ArtifactType.STRUCTURED_JSON)],
),
self._step(
"context_retrieval",
"Context retrieval",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/context_retrieval",
depends_on=["question_classification"],
outputs=[self._out("source_bundle", ArtifactType.STRUCTURED_JSON)],
),
]
analysis_depends_on = ["context_retrieval"]
if task.scenario == Scenario.EXPLAIN_PART:
steps.append(
self._step(
"code_explain_pack_step",
"Build code explain pack",
"build_code_explain_pack",
depends_on=["context_retrieval"],
outputs=[self._out("explain_pack", ArtifactType.STRUCTURED_JSON)],
)
)
analysis_depends_on = ["code_explain_pack_step"]
steps.extend(
[
self._step(
"context_analysis",
"Context analysis",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/context_analysis",
depends_on=analysis_depends_on,
outputs=[self._out("analysis_brief", ArtifactType.STRUCTURED_JSON)],
),
self._step(
"answer_composition",
"Answer composition",
"execute_project_qa_graph",
executor="graph",
graph_id="project_qa/answer_composition",
depends_on=["context_analysis"],
outputs=[self._out("answer_brief", ArtifactType.STRUCTURED_JSON, required=False), self._out("final_answer", ArtifactType.TEXT)],
gates=[self._gate("non_empty_answer_or_changeset")],
),
]
)
return self._plan(task, "project_qa_reasoning_v1", steps, [self._gate("non_empty_answer_or_changeset")])
def _explain(self, task: TaskSpec) -> ExecutionPlan:
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
return self._project_qa(task)
steps = [
self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]),
self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]),

View File

@@ -2,21 +2,28 @@ from pathlib import Path
from typing import TYPE_CHECKING
from app.modules.agent.llm import AgentLlmService
from app.modules.contracts import RagRetriever
if TYPE_CHECKING:
from app.modules.agent.repository import AgentRepository
from app.modules.agent.engine.router.router_service import RouterService
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository") -> "RouterService":
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository", rag: RagRetriever) -> "RouterService":
from app.modules.agent.engine.graphs import (
BaseGraphFactory,
DocsGraphFactory,
ProjectEditsGraphFactory,
ProjectQaAnalysisGraphFactory,
ProjectQaAnswerGraphFactory,
ProjectQaClassificationGraphFactory,
ProjectQaConversationGraphFactory,
ProjectQaGraphFactory,
ProjectQaRetrievalGraphFactory,
)
from app.modules.agent.engine.router.context_store import RouterContextStore
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
from app.modules.agent.engine.router.registry import IntentRegistry
from app.modules.agent.engine.router.router_service import RouterService
@@ -26,13 +33,20 @@ def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepositor
registry.register("project", "qa", ProjectQaGraphFactory(llm).build)
registry.register("project", "edits", ProjectEditsGraphFactory(llm).build)
registry.register("docs", "generation", DocsGraphFactory(llm).build)
registry.register("project_qa", "conversation_understanding", ProjectQaConversationGraphFactory(llm).build)
registry.register("project_qa", "question_classification", ProjectQaClassificationGraphFactory(llm).build)
registry.register("project_qa", "context_retrieval", ProjectQaRetrievalGraphFactory(rag, llm).build)
registry.register("project_qa", "context_analysis", ProjectQaAnalysisGraphFactory(llm).build)
registry.register("project_qa", "answer_composition", ProjectQaAnswerGraphFactory(llm).build)
classifier = IntentClassifier(llm)
switch_detector = IntentSwitchDetector()
context_store = RouterContextStore(agent_repository)
return RouterService(
registry=registry,
classifier=classifier,
context_store=context_store,
switch_detector=switch_detector,
)

View File

@@ -17,6 +17,7 @@ class RouterContextStore:
process_id: str,
user_message: str,
assistant_message: str,
decision_type: str = "start",
max_history: int = 10,
) -> None:
self._repo.update_router_context(
@@ -25,5 +26,6 @@ class RouterContextStore:
process_id=process_id,
user_message=user_message,
assistant_message=assistant_message,
decision_type=decision_type,
max_history=max_history,
)

View File

@@ -17,11 +17,7 @@ class IntentClassifier:
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
def classify(self, user_message: str, context: RouterContext, mode: str = "auto") -> RouteDecision:
forced = self._from_mode(mode)
if forced:
return forced
def classify_new_intent(self, user_message: str, context: RouterContext) -> RouteDecision:
text = (user_message or "").strip().lower()
if text in self._short_confirmations and context.last_routing:
return RouteDecision(
@@ -30,6 +26,7 @@ class IntentClassifier:
confidence=1.0,
reason="short_confirmation",
use_previous=True,
decision_type="continue",
)
deterministic = self._deterministic_route(text)
@@ -45,9 +42,10 @@ class IntentClassifier:
process_id="general",
confidence=0.8,
reason="default",
decision_type="start",
)
def _from_mode(self, mode: str) -> RouteDecision | None:
def from_mode(self, mode: str) -> RouteDecision | None:
mapping = {
"project_qa": ("project", "qa"),
"project_edits": ("project", "edits"),
@@ -65,6 +63,8 @@ class IntentClassifier:
process_id=route[1],
confidence=1.0,
reason=f"mode_override:{mode}",
decision_type="switch",
explicit_switch=True,
)
def _classify_with_llm(self, user_message: str, context: RouterContext) -> RouteDecision | None:
@@ -96,6 +96,7 @@ class IntentClassifier:
process_id=route[1],
confidence=confidence,
reason=f"llm_router:{payload.get('reason', 'ok')}",
decision_type="start",
)
def _parse_llm_payload(self, raw: str) -> dict[str, str | float] | None:
@@ -139,6 +140,8 @@ class IntentClassifier:
process_id="edits",
confidence=0.97,
reason="deterministic_targeted_file_edit",
decision_type="switch",
explicit_switch=True,
)
if self._is_broad_docs_request(text):
return RouteDecision(
@@ -146,6 +149,8 @@ class IntentClassifier:
process_id="generation",
confidence=0.95,
reason="deterministic_docs_generation",
decision_type="switch",
explicit_switch=True,
)
return None

View File

@@ -0,0 +1,81 @@
from __future__ import annotations
import re
from app.modules.agent.engine.router.schemas import RouterContext
class IntentSwitchDetector:
_EXPLICIT_SWITCH_MARKERS = (
"теперь",
"а теперь",
"давай теперь",
"переключись",
"переключаемся",
"сейчас другое",
"новая задача",
"new task",
"switch to",
"now do",
"instead",
)
_FOLLOW_UP_MARKERS = (
"а еще",
"а ещё",
"подробнее",
"почему",
"зачем",
"что если",
"и еще",
"и ещё",
"покажи подробнее",
"можешь подробнее",
)
def should_switch(self, user_message: str, context: RouterContext) -> bool:
if not context.dialog_started or context.active_intent is None:
return False
text = " ".join((user_message or "").strip().lower().split())
if not text:
return False
if self._is_follow_up(text):
return False
if any(marker in text for marker in self._EXPLICIT_SWITCH_MARKERS):
return True
return self._is_strong_targeted_edit_request(text) or self._is_strong_docs_request(text)
def _is_follow_up(self, text: str) -> bool:
return any(marker in text for marker in self._FOLLOW_UP_MARKERS)
def _is_strong_targeted_edit_request(self, text: str) -> bool:
edit_markers = (
"добавь",
"добавить",
"измени",
"исправь",
"обнови",
"удали",
"замени",
"append",
"update",
"edit",
"remove",
"replace",
)
has_edit_marker = any(marker in text for marker in edit_markers)
has_file_marker = (
"readme" in text
or bool(re.search(r"\b[\w.\-/]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg|py)\b", text))
)
return has_edit_marker and has_file_marker
def _is_strong_docs_request(self, text: str) -> bool:
docs_markers = (
"подготовь документац",
"сгенерируй документац",
"создай документац",
"опиши документац",
"generate documentation",
"write documentation",
)
return any(marker in text for marker in docs_markers)

View File

@@ -1,7 +1,8 @@
from app.modules.agent.engine.router.context_store import RouterContextStore
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
from app.modules.agent.engine.router.registry import IntentRegistry
from app.modules.agent.engine.router.schemas import RouteResolution
from app.modules.agent.engine.router.schemas import RouteDecision, RouteResolution
class RouterService:
@@ -10,27 +11,48 @@ class RouterService:
registry: IntentRegistry,
classifier: IntentClassifier,
context_store: RouterContextStore,
switch_detector: IntentSwitchDetector | None = None,
min_confidence: float = 0.7,
) -> None:
self._registry = registry
self._classifier = classifier
self._ctx = context_store
self._switch_detector = switch_detector or IntentSwitchDetector()
self._min_confidence = min_confidence
def resolve(self, user_message: str, conversation_key: str, mode: str = "auto") -> RouteResolution:
context = self._ctx.get(conversation_key)
decision = self._classifier.classify(user_message, context, mode=mode)
if decision.confidence < self._min_confidence:
forced = self._classifier.from_mode(mode)
if forced:
return self._resolution(forced)
if not context.dialog_started or context.active_intent is None:
decision = self._classifier.classify_new_intent(user_message, context)
if not self._is_acceptable(decision):
return self._fallback("low_confidence")
if not self._registry.is_valid(decision.domain_id, decision.process_id):
return self._fallback("invalid_route")
return RouteResolution(
domain_id=decision.domain_id,
process_id=decision.process_id,
confidence=decision.confidence,
reason=decision.reason,
fallback_used=False,
return self._resolution(
decision.model_copy(
update={
"decision_type": "start",
"explicit_switch": False,
}
)
)
if self._switch_detector.should_switch(user_message, context):
decision = self._classifier.classify_new_intent(user_message, context)
if self._is_acceptable(decision):
return self._resolution(
decision.model_copy(
update={
"decision_type": "switch",
"explicit_switch": True,
}
)
)
return self._continue_current(context, "explicit_switch_unresolved_keep_current")
return self._continue_current(context, "continue_current_intent")
def persist_context(
self,
@@ -40,6 +62,7 @@ class RouterService:
process_id: str,
user_message: str,
assistant_message: str,
decision_type: str = "start",
) -> None:
self._ctx.update(
conversation_key,
@@ -47,6 +70,7 @@ class RouterService:
process_id=process_id,
user_message=user_message,
assistant_message=assistant_message,
decision_type=decision_type,
)
def graph_factory(self, domain_id: str, process_id: str):
@@ -59,4 +83,32 @@ class RouterService:
confidence=0.0,
reason=reason,
fallback_used=True,
decision_type="start",
explicit_switch=False,
)
def _continue_current(self, context, reason: str) -> RouteResolution:
active = context.active_intent or context.last_routing or {"domain_id": "default", "process_id": "general"}
return RouteResolution(
domain_id=str(active["domain_id"]),
process_id=str(active["process_id"]),
confidence=1.0,
reason=reason,
fallback_used=False,
decision_type="continue",
explicit_switch=False,
)
def _is_acceptable(self, decision: RouteDecision) -> bool:
return decision.confidence >= self._min_confidence and self._registry.is_valid(decision.domain_id, decision.process_id)
def _resolution(self, decision: RouteDecision) -> RouteResolution:
return RouteResolution(
domain_id=decision.domain_id,
process_id=decision.process_id,
confidence=decision.confidence,
reason=decision.reason,
fallback_used=False,
decision_type=decision.decision_type,
explicit_switch=decision.explicit_switch,
)

View File

@@ -7,6 +7,8 @@ class RouteDecision(BaseModel):
confidence: float = 0.0
reason: str = ""
use_previous: bool = False
decision_type: str = "start"
explicit_switch: bool = False
@field_validator("confidence")
@classmethod
@@ -20,8 +22,13 @@ class RouteResolution(BaseModel):
confidence: float
reason: str
fallback_used: bool = False
decision_type: str = "start"
explicit_switch: bool = False
class RouterContext(BaseModel):
last_routing: dict[str, str] | None = None
message_history: list[dict[str, str]] = Field(default_factory=list)
active_intent: dict[str, str] | None = None
dialog_started: bool = False
turn_index: int = 0

View File

@@ -1,14 +1,40 @@
import logging
from app.modules.agent.prompt_loader import PromptLoader
from app.modules.shared.gigachat.client import GigaChatClient
LOGGER = logging.getLogger(__name__)
def _truncate_for_log(text: str, max_chars: int = 1500) -> str:
value = (text or "").replace("\n", "\\n").strip()
if len(value) <= max_chars:
return value
return value[:max_chars].rstrip() + "...[truncated]"
class AgentLlmService:
def __init__(self, client: GigaChatClient, prompts: PromptLoader) -> None:
self._client = client
self._prompts = prompts
def generate(self, prompt_name: str, user_input: str) -> str:
def generate(self, prompt_name: str, user_input: str, *, log_context: str | None = None) -> str:
system_prompt = self._prompts.load(prompt_name)
if not system_prompt:
system_prompt = "You are a helpful assistant."
return self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
if log_context:
LOGGER.warning(
"graph llm input: context=%s prompt=%s user_input=%s",
log_context,
prompt_name,
_truncate_for_log(user_input),
)
output = self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
if log_context:
LOGGER.warning(
"graph llm output: context=%s prompt=%s output=%s",
log_context,
prompt_name,
_truncate_for_log(output),
)
return output

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
from fastapi import APIRouter
from pydantic import BaseModel, HttpUrl
from typing import TYPE_CHECKING
from app.modules.agent.changeset_validator import ChangeSetValidator
from app.modules.agent.confluence_service import ConfluenceService
@@ -19,12 +22,17 @@ class ConfluenceFetchRequest(BaseModel):
url: HttpUrl
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
class AgentModule:
def __init__(
self,
rag_retriever: RagRetriever,
agent_repository: AgentRepository,
story_context_repository: StoryContextRepository,
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
) -> None:
self.confluence = ConfluenceService()
self.changeset_validator = ChangeSetValidator()
@@ -34,14 +42,16 @@ class AgentModule:
client = GigaChatClient(settings, token_provider)
prompt_loader = PromptLoader()
llm = AgentLlmService(client=client, prompts=prompt_loader)
self.llm = llm
story_recorder = StorySessionRecorder(story_context_repository)
self.runtime = GraphAgentRuntime(
rag=rag_retriever,
confluence=self.confluence,
changeset_validator=self.changeset_validator,
llm=llm,
llm=self.llm,
agent_repository=agent_repository,
story_recorder=story_recorder,
code_explain_retriever=code_explain_retriever,
)
def internal_router(self) -> APIRouter:

View File

@@ -0,0 +1,17 @@
Объяснение кода осуществляется только с использованием предоставленного ExplainPack.
Правила:
- Сначала используйте доказательства.
- Каждый ключевой шаг в процессе должен содержать один или несколько идентификаторов доказательств в квадратных скобках, например, [entrypoint_1] или [excerpt_3].
- Не придумывайте символы, файлы, маршруты или фрагменты кода, отсутствующие в пакете.
- Если доказательства неполные, укажите это явно.
- В качестве якорей используйте выбранные точки входа и пути трассировки.
Верните Markdown со следующей структурой:
1. Краткое описание
2. Пошаговый процесс
3. Данные и побочные эффекты
4. Ошибки и граничные случаи
5. Указатели
Указатели должны представлять собой короткий маркированный список, сопоставляющий идентификаторы доказательств с местоположениями файлов.

View File

@@ -0,0 +1,24 @@
Ты intent-router для layered RAG.
На вход ты получаешь JSON с полями:
- message: текущий запрос пользователя
- active_intent: текущий активный intent диалога или null
- last_query: предыдущий запрос пользователя
- allowed_intents: допустимые intent'ы
Выбери ровно один intent из allowed_intents.
Верни только JSON без markdown и пояснений.
Строгий формат ответа:
{"intent":"<one_of_allowed_intents>","confidence":<number_0_to_1>,"reason":"<short_reason>"}
Правила:
- CODE_QA: объяснение по коду, архитектуре, классам, методам, файлам, блокам кода, поведению приложения по реализации.
- DOCS_QA: объяснение по документации, README, markdown, specs, runbooks, разделам документации.
- GENERATE_DOCS_FROM_CODE: просьба сгенерировать, подготовить или обновить документацию по коду.
- PROJECT_MISC: прочие вопросы по проекту, не относящиеся явно к коду или документации.
Приоритет:
- Если пользователь просит именно подготовить документацию по коду, выбирай GENERATE_DOCS_FROM_CODE.
- Если пользователь спрашивает про конкретный класс, файл, метод или блок кода, выбирай CODE_QA.
- Если пользователь спрашивает про README, docs, markdown или конкретную документацию, выбирай DOCS_QA.
- Если сигнал неочевиден, выбирай PROJECT_MISC и confidence <= 0.6.

View File

@@ -18,6 +18,10 @@ class AgentRepository:
conversation_key VARCHAR(64) PRIMARY KEY,
last_domain_id VARCHAR(64) NULL,
last_process_id VARCHAR(64) NULL,
active_domain_id VARCHAR(64) NULL,
active_process_id VARCHAR(64) NULL,
dialog_started BOOLEAN NOT NULL DEFAULT FALSE,
turn_index INTEGER NOT NULL DEFAULT 0,
message_history_json TEXT NOT NULL DEFAULT '[]',
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
)
@@ -64,14 +68,24 @@ class AgentRepository:
"""
)
)
self._ensure_router_context_columns(conn)
conn.commit()
def _ensure_router_context_columns(self, conn) -> None:
for statement in (
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_domain_id VARCHAR(64) NULL",
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_process_id VARCHAR(64) NULL",
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS dialog_started BOOLEAN NOT NULL DEFAULT FALSE",
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS turn_index INTEGER NOT NULL DEFAULT 0",
):
conn.execute(text(statement))
def get_router_context(self, conversation_key: str) -> RouterContext:
with get_engine().connect() as conn:
row = conn.execute(
text(
"""
SELECT last_domain_id, last_process_id, message_history_json
SELECT last_domain_id, last_process_id, active_domain_id, active_process_id, dialog_started, turn_index, message_history_json
FROM router_context
WHERE conversation_key = :key
"""
@@ -82,7 +96,7 @@ class AgentRepository:
if not row:
return RouterContext()
history_raw = row[2] or "[]"
history_raw = row[6] or "[]"
try:
history = json.loads(history_raw)
except json.JSONDecodeError:
@@ -91,6 +105,9 @@ class AgentRepository:
last = None
if row[0] and row[1]:
last = {"domain_id": str(row[0]), "process_id": str(row[1])}
active = None
if row[2] and row[3]:
active = {"domain_id": str(row[2]), "process_id": str(row[3])}
clean_history = []
for item in history if isinstance(history, list) else []:
@@ -101,7 +118,13 @@ class AgentRepository:
if role in {"user", "assistant"} and content:
clean_history.append({"role": role, "content": content})
return RouterContext(last_routing=last, message_history=clean_history)
return RouterContext(
last_routing=last,
message_history=clean_history,
active_intent=active or last,
dialog_started=bool(row[4]),
turn_index=int(row[5] or 0),
)
def update_router_context(
self,
@@ -111,6 +134,7 @@ class AgentRepository:
process_id: str,
user_message: str,
assistant_message: str,
decision_type: str,
max_history: int,
) -> None:
current = self.get_router_context(conversation_key)
@@ -121,17 +145,29 @@ class AgentRepository:
history.append({"role": "assistant", "content": assistant_message})
if max_history > 0:
history = history[-max_history:]
current_active = current.active_intent or current.last_routing or {"domain_id": domain_id, "process_id": process_id}
next_active = (
{"domain_id": domain_id, "process_id": process_id}
if decision_type in {"start", "switch"}
else current_active
)
next_turn_index = max(0, int(current.turn_index or 0)) + (1 if user_message else 0)
with get_engine().connect() as conn:
conn.execute(
text(
"""
INSERT INTO router_context (
conversation_key, last_domain_id, last_process_id, message_history_json
) VALUES (:key, :domain, :process, :history)
conversation_key, last_domain_id, last_process_id, active_domain_id, active_process_id,
dialog_started, turn_index, message_history_json
) VALUES (:key, :domain, :process, :active_domain, :active_process, :dialog_started, :turn_index, :history)
ON CONFLICT (conversation_key) DO UPDATE SET
last_domain_id = EXCLUDED.last_domain_id,
last_process_id = EXCLUDED.last_process_id,
active_domain_id = EXCLUDED.active_domain_id,
active_process_id = EXCLUDED.active_process_id,
dialog_started = EXCLUDED.dialog_started,
turn_index = EXCLUDED.turn_index,
message_history_json = EXCLUDED.message_history_json,
updated_at = CURRENT_TIMESTAMP
"""
@@ -140,6 +176,10 @@ class AgentRepository:
"key": conversation_key,
"domain": domain_id,
"process": process_id,
"active_domain": str(next_active["domain_id"]),
"active_process": str(next_active["process_id"]),
"dialog_started": True,
"turn_index": next_turn_index,
"history": json.dumps(history, ensure_ascii=False),
},
)

View File

@@ -1,12 +1,16 @@
from __future__ import annotations
from dataclasses import dataclass, field
from collections.abc import Awaitable, Callable
import inspect
import logging
import re
from typing import TYPE_CHECKING
from app.modules.agent.engine.orchestrator import OrchestratorService, TaskSpecBuilder
from app.modules.agent.engine.orchestrator.metrics_persister import MetricsPersister
from app.modules.agent.engine.orchestrator.models import RoutingMeta
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
from app.modules.agent.engine.router import build_router_service
from app.modules.agent.llm import AgentLlmService
from app.modules.agent.story_session_recorder import StorySessionRecorder
@@ -22,6 +26,9 @@ from app.schemas.common import ModuleName
LOGGER = logging.getLogger(__name__)
if TYPE_CHECKING:
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
def _truncate_for_log(text: str | None, max_chars: int = 1500) -> str:
value = (text or "").replace("\n", "\\n").strip()
@@ -47,13 +54,14 @@ class GraphAgentRuntime:
llm: AgentLlmService,
agent_repository: AgentRepository,
story_recorder: StorySessionRecorder | None = None,
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
) -> None:
self._rag = rag
self._confluence = confluence
self._changeset_validator = changeset_validator
self._router = build_router_service(llm, agent_repository)
self._router = build_router_service(llm, agent_repository, rag)
self._task_spec_builder = TaskSpecBuilder()
self._orchestrator = OrchestratorService()
self._orchestrator = OrchestratorService(step_registry=StepRegistry(code_explain_retriever))
self._metrics_persister = MetricsPersister(agent_repository)
self._story_recorder = story_recorder
self._checkpointer = None
@@ -70,7 +78,7 @@ class GraphAgentRuntime:
files: list[dict],
progress_cb: Callable[[str, str, str, dict | None], Awaitable[None] | None] | None = None,
) -> AgentResult:
LOGGER.warning(
LOGGER.info(
"GraphAgentRuntime.run started: task_id=%s dialog_session_id=%s mode=%s",
task_id,
dialog_session_id,
@@ -96,9 +104,7 @@ class GraphAgentRuntime:
meta={"domain_id": route.domain_id, "process_id": route.process_id},
)
files_map = self._build_files_map(files)
await self._emit_progress(progress_cb, "agent.rag", "Собираю релевантный контекст из RAG.")
rag_ctx = await self._rag.retrieve(rag_session_id, message)
rag_ctx: list[dict] = []
await self._emit_progress(progress_cb, "agent.attachments", "Обрабатываю дополнительные вложения.")
conf_pages = await self._fetch_confluence_pages(attachments)
route_meta = RoutingMeta(
@@ -157,8 +163,9 @@ class GraphAgentRuntime:
process_id=route.process_id,
user_message=message,
assistant_message=final_answer,
decision_type=route.decision_type,
)
LOGGER.warning(
LOGGER.info(
"final agent answer: task_id=%s route=%s/%s answer=%s",
task_id,
route.domain_id,
@@ -178,7 +185,7 @@ class GraphAgentRuntime:
answer=final_answer,
meta={
"route": route.model_dump(),
"used_rag": True,
"used_rag": False,
"used_confluence": bool(conf_pages),
"changeset_filtered_out": True,
"orchestrator": orchestrator_meta,
@@ -193,6 +200,7 @@ class GraphAgentRuntime:
process_id=route.process_id,
user_message=message,
assistant_message=final_answer or f"changeset:{len(validated)}",
decision_type=route.decision_type,
)
final = AgentResult(
result_type=TaskResultType.CHANGESET,
@@ -200,7 +208,7 @@ class GraphAgentRuntime:
changeset=validated,
meta={
"route": route.model_dump(),
"used_rag": True,
"used_rag": False,
"used_confluence": bool(conf_pages),
"orchestrator": orchestrator_meta,
"orchestrator_steps": orchestrator_steps,
@@ -214,7 +222,7 @@ class GraphAgentRuntime:
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
quality=quality_meta,
)
LOGGER.warning(
LOGGER.info(
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s changeset_items=%s",
task_id,
route.domain_id,
@@ -222,7 +230,7 @@ class GraphAgentRuntime:
final.result_type.value,
len(final.changeset),
)
LOGGER.warning(
LOGGER.info(
"final agent answer: task_id=%s route=%s/%s answer=%s",
task_id,
route.domain_id,
@@ -239,13 +247,14 @@ class GraphAgentRuntime:
process_id=route.process_id,
user_message=message,
assistant_message=final_answer,
decision_type=route.decision_type,
)
final = AgentResult(
result_type=TaskResultType.ANSWER,
answer=final_answer,
meta={
"route": route.model_dump(),
"used_rag": True,
"used_rag": False,
"used_confluence": bool(conf_pages),
"orchestrator": orchestrator_meta,
"orchestrator_steps": orchestrator_steps,
@@ -259,7 +268,7 @@ class GraphAgentRuntime:
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
quality=quality_meta,
)
LOGGER.warning(
LOGGER.info(
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s answer_len=%s",
task_id,
route.domain_id,
@@ -267,7 +276,7 @@ class GraphAgentRuntime:
final.result_type.value,
len(final.answer or ""),
)
LOGGER.warning(
LOGGER.info(
"final agent answer: task_id=%s route=%s/%s answer=%s",
task_id,
route.domain_id,
@@ -351,7 +360,7 @@ class GraphAgentRuntime:
factory = self._router.graph_factory("default", "general")
if factory is None:
raise RuntimeError("No graph factory configured")
LOGGER.warning("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id)
LOGGER.debug("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id)
return factory(self._checkpointer)
def _invoke_graph(self, graph, state: dict, dialog_session_id: str):
@@ -365,7 +374,7 @@ class GraphAgentRuntime:
for item in attachments:
if item.get("type") == "confluence_url":
pages.append(await self._confluence.fetch_page(item["url"]))
LOGGER.warning("_fetch_confluence_pages completed: pages=%s", len(pages))
LOGGER.info("_fetch_confluence_pages completed: pages=%s", len(pages))
return pages
def _format_rag(self, items: list[dict]) -> str:
@@ -411,7 +420,7 @@ class GraphAgentRuntime:
"content": str(item.get("content", "")),
"content_hash": str(item.get("content_hash", "")),
}
LOGGER.warning("_build_files_map completed: files=%s", len(output))
LOGGER.debug("_build_files_map completed: files=%s", len(output))
return output
def _lookup_file(self, files_map: dict[str, dict], path: str) -> dict | None:
@@ -437,7 +446,7 @@ class GraphAgentRuntime:
)
item.base_hash = str(source["content_hash"])
enriched.append(item)
LOGGER.warning("_enrich_changeset_hashes completed: items=%s", len(enriched))
LOGGER.debug("_enrich_changeset_hashes completed: items=%s", len(enriched))
return enriched
def _sanitize_changeset(self, items: list[ChangeItem], files_map: dict[str, dict]) -> list[ChangeItem]:
@@ -462,7 +471,7 @@ class GraphAgentRuntime:
continue
sanitized.append(item)
if dropped_noop or dropped_ws:
LOGGER.warning(
LOGGER.info(
"_sanitize_changeset dropped items: noop=%s whitespace_only=%s kept=%s",
dropped_noop,
dropped_ws,

View File

@@ -1,9 +1,14 @@
from app.modules.agent.module import AgentModule
from app.modules.agent.repository import AgentRepository
from app.modules.agent.story_context_repository import StoryContextRepository, StoryContextSchemaRepository
from app.modules.chat.direct_service import CodeExplainChatService
from app.modules.chat.dialog_store import DialogSessionStore
from app.modules.chat.repository import ChatRepository
from app.modules.chat.module import ChatModule
from app.modules.chat.session_resolver import ChatSessionResolver
from app.modules.chat.task_store import TaskStore
from app.modules.rag.persistence.repository import RagRepository
from app.modules.rag.explain import CodeExplainRetrieverV2, CodeGraphRepository, LayeredRetrievalGateway
from app.modules.rag_session.module import RagModule
from app.modules.rag_repo.module import RagRepoModule
from app.modules.shared.bootstrap import bootstrap_database
@@ -20,16 +25,32 @@ class ModularApplication:
self.agent_repository = AgentRepository()
self.story_context_schema_repository = StoryContextSchemaRepository()
self.story_context_repository = StoryContextRepository()
self.chat_tasks = TaskStore()
self.rag_session = RagModule(event_bus=self.events, retry=self.retry, repository=self.rag_repository)
self.rag_repo = RagRepoModule(
story_context_repository=self.story_context_repository,
rag_repository=self.rag_repository,
)
self.code_explain_retriever = CodeExplainRetrieverV2(
gateway=LayeredRetrievalGateway(self.rag_repository, self.rag_session.embedder),
graph_repository=CodeGraphRepository(),
)
self.agent = AgentModule(
rag_retriever=self.rag_session.rag,
agent_repository=self.agent_repository,
story_context_repository=self.story_context_repository,
code_explain_retriever=self.code_explain_retriever,
)
self.direct_chat = CodeExplainChatService(
retriever=self.code_explain_retriever,
llm=self.agent.llm,
session_resolver=ChatSessionResolver(
dialogs=DialogSessionStore(self.chat_repository),
rag_session_exists=lambda rag_session_id: self.rag_session.sessions.get(rag_session_id) is not None,
),
task_store=self.chat_tasks,
message_sink=self.chat_repository.add_message,
)
self.chat = ChatModule(
agent_runner=self.agent.runtime,
@@ -37,6 +58,8 @@ class ModularApplication:
retry=self.retry,
rag_sessions=self.rag_session.sessions,
repository=self.chat_repository,
direct_chat=self.direct_chat,
task_store=self.chat_tasks,
)
def startup(self) -> None:

View File

@@ -1,6 +1,10 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
from uuid import uuid4
if TYPE_CHECKING:
from app.modules.chat.repository import ChatRepository

View File

@@ -0,0 +1,71 @@
from __future__ import annotations
import logging
from uuid import uuid4
from app.modules.agent.llm import AgentLlmService
from app.modules.chat.evidence_gate import CodeExplainEvidenceGate
from app.modules.chat.session_resolver import ChatSessionResolver
from app.modules.chat.task_store import TaskState, TaskStore
from app.modules.rag.explain import CodeExplainRetrieverV2, PromptBudgeter
from app.schemas.chat import ChatMessageRequest, TaskQueuedResponse, TaskResultType, TaskStatus
LOGGER = logging.getLogger(__name__)
class CodeExplainChatService:
def __init__(
self,
retriever: CodeExplainRetrieverV2,
llm: AgentLlmService,
session_resolver: ChatSessionResolver,
task_store: TaskStore,
message_sink,
budgeter: PromptBudgeter | None = None,
evidence_gate: CodeExplainEvidenceGate | None = None,
) -> None:
self._retriever = retriever
self._llm = llm
self._session_resolver = session_resolver
self._task_store = task_store
self._message_sink = message_sink
self._budgeter = budgeter or PromptBudgeter()
self._evidence_gate = evidence_gate or CodeExplainEvidenceGate()
async def handle_message(self, request: ChatMessageRequest) -> TaskQueuedResponse:
dialog_session_id, rag_session_id = self._session_resolver.resolve(request)
task_id = str(uuid4())
task = TaskState(task_id=task_id, status=TaskStatus.RUNNING)
self._task_store.save(task)
self._message_sink(dialog_session_id, "user", request.message, task_id=task_id)
pack = self._retriever.build_pack(
rag_session_id,
request.message,
file_candidates=[item.model_dump(mode="json") for item in request.files],
)
decision = self._evidence_gate.evaluate(pack)
if decision.passed:
prompt_input = self._budgeter.build_prompt_input(request.message, pack)
answer = self._llm.generate(
"code_explain_answer_v2",
prompt_input,
log_context="chat.code_explain.direct",
).strip()
else:
answer = decision.answer
self._message_sink(dialog_session_id, "assistant", answer, task_id=task_id)
task.status = TaskStatus.DONE
task.result_type = TaskResultType.ANSWER
task.answer = answer
self._task_store.save(task)
LOGGER.warning(
"direct code explain response: task_id=%s rag_session_id=%s excerpts=%s missing=%s",
task_id,
rag_session_id,
len(pack.code_excerpts),
pack.missing,
)
return TaskQueuedResponse(
task_id=task_id,
status=TaskStatus.DONE.value,
)

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
from dataclasses import dataclass, field
from app.modules.rag.explain.models import ExplainPack
@dataclass(slots=True)
class EvidenceGateDecision:
passed: bool
answer: str = ""
diagnostics: dict[str, list[str]] = field(default_factory=dict)
class CodeExplainEvidenceGate:
def __init__(self, min_excerpts: int = 2) -> None:
self._min_excerpts = min_excerpts
def evaluate(self, pack: ExplainPack) -> EvidenceGateDecision:
diagnostics = self._diagnostics(pack)
if len(pack.code_excerpts) >= self._min_excerpts:
return EvidenceGateDecision(passed=True, diagnostics=diagnostics)
return EvidenceGateDecision(
passed=False,
answer=self._build_answer(pack, diagnostics),
diagnostics=diagnostics,
)
def _diagnostics(self, pack: ExplainPack) -> dict[str, list[str]]:
return {
"entrypoints": [item.title for item in pack.selected_entrypoints[:3] if item.title],
"symbols": [item.title for item in pack.seed_symbols[:5] if item.title],
"paths": self._paths(pack),
"missing": list(pack.missing),
}
def _paths(self, pack: ExplainPack) -> list[str]:
values: list[str] = []
for item in pack.selected_entrypoints + pack.seed_symbols:
path = item.source or (item.location.path if item.location else "")
if path and path not in values:
values.append(path)
for excerpt in pack.code_excerpts:
if excerpt.path and excerpt.path not in values:
values.append(excerpt.path)
return values[:6]
def _build_answer(self, pack: ExplainPack, diagnostics: dict[str, list[str]]) -> str:
lines = [
"Недостаточно опоры в коде, чтобы дать объяснение без догадок.",
"",
f"Найдено фрагментов кода: {len(pack.code_excerpts)} из {self._min_excerpts} минимально необходимых.",
]
if diagnostics["paths"]:
lines.append(f"Пути: {', '.join(diagnostics['paths'])}")
if diagnostics["entrypoints"]:
lines.append(f"Entrypoints: {', '.join(diagnostics['entrypoints'])}")
if diagnostics["symbols"]:
lines.append(f"Символы: {', '.join(diagnostics['symbols'])}")
if diagnostics["missing"]:
lines.append(f"Диагностика: {', '.join(diagnostics['missing'])}")
return "\n".join(lines).strip()

View File

@@ -1,13 +1,16 @@
from __future__ import annotations
import os
from typing import TYPE_CHECKING
from fastapi import APIRouter, Header
from fastapi.responses import StreamingResponse
from app.core.exceptions import AppError
from app.modules.chat.direct_service import CodeExplainChatService
from app.modules.chat.dialog_store import DialogSessionStore
from app.modules.chat.repository import ChatRepository
from app.modules.chat.service import ChatOrchestrator
from app.modules.chat.task_store import TaskStore
from app.modules.contracts import AgentRunner
from app.modules.rag_session.session_store import RagSessionStore
from app.modules.shared.event_bus import EventBus
from app.modules.shared.idempotency_store import IdempotencyStore
from app.modules.shared.retry_executor import RetryExecutor
@@ -20,6 +23,11 @@ from app.schemas.chat import (
)
from app.schemas.common import ModuleName
if TYPE_CHECKING:
from app.modules.chat.repository import ChatRepository
from app.modules.contracts import AgentRunner
from app.modules.rag_session.session_store import RagSessionStore
class ChatModule:
def __init__(
@@ -29,12 +37,16 @@ class ChatModule:
retry: RetryExecutor,
rag_sessions: RagSessionStore,
repository: ChatRepository,
direct_chat: CodeExplainChatService | None = None,
task_store: TaskStore | None = None,
) -> None:
self._rag_sessions = rag_sessions
self.tasks = TaskStore()
self._simple_code_explain_only = os.getenv("SIMPLE_CODE_EXPLAIN_ONLY", "true").lower() in {"1", "true", "yes"}
self.tasks = task_store or TaskStore()
self.dialogs = DialogSessionStore(repository)
self.idempotency = IdempotencyStore()
self.events = event_bus
self.direct_chat = direct_chat
self.chat = ChatOrchestrator(
task_store=self.tasks,
dialogs=self.dialogs,
@@ -59,11 +71,13 @@ class ChatModule:
rag_session_id=dialog.rag_session_id,
)
@router.post("/api/chat/messages", response_model=TaskQueuedResponse)
@router.post("/api/chat/messages", response_model=TaskQueuedResponse | TaskResultResponse)
async def send_message(
request: ChatMessageRequest,
idempotency_key: str | None = Header(default=None, alias="Idempotency-Key"),
) -> TaskQueuedResponse:
) -> TaskQueuedResponse | TaskResultResponse:
if self._simple_code_explain_only and self.direct_chat is not None:
return await self.direct_chat.handle_message(request)
task = await self.chat.enqueue_message(request, idempotency_key)
return TaskQueuedResponse(task_id=task.task_id, status=task.status.value)

View File

@@ -6,6 +6,7 @@ from app.modules.contracts import AgentRunner
from app.schemas.chat import ChatMessageRequest, TaskResultType, TaskStatus
from app.schemas.common import ErrorPayload, ModuleName
from app.modules.chat.dialog_store import DialogSessionStore
from app.modules.chat.session_resolver import ChatSessionResolver
from app.modules.chat.task_store import TaskState, TaskStore
from app.modules.shared.event_bus import EventBus
from app.modules.shared.idempotency_store import IdempotencyStore
@@ -41,6 +42,7 @@ class ChatOrchestrator:
self._retry = retry
self._rag_session_exists = rag_session_exists
self._message_sink = message_sink
self._session_resolver = ChatSessionResolver(dialogs, rag_session_exists)
async def enqueue_message(
self,
@@ -52,7 +54,7 @@ class ChatOrchestrator:
if existing:
task = self._task_store.get(existing)
if task:
LOGGER.warning(
LOGGER.info(
"enqueue_message reused task by idempotency key: task_id=%s mode=%s",
task.task_id,
request.mode.value,
@@ -63,7 +65,7 @@ class ChatOrchestrator:
if idempotency_key:
self._idempotency.put(idempotency_key, task.task_id)
asyncio.create_task(self._process_task(task.task_id, request))
LOGGER.warning(
LOGGER.info(
"enqueue_message created task: task_id=%s mode=%s",
task.task_id,
request.mode.value,
@@ -135,6 +137,13 @@ class ChatOrchestrator:
task.changeset = result.changeset
if task.result_type == TaskResultType.ANSWER and task.answer:
self._message_sink(dialog_session_id, "assistant", task.answer, task_id=task_id)
LOGGER.warning(
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s answer=%s",
task_id,
dialog_session_id,
task.result_type.value,
_truncate_for_log(task.answer),
)
elif task.result_type == TaskResultType.CHANGESET:
self._message_sink(
dialog_session_id,
@@ -146,6 +155,14 @@ class ChatOrchestrator:
"changeset": [item.model_dump(mode="json") for item in task.changeset],
},
)
LOGGER.warning(
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s changeset_items=%s answer=%s",
task_id,
dialog_session_id,
task.result_type.value,
len(task.changeset),
_truncate_for_log(task.answer or ""),
)
self._task_store.save(task)
await self._events.publish(
task_id,
@@ -160,7 +177,7 @@ class ChatOrchestrator:
},
)
await self._publish_progress(task_id, "task.done", "Обработка завершена.", progress=100)
LOGGER.warning(
LOGGER.info(
"_process_task completed: task_id=%s status=%s result_type=%s changeset_items=%s",
task_id,
task.status.value,
@@ -232,7 +249,7 @@ class ChatOrchestrator:
if progress is not None:
payload["progress"] = max(0, min(100, int(progress)))
await self._events.publish(task_id, kind, payload)
LOGGER.warning(
LOGGER.debug(
"_publish_progress emitted: task_id=%s kind=%s stage=%s progress=%s",
task_id,
kind,
@@ -259,35 +276,7 @@ class ChatOrchestrator:
meta={"heartbeat": True},
)
index += 1
LOGGER.warning("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index)
LOGGER.debug("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index)
def _resolve_sessions(self, request: ChatMessageRequest) -> tuple[str, str]:
# Legacy compatibility: old session_id/project_id flow.
if request.dialog_session_id and request.rag_session_id:
dialog = self._dialogs.get(request.dialog_session_id)
if not dialog:
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
if dialog.rag_session_id != request.rag_session_id:
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
LOGGER.warning(
"_resolve_sessions resolved by dialog_session_id: dialog_session_id=%s rag_session_id=%s",
request.dialog_session_id,
request.rag_session_id,
)
return request.dialog_session_id, request.rag_session_id
if request.session_id and request.project_id:
if not self._rag_session_exists(request.project_id):
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
LOGGER.warning(
"_resolve_sessions resolved by legacy session/project: session_id=%s project_id=%s",
request.session_id,
request.project_id,
)
return request.session_id, request.project_id
raise AppError(
"missing_sessions",
"dialog_session_id and rag_session_id are required",
ModuleName.BACKEND,
)
return self._session_resolver.resolve(request)

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from app.core.exceptions import AppError
from app.schemas.chat import ChatMessageRequest
from app.schemas.common import ModuleName
if TYPE_CHECKING:
from app.modules.chat.dialog_store import DialogSessionStore
class ChatSessionResolver:
def __init__(self, dialogs: DialogSessionStore, rag_session_exists) -> None:
self._dialogs = dialogs
self._rag_session_exists = rag_session_exists
def resolve(self, request: ChatMessageRequest) -> tuple[str, str]:
if request.dialog_session_id and request.rag_session_id:
dialog = self._dialogs.get(request.dialog_session_id)
if not dialog:
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
if dialog.rag_session_id != request.rag_session_id:
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
return request.dialog_session_id, request.rag_session_id
if request.session_id and request.project_id:
if not self._rag_session_exists(request.project_id):
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
return request.session_id, request.project_id
raise AppError(
"missing_sessions",
"dialog_session_id and rag_session_id are required",
ModuleName.BACKEND,
)

View File

@@ -90,6 +90,41 @@ sequenceDiagram
Rag-->>Agent: items
```
### Retrieval + project/qa reasoning
Назначение: `RAG` вызывается не в начале runtime, а внутри отдельного graph-шага `context_retrieval` для `project/qa`.
```mermaid
sequenceDiagram
participant Agent as GraphAgentRuntime
participant Orch as OrchestratorService
participant G1 as conversation_understanding
participant G2 as question_classification
participant G3 as context_retrieval
participant Rag as RagService
participant G4 as context_analysis
participant G5 as answer_composition
Agent->>Orch: run(task)
Orch->>G1: execute
G1-->>Orch: resolved_request
Orch->>G2: execute
G2-->>Orch: question_profile
Orch->>G3: execute
G3->>Rag: retrieve(query)
Rag-->>G3: rag_items
G3-->>Orch: source_bundle
Orch->>G4: execute
G4-->>Orch: analysis_brief
Orch->>G5: execute
G5-->>Orch: final_answer
Orch-->>Agent: final_answer
```
Для `project/qa` это означает:
- ранний глобальный retrieval больше не нужен;
- `RAG` возвращает записи только для конкретного шага `context_retrieval`;
- оркестратор управляет цепочкой graph-шагов;
- пользовательский ответ собирается после анализа, а не напрямую из сырого retrieval.
## 5. Слои, фиксируемые в RAG
### 5.1. Слои DOCS

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from importlib import import_module
__all__ = [
"CodeExcerpt",
"CodeExplainRetrieverV2",
"CodeGraphRepository",
"EvidenceItem",
"ExplainIntent",
"ExplainIntentBuilder",
"ExplainPack",
"LayeredRetrievalGateway",
"PromptBudgeter",
"TracePath",
]
def __getattr__(name: str):
module_map = {
"CodeExcerpt": "app.modules.rag.explain.models",
"EvidenceItem": "app.modules.rag.explain.models",
"ExplainIntent": "app.modules.rag.explain.models",
"ExplainPack": "app.modules.rag.explain.models",
"TracePath": "app.modules.rag.explain.models",
"ExplainIntentBuilder": "app.modules.rag.explain.intent_builder",
"PromptBudgeter": "app.modules.rag.explain.budgeter",
"LayeredRetrievalGateway": "app.modules.rag.explain.layered_gateway",
"CodeGraphRepository": "app.modules.rag.explain.graph_repository",
"CodeExplainRetrieverV2": "app.modules.rag.explain.retriever_v2",
}
module_name = module_map.get(name)
if module_name is None:
raise AttributeError(name)
module = import_module(module_name)
return getattr(module, name)

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
import json
from app.modules.rag.explain.models import ExplainPack
class PromptBudgeter:
def __init__(
self,
*,
max_paths: int = 3,
max_symbols: int = 25,
max_excerpts: int = 40,
max_chars: int = 30000,
) -> None:
self._max_paths = max_paths
self._max_symbols = max_symbols
self._max_excerpts = max_excerpts
self._max_chars = max_chars
def build_prompt_input(self, question: str, pack: ExplainPack) -> str:
symbol_ids: list[str] = []
for path in pack.trace_paths[: self._max_paths]:
for symbol_id in path.symbol_ids:
if symbol_id and symbol_id not in symbol_ids and len(symbol_ids) < self._max_symbols:
symbol_ids.append(symbol_id)
excerpts = []
total_chars = 0
for excerpt in pack.code_excerpts:
if symbol_ids and excerpt.symbol_id and excerpt.symbol_id not in symbol_ids:
continue
body = excerpt.content.strip()
remaining = self._max_chars - total_chars
if remaining <= 0 or len(excerpts) >= self._max_excerpts:
break
if len(body) > remaining:
body = body[:remaining].rstrip() + "...[truncated]"
excerpts.append(
{
"evidence_id": excerpt.evidence_id,
"title": excerpt.title,
"path": excerpt.path,
"start_line": excerpt.start_line,
"end_line": excerpt.end_line,
"focus": excerpt.focus,
"content": body,
}
)
total_chars += len(body)
payload = {
"question": question,
"intent": pack.intent.model_dump(mode="json"),
"selected_entrypoints": [item.model_dump(mode="json") for item in pack.selected_entrypoints[:5]],
"seed_symbols": [item.model_dump(mode="json") for item in pack.seed_symbols[: self._max_symbols]],
"trace_paths": [path.model_dump(mode="json") for path in pack.trace_paths[: self._max_paths]],
"evidence_index": {key: value.model_dump(mode="json") for key, value in pack.evidence_index.items()},
"code_excerpts": excerpts,
"missing": pack.missing,
"conflicts": pack.conflicts,
}
return json.dumps(payload, ensure_ascii=False, indent=2)

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
from app.modules.rag.explain.models import CodeExcerpt, LayeredRetrievalItem
class ExcerptPlanner:
_FOCUS_TOKENS = ("raise", "except", "db", "select", "insert", "update", "delete", "http", "publish", "emit")
def plan(self, chunk: LayeredRetrievalItem, *, evidence_id: str, symbol_id: str | None) -> list[CodeExcerpt]:
location = chunk.location
if location is None:
return []
excerpts = [
CodeExcerpt(
evidence_id=evidence_id,
symbol_id=symbol_id,
title=chunk.title,
path=location.path,
start_line=location.start_line,
end_line=location.end_line,
content=chunk.content.strip(),
focus="overview",
)
]
focus = self._focus_excerpt(chunk, evidence_id=evidence_id, symbol_id=symbol_id)
if focus is not None:
excerpts.append(focus)
return excerpts
def _focus_excerpt(
self,
chunk: LayeredRetrievalItem,
*,
evidence_id: str,
symbol_id: str | None,
) -> CodeExcerpt | None:
location = chunk.location
if location is None:
return None
lines = chunk.content.splitlines()
for index, line in enumerate(lines):
lowered = line.lower()
if not any(token in lowered for token in self._FOCUS_TOKENS):
continue
start = max(0, index - 2)
end = min(len(lines), index + 3)
if end - start >= len(lines):
return None
return CodeExcerpt(
evidence_id=evidence_id,
symbol_id=symbol_id,
title=f"{chunk.title}:focus",
path=location.path,
start_line=(location.start_line or 1) + start,
end_line=(location.start_line or 1) + end - 1,
content="\n".join(lines[start:end]).strip(),
focus="focus",
)
return None

View File

@@ -0,0 +1,216 @@
from __future__ import annotations
import json
from sqlalchemy import text
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
from app.modules.shared.db import get_engine
class CodeGraphRepository:
def get_out_edges(
self,
rag_session_id: str,
src_symbol_ids: list[str],
edge_types: list[str],
limit_per_src: int,
) -> list[LayeredRetrievalItem]:
if not src_symbol_ids:
return []
sql = """
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C2_DEPENDENCY_GRAPH'
AND CAST(metadata_json AS jsonb)->>'src_symbol_id' = ANY(:src_ids)
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
ORDER BY path, span_start
"""
with get_engine().connect() as conn:
rows = conn.execute(
text(sql),
{"sid": rag_session_id, "src_ids": src_symbol_ids, "edge_types": edge_types},
).mappings().fetchall()
grouped: dict[str, int] = {}
items: list[LayeredRetrievalItem] = []
for row in rows:
metadata = self._loads(row.get("metadata_json"))
src_symbol_id = str(metadata.get("src_symbol_id") or "")
grouped[src_symbol_id] = grouped.get(src_symbol_id, 0) + 1
if grouped[src_symbol_id] > limit_per_src:
continue
items.append(self._to_item(row, metadata))
return items
def get_in_edges(
self,
rag_session_id: str,
dst_symbol_ids: list[str],
edge_types: list[str],
limit_per_dst: int,
) -> list[LayeredRetrievalItem]:
if not dst_symbol_ids:
return []
sql = """
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C2_DEPENDENCY_GRAPH'
AND CAST(metadata_json AS jsonb)->>'dst_symbol_id' = ANY(:dst_ids)
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
ORDER BY path, span_start
"""
with get_engine().connect() as conn:
rows = conn.execute(
text(sql),
{"sid": rag_session_id, "dst_ids": dst_symbol_ids, "edge_types": edge_types},
).mappings().fetchall()
grouped: dict[str, int] = {}
items: list[LayeredRetrievalItem] = []
for row in rows:
metadata = self._loads(row.get("metadata_json"))
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
grouped[dst_symbol_id] = grouped.get(dst_symbol_id, 0) + 1
if grouped[dst_symbol_id] > limit_per_dst:
continue
items.append(self._to_item(row, metadata))
return items
def resolve_symbol_by_ref(
self,
rag_session_id: str,
dst_ref: str,
package_hint: str | None = None,
) -> LayeredRetrievalItem | None:
ref = (dst_ref or "").strip()
if not ref:
return None
with get_engine().connect() as conn:
rows = conn.execute(
text(
"""
SELECT path, content, layer, title, metadata_json, span_start, span_end, qname
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C1_SYMBOL_CATALOG'
AND (qname = :ref OR title = :ref OR qname LIKE :tail)
ORDER BY path
LIMIT 12
"""
),
{"sid": rag_session_id, "ref": ref, "tail": f"%{ref}"},
).mappings().fetchall()
best: LayeredRetrievalItem | None = None
best_score = -1
for row in rows:
metadata = self._loads(row.get("metadata_json"))
package = str(metadata.get("package_or_module") or "")
score = 0
if str(row.get("qname") or "") == ref:
score += 3
if str(row.get("title") or "") == ref:
score += 2
if package_hint and package.startswith(package_hint):
score += 3
if package_hint and package_hint in str(row.get("path") or ""):
score += 1
if score > best_score:
best = self._to_item(row, metadata)
best_score = score
return best
def get_symbols_by_ids(self, rag_session_id: str, symbol_ids: list[str]) -> list[LayeredRetrievalItem]:
if not symbol_ids:
return []
with get_engine().connect() as conn:
rows = conn.execute(
text(
"""
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C1_SYMBOL_CATALOG'
AND symbol_id = ANY(:symbol_ids)
ORDER BY path, span_start
"""
),
{"sid": rag_session_id, "symbol_ids": symbol_ids},
).mappings().fetchall()
return [self._to_item(row, self._loads(row.get("metadata_json"))) for row in rows]
def get_chunks_by_symbol_ids(
self,
rag_session_id: str,
symbol_ids: list[str],
prefer_chunk_type: str = "symbol_block",
) -> list[LayeredRetrievalItem]:
symbols = self.get_symbols_by_ids(rag_session_id, symbol_ids)
chunks: list[LayeredRetrievalItem] = []
for symbol in symbols:
location = symbol.location
if location is None:
continue
chunk = self._chunk_for_symbol(rag_session_id, symbol, prefer_chunk_type=prefer_chunk_type)
if chunk is not None:
chunks.append(chunk)
return chunks
def _chunk_for_symbol(
self,
rag_session_id: str,
symbol: LayeredRetrievalItem,
*,
prefer_chunk_type: str,
) -> LayeredRetrievalItem | None:
location = symbol.location
if location is None:
return None
with get_engine().connect() as conn:
rows = conn.execute(
text(
"""
SELECT path, content, layer, title, metadata_json, span_start, span_end
FROM rag_chunks
WHERE rag_session_id = :sid
AND layer = 'C0_SOURCE_CHUNKS'
AND path = :path
AND COALESCE(span_start, 0) <= :end_line
AND COALESCE(span_end, 999999) >= :start_line
ORDER BY
CASE WHEN CAST(metadata_json AS jsonb)->>'chunk_type' = :prefer_chunk_type THEN 0 ELSE 1 END,
ABS(COALESCE(span_start, 0) - :start_line)
LIMIT 1
"""
),
{
"sid": rag_session_id,
"path": location.path,
"start_line": location.start_line or 0,
"end_line": location.end_line or 999999,
"prefer_chunk_type": prefer_chunk_type,
},
).mappings().fetchall()
if not rows:
return None
row = rows[0]
return self._to_item(row, self._loads(row.get("metadata_json")))
def _to_item(self, row, metadata: dict) -> LayeredRetrievalItem:
return LayeredRetrievalItem(
source=str(row.get("path") or ""),
content=str(row.get("content") or ""),
layer=str(row.get("layer") or ""),
title=str(row.get("title") or ""),
metadata=metadata,
location=CodeLocation(
path=str(row.get("path") or ""),
start_line=row.get("span_start"),
end_line=row.get("span_end"),
),
)
def _loads(self, value) -> dict:
if not value:
return {}
return json.loads(str(value))

View File

@@ -0,0 +1,102 @@
from __future__ import annotations
import re
from app.modules.rag.explain.models import ExplainHints, ExplainIntent
from app.modules.rag.retrieval.query_terms import extract_query_terms
class ExplainIntentBuilder:
_ROUTE_RE = re.compile(r"(/[A-Za-z0-9_./{}:-]+)")
_FILE_RE = re.compile(r"([A-Za-z0-9_./-]+\.py)")
_SYMBOL_RE = re.compile(r"\b([A-Z][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*|[A-Z][A-Za-z0-9_]{2,}|[a-z_][A-Za-z0-9_]{2,})\b")
_COMMAND_RE = re.compile(r"`([A-Za-z0-9:_-]+)`")
_TEST_KEYWORDS = (
"тест",
"tests",
"test ",
"unit-test",
"unit test",
"юнит-тест",
"pytest",
"spec",
"как покрыто тестами",
"как проверяется",
"how is it tested",
"how it's tested",
)
def build(self, user_query: str) -> ExplainIntent:
normalized = " ".join((user_query or "").split())
lowered = normalized.lower()
keywords = self._keywords(normalized)
hints = ExplainHints(
paths=self._dedupe(self._FILE_RE.findall(normalized)),
symbols=self._symbols(normalized),
endpoints=self._dedupe(self._ROUTE_RE.findall(normalized)),
commands=self._commands(normalized, lowered),
)
return ExplainIntent(
raw_query=user_query,
normalized_query=normalized,
keywords=keywords[:12],
hints=hints,
include_tests=self._include_tests(lowered),
expected_entry_types=self._entry_types(lowered, hints),
depth=self._depth(lowered),
)
def _keywords(self, text: str) -> list[str]:
keywords = extract_query_terms(text)
for token in self._symbols(text):
if token not in keywords:
keywords.append(token)
for token in self._ROUTE_RE.findall(text):
if token not in keywords:
keywords.append(token)
return self._dedupe(keywords)
def _symbols(self, text: str) -> list[str]:
values = []
for raw in self._SYMBOL_RE.findall(text):
token = raw.strip()
if len(token) < 3:
continue
if token.endswith(".py"):
continue
values.append(token)
return self._dedupe(values)
def _commands(self, text: str, lowered: str) -> list[str]:
values = list(self._COMMAND_RE.findall(text))
if " command " in f" {lowered} ":
values.extend(re.findall(r"command\s+([A-Za-z0-9:_-]+)", lowered))
if " cli " in f" {lowered} ":
values.extend(re.findall(r"cli\s+([A-Za-z0-9:_-]+)", lowered))
return self._dedupe(values)
def _entry_types(self, lowered: str, hints: ExplainHints) -> list[str]:
if hints.endpoints or any(token in lowered for token in ("endpoint", "route", "handler", "http", "api")):
return ["http"]
if hints.commands or any(token in lowered for token in ("cli", "command", "click", "typer")):
return ["cli"]
return ["http", "cli"]
def _depth(self, lowered: str) -> str:
if any(token in lowered for token in ("deep", "подроб", "деталь", "full flow", "trace")):
return "deep"
if any(token in lowered for token in ("high level", "overview", "кратко", "summary")):
return "high"
return "medium"
def _include_tests(self, lowered: str) -> bool:
normalized = f" {lowered} "
return any(token in normalized for token in self._TEST_KEYWORDS)
def _dedupe(self, values: list[str]) -> list[str]:
result: list[str] = []
for value in values:
item = value.strip()
if item and item not in result:
result.append(item)
return result

View File

@@ -0,0 +1,289 @@
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Callable
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
from app.modules.rag.retrieval.test_filter import build_test_filters, debug_disable_test_filter
LOGGER = logging.getLogger(__name__)
if TYPE_CHECKING:
from app.modules.rag.persistence.repository import RagRepository
from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder
@dataclass(slots=True)
class LayerRetrievalResult:
items: list[LayeredRetrievalItem]
missing: list[str] = field(default_factory=list)
class LayeredRetrievalGateway:
def __init__(self, repository: RagRepository, embedder: GigaChatEmbedder) -> None:
self._repository = repository
self._embedder = embedder
def retrieve_layer(
self,
rag_session_id: str,
query: str,
layer: str,
*,
limit: int,
path_prefixes: list[str] | None = None,
exclude_tests: bool = True,
prefer_non_tests: bool = False,
include_spans: bool = False,
) -> LayerRetrievalResult:
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
filter_args = self._filter_args(effective_exclude_tests)
query_embedding: list[float] | None = None
try:
query_embedding = self._embedder.embed([query])[0]
rows = self._repository.retrieve(
rag_session_id,
query_embedding,
query_text=query,
limit=limit,
layers=[layer],
path_prefixes=path_prefixes,
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
exclude_like_patterns=filter_args["exclude_like_patterns"],
prefer_non_tests=prefer_non_tests or not effective_exclude_tests,
)
return self._success_result(
rows,
rag_session_id=rag_session_id,
label="layered retrieval",
include_spans=include_spans,
layer=layer,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
)
except Exception as exc:
if query_embedding is None:
self._log_failure(
label="layered retrieval",
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
)
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
retry_result = self._retry_without_test_filter(
operation=lambda: self._repository.retrieve(
rag_session_id,
query_embedding,
query_text=query,
limit=limit,
layers=[layer],
path_prefixes=path_prefixes,
exclude_path_prefixes=None,
exclude_like_patterns=None,
prefer_non_tests=True,
),
label="layered retrieval",
rag_session_id=rag_session_id,
include_spans=include_spans,
layer=layer,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
missing_prefix=f"layer:{layer} retrieval_failed",
)
if retry_result is not None:
return retry_result
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
def retrieve_lexical_code(
self,
rag_session_id: str,
query: str,
*,
limit: int,
path_prefixes: list[str] | None = None,
exclude_tests: bool = True,
include_spans: bool = False,
) -> LayerRetrievalResult:
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
filter_args = self._filter_args(effective_exclude_tests)
try:
rows = self._repository.retrieve_lexical_code(
rag_session_id,
query_text=query,
limit=limit,
path_prefixes=path_prefixes,
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
exclude_like_patterns=filter_args["exclude_like_patterns"],
prefer_non_tests=not effective_exclude_tests,
)
return self._success_result(
rows,
rag_session_id=rag_session_id,
label="lexical retrieval",
include_spans=include_spans,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
)
except Exception as exc:
retry_result = self._retry_without_test_filter(
operation=lambda: self._repository.retrieve_lexical_code(
rag_session_id,
query_text=query,
limit=limit,
path_prefixes=path_prefixes,
exclude_path_prefixes=None,
exclude_like_patterns=None,
prefer_non_tests=True,
),
label="lexical retrieval",
rag_session_id=rag_session_id,
include_spans=include_spans,
exclude_tests=effective_exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
missing_prefix="layer:C0 lexical_retrieval_failed",
)
if retry_result is not None:
return retry_result
return LayerRetrievalResult(items=[], missing=[self._failure_missing("layer:C0 lexical_retrieval_failed", exc)])
def _retry_without_test_filter(
self,
*,
operation: Callable[[], list[dict]],
label: str,
rag_session_id: str,
include_spans: bool,
exclude_tests: bool,
path_prefixes: list[str] | None,
exc: Exception,
missing_prefix: str,
layer: str | None = None,
) -> LayerRetrievalResult | None:
if not exclude_tests:
self._log_failure(
label=label,
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
)
return None
self._log_failure(
label=label,
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=exclude_tests,
path_prefixes=path_prefixes,
exc=exc,
retried_without_test_filter=True,
)
try:
rows = operation()
except Exception as retry_exc:
self._log_failure(
label=f"{label} retry",
rag_session_id=rag_session_id,
layer=layer,
exclude_tests=False,
path_prefixes=path_prefixes,
exc=retry_exc,
)
return None
result = self._success_result(
rows,
rag_session_id=rag_session_id,
label=f"{label} retry",
include_spans=include_spans,
layer=layer,
exclude_tests=False,
path_prefixes=path_prefixes,
)
result.missing.append(f"{missing_prefix}:retried_without_test_filter")
return result
def _success_result(
self,
rows: list[dict],
*,
rag_session_id: str,
label: str,
include_spans: bool,
exclude_tests: bool,
path_prefixes: list[str] | None,
layer: str | None = None,
) -> LayerRetrievalResult:
items = [self._to_item(row, include_spans=include_spans) for row in rows]
LOGGER.warning(
"%s: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s returned_count=%s top_paths=%s",
label,
rag_session_id,
layer,
exclude_tests,
path_prefixes or [],
len(items),
[item.source for item in items[:3]],
)
return LayerRetrievalResult(items=items)
def _log_failure(
self,
*,
label: str,
rag_session_id: str,
exclude_tests: bool,
path_prefixes: list[str] | None,
exc: Exception,
layer: str | None = None,
retried_without_test_filter: bool = False,
) -> None:
LOGGER.warning(
"%s failed: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s retried_without_test_filter=%s error=%s",
label,
rag_session_id,
layer,
exclude_tests,
path_prefixes or [],
retried_without_test_filter,
self._exception_summary(exc),
exc_info=True,
)
def _filter_args(self, exclude_tests: bool) -> dict[str, list[str] | None]:
test_filters = build_test_filters() if exclude_tests else None
return {
"exclude_path_prefixes": test_filters.exclude_path_prefixes if test_filters else None,
"exclude_like_patterns": test_filters.exclude_like_patterns if test_filters else None,
}
def _failure_missing(self, prefix: str, exc: Exception) -> str:
return f"{prefix}:{self._exception_summary(exc)}"
def _exception_summary(self, exc: Exception) -> str:
message = " ".join(str(exc).split())
if len(message) > 180:
message = message[:177] + "..."
return f"{type(exc).__name__}:{message or 'no_message'}"
def _to_item(self, row: dict, *, include_spans: bool) -> LayeredRetrievalItem:
location = None
if include_spans:
location = CodeLocation(
path=str(row.get("path") or ""),
start_line=row.get("span_start"),
end_line=row.get("span_end"),
)
return LayeredRetrievalItem(
source=str(row.get("path") or ""),
content=str(row.get("content") or ""),
layer=str(row.get("layer") or ""),
title=str(row.get("title") or ""),
metadata=dict(row.get("metadata", {}) or {}),
score=row.get("distance"),
location=location,
)

View File

@@ -0,0 +1,91 @@
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
class ExplainHints(BaseModel):
model_config = ConfigDict(extra="forbid")
paths: list[str] = Field(default_factory=list)
symbols: list[str] = Field(default_factory=list)
endpoints: list[str] = Field(default_factory=list)
commands: list[str] = Field(default_factory=list)
class ExplainIntent(BaseModel):
model_config = ConfigDict(extra="forbid")
raw_query: str
normalized_query: str
keywords: list[str] = Field(default_factory=list)
hints: ExplainHints = Field(default_factory=ExplainHints)
include_tests: bool = False
expected_entry_types: list[Literal["http", "cli"]] = Field(default_factory=list)
depth: Literal["high", "medium", "deep"] = "medium"
class CodeLocation(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
start_line: int | None = None
end_line: int | None = None
class LayeredRetrievalItem(BaseModel):
model_config = ConfigDict(extra="forbid")
source: str
content: str
layer: str
title: str
metadata: dict[str, Any] = Field(default_factory=dict)
score: float | None = None
location: CodeLocation | None = None
class TracePath(BaseModel):
model_config = ConfigDict(extra="forbid")
symbol_ids: list[str] = Field(default_factory=list)
score: float = 0.0
entrypoint_id: str | None = None
notes: list[str] = Field(default_factory=list)
class EvidenceItem(BaseModel):
model_config = ConfigDict(extra="forbid")
evidence_id: str
kind: Literal["entrypoint", "symbol", "edge", "excerpt"]
summary: str
location: CodeLocation | None = None
supports: list[str] = Field(default_factory=list)
class CodeExcerpt(BaseModel):
model_config = ConfigDict(extra="forbid")
evidence_id: str
symbol_id: str | None = None
title: str
path: str
start_line: int | None = None
end_line: int | None = None
content: str
focus: str = "overview"
class ExplainPack(BaseModel):
model_config = ConfigDict(extra="forbid")
intent: ExplainIntent
selected_entrypoints: list[LayeredRetrievalItem] = Field(default_factory=list)
seed_symbols: list[LayeredRetrievalItem] = Field(default_factory=list)
trace_paths: list[TracePath] = Field(default_factory=list)
evidence_index: dict[str, EvidenceItem] = Field(default_factory=dict)
code_excerpts: list[CodeExcerpt] = Field(default_factory=list)
missing: list[str] = Field(default_factory=list)
conflicts: list[str] = Field(default_factory=list)

View File

@@ -0,0 +1,328 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from app.modules.rag.contracts.enums import RagLayer
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
from app.modules.rag.explain.layered_gateway import LayerRetrievalResult, LayeredRetrievalGateway
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, ExplainPack, LayeredRetrievalItem
from app.modules.rag.explain.source_excerpt_fetcher import SourceExcerptFetcher
from app.modules.rag.explain.trace_builder import TraceBuilder
from app.modules.rag.retrieval.test_filter import exclude_tests_default, is_test_path
LOGGER = logging.getLogger(__name__)
_MIN_EXCERPTS = 2
if TYPE_CHECKING:
from app.modules.rag.explain.graph_repository import CodeGraphRepository
from app.modules.rag.explain.models import ExplainIntent
class CodeExplainRetrieverV2:
def __init__(
self,
gateway: LayeredRetrievalGateway,
graph_repository: CodeGraphRepository,
intent_builder: ExplainIntentBuilder | None = None,
trace_builder: TraceBuilder | None = None,
excerpt_fetcher: SourceExcerptFetcher | None = None,
) -> None:
self._gateway = gateway
self._graph = graph_repository
self._intent_builder = intent_builder or ExplainIntentBuilder()
self._trace_builder = trace_builder or TraceBuilder(graph_repository)
self._excerpt_fetcher = excerpt_fetcher or SourceExcerptFetcher(graph_repository)
def build_pack(
self,
rag_session_id: str,
user_query: str,
*,
file_candidates: list[dict] | None = None,
) -> ExplainPack:
intent = self._intent_builder.build(user_query)
path_prefixes = _path_prefixes(intent, file_candidates or [])
exclude_tests = exclude_tests_default() and not intent.include_tests
pack = self._run_pass(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
if exclude_tests and len(pack.code_excerpts) < _MIN_EXCERPTS:
self._merge_test_fallback(pack, rag_session_id, intent, path_prefixes)
self._log_pack(rag_session_id, pack)
return pack
def _run_pass(
self,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
*,
exclude_tests: bool,
) -> ExplainPack:
missing: list[str] = []
entrypoints_result = self._entrypoints(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
missing.extend(entrypoints_result.missing)
selected_entrypoints = self._filter_entrypoints(intent, entrypoints_result.items)
if not selected_entrypoints:
missing.append("layer:C3 empty")
seed_result = self._seed_symbols(rag_session_id, intent, path_prefixes, selected_entrypoints, exclude_tests=exclude_tests)
missing.extend(seed_result.missing)
seed_symbols = seed_result.items
if not seed_symbols:
missing.append("layer:C1 empty")
depth = 4 if intent.depth == "deep" else 3 if intent.depth == "medium" else 2
trace_paths = self._trace_builder.build_paths(rag_session_id, seed_symbols, max_depth=depth) if seed_symbols else []
excerpts, excerpt_evidence = self._excerpt_fetcher.fetch(rag_session_id, trace_paths) if trace_paths else ([], {})
if not excerpts:
lexical_result = self._gateway.retrieve_lexical_code(
rag_session_id,
intent.normalized_query,
limit=6,
path_prefixes=path_prefixes or None,
exclude_tests=exclude_tests,
include_spans=True,
)
missing.extend(lexical_result.missing)
excerpts, excerpt_evidence = _lexical_excerpts(lexical_result.items)
if not excerpts:
missing.append("layer:C0 empty")
evidence_index = _evidence_index(selected_entrypoints, seed_symbols)
evidence_index.update(excerpt_evidence)
missing.extend(_missing(selected_entrypoints, seed_symbols, trace_paths, excerpts))
return ExplainPack(
intent=intent,
selected_entrypoints=selected_entrypoints,
seed_symbols=seed_symbols,
trace_paths=trace_paths,
evidence_index=evidence_index,
code_excerpts=excerpts,
missing=_cleanup_missing(_dedupe(missing), has_excerpts=bool(excerpts)),
conflicts=[],
)
def _merge_test_fallback(
self,
pack: ExplainPack,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
) -> None:
lexical_result = self._gateway.retrieve_lexical_code(
rag_session_id,
intent.normalized_query,
limit=6,
path_prefixes=path_prefixes or None,
exclude_tests=False,
include_spans=True,
)
excerpt_offset = len([key for key in pack.evidence_index if key.startswith("excerpt_")])
excerpts, evidence = _lexical_excerpts(
lexical_result.items,
start_index=excerpt_offset,
is_test_fallback=True,
)
if not excerpts:
pack.missing = _dedupe(pack.missing + lexical_result.missing)
return
seen = {(item.path, item.start_line, item.end_line, item.content) for item in pack.code_excerpts}
for excerpt in excerpts:
key = (excerpt.path, excerpt.start_line, excerpt.end_line, excerpt.content)
if key in seen:
continue
pack.code_excerpts.append(excerpt)
seen.add(key)
pack.evidence_index.update(evidence)
pack.missing = _cleanup_missing(_dedupe(pack.missing + lexical_result.missing), has_excerpts=bool(pack.code_excerpts))
def _entrypoints(
self,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
*,
exclude_tests: bool,
) -> LayerRetrievalResult:
return self._gateway.retrieve_layer(
rag_session_id,
intent.normalized_query,
RagLayer.CODE_ENTRYPOINTS,
limit=6,
path_prefixes=path_prefixes or None,
exclude_tests=exclude_tests,
prefer_non_tests=True,
include_spans=True,
)
def _filter_entrypoints(self, intent: ExplainIntent, items: list[LayeredRetrievalItem]) -> list[LayeredRetrievalItem]:
if not intent.expected_entry_types:
return items[:3]
filtered = [item for item in items if str(item.metadata.get("entry_type") or "") in intent.expected_entry_types]
return filtered[:3] or items[:3]
def _seed_symbols(
self,
rag_session_id: str,
intent: ExplainIntent,
path_prefixes: list[str],
entrypoints: list[LayeredRetrievalItem],
*,
exclude_tests: bool,
) -> LayerRetrievalResult:
symbol_result = self._gateway.retrieve_layer(
rag_session_id,
intent.normalized_query,
RagLayer.CODE_SYMBOL_CATALOG,
limit=12,
path_prefixes=path_prefixes or None,
exclude_tests=exclude_tests,
prefer_non_tests=True,
include_spans=True,
)
handlers: list[LayeredRetrievalItem] = []
handler_ids = [str(item.metadata.get("handler_symbol_id") or "") for item in entrypoints]
if handler_ids:
handlers = self._graph.get_symbols_by_ids(rag_session_id, [item for item in handler_ids if item])
seeds: list[LayeredRetrievalItem] = []
seen: set[str] = set()
for item in handlers + symbol_result.items:
symbol_id = str(item.metadata.get("symbol_id") or "")
if not symbol_id or symbol_id in seen:
continue
seen.add(symbol_id)
seeds.append(item)
if len(seeds) >= 8:
break
return LayerRetrievalResult(items=seeds, missing=list(symbol_result.missing))
def _log_pack(self, rag_session_id: str, pack: ExplainPack) -> None:
prod_excerpt_count = len([excerpt for excerpt in pack.code_excerpts if not _is_test_excerpt(excerpt)])
test_excerpt_count = len(pack.code_excerpts) - prod_excerpt_count
LOGGER.warning(
"code explain pack: rag_session_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s prod_excerpt_count=%s test_excerpt_count=%s missing=%s",
rag_session_id,
len(pack.selected_entrypoints),
len(pack.seed_symbols),
len(pack.trace_paths),
len(pack.code_excerpts),
prod_excerpt_count,
test_excerpt_count,
pack.missing,
)
def _evidence_index(
entrypoints: list[LayeredRetrievalItem],
seed_symbols: list[LayeredRetrievalItem],
) -> dict[str, EvidenceItem]:
result: dict[str, EvidenceItem] = {}
for index, item in enumerate(entrypoints, start=1):
evidence_id = f"entrypoint_{index}"
result[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="entrypoint",
summary=item.title,
location=item.location,
supports=[str(item.metadata.get("handler_symbol_id") or "")],
)
for index, item in enumerate(seed_symbols, start=1):
evidence_id = f"symbol_{index}"
result[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="symbol",
summary=item.title,
location=item.location,
supports=[str(item.metadata.get("symbol_id") or "")],
)
return result
def _missing(
entrypoints: list[LayeredRetrievalItem],
seed_symbols: list[LayeredRetrievalItem],
trace_paths,
excerpts,
) -> list[str]:
missing: list[str] = []
if not entrypoints:
missing.append("entrypoints")
if not seed_symbols:
missing.append("seed_symbols")
if not trace_paths:
missing.append("trace_paths")
if not excerpts:
missing.append("code_excerpts")
return missing
def _lexical_excerpts(
items: list[LayeredRetrievalItem],
*,
start_index: int = 0,
is_test_fallback: bool = False,
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
excerpts: list[CodeExcerpt] = []
evidence_index: dict[str, EvidenceItem] = {}
for item in items:
evidence_id = f"excerpt_{start_index + len(evidence_index) + 1}"
location = item.location
evidence_index[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="excerpt",
summary=item.title or item.source,
location=location,
supports=[],
)
focus = "lexical"
if _item_is_test(item):
focus = "test:lexical"
elif is_test_fallback:
focus = "lexical"
excerpts.append(
CodeExcerpt(
evidence_id=evidence_id,
symbol_id=str(item.metadata.get("symbol_id") or "") or None,
title=item.title or item.source,
path=item.source,
start_line=location.start_line if location else None,
end_line=location.end_line if location else None,
content=item.content,
focus=focus,
)
)
return excerpts, evidence_index
def _item_is_test(item: LayeredRetrievalItem) -> bool:
return bool(item.metadata.get("is_test")) or is_test_path(item.source)
def _is_test_excerpt(excerpt: CodeExcerpt) -> bool:
return excerpt.focus.startswith("test:") or is_test_path(excerpt.path)
def _path_prefixes(intent: ExplainIntent, file_candidates: list[dict]) -> list[str]:
values: list[str] = []
for path in intent.hints.paths:
prefix = path.rsplit("/", 1)[0] if "/" in path else path
if prefix and prefix not in values:
values.append(prefix)
for item in file_candidates[:6]:
path = str(item.get("path") or "")
prefix = path.rsplit("/", 1)[0] if "/" in path else ""
if prefix and prefix not in values:
values.append(prefix)
return values
def _cleanup_missing(values: list[str], *, has_excerpts: bool) -> list[str]:
if not has_excerpts:
return values
return [value for value in values if value not in {"code_excerpts", "layer:C0 empty"}]
def _dedupe(values: list[str]) -> list[str]:
result: list[str] = []
for value in values:
item = value.strip()
if item and item not in result:
result.append(item)
return result

View File

@@ -0,0 +1,53 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from app.modules.rag.explain.excerpt_planner import ExcerptPlanner
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, TracePath
from app.modules.rag.retrieval.test_filter import is_test_path
if TYPE_CHECKING:
from app.modules.rag.explain.graph_repository import CodeGraphRepository
class SourceExcerptFetcher:
def __init__(self, graph_repository: CodeGraphRepository, planner: ExcerptPlanner | None = None) -> None:
self._graph = graph_repository
self._planner = planner or ExcerptPlanner()
def fetch(
self,
rag_session_id: str,
trace_paths: list[TracePath],
*,
max_excerpts: int = 40,
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
ordered_symbol_ids: list[str] = []
for path in trace_paths:
for symbol_id in path.symbol_ids:
if symbol_id and symbol_id not in ordered_symbol_ids:
ordered_symbol_ids.append(symbol_id)
chunks = self._graph.get_chunks_by_symbol_ids(rag_session_id, ordered_symbol_ids)
excerpts: list[CodeExcerpt] = []
evidence_index: dict[str, EvidenceItem] = {}
for chunk in chunks:
symbol_id = str(chunk.metadata.get("symbol_id") or "")
evidence_id = f"excerpt_{len(evidence_index) + 1}"
location = chunk.location
evidence_index[evidence_id] = EvidenceItem(
evidence_id=evidence_id,
kind="excerpt",
summary=chunk.title,
location=location,
supports=[symbol_id] if symbol_id else [],
)
is_test_chunk = bool(chunk.metadata.get("is_test")) or is_test_path(location.path if location else chunk.source)
for excerpt in self._planner.plan(chunk, evidence_id=evidence_id, symbol_id=symbol_id):
if len(excerpts) >= max_excerpts:
break
if is_test_chunk and not excerpt.focus.startswith("test:"):
excerpt.focus = f"test:{excerpt.focus}"
excerpts.append(excerpt)
if len(excerpts) >= max_excerpts:
break
return excerpts, evidence_index

View File

@@ -0,0 +1,102 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from app.modules.rag.explain.models import LayeredRetrievalItem, TracePath
if TYPE_CHECKING:
from app.modules.rag.explain.graph_repository import CodeGraphRepository
class TraceBuilder:
def __init__(self, graph_repository: CodeGraphRepository) -> None:
self._graph = graph_repository
def build_paths(
self,
rag_session_id: str,
seed_symbols: list[LayeredRetrievalItem],
*,
max_depth: int,
max_paths: int = 3,
edge_types: list[str] | None = None,
) -> list[TracePath]:
edges_filter = edge_types or ["calls", "imports", "inherits"]
symbol_map = self._symbol_map(seed_symbols)
paths: list[TracePath] = []
for seed in seed_symbols:
seed_id = str(seed.metadata.get("symbol_id") or "")
if not seed_id:
continue
queue: list[tuple[list[str], float, list[str]]] = [([seed_id], 0.0, [])]
while queue and len(paths) < max_paths * 3:
current_path, score, notes = queue.pop(0)
src_symbol_id = current_path[-1]
out_edges = self._graph.get_out_edges(rag_session_id, [src_symbol_id], edges_filter, limit_per_src=4)
if not out_edges or len(current_path) >= max_depth:
paths.append(TracePath(symbol_ids=current_path, score=score, notes=notes))
continue
for edge in out_edges:
metadata = edge.metadata
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
next_notes = list(notes)
next_score = score + self._edge_score(edge, symbol_map.get(src_symbol_id))
if not dst_symbol_id:
dst_ref = str(metadata.get("dst_ref") or "")
package_hint = self._package_hint(symbol_map.get(src_symbol_id))
resolved = self._graph.resolve_symbol_by_ref(rag_session_id, dst_ref, package_hint=package_hint)
if resolved is not None:
dst_symbol_id = str(resolved.metadata.get("symbol_id") or "")
symbol_map[dst_symbol_id] = resolved
next_score += 2.0
next_notes.append(f"resolved:{dst_ref}")
if not dst_symbol_id or dst_symbol_id in current_path:
paths.append(TracePath(symbol_ids=current_path, score=next_score, notes=next_notes))
continue
if dst_symbol_id not in symbol_map:
symbols = self._graph.get_symbols_by_ids(rag_session_id, [dst_symbol_id])
if symbols:
symbol_map[dst_symbol_id] = symbols[0]
queue.append((current_path + [dst_symbol_id], next_score, next_notes))
unique = self._unique_paths(paths)
unique.sort(key=lambda item: item.score, reverse=True)
return unique[:max_paths] or [TracePath(symbol_ids=[seed.metadata.get("symbol_id", "")], score=0.0) for seed in seed_symbols[:1]]
def _edge_score(self, edge: LayeredRetrievalItem, source_symbol: LayeredRetrievalItem | None) -> float:
metadata = edge.metadata
score = 1.0
if str(metadata.get("resolution") or "") == "resolved":
score += 2.0
source_path = source_symbol.source if source_symbol is not None else ""
if source_path and edge.source == source_path:
score += 1.0
if "tests/" in edge.source or "/tests/" in edge.source:
score -= 3.0
return score
def _package_hint(self, symbol: LayeredRetrievalItem | None) -> str | None:
if symbol is None:
return None
package = str(symbol.metadata.get("package_or_module") or "")
if not package:
return None
return ".".join(package.split(".")[:-1]) or package
def _symbol_map(self, items: list[LayeredRetrievalItem]) -> dict[str, LayeredRetrievalItem]:
result: dict[str, LayeredRetrievalItem] = {}
for item in items:
symbol_id = str(item.metadata.get("symbol_id") or "")
if symbol_id:
result[symbol_id] = item
return result
def _unique_paths(self, items: list[TracePath]) -> list[TracePath]:
result: list[TracePath] = []
seen: set[tuple[str, ...]] = set()
for item in items:
key = tuple(symbol_id for symbol_id in item.symbol_ids if symbol_id)
if not key or key in seen:
continue
seen.add(key)
result.append(item)
return result

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
from app.modules.rag.indexing.code.code_text.chunker import CodeChunk
from app.modules.rag.retrieval.test_filter import is_test_path
class CodeTextDocumentBuilder:
@@ -17,6 +18,7 @@ class CodeTextDocumentBuilder:
"chunk_index": chunk_index,
"chunk_type": chunk.chunk_type,
"module_or_unit": source.path.replace("/", ".").removesuffix(".py"),
"is_test": is_test_path(source.path),
"artifact_type": "CODE",
},
)

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
from app.modules.rag.indexing.code.edges.extractor import PyEdge
from app.modules.rag.retrieval.test_filter import is_test_path
class EdgeDocumentBuilder:
@@ -22,6 +23,7 @@ class EdgeDocumentBuilder:
"dst_symbol_id": edge.dst_symbol_id,
"dst_ref": edge.dst_ref,
"resolution": edge.resolution,
"is_test": is_test_path(source.path),
"lang_payload": edge.metadata,
"artifact_type": "CODE",
},

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
from app.modules.rag.retrieval.test_filter import is_test_path
class EntrypointDocumentBuilder:
@@ -19,6 +20,7 @@ class EntrypointDocumentBuilder:
"framework": entrypoint.framework,
"route_or_command": entrypoint.route_or_command,
"handler_symbol_id": entrypoint.handler_symbol_id,
"is_test": is_test_path(source.path),
"lang_payload": entrypoint.metadata,
"artifact_type": "CODE",
},

Some files were not shown because too many files have changed in this diff Show More