Фиксация изменений
This commit is contained in:
Binary file not shown.
@@ -37,6 +37,8 @@ classDiagram
|
||||
Методы: `run` — строит, валидирует и исполняет execution plan.
|
||||
- `TaskSpecBuilder`: формирует спецификацию задачи для оркестратора.
|
||||
Методы: `build` — собирает `TaskSpec` из route, контекстов и ограничений.
|
||||
- `ProjectQaConversationGraphFactory`, `ProjectQaClassificationGraphFactory`, `ProjectQaRetrievalGraphFactory`, `ProjectQaAnalysisGraphFactory`, `ProjectQaAnswerGraphFactory`: набор маленьких graph-исполнителей для `project/qa`.
|
||||
Роли: нормализация запроса; классификация project-question; поздний retrieval из `RAG`; анализ code/docs контекста; сборка финального ответа.
|
||||
- `StorySessionRecorder`: пишет session-scoped артефакты для последующего bind к Story.
|
||||
Методы: `record_run` — сохраняет входные источники и выходные артефакты сессии.
|
||||
- `StoryContextRepository`: репозиторий Story-контекста и его связей.
|
||||
@@ -58,3 +60,32 @@ sequenceDiagram
|
||||
Router->>Confluence: fetch_page(url)
|
||||
Confluence-->>Router: page(content_markdown, metadata)
|
||||
```
|
||||
|
||||
### `project/qa` reasoning flow
|
||||
Назначение: оркестратор планирует шаги, а каждый шаг исполняется отдельным graph. Retrieval вызывается поздно, внутри шага `context_retrieval`.
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Runtime as GraphAgentRuntime
|
||||
participant Orch as OrchestratorService
|
||||
participant G1 as conversation_understanding
|
||||
participant G2 as question_classification
|
||||
participant G3 as context_retrieval
|
||||
participant Rag as RagService
|
||||
participant G4 as context_analysis
|
||||
participant G5 as answer_composition
|
||||
|
||||
Runtime->>Orch: run(task)
|
||||
Orch->>G1: execute
|
||||
G1-->>Orch: resolved_request
|
||||
Orch->>G2: execute
|
||||
G2-->>Orch: question_profile
|
||||
Orch->>G3: execute
|
||||
G3->>Rag: retrieve(query)
|
||||
Rag-->>G3: rag_items
|
||||
G3-->>Orch: source_bundle
|
||||
Orch->>G4: execute
|
||||
G4-->>Orch: analysis_brief
|
||||
Orch->>G5: execute
|
||||
G5-->>Orch: final_answer
|
||||
Orch-->>Runtime: final_answer
|
||||
```
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,8 +1,13 @@
|
||||
__all__ = [
|
||||
"BaseGraphFactory",
|
||||
"DocsGraphFactory",
|
||||
"ProjectQaAnalysisGraphFactory",
|
||||
"ProjectQaAnswerGraphFactory",
|
||||
"ProjectQaClassificationGraphFactory",
|
||||
"ProjectQaConversationGraphFactory",
|
||||
"ProjectEditsGraphFactory",
|
||||
"ProjectQaGraphFactory",
|
||||
"ProjectQaRetrievalGraphFactory",
|
||||
]
|
||||
|
||||
|
||||
@@ -15,6 +20,26 @@ def __getattr__(name: str):
|
||||
from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory
|
||||
|
||||
return DocsGraphFactory
|
||||
if name == "ProjectQaConversationGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaConversationGraphFactory
|
||||
|
||||
return ProjectQaConversationGraphFactory
|
||||
if name == "ProjectQaClassificationGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaClassificationGraphFactory
|
||||
|
||||
return ProjectQaClassificationGraphFactory
|
||||
if name == "ProjectQaRetrievalGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaRetrievalGraphFactory
|
||||
|
||||
return ProjectQaRetrievalGraphFactory
|
||||
if name == "ProjectQaAnalysisGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnalysisGraphFactory
|
||||
|
||||
return ProjectQaAnalysisGraphFactory
|
||||
if name == "ProjectQaAnswerGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnswerGraphFactory
|
||||
|
||||
return ProjectQaAnswerGraphFactory
|
||||
if name == "ProjectEditsGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -59,7 +59,7 @@ class BaseGraphFactory:
|
||||
f"Confluence context:\n{conf}",
|
||||
]
|
||||
)
|
||||
answer = self._llm.generate("general_answer", user_input)
|
||||
answer = self._llm.generate("general_answer", user_input, log_context="graph.default.answer")
|
||||
emit_progress_sync(
|
||||
state,
|
||||
stage="graph.default.answer.done",
|
||||
|
||||
@@ -52,7 +52,7 @@ class DocsContextAnalyzer:
|
||||
f"Detected documentation candidates:\n{snippets}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_detect", user_input)
|
||||
raw = self._llm.generate("docs_detect", user_input, log_context="graph.docs.detect_existing_docs")
|
||||
exists = self.parse_bool_marker(raw, "exists", default=True)
|
||||
summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.")
|
||||
return {"existing_docs_detected": exists, "existing_docs_summary": summary}
|
||||
@@ -71,7 +71,7 @@ class DocsContextAnalyzer:
|
||||
f"Existing docs summary:\n{state.get('existing_docs_summary', '')}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_strategy", user_input)
|
||||
raw = self._llm.generate("docs_strategy", user_input, log_context="graph.docs.decide_strategy")
|
||||
strategy = self.parse_text_marker(raw, "strategy", default="").lower()
|
||||
if strategy not in {"incremental_update", "from_scratch"}:
|
||||
strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch"
|
||||
@@ -260,7 +260,7 @@ class DocsContentComposer:
|
||||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||||
]
|
||||
)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_incremental_changes")
|
||||
return {
|
||||
"doc_plan": plan,
|
||||
"target_path": target_path,
|
||||
@@ -279,7 +279,7 @@ class DocsContentComposer:
|
||||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||||
]
|
||||
)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_new_document")
|
||||
return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""}
|
||||
|
||||
def generate_doc_content(self, state: AgentGraphState) -> dict:
|
||||
@@ -294,7 +294,7 @@ class DocsContentComposer:
|
||||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_generation", user_input)
|
||||
raw = self._llm.generate("docs_generation", user_input, log_context="graph.docs.generate_doc_content")
|
||||
bundle = self._bundle.parse_docs_bundle(raw)
|
||||
if bundle:
|
||||
first_content = str(bundle[0].get("content", "")).strip()
|
||||
@@ -369,7 +369,7 @@ class DocsContentComposer:
|
||||
f"Generated document:\n{generated}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_self_check", user_input)
|
||||
raw = self._llm.generate("docs_self_check", user_input, log_context="graph.docs.self_check")
|
||||
passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False)
|
||||
feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.")
|
||||
return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback}
|
||||
@@ -379,7 +379,7 @@ class DocsContentComposer:
|
||||
bundle = state.get("generated_docs_bundle", []) or []
|
||||
strategy = state.get("docs_strategy", "from_scratch")
|
||||
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"build_changeset fallback bundle used: strategy=%s bundle_items=%s",
|
||||
strategy,
|
||||
len(bundle),
|
||||
@@ -452,7 +452,11 @@ class DocsContentComposer:
|
||||
]
|
||||
)
|
||||
try:
|
||||
summary = self._llm.generate("docs_execution_summary", user_input).strip()
|
||||
summary = self._llm.generate(
|
||||
"docs_execution_summary",
|
||||
user_input,
|
||||
log_context="graph.docs.summarize_result",
|
||||
).strip()
|
||||
except Exception:
|
||||
summary = ""
|
||||
if not summary:
|
||||
|
||||
@@ -48,7 +48,9 @@ class ProjectEditsLogic:
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
parsed = self._support.parse_json(self._llm.generate("project_edits_plan", user_input))
|
||||
parsed = self._support.parse_json(
|
||||
self._llm.generate("project_edits_plan", user_input, log_context="graph.project_edits.plan_changes")
|
||||
)
|
||||
contracts = self._contracts.parse(
|
||||
parsed,
|
||||
request=str(state.get("message", "")),
|
||||
@@ -165,7 +167,13 @@ class ProjectEditsLogic:
|
||||
"changeset": [{"op": x.op.value, "path": x.path, "reason": x.reason} for x in changeset[:20]],
|
||||
"rule": "Changes must stay inside contract blocks and not affect unrelated sections.",
|
||||
}
|
||||
parsed = self._support.parse_json(self._llm.generate("project_edits_self_check", json.dumps(payload, ensure_ascii=False)))
|
||||
parsed = self._support.parse_json(
|
||||
self._llm.generate(
|
||||
"project_edits_self_check",
|
||||
json.dumps(payload, ensure_ascii=False),
|
||||
log_context="graph.project_edits.self_check",
|
||||
)
|
||||
)
|
||||
passed = bool(parsed.get("pass")) if isinstance(parsed, dict) else False
|
||||
feedback = str(parsed.get("feedback", "")).strip() if isinstance(parsed, dict) else ""
|
||||
return {
|
||||
@@ -192,7 +200,11 @@ class ProjectEditsLogic:
|
||||
"rag_context": self._support.shorten(state.get("rag_context", ""), 5000),
|
||||
"confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000),
|
||||
}
|
||||
raw = self._llm.generate("project_edits_hunks", json.dumps(prompt_payload, ensure_ascii=False))
|
||||
raw = self._llm.generate(
|
||||
"project_edits_hunks",
|
||||
json.dumps(prompt_payload, ensure_ascii=False),
|
||||
log_context="graph.project_edits.generate_changeset",
|
||||
)
|
||||
parsed = self._support.parse_json(raw)
|
||||
hunks = parsed.get("hunks", []) if isinstance(parsed, dict) else []
|
||||
if not isinstance(hunks, list) or not hunks:
|
||||
|
||||
@@ -33,7 +33,7 @@ class ProjectQaGraphFactory:
|
||||
f"Confluence context:\n{state.get('confluence_context', '')}",
|
||||
]
|
||||
)
|
||||
answer = self._llm.generate("project_answer", user_input)
|
||||
answer = self._llm.generate("project_answer", user_input, log_context="graph.project_qa.answer")
|
||||
emit_progress_sync(
|
||||
state,
|
||||
stage="graph.project_qa.answer.done",
|
||||
|
||||
172
app/modules/agent/engine/graphs/project_qa_step_graphs.py
Normal file
172
app/modules/agent/engine/graphs/project_qa_step_graphs.py
Normal file
@@ -0,0 +1,172 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
|
||||
from app.modules.agent.engine.graphs.progress import emit_progress_sync
|
||||
from app.modules.agent.engine.graphs.state import AgentGraphState
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.contracts import RagRetriever
|
||||
from app.modules.rag.explain import ExplainPack, PromptBudgeter
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProjectQaConversationGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("resolve_request", self._resolve_request)
|
||||
graph.add_edge(START, "resolve_request")
|
||||
graph.add_edge("resolve_request", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _resolve_request(self, state: AgentGraphState) -> dict:
|
||||
emit_progress_sync(state, stage="graph.project_qa.conversation_understanding", message="Нормализую пользовательский запрос.")
|
||||
resolved = self._support.resolve_request(str(state.get("message", "") or ""))
|
||||
LOGGER.warning("graph step result: graph=project_qa/conversation_understanding normalized=%s", resolved.get("normalized_message", ""))
|
||||
return {"resolved_request": resolved}
|
||||
|
||||
|
||||
class ProjectQaClassificationGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("classify_question", self._classify_question)
|
||||
graph.add_edge(START, "classify_question")
|
||||
graph.add_edge("classify_question", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _classify_question(self, state: AgentGraphState) -> dict:
|
||||
resolved = state.get("resolved_request", {}) or {}
|
||||
message = str(resolved.get("normalized_message") or state.get("message", "") or "")
|
||||
profile = self._support.build_profile(message)
|
||||
LOGGER.warning("graph step result: graph=project_qa/question_classification domain=%s intent=%s", profile.get("domain"), profile.get("intent"))
|
||||
return {"question_profile": profile}
|
||||
|
||||
|
||||
class ProjectQaRetrievalGraphFactory:
|
||||
def __init__(self, rag: RagRetriever, llm: AgentLlmService | None = None) -> None:
|
||||
self._rag = rag
|
||||
self._support = ProjectQaSupport()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("retrieve_context", self._retrieve_context)
|
||||
graph.add_edge(START, "retrieve_context")
|
||||
graph.add_edge("retrieve_context", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _retrieve_context(self, state: AgentGraphState) -> dict:
|
||||
emit_progress_sync(state, stage="graph.project_qa.context_retrieval", message="Собираю контекст по проекту.")
|
||||
resolved = state.get("resolved_request", {}) or {}
|
||||
profile = state.get("question_profile", {}) or {}
|
||||
files_map = dict(state.get("files_map", {}) or {})
|
||||
rag_items: list[dict] = []
|
||||
source_bundle = self._support.build_source_bundle(profile, list(rag_items), files_map)
|
||||
LOGGER.warning(
|
||||
"graph step result: graph=project_qa/context_retrieval mode=%s rag_items=%s file_candidates=%s legacy_rag=%s",
|
||||
profile.get("domain"),
|
||||
len(source_bundle.get("rag_items", []) or []),
|
||||
len(source_bundle.get("file_candidates", []) or []),
|
||||
False,
|
||||
)
|
||||
return {"source_bundle": source_bundle}
|
||||
|
||||
|
||||
class ProjectQaAnalysisGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
self._analyzer = ProjectQaAnalyzer()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("analyze_context", self._analyze_context)
|
||||
graph.add_edge(START, "analyze_context")
|
||||
graph.add_edge("analyze_context", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _analyze_context(self, state: AgentGraphState) -> dict:
|
||||
explain_pack = state.get("explain_pack")
|
||||
if explain_pack:
|
||||
analysis = self._analysis_from_pack(explain_pack)
|
||||
LOGGER.warning(
|
||||
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
|
||||
len(analysis.get("findings", []) or []),
|
||||
len(analysis.get("evidence", []) or []),
|
||||
)
|
||||
return {"analysis_brief": analysis}
|
||||
bundle = state.get("source_bundle", {}) or {}
|
||||
profile = bundle.get("profile", {}) or state.get("question_profile", {}) or {}
|
||||
rag_items = list(bundle.get("rag_items", []) or [])
|
||||
file_candidates = list(bundle.get("file_candidates", []) or [])
|
||||
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates) if str(profile.get("domain")) == "code" else self._analyzer.analyze_docs(profile, rag_items)
|
||||
LOGGER.warning(
|
||||
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
|
||||
len(analysis.get("findings", []) or []),
|
||||
len(analysis.get("evidence", []) or []),
|
||||
)
|
||||
return {"analysis_brief": analysis}
|
||||
|
||||
def _analysis_from_pack(self, raw_pack) -> dict:
|
||||
pack = ExplainPack.model_validate(raw_pack)
|
||||
findings: list[str] = []
|
||||
evidence: list[str] = []
|
||||
for entrypoint in pack.selected_entrypoints[:3]:
|
||||
findings.append(f"Entrypoint `{entrypoint.title}` maps to handler `{entrypoint.metadata.get('handler_symbol_id', '')}`.")
|
||||
if entrypoint.source:
|
||||
evidence.append(entrypoint.source)
|
||||
for path in pack.trace_paths[:3]:
|
||||
if path.symbol_ids:
|
||||
findings.append(f"Trace path: {' -> '.join(path.symbol_ids)}")
|
||||
for excerpt in pack.code_excerpts[:4]:
|
||||
evidence.append(f"{excerpt.path}:{excerpt.start_line}-{excerpt.end_line} [{excerpt.evidence_id}]")
|
||||
return {
|
||||
"subject": pack.intent.normalized_query,
|
||||
"findings": findings or ["No explain trace was built from the available code evidence."],
|
||||
"evidence": evidence,
|
||||
"gaps": list(pack.missing),
|
||||
"answer_mode": "summary",
|
||||
}
|
||||
|
||||
|
||||
class ProjectQaAnswerGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
self._llm = llm
|
||||
self._budgeter = PromptBudgeter()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("compose_answer", self._compose_answer)
|
||||
graph.add_edge(START, "compose_answer")
|
||||
graph.add_edge("compose_answer", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _compose_answer(self, state: AgentGraphState) -> dict:
|
||||
profile = state.get("question_profile", {}) or {}
|
||||
analysis = state.get("analysis_brief", {}) or {}
|
||||
brief = self._support.build_answer_brief(profile, analysis)
|
||||
explain_pack = state.get("explain_pack")
|
||||
answer = self._compose_explain_answer(state, explain_pack)
|
||||
if not answer:
|
||||
answer = self._support.compose_answer(brief)
|
||||
LOGGER.warning("graph step result: graph=project_qa/answer_composition answer_len=%s", len(answer or ""))
|
||||
return {"answer_brief": brief, "final_answer": answer}
|
||||
|
||||
def _compose_explain_answer(self, state: AgentGraphState, raw_pack) -> str:
|
||||
if raw_pack is None or self._llm is None:
|
||||
return ""
|
||||
pack = ExplainPack.model_validate(raw_pack)
|
||||
prompt_input = self._budgeter.build_prompt_input(str(state.get("message", "") or ""), pack)
|
||||
return self._llm.generate(
|
||||
"code_explain_answer_v2",
|
||||
prompt_input,
|
||||
log_context="graph.project_qa.answer_v2",
|
||||
).strip()
|
||||
@@ -25,6 +25,12 @@ class AgentGraphState(TypedDict, total=False):
|
||||
validation_passed: bool
|
||||
validation_feedback: str
|
||||
validation_attempts: int
|
||||
resolved_request: dict
|
||||
question_profile: dict
|
||||
source_bundle: dict
|
||||
analysis_brief: dict
|
||||
answer_brief: dict
|
||||
final_answer: str
|
||||
answer: str
|
||||
changeset: list[ChangeItem]
|
||||
edits_requested_path: str
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,13 +1,17 @@
|
||||
from app.modules.agent.engine.orchestrator.actions.code_explain_actions import CodeExplainActions
|
||||
from app.modules.agent.engine.orchestrator.actions.docs_actions import DocsActions
|
||||
from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions
|
||||
from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions
|
||||
from app.modules.agent.engine.orchestrator.actions.gherkin_actions import GherkinActions
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_actions import ProjectQaActions
|
||||
from app.modules.agent.engine.orchestrator.actions.review_actions import ReviewActions
|
||||
|
||||
__all__ = [
|
||||
"CodeExplainActions",
|
||||
"DocsActions",
|
||||
"EditActions",
|
||||
"ExplainActions",
|
||||
"GherkinActions",
|
||||
"ProjectQaActions",
|
||||
"ReviewActions",
|
||||
]
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
from app.modules.agent.engine.orchestrator.models import ArtifactType
|
||||
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
|
||||
from app.modules.rag.explain.models import ExplainPack
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CodeExplainActions(ActionSupport):
|
||||
def __init__(self, retriever: CodeExplainRetrieverV2 | None = None) -> None:
|
||||
self._retriever = retriever
|
||||
self._intent_builder = ExplainIntentBuilder()
|
||||
|
||||
def build_code_explain_pack(self, ctx: ExecutionContext) -> list[str]:
|
||||
file_candidates = list((self.get(ctx, "source_bundle", {}) or {}).get("file_candidates", []) or [])
|
||||
if self._retriever is None:
|
||||
pack = ExplainPack(
|
||||
intent=self._intent_builder.build(ctx.task.user_message),
|
||||
missing=["code_explain_retriever_unavailable"],
|
||||
)
|
||||
else:
|
||||
pack = self._retriever.build_pack(
|
||||
ctx.task.rag_session_id,
|
||||
ctx.task.user_message,
|
||||
file_candidates=file_candidates,
|
||||
)
|
||||
LOGGER.warning(
|
||||
"code explain action: task_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s missing=%s",
|
||||
ctx.task.task_id,
|
||||
len(pack.selected_entrypoints),
|
||||
len(pack.seed_symbols),
|
||||
len(pack.trace_paths),
|
||||
len(pack.code_excerpts),
|
||||
pack.missing,
|
||||
)
|
||||
return [self.put(ctx, "explain_pack", ArtifactType.STRUCTURED_JSON, pack.model_dump(mode="json"))]
|
||||
@@ -0,0 +1,117 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
|
||||
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
from app.modules.agent.engine.orchestrator.models import ArtifactType
|
||||
|
||||
|
||||
class ProjectQaActions(ActionSupport):
|
||||
def __init__(self) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
self._analyzer = ProjectQaAnalyzer()
|
||||
|
||||
def classify_project_question(self, ctx: ExecutionContext) -> list[str]:
|
||||
message = str(ctx.task.user_message or "")
|
||||
profile = self._support.build_profile(message)
|
||||
return [self.put(ctx, "question_profile", ArtifactType.STRUCTURED_JSON, profile)]
|
||||
|
||||
def collect_project_sources(self, ctx: ExecutionContext) -> list[str]:
|
||||
profile = self.get(ctx, "question_profile", {}) or {}
|
||||
terms = list(profile.get("terms", []) or [])
|
||||
entities = list(profile.get("entities", []) or [])
|
||||
rag_items = list(ctx.task.metadata.get("rag_items", []) or [])
|
||||
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
|
||||
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
|
||||
|
||||
ranked_rag = []
|
||||
for item in rag_items:
|
||||
score = self._support.rag_score(item, terms, entities)
|
||||
source = str(item.get("source", "") or "")
|
||||
if not explicit_test and self._support.is_test_path(source):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_rag.append((score, item))
|
||||
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
ranked_files = []
|
||||
for path, payload in files_map.items():
|
||||
score = self._support.file_score(path, payload, terms, entities)
|
||||
if not explicit_test and self._support.is_test_path(path):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_files.append(
|
||||
(
|
||||
score,
|
||||
{
|
||||
"path": path,
|
||||
"content": str(payload.get("content", "")),
|
||||
"content_hash": str(payload.get("content_hash", "")),
|
||||
},
|
||||
)
|
||||
)
|
||||
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
bundle = {
|
||||
"profile": profile,
|
||||
"rag_items": [item for _, item in ranked_rag[:12]],
|
||||
"file_candidates": [item for _, item in ranked_files[:10]],
|
||||
"rag_total": len(ranked_rag),
|
||||
"files_total": len(ranked_files),
|
||||
}
|
||||
return [self.put(ctx, "source_bundle", ArtifactType.STRUCTURED_JSON, bundle)]
|
||||
|
||||
def analyze_project_sources(self, ctx: ExecutionContext) -> list[str]:
|
||||
bundle = self.get(ctx, "source_bundle", {}) or {}
|
||||
profile = bundle.get("profile", {}) or {}
|
||||
rag_items = list(bundle.get("rag_items", []) or [])
|
||||
file_candidates = list(bundle.get("file_candidates", []) or [])
|
||||
|
||||
if str(profile.get("domain")) == "code":
|
||||
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates)
|
||||
else:
|
||||
analysis = self._analyzer.analyze_docs(profile, rag_items)
|
||||
return [self.put(ctx, "analysis_brief", ArtifactType.STRUCTURED_JSON, analysis)]
|
||||
|
||||
def build_project_answer_brief(self, ctx: ExecutionContext) -> list[str]:
|
||||
profile = self.get(ctx, "question_profile", {}) or {}
|
||||
analysis = self.get(ctx, "analysis_brief", {}) or {}
|
||||
brief = {
|
||||
"question_profile": profile,
|
||||
"resolved_subject": analysis.get("subject"),
|
||||
"key_findings": analysis.get("findings", []),
|
||||
"supporting_evidence": analysis.get("evidence", []),
|
||||
"missing_evidence": analysis.get("gaps", []),
|
||||
"answer_mode": analysis.get("answer_mode", "summary"),
|
||||
}
|
||||
return [self.put(ctx, "answer_brief", ArtifactType.STRUCTURED_JSON, brief)]
|
||||
|
||||
def compose_project_answer(self, ctx: ExecutionContext) -> list[str]:
|
||||
brief = self.get(ctx, "answer_brief", {}) or {}
|
||||
profile = brief.get("question_profile", {}) or {}
|
||||
russian = bool(profile.get("russian"))
|
||||
answer_mode = str(brief.get("answer_mode") or "summary")
|
||||
findings = list(brief.get("key_findings", []) or [])
|
||||
evidence = list(brief.get("supporting_evidence", []) or [])
|
||||
gaps = list(brief.get("missing_evidence", []) or [])
|
||||
|
||||
title = "## Кратко" if russian else "## Summary"
|
||||
lines = [title]
|
||||
if answer_mode == "inventory":
|
||||
lines.append("### Что реализовано" if russian else "### Implemented items")
|
||||
else:
|
||||
lines.append("### Что видно по проекту" if russian else "### What the project shows")
|
||||
if findings:
|
||||
lines.extend(f"- {item}" for item in findings)
|
||||
else:
|
||||
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
|
||||
if evidence:
|
||||
lines.append("")
|
||||
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
|
||||
lines.extend(f"- `{item}`" for item in evidence[:5])
|
||||
if gaps:
|
||||
lines.append("")
|
||||
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
|
||||
lines.extend(f"- {item}" for item in gaps[:3])
|
||||
return [self.put(ctx, "final_answer", ArtifactType.TEXT, "\n".join(lines))]
|
||||
@@ -0,0 +1,154 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class ProjectQaAnalyzer:
|
||||
def analyze_code(self, profile: dict, rag_items: list[dict], file_candidates: list[dict]) -> dict:
|
||||
terms = list(profile.get("terms", []) or [])
|
||||
intent = str(profile.get("intent") or "lookup")
|
||||
russian = bool(profile.get("russian"))
|
||||
findings: list[str] = []
|
||||
evidence: list[str] = []
|
||||
gaps: list[str] = []
|
||||
|
||||
symbol_titles = [str(item.get("title", "") or "") for item in rag_items if str(item.get("layer", "")).startswith("C1")]
|
||||
symbol_set = set(symbol_titles)
|
||||
file_paths = [str(item.get("path", "") or item.get("source", "") or "") for item in rag_items]
|
||||
file_paths.extend(str(item.get("path", "") or "") for item in file_candidates)
|
||||
|
||||
if "ConfigManager" in profile.get("entities", []) or "configmanager" in terms or "config_manager" in terms:
|
||||
alias_file = self.find_path(file_paths, "src/config_manager/__init__.py")
|
||||
if alias_file:
|
||||
findings.append(
|
||||
"Публичный `ConfigManager` экспортируется из `src/config_manager/__init__.py` как alias на `ConfigManagerV2`."
|
||||
if russian
|
||||
else "Public `ConfigManager` is exported from `src/config_manager/__init__.py` as an alias to `ConfigManagerV2`."
|
||||
)
|
||||
evidence.append("src/config_manager/__init__.py")
|
||||
|
||||
if "controlchannel" in {name.lower() for name in symbol_set}:
|
||||
findings.append(
|
||||
"Базовый контракт управления задает `ControlChannel`: он определяет команды `start` и `stop` для внешнего канала управления."
|
||||
if russian
|
||||
else "`ControlChannel` defines the base management contract with `start` and `stop` commands."
|
||||
)
|
||||
evidence.append("src/config_manager/v2/control/base.py")
|
||||
|
||||
if "ControlChannelBridge" in symbol_set:
|
||||
findings.append(
|
||||
"`ControlChannelBridge` связывает внешний канал управления с lifecycle-методами менеджера: `on_start`, `on_stop`, `on_status`."
|
||||
if russian
|
||||
else "`ControlChannelBridge` connects the external control channel to manager lifecycle methods: `on_start`, `on_stop`, `on_status`."
|
||||
)
|
||||
evidence.append("src/config_manager/v2/core/control_bridge.py")
|
||||
|
||||
implementation_files = self.find_management_implementations(file_candidates)
|
||||
if implementation_files:
|
||||
labels = ", ".join(f"`{path}`" for path in implementation_files)
|
||||
channel_names = self.implementation_names(implementation_files)
|
||||
findings.append(
|
||||
f"В коде найдены конкретные реализации каналов управления: {', '.join(channel_names)} ({labels})."
|
||||
if russian
|
||||
else f"Concrete management channel implementations were found in code: {', '.join(channel_names)} ({labels})."
|
||||
)
|
||||
evidence.extend(implementation_files)
|
||||
elif intent == "inventory":
|
||||
gaps.append(
|
||||
"В текущем контексте не удалось уверенно подтвердить конкретные файлы-реализации каналов, кроме базового контракта и bridge-слоя."
|
||||
if russian
|
||||
else "The current context does not yet confirm concrete channel implementation files beyond the base contract and bridge layer."
|
||||
)
|
||||
|
||||
package_doc = self.find_management_doc(file_candidates)
|
||||
if package_doc:
|
||||
findings.append(
|
||||
f"Пакет управления прямо описывает внешние каналы через `{package_doc}`."
|
||||
if russian
|
||||
else f"The control package directly describes external channels in `{package_doc}`."
|
||||
)
|
||||
evidence.append(package_doc)
|
||||
|
||||
subject = "management channels"
|
||||
if profile.get("entities"):
|
||||
subject = ", ".join(profile["entities"])
|
||||
return {
|
||||
"subject": subject,
|
||||
"findings": self.dedupe(findings),
|
||||
"evidence": self.dedupe(evidence),
|
||||
"gaps": gaps,
|
||||
"answer_mode": "inventory" if intent == "inventory" else "summary",
|
||||
}
|
||||
|
||||
def analyze_docs(self, profile: dict, rag_items: list[dict]) -> dict:
|
||||
findings: list[str] = []
|
||||
evidence: list[str] = []
|
||||
for item in rag_items[:5]:
|
||||
title = str(item.get("title", "") or "")
|
||||
source = str(item.get("source", "") or "")
|
||||
content = str(item.get("content", "") or "").strip()
|
||||
if content:
|
||||
findings.append(content.splitlines()[0][:220])
|
||||
if source:
|
||||
evidence.append(source)
|
||||
elif title:
|
||||
evidence.append(title)
|
||||
return {
|
||||
"subject": "docs",
|
||||
"findings": self.dedupe(findings),
|
||||
"evidence": self.dedupe(evidence),
|
||||
"gaps": [] if findings else ["Недостаточно данных в документации." if profile.get("russian") else "Not enough data in documentation."],
|
||||
"answer_mode": "summary",
|
||||
}
|
||||
|
||||
def find_management_implementations(self, file_candidates: list[dict]) -> list[str]:
|
||||
found: list[str] = []
|
||||
for item in file_candidates:
|
||||
path = str(item.get("path", "") or "")
|
||||
lowered = path.lower()
|
||||
if self.is_test_path(path):
|
||||
continue
|
||||
if any(token in lowered for token in ("http_channel.py", "telegram.py", "telegram_channel.py", "http.py")):
|
||||
found.append(path)
|
||||
continue
|
||||
content = str(item.get("content", "") or "").lower()
|
||||
if "controlchannel" in content and "class " in content:
|
||||
found.append(path)
|
||||
continue
|
||||
if ("channel" in lowered or "control" in lowered) and any(token in content for token in ("http", "telegram", "bot")):
|
||||
found.append(path)
|
||||
return self.dedupe(found)[:4]
|
||||
|
||||
def implementation_names(self, paths: list[str]) -> list[str]:
|
||||
names: list[str] = []
|
||||
for path in paths:
|
||||
stem = path.rsplit("/", 1)[-1].rsplit(".", 1)[0]
|
||||
label = stem.replace("_", " ").strip()
|
||||
if label and label not in names:
|
||||
names.append(label)
|
||||
return names
|
||||
|
||||
def find_management_doc(self, file_candidates: list[dict]) -> str | None:
|
||||
for item in file_candidates:
|
||||
path = str(item.get("path", "") or "")
|
||||
if self.is_test_path(path):
|
||||
continue
|
||||
content = str(item.get("content", "") or "").lower()
|
||||
if any(token in content for token in ("каналы внешнего управления", "external control channels", "http api", "telegram")):
|
||||
return path
|
||||
return None
|
||||
|
||||
def find_path(self, paths: list[str], target: str) -> str | None:
|
||||
for path in paths:
|
||||
if path == target:
|
||||
return path
|
||||
return None
|
||||
|
||||
def dedupe(self, items: list[str]) -> list[str]:
|
||||
seen: list[str] = []
|
||||
for item in items:
|
||||
if item and item not in seen:
|
||||
seen.append(item)
|
||||
return seen
|
||||
|
||||
def is_test_path(self, path: str) -> bool:
|
||||
lowered = path.lower()
|
||||
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
|
||||
@@ -0,0 +1,166 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
|
||||
class ProjectQaSupport:
|
||||
def resolve_request(self, message: str) -> dict:
|
||||
profile = self.build_profile(message)
|
||||
subject = profile["entities"][0] if profile.get("entities") else ""
|
||||
return {
|
||||
"original_message": message,
|
||||
"normalized_message": " ".join((message or "").split()),
|
||||
"subject_hint": subject,
|
||||
"source_hint": profile["domain"],
|
||||
"russian": profile["russian"],
|
||||
}
|
||||
|
||||
def build_profile(self, message: str) -> dict:
|
||||
lowered = message.lower()
|
||||
return {
|
||||
"domain": "code" if self.looks_like_code_question(lowered) else "docs",
|
||||
"intent": self.detect_intent(lowered),
|
||||
"terms": extract_query_terms(message),
|
||||
"entities": self.extract_entities(message),
|
||||
"russian": self.is_russian(message),
|
||||
}
|
||||
|
||||
def build_retrieval_query(self, resolved_request: dict, profile: dict) -> str:
|
||||
normalized = str(resolved_request.get("normalized_message") or resolved_request.get("original_message") or "").strip()
|
||||
if profile.get("domain") == "code" and "по коду" not in normalized.lower():
|
||||
return f"по коду {normalized}".strip()
|
||||
return normalized
|
||||
|
||||
def build_source_bundle(self, profile: dict, rag_items: list[dict], files_map: dict[str, dict]) -> dict:
|
||||
terms = list(profile.get("terms", []) or [])
|
||||
entities = list(profile.get("entities", []) or [])
|
||||
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
|
||||
|
||||
ranked_rag: list[tuple[int, dict]] = []
|
||||
for item in rag_items:
|
||||
score = self.rag_score(item, terms, entities)
|
||||
source = str(item.get("source", "") or "")
|
||||
if not explicit_test and self.is_test_path(source):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_rag.append((score, item))
|
||||
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
ranked_files: list[tuple[int, dict]] = []
|
||||
for path, payload in files_map.items():
|
||||
score = self.file_score(path, payload, terms, entities)
|
||||
if not explicit_test and self.is_test_path(path):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_files.append(
|
||||
(
|
||||
score,
|
||||
{
|
||||
"path": path,
|
||||
"content": str(payload.get("content", "")),
|
||||
"content_hash": str(payload.get("content_hash", "")),
|
||||
},
|
||||
)
|
||||
)
|
||||
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
return {
|
||||
"profile": profile,
|
||||
"rag_items": [item for _, item in ranked_rag[:12]],
|
||||
"file_candidates": [item for _, item in ranked_files[:10]],
|
||||
"rag_total": len(ranked_rag),
|
||||
"files_total": len(ranked_files),
|
||||
}
|
||||
|
||||
def build_answer_brief(self, profile: dict, analysis: dict) -> dict:
|
||||
return {
|
||||
"question_profile": profile,
|
||||
"resolved_subject": analysis.get("subject"),
|
||||
"key_findings": analysis.get("findings", []),
|
||||
"supporting_evidence": analysis.get("evidence", []),
|
||||
"missing_evidence": analysis.get("gaps", []),
|
||||
"answer_mode": analysis.get("answer_mode", "summary"),
|
||||
}
|
||||
|
||||
def compose_answer(self, brief: dict) -> str:
|
||||
profile = brief.get("question_profile", {}) or {}
|
||||
russian = bool(profile.get("russian"))
|
||||
answer_mode = str(brief.get("answer_mode") or "summary")
|
||||
findings = list(brief.get("key_findings", []) or [])
|
||||
evidence = list(brief.get("supporting_evidence", []) or [])
|
||||
gaps = list(brief.get("missing_evidence", []) or [])
|
||||
|
||||
title = "## Кратко" if russian else "## Summary"
|
||||
lines = [title]
|
||||
lines.append("### Что реализовано" if answer_mode == "inventory" and russian else "### Implemented items" if answer_mode == "inventory" else "### Что видно по проекту" if russian else "### What the project shows")
|
||||
if findings:
|
||||
lines.extend(f"- {item}" for item in findings)
|
||||
else:
|
||||
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
|
||||
if evidence:
|
||||
lines.append("")
|
||||
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
|
||||
lines.extend(f"- `{item}`" for item in evidence[:5])
|
||||
if gaps:
|
||||
lines.append("")
|
||||
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
|
||||
lines.extend(f"- {item}" for item in gaps[:3])
|
||||
return "\n".join(lines)
|
||||
|
||||
def detect_intent(self, lowered: str) -> str:
|
||||
if any(token in lowered for token in ("какие", "что уже реализ", "список", "перечень", "какие есть")):
|
||||
return "inventory"
|
||||
if any(token in lowered for token in ("где", "find", "where")):
|
||||
return "lookup"
|
||||
if any(token in lowered for token in ("сравни", "compare")):
|
||||
return "compare"
|
||||
return "explain"
|
||||
|
||||
def looks_like_code_question(self, lowered: str) -> bool:
|
||||
code_markers = ("по коду", "код", "реализ", "имплементац", "класс", "метод", "модул", "файл", "канал", "handler", "endpoint")
|
||||
return any(marker in lowered for marker in code_markers) or bool(re.search(r"\b[A-Z][A-Za-z0-9_]{2,}\b", lowered))
|
||||
|
||||
def extract_entities(self, message: str) -> list[str]:
|
||||
return re.findall(r"\b[A-Z][A-Za-z0-9_]{2,}\b", message)[:5]
|
||||
|
||||
def rag_score(self, item: dict, terms: list[str], entities: list[str]) -> int:
|
||||
haystacks = [
|
||||
str(item.get("source", "") or "").lower(),
|
||||
str(item.get("title", "") or "").lower(),
|
||||
str(item.get("content", "") or "").lower(),
|
||||
str((item.get("metadata", {}) or {}).get("qname", "") or "").lower(),
|
||||
]
|
||||
score = 0
|
||||
for term in terms:
|
||||
if any(term in hay for hay in haystacks):
|
||||
score += 3
|
||||
for entity in entities:
|
||||
if any(entity.lower() in hay for hay in haystacks):
|
||||
score += 5
|
||||
return score
|
||||
|
||||
def file_score(self, path: str, payload: dict, terms: list[str], entities: list[str]) -> int:
|
||||
content = str(payload.get("content", "") or "").lower()
|
||||
path_lower = path.lower()
|
||||
score = 0
|
||||
for term in terms:
|
||||
if term in path_lower:
|
||||
score += 4
|
||||
elif term in content:
|
||||
score += 2
|
||||
for entity in entities:
|
||||
entity_lower = entity.lower()
|
||||
if entity_lower in path_lower:
|
||||
score += 5
|
||||
elif entity_lower in content:
|
||||
score += 3
|
||||
return score
|
||||
|
||||
def is_test_path(self, path: str) -> bool:
|
||||
lowered = path.lower()
|
||||
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
|
||||
|
||||
def is_russian(self, text: str) -> bool:
|
||||
return any("а" <= ch.lower() <= "я" or ch.lower() == "ё" for ch in text)
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import logging
|
||||
import time
|
||||
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
@@ -9,6 +10,8 @@ from app.modules.agent.engine.orchestrator.models import PlanStatus, PlanStep, S
|
||||
from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner
|
||||
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExecutionEngine:
|
||||
def __init__(self, step_registry: StepRegistry, gates: QualityGateRunner) -> None:
|
||||
@@ -22,17 +25,18 @@ class ExecutionEngine:
|
||||
for step in ctx.plan.steps:
|
||||
dep_issue = self._dependency_issue(step, step_results)
|
||||
if dep_issue:
|
||||
step_results.append(
|
||||
StepResult(
|
||||
result = StepResult(
|
||||
step_id=step.step_id,
|
||||
status=StepStatus.SKIPPED,
|
||||
warnings=[dep_issue],
|
||||
)
|
||||
)
|
||||
step_results.append(result)
|
||||
self._log_step_result(ctx, step, result)
|
||||
continue
|
||||
|
||||
result = await self._run_with_retry(step, ctx)
|
||||
step_results.append(result)
|
||||
self._log_step_result(ctx, step, result)
|
||||
if result.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} and step.on_failure == "fail":
|
||||
ctx.plan.status = PlanStatus.FAILED
|
||||
return step_results
|
||||
@@ -65,6 +69,15 @@ class ExecutionEngine:
|
||||
while attempt < max_attempts:
|
||||
attempt += 1
|
||||
started_at = time.monotonic()
|
||||
LOGGER.warning(
|
||||
"orchestrator step start: task_id=%s step_id=%s action_id=%s executor=%s attempt=%s graph_id=%s",
|
||||
ctx.task.task_id,
|
||||
step.step_id,
|
||||
step.action_id,
|
||||
step.executor,
|
||||
attempt,
|
||||
step.graph_id or "",
|
||||
)
|
||||
await self._emit_progress(ctx, f"orchestrator.step.{step.step_id}", step.title)
|
||||
|
||||
try:
|
||||
@@ -113,3 +126,21 @@ class ExecutionEngine:
|
||||
result = ctx.progress_cb(stage, message, "task_progress", {"layer": "orchestrator"})
|
||||
if inspect.isawaitable(result):
|
||||
await result
|
||||
|
||||
def _log_step_result(self, ctx: ExecutionContext, step: PlanStep, result: StepResult) -> None:
|
||||
artifact_keys = []
|
||||
for artifact_id in result.produced_artifact_ids:
|
||||
item = next((artifact for artifact in ctx.artifacts.all_items() if artifact.artifact_id == artifact_id), None)
|
||||
if item is not None:
|
||||
artifact_keys.append(item.key)
|
||||
LOGGER.warning(
|
||||
"orchestrator step result: task_id=%s step_id=%s action_id=%s status=%s duration_ms=%s artifact_keys=%s warnings=%s error=%s",
|
||||
ctx.task.task_id,
|
||||
step.step_id,
|
||||
step.action_id,
|
||||
result.status.value,
|
||||
result.duration_ms,
|
||||
artifact_keys,
|
||||
result.warnings,
|
||||
result.error_message or "",
|
||||
)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
|
||||
from app.core.exceptions import AppError
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext, GraphInvoker, GraphResolver, ProgressCallback
|
||||
@@ -14,6 +15,8 @@ from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
|
||||
from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry
|
||||
from app.schemas.common import ModuleName
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OrchestratorService:
|
||||
def __init__(
|
||||
@@ -74,6 +77,21 @@ class OrchestratorService:
|
||||
)
|
||||
result = self._assembler.assemble(ctx, step_results)
|
||||
await self._emit_progress(progress_cb, "orchestrator.done", "Execution plan completed.")
|
||||
LOGGER.warning(
|
||||
"orchestrator decision: task_id=%s scenario=%s plan_status=%s steps=%s changeset_items=%s answer_len=%s",
|
||||
task.task_id,
|
||||
task.scenario.value,
|
||||
result.meta.get("plan", {}).get("status", ""),
|
||||
[
|
||||
{
|
||||
"step_id": step.step_id,
|
||||
"status": step.status.value,
|
||||
}
|
||||
for step in result.steps
|
||||
],
|
||||
len(result.changeset),
|
||||
len(result.answer or ""),
|
||||
)
|
||||
return result
|
||||
|
||||
async def _emit_progress(self, progress_cb: ProgressCallback | None, stage: str, message: str) -> None:
|
||||
|
||||
@@ -2,29 +2,50 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.engine.graphs.progress_registry import progress_registry
|
||||
from app.modules.agent.engine.orchestrator.actions import DocsActions, EditActions, ExplainActions, GherkinActions, ReviewActions
|
||||
from app.modules.agent.engine.orchestrator.actions import (
|
||||
CodeExplainActions,
|
||||
DocsActions,
|
||||
EditActions,
|
||||
ExplainActions,
|
||||
GherkinActions,
|
||||
ProjectQaActions,
|
||||
ReviewActions,
|
||||
)
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
from app.modules.agent.engine.orchestrator.models import ArtifactType, PlanStep
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
StepFn = Callable[[ExecutionContext], list[str]]
|
||||
|
||||
|
||||
class StepRegistry:
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, code_explain_retriever: CodeExplainRetrieverV2 | None = None) -> None:
|
||||
code_explain = CodeExplainActions(code_explain_retriever)
|
||||
explain = ExplainActions()
|
||||
review = ReviewActions()
|
||||
docs = DocsActions()
|
||||
edits = EditActions()
|
||||
gherkin = GherkinActions()
|
||||
project_qa = ProjectQaActions()
|
||||
|
||||
self._functions: dict[str, StepFn] = {
|
||||
"collect_state": self._collect_state,
|
||||
"finalize_graph_output": self._finalize_graph_output,
|
||||
"execute_project_qa_graph": self._collect_state,
|
||||
"build_code_explain_pack": code_explain.build_code_explain_pack,
|
||||
"collect_sources": explain.collect_sources,
|
||||
"extract_logic": explain.extract_logic,
|
||||
"summarize": explain.summarize,
|
||||
"classify_project_question": project_qa.classify_project_question,
|
||||
"collect_project_sources": project_qa.collect_project_sources,
|
||||
"analyze_project_sources": project_qa.analyze_project_sources,
|
||||
"build_project_answer_brief": project_qa.build_project_answer_brief,
|
||||
"compose_project_answer": project_qa.compose_project_answer,
|
||||
"fetch_source_doc": review.fetch_source_doc,
|
||||
"normalize_document": review.normalize_document,
|
||||
"structural_check": review.structural_check,
|
||||
@@ -66,6 +87,7 @@ class StepRegistry:
|
||||
state = {
|
||||
"task_id": ctx.task.task_id,
|
||||
"project_id": ctx.task.rag_session_id,
|
||||
"scenario": ctx.task.scenario.value,
|
||||
"message": ctx.task.user_message,
|
||||
"progress_key": ctx.task.task_id,
|
||||
"rag_context": str(ctx.task.metadata.get("rag_context", "")),
|
||||
@@ -86,7 +108,7 @@ class StepRegistry:
|
||||
raise RuntimeError(f"Unsupported graph_id: {graph_key}")
|
||||
|
||||
graph = ctx.graph_resolver(domain_id, process_id)
|
||||
state = ctx.artifacts.get_content("agent_state", {}) or {}
|
||||
state = self._build_graph_state(ctx)
|
||||
|
||||
if ctx.progress_cb is not None:
|
||||
progress_registry.register(ctx.task.task_id, ctx.progress_cb)
|
||||
@@ -96,8 +118,29 @@ class StepRegistry:
|
||||
if ctx.progress_cb is not None:
|
||||
progress_registry.unregister(ctx.task.task_id)
|
||||
|
||||
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result)
|
||||
return [item.artifact_id]
|
||||
return self._store_graph_outputs(step, ctx, result)
|
||||
|
||||
def _build_graph_state(self, ctx: ExecutionContext) -> dict:
|
||||
state = dict(ctx.artifacts.get_content("agent_state", {}) or {})
|
||||
for item in ctx.artifacts.all_items():
|
||||
state[item.key] = ctx.artifacts.get_content(item.key)
|
||||
return state
|
||||
|
||||
def _store_graph_outputs(self, step: PlanStep, ctx: ExecutionContext, result: dict) -> list[str]:
|
||||
if not isinstance(result, dict):
|
||||
raise RuntimeError("graph_result must be an object")
|
||||
if len(step.outputs) == 1 and step.outputs[0].key == "graph_result":
|
||||
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result)
|
||||
return [item.artifact_id]
|
||||
|
||||
artifact_ids: list[str] = []
|
||||
for output in step.outputs:
|
||||
value = result.get(output.key)
|
||||
if value is None and output.required:
|
||||
raise RuntimeError(f"graph_output_missing:{step.step_id}:{output.key}")
|
||||
item = ctx.artifacts.put(key=output.key, artifact_type=output.type, content=value)
|
||||
artifact_ids.append(item.artifact_id)
|
||||
return artifact_ids
|
||||
|
||||
def _finalize_graph_output(self, ctx: ExecutionContext) -> list[str]:
|
||||
raw = ctx.artifacts.get_content("graph_result", {}) or {}
|
||||
|
||||
@@ -16,6 +16,8 @@ class ScenarioTemplateRegistry:
|
||||
return builders.get(task.scenario, self._general)(task)
|
||||
|
||||
def _general(self, task: TaskSpec) -> ExecutionPlan:
|
||||
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
|
||||
return self._project_qa(task)
|
||||
steps = [
|
||||
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
|
||||
self._step(
|
||||
@@ -39,7 +41,77 @@ class ScenarioTemplateRegistry:
|
||||
]
|
||||
return self._plan(task, "general_qa_v1", steps, [self._gate("non_empty_answer_or_changeset")])
|
||||
|
||||
def _project_qa(self, task: TaskSpec) -> ExecutionPlan:
|
||||
steps = [
|
||||
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
|
||||
self._step(
|
||||
"conversation_understanding",
|
||||
"Conversation understanding",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/conversation_understanding",
|
||||
depends_on=["collect_state"],
|
||||
outputs=[self._out("resolved_request", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
self._step(
|
||||
"question_classification",
|
||||
"Question classification",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/question_classification",
|
||||
depends_on=["conversation_understanding"],
|
||||
outputs=[self._out("question_profile", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
self._step(
|
||||
"context_retrieval",
|
||||
"Context retrieval",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/context_retrieval",
|
||||
depends_on=["question_classification"],
|
||||
outputs=[self._out("source_bundle", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
]
|
||||
analysis_depends_on = ["context_retrieval"]
|
||||
if task.scenario == Scenario.EXPLAIN_PART:
|
||||
steps.append(
|
||||
self._step(
|
||||
"code_explain_pack_step",
|
||||
"Build code explain pack",
|
||||
"build_code_explain_pack",
|
||||
depends_on=["context_retrieval"],
|
||||
outputs=[self._out("explain_pack", ArtifactType.STRUCTURED_JSON)],
|
||||
)
|
||||
)
|
||||
analysis_depends_on = ["code_explain_pack_step"]
|
||||
steps.extend(
|
||||
[
|
||||
self._step(
|
||||
"context_analysis",
|
||||
"Context analysis",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/context_analysis",
|
||||
depends_on=analysis_depends_on,
|
||||
outputs=[self._out("analysis_brief", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
self._step(
|
||||
"answer_composition",
|
||||
"Answer composition",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/answer_composition",
|
||||
depends_on=["context_analysis"],
|
||||
outputs=[self._out("answer_brief", ArtifactType.STRUCTURED_JSON, required=False), self._out("final_answer", ArtifactType.TEXT)],
|
||||
gates=[self._gate("non_empty_answer_or_changeset")],
|
||||
),
|
||||
]
|
||||
)
|
||||
return self._plan(task, "project_qa_reasoning_v1", steps, [self._gate("non_empty_answer_or_changeset")])
|
||||
|
||||
def _explain(self, task: TaskSpec) -> ExecutionPlan:
|
||||
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
|
||||
return self._project_qa(task)
|
||||
steps = [
|
||||
self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]),
|
||||
self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]),
|
||||
|
||||
@@ -2,21 +2,28 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.contracts import RagRetriever
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.agent.repository import AgentRepository
|
||||
from app.modules.agent.engine.router.router_service import RouterService
|
||||
|
||||
|
||||
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository") -> "RouterService":
|
||||
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository", rag: RagRetriever) -> "RouterService":
|
||||
from app.modules.agent.engine.graphs import (
|
||||
BaseGraphFactory,
|
||||
DocsGraphFactory,
|
||||
ProjectEditsGraphFactory,
|
||||
ProjectQaAnalysisGraphFactory,
|
||||
ProjectQaAnswerGraphFactory,
|
||||
ProjectQaClassificationGraphFactory,
|
||||
ProjectQaConversationGraphFactory,
|
||||
ProjectQaGraphFactory,
|
||||
ProjectQaRetrievalGraphFactory,
|
||||
)
|
||||
from app.modules.agent.engine.router.context_store import RouterContextStore
|
||||
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
|
||||
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
|
||||
from app.modules.agent.engine.router.registry import IntentRegistry
|
||||
from app.modules.agent.engine.router.router_service import RouterService
|
||||
|
||||
@@ -26,13 +33,20 @@ def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepositor
|
||||
registry.register("project", "qa", ProjectQaGraphFactory(llm).build)
|
||||
registry.register("project", "edits", ProjectEditsGraphFactory(llm).build)
|
||||
registry.register("docs", "generation", DocsGraphFactory(llm).build)
|
||||
registry.register("project_qa", "conversation_understanding", ProjectQaConversationGraphFactory(llm).build)
|
||||
registry.register("project_qa", "question_classification", ProjectQaClassificationGraphFactory(llm).build)
|
||||
registry.register("project_qa", "context_retrieval", ProjectQaRetrievalGraphFactory(rag, llm).build)
|
||||
registry.register("project_qa", "context_analysis", ProjectQaAnalysisGraphFactory(llm).build)
|
||||
registry.register("project_qa", "answer_composition", ProjectQaAnswerGraphFactory(llm).build)
|
||||
|
||||
classifier = IntentClassifier(llm)
|
||||
switch_detector = IntentSwitchDetector()
|
||||
context_store = RouterContextStore(agent_repository)
|
||||
return RouterService(
|
||||
registry=registry,
|
||||
classifier=classifier,
|
||||
context_store=context_store,
|
||||
switch_detector=switch_detector,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -17,6 +17,7 @@ class RouterContextStore:
|
||||
process_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str,
|
||||
decision_type: str = "start",
|
||||
max_history: int = 10,
|
||||
) -> None:
|
||||
self._repo.update_router_context(
|
||||
@@ -25,5 +26,6 @@ class RouterContextStore:
|
||||
process_id=process_id,
|
||||
user_message=user_message,
|
||||
assistant_message=assistant_message,
|
||||
decision_type=decision_type,
|
||||
max_history=max_history,
|
||||
)
|
||||
|
||||
@@ -17,11 +17,7 @@ class IntentClassifier:
|
||||
def __init__(self, llm: AgentLlmService) -> None:
|
||||
self._llm = llm
|
||||
|
||||
def classify(self, user_message: str, context: RouterContext, mode: str = "auto") -> RouteDecision:
|
||||
forced = self._from_mode(mode)
|
||||
if forced:
|
||||
return forced
|
||||
|
||||
def classify_new_intent(self, user_message: str, context: RouterContext) -> RouteDecision:
|
||||
text = (user_message or "").strip().lower()
|
||||
if text in self._short_confirmations and context.last_routing:
|
||||
return RouteDecision(
|
||||
@@ -30,6 +26,7 @@ class IntentClassifier:
|
||||
confidence=1.0,
|
||||
reason="short_confirmation",
|
||||
use_previous=True,
|
||||
decision_type="continue",
|
||||
)
|
||||
|
||||
deterministic = self._deterministic_route(text)
|
||||
@@ -45,9 +42,10 @@ class IntentClassifier:
|
||||
process_id="general",
|
||||
confidence=0.8,
|
||||
reason="default",
|
||||
decision_type="start",
|
||||
)
|
||||
|
||||
def _from_mode(self, mode: str) -> RouteDecision | None:
|
||||
def from_mode(self, mode: str) -> RouteDecision | None:
|
||||
mapping = {
|
||||
"project_qa": ("project", "qa"),
|
||||
"project_edits": ("project", "edits"),
|
||||
@@ -65,6 +63,8 @@ class IntentClassifier:
|
||||
process_id=route[1],
|
||||
confidence=1.0,
|
||||
reason=f"mode_override:{mode}",
|
||||
decision_type="switch",
|
||||
explicit_switch=True,
|
||||
)
|
||||
|
||||
def _classify_with_llm(self, user_message: str, context: RouterContext) -> RouteDecision | None:
|
||||
@@ -96,6 +96,7 @@ class IntentClassifier:
|
||||
process_id=route[1],
|
||||
confidence=confidence,
|
||||
reason=f"llm_router:{payload.get('reason', 'ok')}",
|
||||
decision_type="start",
|
||||
)
|
||||
|
||||
def _parse_llm_payload(self, raw: str) -> dict[str, str | float] | None:
|
||||
@@ -139,6 +140,8 @@ class IntentClassifier:
|
||||
process_id="edits",
|
||||
confidence=0.97,
|
||||
reason="deterministic_targeted_file_edit",
|
||||
decision_type="switch",
|
||||
explicit_switch=True,
|
||||
)
|
||||
if self._is_broad_docs_request(text):
|
||||
return RouteDecision(
|
||||
@@ -146,6 +149,8 @@ class IntentClassifier:
|
||||
process_id="generation",
|
||||
confidence=0.95,
|
||||
reason="deterministic_docs_generation",
|
||||
decision_type="switch",
|
||||
explicit_switch=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
81
app/modules/agent/engine/router/intent_switch_detector.py
Normal file
81
app/modules/agent/engine/router/intent_switch_detector.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.agent.engine.router.schemas import RouterContext
|
||||
|
||||
|
||||
class IntentSwitchDetector:
|
||||
_EXPLICIT_SWITCH_MARKERS = (
|
||||
"теперь",
|
||||
"а теперь",
|
||||
"давай теперь",
|
||||
"переключись",
|
||||
"переключаемся",
|
||||
"сейчас другое",
|
||||
"новая задача",
|
||||
"new task",
|
||||
"switch to",
|
||||
"now do",
|
||||
"instead",
|
||||
)
|
||||
_FOLLOW_UP_MARKERS = (
|
||||
"а еще",
|
||||
"а ещё",
|
||||
"подробнее",
|
||||
"почему",
|
||||
"зачем",
|
||||
"что если",
|
||||
"и еще",
|
||||
"и ещё",
|
||||
"покажи подробнее",
|
||||
"можешь подробнее",
|
||||
)
|
||||
|
||||
def should_switch(self, user_message: str, context: RouterContext) -> bool:
|
||||
if not context.dialog_started or context.active_intent is None:
|
||||
return False
|
||||
text = " ".join((user_message or "").strip().lower().split())
|
||||
if not text:
|
||||
return False
|
||||
if self._is_follow_up(text):
|
||||
return False
|
||||
if any(marker in text for marker in self._EXPLICIT_SWITCH_MARKERS):
|
||||
return True
|
||||
return self._is_strong_targeted_edit_request(text) or self._is_strong_docs_request(text)
|
||||
|
||||
def _is_follow_up(self, text: str) -> bool:
|
||||
return any(marker in text for marker in self._FOLLOW_UP_MARKERS)
|
||||
|
||||
def _is_strong_targeted_edit_request(self, text: str) -> bool:
|
||||
edit_markers = (
|
||||
"добавь",
|
||||
"добавить",
|
||||
"измени",
|
||||
"исправь",
|
||||
"обнови",
|
||||
"удали",
|
||||
"замени",
|
||||
"append",
|
||||
"update",
|
||||
"edit",
|
||||
"remove",
|
||||
"replace",
|
||||
)
|
||||
has_edit_marker = any(marker in text for marker in edit_markers)
|
||||
has_file_marker = (
|
||||
"readme" in text
|
||||
or bool(re.search(r"\b[\w.\-/]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg|py)\b", text))
|
||||
)
|
||||
return has_edit_marker and has_file_marker
|
||||
|
||||
def _is_strong_docs_request(self, text: str) -> bool:
|
||||
docs_markers = (
|
||||
"подготовь документац",
|
||||
"сгенерируй документац",
|
||||
"создай документац",
|
||||
"опиши документац",
|
||||
"generate documentation",
|
||||
"write documentation",
|
||||
)
|
||||
return any(marker in text for marker in docs_markers)
|
||||
@@ -1,7 +1,8 @@
|
||||
from app.modules.agent.engine.router.context_store import RouterContextStore
|
||||
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
|
||||
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
|
||||
from app.modules.agent.engine.router.registry import IntentRegistry
|
||||
from app.modules.agent.engine.router.schemas import RouteResolution
|
||||
from app.modules.agent.engine.router.schemas import RouteDecision, RouteResolution
|
||||
|
||||
|
||||
class RouterService:
|
||||
@@ -10,27 +11,48 @@ class RouterService:
|
||||
registry: IntentRegistry,
|
||||
classifier: IntentClassifier,
|
||||
context_store: RouterContextStore,
|
||||
switch_detector: IntentSwitchDetector | None = None,
|
||||
min_confidence: float = 0.7,
|
||||
) -> None:
|
||||
self._registry = registry
|
||||
self._classifier = classifier
|
||||
self._ctx = context_store
|
||||
self._switch_detector = switch_detector or IntentSwitchDetector()
|
||||
self._min_confidence = min_confidence
|
||||
|
||||
def resolve(self, user_message: str, conversation_key: str, mode: str = "auto") -> RouteResolution:
|
||||
context = self._ctx.get(conversation_key)
|
||||
decision = self._classifier.classify(user_message, context, mode=mode)
|
||||
if decision.confidence < self._min_confidence:
|
||||
return self._fallback("low_confidence")
|
||||
if not self._registry.is_valid(decision.domain_id, decision.process_id):
|
||||
return self._fallback("invalid_route")
|
||||
return RouteResolution(
|
||||
domain_id=decision.domain_id,
|
||||
process_id=decision.process_id,
|
||||
confidence=decision.confidence,
|
||||
reason=decision.reason,
|
||||
fallback_used=False,
|
||||
)
|
||||
forced = self._classifier.from_mode(mode)
|
||||
if forced:
|
||||
return self._resolution(forced)
|
||||
|
||||
if not context.dialog_started or context.active_intent is None:
|
||||
decision = self._classifier.classify_new_intent(user_message, context)
|
||||
if not self._is_acceptable(decision):
|
||||
return self._fallback("low_confidence")
|
||||
return self._resolution(
|
||||
decision.model_copy(
|
||||
update={
|
||||
"decision_type": "start",
|
||||
"explicit_switch": False,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
if self._switch_detector.should_switch(user_message, context):
|
||||
decision = self._classifier.classify_new_intent(user_message, context)
|
||||
if self._is_acceptable(decision):
|
||||
return self._resolution(
|
||||
decision.model_copy(
|
||||
update={
|
||||
"decision_type": "switch",
|
||||
"explicit_switch": True,
|
||||
}
|
||||
)
|
||||
)
|
||||
return self._continue_current(context, "explicit_switch_unresolved_keep_current")
|
||||
|
||||
return self._continue_current(context, "continue_current_intent")
|
||||
|
||||
def persist_context(
|
||||
self,
|
||||
@@ -40,6 +62,7 @@ class RouterService:
|
||||
process_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str,
|
||||
decision_type: str = "start",
|
||||
) -> None:
|
||||
self._ctx.update(
|
||||
conversation_key,
|
||||
@@ -47,6 +70,7 @@ class RouterService:
|
||||
process_id=process_id,
|
||||
user_message=user_message,
|
||||
assistant_message=assistant_message,
|
||||
decision_type=decision_type,
|
||||
)
|
||||
|
||||
def graph_factory(self, domain_id: str, process_id: str):
|
||||
@@ -59,4 +83,32 @@ class RouterService:
|
||||
confidence=0.0,
|
||||
reason=reason,
|
||||
fallback_used=True,
|
||||
decision_type="start",
|
||||
explicit_switch=False,
|
||||
)
|
||||
|
||||
def _continue_current(self, context, reason: str) -> RouteResolution:
|
||||
active = context.active_intent or context.last_routing or {"domain_id": "default", "process_id": "general"}
|
||||
return RouteResolution(
|
||||
domain_id=str(active["domain_id"]),
|
||||
process_id=str(active["process_id"]),
|
||||
confidence=1.0,
|
||||
reason=reason,
|
||||
fallback_used=False,
|
||||
decision_type="continue",
|
||||
explicit_switch=False,
|
||||
)
|
||||
|
||||
def _is_acceptable(self, decision: RouteDecision) -> bool:
|
||||
return decision.confidence >= self._min_confidence and self._registry.is_valid(decision.domain_id, decision.process_id)
|
||||
|
||||
def _resolution(self, decision: RouteDecision) -> RouteResolution:
|
||||
return RouteResolution(
|
||||
domain_id=decision.domain_id,
|
||||
process_id=decision.process_id,
|
||||
confidence=decision.confidence,
|
||||
reason=decision.reason,
|
||||
fallback_used=False,
|
||||
decision_type=decision.decision_type,
|
||||
explicit_switch=decision.explicit_switch,
|
||||
)
|
||||
|
||||
@@ -7,6 +7,8 @@ class RouteDecision(BaseModel):
|
||||
confidence: float = 0.0
|
||||
reason: str = ""
|
||||
use_previous: bool = False
|
||||
decision_type: str = "start"
|
||||
explicit_switch: bool = False
|
||||
|
||||
@field_validator("confidence")
|
||||
@classmethod
|
||||
@@ -20,8 +22,13 @@ class RouteResolution(BaseModel):
|
||||
confidence: float
|
||||
reason: str
|
||||
fallback_used: bool = False
|
||||
decision_type: str = "start"
|
||||
explicit_switch: bool = False
|
||||
|
||||
|
||||
class RouterContext(BaseModel):
|
||||
last_routing: dict[str, str] | None = None
|
||||
message_history: list[dict[str, str]] = Field(default_factory=list)
|
||||
active_intent: dict[str, str] | None = None
|
||||
dialog_started: bool = False
|
||||
turn_index: int = 0
|
||||
|
||||
Binary file not shown.
@@ -1,14 +1,40 @@
|
||||
import logging
|
||||
|
||||
from app.modules.agent.prompt_loader import PromptLoader
|
||||
from app.modules.shared.gigachat.client import GigaChatClient
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _truncate_for_log(text: str, max_chars: int = 1500) -> str:
|
||||
value = (text or "").replace("\n", "\\n").strip()
|
||||
if len(value) <= max_chars:
|
||||
return value
|
||||
return value[:max_chars].rstrip() + "...[truncated]"
|
||||
|
||||
|
||||
class AgentLlmService:
|
||||
def __init__(self, client: GigaChatClient, prompts: PromptLoader) -> None:
|
||||
self._client = client
|
||||
self._prompts = prompts
|
||||
|
||||
def generate(self, prompt_name: str, user_input: str) -> str:
|
||||
def generate(self, prompt_name: str, user_input: str, *, log_context: str | None = None) -> str:
|
||||
system_prompt = self._prompts.load(prompt_name)
|
||||
if not system_prompt:
|
||||
system_prompt = "You are a helpful assistant."
|
||||
return self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
|
||||
if log_context:
|
||||
LOGGER.warning(
|
||||
"graph llm input: context=%s prompt=%s user_input=%s",
|
||||
log_context,
|
||||
prompt_name,
|
||||
_truncate_for_log(user_input),
|
||||
)
|
||||
output = self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
|
||||
if log_context:
|
||||
LOGGER.warning(
|
||||
"graph llm output: context=%s prompt=%s output=%s",
|
||||
log_context,
|
||||
prompt_name,
|
||||
_truncate_for_log(output),
|
||||
)
|
||||
return output
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.changeset_validator import ChangeSetValidator
|
||||
from app.modules.agent.confluence_service import ConfluenceService
|
||||
@@ -19,12 +22,17 @@ class ConfluenceFetchRequest(BaseModel):
|
||||
url: HttpUrl
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
class AgentModule:
|
||||
def __init__(
|
||||
self,
|
||||
rag_retriever: RagRetriever,
|
||||
agent_repository: AgentRepository,
|
||||
story_context_repository: StoryContextRepository,
|
||||
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
|
||||
) -> None:
|
||||
self.confluence = ConfluenceService()
|
||||
self.changeset_validator = ChangeSetValidator()
|
||||
@@ -34,14 +42,16 @@ class AgentModule:
|
||||
client = GigaChatClient(settings, token_provider)
|
||||
prompt_loader = PromptLoader()
|
||||
llm = AgentLlmService(client=client, prompts=prompt_loader)
|
||||
self.llm = llm
|
||||
story_recorder = StorySessionRecorder(story_context_repository)
|
||||
self.runtime = GraphAgentRuntime(
|
||||
rag=rag_retriever,
|
||||
confluence=self.confluence,
|
||||
changeset_validator=self.changeset_validator,
|
||||
llm=llm,
|
||||
llm=self.llm,
|
||||
agent_repository=agent_repository,
|
||||
story_recorder=story_recorder,
|
||||
code_explain_retriever=code_explain_retriever,
|
||||
)
|
||||
|
||||
def internal_router(self) -> APIRouter:
|
||||
|
||||
17
app/modules/agent/prompts/code_explain_answer_v2.txt
Normal file
17
app/modules/agent/prompts/code_explain_answer_v2.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
Объяснение кода осуществляется только с использованием предоставленного ExplainPack.
|
||||
|
||||
Правила:
|
||||
- Сначала используйте доказательства.
|
||||
- Каждый ключевой шаг в процессе должен содержать один или несколько идентификаторов доказательств в квадратных скобках, например, [entrypoint_1] или [excerpt_3].
|
||||
- Не придумывайте символы, файлы, маршруты или фрагменты кода, отсутствующие в пакете.
|
||||
- Если доказательства неполные, укажите это явно.
|
||||
- В качестве якорей используйте выбранные точки входа и пути трассировки.
|
||||
|
||||
Верните Markdown со следующей структурой:
|
||||
1. Краткое описание
|
||||
2. Пошаговый процесс
|
||||
3. Данные и побочные эффекты
|
||||
4. Ошибки и граничные случаи
|
||||
5. Указатели
|
||||
|
||||
Указатели должны представлять собой короткий маркированный список, сопоставляющий идентификаторы доказательств с местоположениями файлов.
|
||||
24
app/modules/agent/prompts/rag_intent_router_v2.txt
Normal file
24
app/modules/agent/prompts/rag_intent_router_v2.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
Ты intent-router для layered RAG.
|
||||
На вход ты получаешь JSON с полями:
|
||||
- message: текущий запрос пользователя
|
||||
- active_intent: текущий активный intent диалога или null
|
||||
- last_query: предыдущий запрос пользователя
|
||||
- allowed_intents: допустимые intent'ы
|
||||
|
||||
Выбери ровно один intent из allowed_intents.
|
||||
Верни только JSON без markdown и пояснений.
|
||||
|
||||
Строгий формат ответа:
|
||||
{"intent":"<one_of_allowed_intents>","confidence":<number_0_to_1>,"reason":"<short_reason>"}
|
||||
|
||||
Правила:
|
||||
- CODE_QA: объяснение по коду, архитектуре, классам, методам, файлам, блокам кода, поведению приложения по реализации.
|
||||
- DOCS_QA: объяснение по документации, README, markdown, specs, runbooks, разделам документации.
|
||||
- GENERATE_DOCS_FROM_CODE: просьба сгенерировать, подготовить или обновить документацию по коду.
|
||||
- PROJECT_MISC: прочие вопросы по проекту, не относящиеся явно к коду или документации.
|
||||
|
||||
Приоритет:
|
||||
- Если пользователь просит именно подготовить документацию по коду, выбирай GENERATE_DOCS_FROM_CODE.
|
||||
- Если пользователь спрашивает про конкретный класс, файл, метод или блок кода, выбирай CODE_QA.
|
||||
- Если пользователь спрашивает про README, docs, markdown или конкретную документацию, выбирай DOCS_QA.
|
||||
- Если сигнал неочевиден, выбирай PROJECT_MISC и confidence <= 0.6.
|
||||
@@ -18,6 +18,10 @@ class AgentRepository:
|
||||
conversation_key VARCHAR(64) PRIMARY KEY,
|
||||
last_domain_id VARCHAR(64) NULL,
|
||||
last_process_id VARCHAR(64) NULL,
|
||||
active_domain_id VARCHAR(64) NULL,
|
||||
active_process_id VARCHAR(64) NULL,
|
||||
dialog_started BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
turn_index INTEGER NOT NULL DEFAULT 0,
|
||||
message_history_json TEXT NOT NULL DEFAULT '[]',
|
||||
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
@@ -64,14 +68,24 @@ class AgentRepository:
|
||||
"""
|
||||
)
|
||||
)
|
||||
self._ensure_router_context_columns(conn)
|
||||
conn.commit()
|
||||
|
||||
def _ensure_router_context_columns(self, conn) -> None:
|
||||
for statement in (
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_domain_id VARCHAR(64) NULL",
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_process_id VARCHAR(64) NULL",
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS dialog_started BOOLEAN NOT NULL DEFAULT FALSE",
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS turn_index INTEGER NOT NULL DEFAULT 0",
|
||||
):
|
||||
conn.execute(text(statement))
|
||||
|
||||
def get_router_context(self, conversation_key: str) -> RouterContext:
|
||||
with get_engine().connect() as conn:
|
||||
row = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT last_domain_id, last_process_id, message_history_json
|
||||
SELECT last_domain_id, last_process_id, active_domain_id, active_process_id, dialog_started, turn_index, message_history_json
|
||||
FROM router_context
|
||||
WHERE conversation_key = :key
|
||||
"""
|
||||
@@ -82,7 +96,7 @@ class AgentRepository:
|
||||
if not row:
|
||||
return RouterContext()
|
||||
|
||||
history_raw = row[2] or "[]"
|
||||
history_raw = row[6] or "[]"
|
||||
try:
|
||||
history = json.loads(history_raw)
|
||||
except json.JSONDecodeError:
|
||||
@@ -91,6 +105,9 @@ class AgentRepository:
|
||||
last = None
|
||||
if row[0] and row[1]:
|
||||
last = {"domain_id": str(row[0]), "process_id": str(row[1])}
|
||||
active = None
|
||||
if row[2] and row[3]:
|
||||
active = {"domain_id": str(row[2]), "process_id": str(row[3])}
|
||||
|
||||
clean_history = []
|
||||
for item in history if isinstance(history, list) else []:
|
||||
@@ -101,7 +118,13 @@ class AgentRepository:
|
||||
if role in {"user", "assistant"} and content:
|
||||
clean_history.append({"role": role, "content": content})
|
||||
|
||||
return RouterContext(last_routing=last, message_history=clean_history)
|
||||
return RouterContext(
|
||||
last_routing=last,
|
||||
message_history=clean_history,
|
||||
active_intent=active or last,
|
||||
dialog_started=bool(row[4]),
|
||||
turn_index=int(row[5] or 0),
|
||||
)
|
||||
|
||||
def update_router_context(
|
||||
self,
|
||||
@@ -111,6 +134,7 @@ class AgentRepository:
|
||||
process_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str,
|
||||
decision_type: str,
|
||||
max_history: int,
|
||||
) -> None:
|
||||
current = self.get_router_context(conversation_key)
|
||||
@@ -121,17 +145,29 @@ class AgentRepository:
|
||||
history.append({"role": "assistant", "content": assistant_message})
|
||||
if max_history > 0:
|
||||
history = history[-max_history:]
|
||||
current_active = current.active_intent or current.last_routing or {"domain_id": domain_id, "process_id": process_id}
|
||||
next_active = (
|
||||
{"domain_id": domain_id, "process_id": process_id}
|
||||
if decision_type in {"start", "switch"}
|
||||
else current_active
|
||||
)
|
||||
next_turn_index = max(0, int(current.turn_index or 0)) + (1 if user_message else 0)
|
||||
|
||||
with get_engine().connect() as conn:
|
||||
conn.execute(
|
||||
text(
|
||||
"""
|
||||
INSERT INTO router_context (
|
||||
conversation_key, last_domain_id, last_process_id, message_history_json
|
||||
) VALUES (:key, :domain, :process, :history)
|
||||
conversation_key, last_domain_id, last_process_id, active_domain_id, active_process_id,
|
||||
dialog_started, turn_index, message_history_json
|
||||
) VALUES (:key, :domain, :process, :active_domain, :active_process, :dialog_started, :turn_index, :history)
|
||||
ON CONFLICT (conversation_key) DO UPDATE SET
|
||||
last_domain_id = EXCLUDED.last_domain_id,
|
||||
last_process_id = EXCLUDED.last_process_id,
|
||||
active_domain_id = EXCLUDED.active_domain_id,
|
||||
active_process_id = EXCLUDED.active_process_id,
|
||||
dialog_started = EXCLUDED.dialog_started,
|
||||
turn_index = EXCLUDED.turn_index,
|
||||
message_history_json = EXCLUDED.message_history_json,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
"""
|
||||
@@ -140,6 +176,10 @@ class AgentRepository:
|
||||
"key": conversation_key,
|
||||
"domain": domain_id,
|
||||
"process": process_id,
|
||||
"active_domain": str(next_active["domain_id"]),
|
||||
"active_process": str(next_active["process_id"]),
|
||||
"dialog_started": True,
|
||||
"turn_index": next_turn_index,
|
||||
"history": json.dumps(history, ensure_ascii=False),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from collections.abc import Awaitable, Callable
|
||||
import inspect
|
||||
import logging
|
||||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.engine.orchestrator import OrchestratorService, TaskSpecBuilder
|
||||
from app.modules.agent.engine.orchestrator.metrics_persister import MetricsPersister
|
||||
from app.modules.agent.engine.orchestrator.models import RoutingMeta
|
||||
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
|
||||
from app.modules.agent.engine.router import build_router_service
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.agent.story_session_recorder import StorySessionRecorder
|
||||
@@ -22,6 +26,9 @@ from app.schemas.common import ModuleName
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
def _truncate_for_log(text: str | None, max_chars: int = 1500) -> str:
|
||||
value = (text or "").replace("\n", "\\n").strip()
|
||||
@@ -47,13 +54,14 @@ class GraphAgentRuntime:
|
||||
llm: AgentLlmService,
|
||||
agent_repository: AgentRepository,
|
||||
story_recorder: StorySessionRecorder | None = None,
|
||||
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
|
||||
) -> None:
|
||||
self._rag = rag
|
||||
self._confluence = confluence
|
||||
self._changeset_validator = changeset_validator
|
||||
self._router = build_router_service(llm, agent_repository)
|
||||
self._router = build_router_service(llm, agent_repository, rag)
|
||||
self._task_spec_builder = TaskSpecBuilder()
|
||||
self._orchestrator = OrchestratorService()
|
||||
self._orchestrator = OrchestratorService(step_registry=StepRegistry(code_explain_retriever))
|
||||
self._metrics_persister = MetricsPersister(agent_repository)
|
||||
self._story_recorder = story_recorder
|
||||
self._checkpointer = None
|
||||
@@ -70,7 +78,7 @@ class GraphAgentRuntime:
|
||||
files: list[dict],
|
||||
progress_cb: Callable[[str, str, str, dict | None], Awaitable[None] | None] | None = None,
|
||||
) -> AgentResult:
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"GraphAgentRuntime.run started: task_id=%s dialog_session_id=%s mode=%s",
|
||||
task_id,
|
||||
dialog_session_id,
|
||||
@@ -96,9 +104,7 @@ class GraphAgentRuntime:
|
||||
meta={"domain_id": route.domain_id, "process_id": route.process_id},
|
||||
)
|
||||
files_map = self._build_files_map(files)
|
||||
|
||||
await self._emit_progress(progress_cb, "agent.rag", "Собираю релевантный контекст из RAG.")
|
||||
rag_ctx = await self._rag.retrieve(rag_session_id, message)
|
||||
rag_ctx: list[dict] = []
|
||||
await self._emit_progress(progress_cb, "agent.attachments", "Обрабатываю дополнительные вложения.")
|
||||
conf_pages = await self._fetch_confluence_pages(attachments)
|
||||
route_meta = RoutingMeta(
|
||||
@@ -157,8 +163,9 @@ class GraphAgentRuntime:
|
||||
process_id=route.process_id,
|
||||
user_message=message,
|
||||
assistant_message=final_answer,
|
||||
decision_type=route.decision_type,
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"final agent answer: task_id=%s route=%s/%s answer=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -178,7 +185,7 @@ class GraphAgentRuntime:
|
||||
answer=final_answer,
|
||||
meta={
|
||||
"route": route.model_dump(),
|
||||
"used_rag": True,
|
||||
"used_rag": False,
|
||||
"used_confluence": bool(conf_pages),
|
||||
"changeset_filtered_out": True,
|
||||
"orchestrator": orchestrator_meta,
|
||||
@@ -193,6 +200,7 @@ class GraphAgentRuntime:
|
||||
process_id=route.process_id,
|
||||
user_message=message,
|
||||
assistant_message=final_answer or f"changeset:{len(validated)}",
|
||||
decision_type=route.decision_type,
|
||||
)
|
||||
final = AgentResult(
|
||||
result_type=TaskResultType.CHANGESET,
|
||||
@@ -200,7 +208,7 @@ class GraphAgentRuntime:
|
||||
changeset=validated,
|
||||
meta={
|
||||
"route": route.model_dump(),
|
||||
"used_rag": True,
|
||||
"used_rag": False,
|
||||
"used_confluence": bool(conf_pages),
|
||||
"orchestrator": orchestrator_meta,
|
||||
"orchestrator_steps": orchestrator_steps,
|
||||
@@ -214,7 +222,7 @@ class GraphAgentRuntime:
|
||||
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
|
||||
quality=quality_meta,
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s changeset_items=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -222,7 +230,7 @@ class GraphAgentRuntime:
|
||||
final.result_type.value,
|
||||
len(final.changeset),
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"final agent answer: task_id=%s route=%s/%s answer=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -239,13 +247,14 @@ class GraphAgentRuntime:
|
||||
process_id=route.process_id,
|
||||
user_message=message,
|
||||
assistant_message=final_answer,
|
||||
decision_type=route.decision_type,
|
||||
)
|
||||
final = AgentResult(
|
||||
result_type=TaskResultType.ANSWER,
|
||||
answer=final_answer,
|
||||
meta={
|
||||
"route": route.model_dump(),
|
||||
"used_rag": True,
|
||||
"used_rag": False,
|
||||
"used_confluence": bool(conf_pages),
|
||||
"orchestrator": orchestrator_meta,
|
||||
"orchestrator_steps": orchestrator_steps,
|
||||
@@ -259,7 +268,7 @@ class GraphAgentRuntime:
|
||||
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
|
||||
quality=quality_meta,
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s answer_len=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -267,7 +276,7 @@ class GraphAgentRuntime:
|
||||
final.result_type.value,
|
||||
len(final.answer or ""),
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"final agent answer: task_id=%s route=%s/%s answer=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -351,7 +360,7 @@ class GraphAgentRuntime:
|
||||
factory = self._router.graph_factory("default", "general")
|
||||
if factory is None:
|
||||
raise RuntimeError("No graph factory configured")
|
||||
LOGGER.warning("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id)
|
||||
LOGGER.debug("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id)
|
||||
return factory(self._checkpointer)
|
||||
|
||||
def _invoke_graph(self, graph, state: dict, dialog_session_id: str):
|
||||
@@ -365,7 +374,7 @@ class GraphAgentRuntime:
|
||||
for item in attachments:
|
||||
if item.get("type") == "confluence_url":
|
||||
pages.append(await self._confluence.fetch_page(item["url"]))
|
||||
LOGGER.warning("_fetch_confluence_pages completed: pages=%s", len(pages))
|
||||
LOGGER.info("_fetch_confluence_pages completed: pages=%s", len(pages))
|
||||
return pages
|
||||
|
||||
def _format_rag(self, items: list[dict]) -> str:
|
||||
@@ -411,7 +420,7 @@ class GraphAgentRuntime:
|
||||
"content": str(item.get("content", "")),
|
||||
"content_hash": str(item.get("content_hash", "")),
|
||||
}
|
||||
LOGGER.warning("_build_files_map completed: files=%s", len(output))
|
||||
LOGGER.debug("_build_files_map completed: files=%s", len(output))
|
||||
return output
|
||||
|
||||
def _lookup_file(self, files_map: dict[str, dict], path: str) -> dict | None:
|
||||
@@ -437,7 +446,7 @@ class GraphAgentRuntime:
|
||||
)
|
||||
item.base_hash = str(source["content_hash"])
|
||||
enriched.append(item)
|
||||
LOGGER.warning("_enrich_changeset_hashes completed: items=%s", len(enriched))
|
||||
LOGGER.debug("_enrich_changeset_hashes completed: items=%s", len(enriched))
|
||||
return enriched
|
||||
|
||||
def _sanitize_changeset(self, items: list[ChangeItem], files_map: dict[str, dict]) -> list[ChangeItem]:
|
||||
@@ -462,7 +471,7 @@ class GraphAgentRuntime:
|
||||
continue
|
||||
sanitized.append(item)
|
||||
if dropped_noop or dropped_ws:
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"_sanitize_changeset dropped items: noop=%s whitespace_only=%s kept=%s",
|
||||
dropped_noop,
|
||||
dropped_ws,
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
from app.modules.agent.module import AgentModule
|
||||
from app.modules.agent.repository import AgentRepository
|
||||
from app.modules.agent.story_context_repository import StoryContextRepository, StoryContextSchemaRepository
|
||||
from app.modules.chat.direct_service import CodeExplainChatService
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
from app.modules.chat.module import ChatModule
|
||||
from app.modules.chat.session_resolver import ChatSessionResolver
|
||||
from app.modules.chat.task_store import TaskStore
|
||||
from app.modules.rag.persistence.repository import RagRepository
|
||||
from app.modules.rag.explain import CodeExplainRetrieverV2, CodeGraphRepository, LayeredRetrievalGateway
|
||||
from app.modules.rag_session.module import RagModule
|
||||
from app.modules.rag_repo.module import RagRepoModule
|
||||
from app.modules.shared.bootstrap import bootstrap_database
|
||||
@@ -20,16 +25,32 @@ class ModularApplication:
|
||||
self.agent_repository = AgentRepository()
|
||||
self.story_context_schema_repository = StoryContextSchemaRepository()
|
||||
self.story_context_repository = StoryContextRepository()
|
||||
self.chat_tasks = TaskStore()
|
||||
|
||||
self.rag_session = RagModule(event_bus=self.events, retry=self.retry, repository=self.rag_repository)
|
||||
self.rag_repo = RagRepoModule(
|
||||
story_context_repository=self.story_context_repository,
|
||||
rag_repository=self.rag_repository,
|
||||
)
|
||||
self.code_explain_retriever = CodeExplainRetrieverV2(
|
||||
gateway=LayeredRetrievalGateway(self.rag_repository, self.rag_session.embedder),
|
||||
graph_repository=CodeGraphRepository(),
|
||||
)
|
||||
self.agent = AgentModule(
|
||||
rag_retriever=self.rag_session.rag,
|
||||
agent_repository=self.agent_repository,
|
||||
story_context_repository=self.story_context_repository,
|
||||
code_explain_retriever=self.code_explain_retriever,
|
||||
)
|
||||
self.direct_chat = CodeExplainChatService(
|
||||
retriever=self.code_explain_retriever,
|
||||
llm=self.agent.llm,
|
||||
session_resolver=ChatSessionResolver(
|
||||
dialogs=DialogSessionStore(self.chat_repository),
|
||||
rag_session_exists=lambda rag_session_id: self.rag_session.sessions.get(rag_session_id) is not None,
|
||||
),
|
||||
task_store=self.chat_tasks,
|
||||
message_sink=self.chat_repository.add_message,
|
||||
)
|
||||
self.chat = ChatModule(
|
||||
agent_runner=self.agent.runtime,
|
||||
@@ -37,6 +58,8 @@ class ModularApplication:
|
||||
retry=self.retry,
|
||||
rag_sessions=self.rag_session.sessions,
|
||||
repository=self.chat_repository,
|
||||
direct_chat=self.direct_chat,
|
||||
task_store=self.chat_tasks,
|
||||
)
|
||||
|
||||
def startup(self) -> None:
|
||||
|
||||
Binary file not shown.
BIN
app/modules/chat/__pycache__/direct_service.cpython-312.pyc
Normal file
BIN
app/modules/chat/__pycache__/direct_service.cpython-312.pyc
Normal file
Binary file not shown.
BIN
app/modules/chat/__pycache__/evidence_gate.cpython-312.pyc
Normal file
BIN
app/modules/chat/__pycache__/evidence_gate.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
app/modules/chat/__pycache__/session_resolver.cpython-312.pyc
Normal file
BIN
app/modules/chat/__pycache__/session_resolver.cpython-312.pyc
Normal file
Binary file not shown.
@@ -1,7 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from uuid import uuid4
|
||||
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
71
app/modules/chat/direct_service.py
Normal file
71
app/modules/chat/direct_service.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from uuid import uuid4
|
||||
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.chat.evidence_gate import CodeExplainEvidenceGate
|
||||
from app.modules.chat.session_resolver import ChatSessionResolver
|
||||
from app.modules.chat.task_store import TaskState, TaskStore
|
||||
from app.modules.rag.explain import CodeExplainRetrieverV2, PromptBudgeter
|
||||
from app.schemas.chat import ChatMessageRequest, TaskQueuedResponse, TaskResultType, TaskStatus
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CodeExplainChatService:
|
||||
def __init__(
|
||||
self,
|
||||
retriever: CodeExplainRetrieverV2,
|
||||
llm: AgentLlmService,
|
||||
session_resolver: ChatSessionResolver,
|
||||
task_store: TaskStore,
|
||||
message_sink,
|
||||
budgeter: PromptBudgeter | None = None,
|
||||
evidence_gate: CodeExplainEvidenceGate | None = None,
|
||||
) -> None:
|
||||
self._retriever = retriever
|
||||
self._llm = llm
|
||||
self._session_resolver = session_resolver
|
||||
self._task_store = task_store
|
||||
self._message_sink = message_sink
|
||||
self._budgeter = budgeter or PromptBudgeter()
|
||||
self._evidence_gate = evidence_gate or CodeExplainEvidenceGate()
|
||||
|
||||
async def handle_message(self, request: ChatMessageRequest) -> TaskQueuedResponse:
|
||||
dialog_session_id, rag_session_id = self._session_resolver.resolve(request)
|
||||
task_id = str(uuid4())
|
||||
task = TaskState(task_id=task_id, status=TaskStatus.RUNNING)
|
||||
self._task_store.save(task)
|
||||
self._message_sink(dialog_session_id, "user", request.message, task_id=task_id)
|
||||
pack = self._retriever.build_pack(
|
||||
rag_session_id,
|
||||
request.message,
|
||||
file_candidates=[item.model_dump(mode="json") for item in request.files],
|
||||
)
|
||||
decision = self._evidence_gate.evaluate(pack)
|
||||
if decision.passed:
|
||||
prompt_input = self._budgeter.build_prompt_input(request.message, pack)
|
||||
answer = self._llm.generate(
|
||||
"code_explain_answer_v2",
|
||||
prompt_input,
|
||||
log_context="chat.code_explain.direct",
|
||||
).strip()
|
||||
else:
|
||||
answer = decision.answer
|
||||
self._message_sink(dialog_session_id, "assistant", answer, task_id=task_id)
|
||||
task.status = TaskStatus.DONE
|
||||
task.result_type = TaskResultType.ANSWER
|
||||
task.answer = answer
|
||||
self._task_store.save(task)
|
||||
LOGGER.warning(
|
||||
"direct code explain response: task_id=%s rag_session_id=%s excerpts=%s missing=%s",
|
||||
task_id,
|
||||
rag_session_id,
|
||||
len(pack.code_excerpts),
|
||||
pack.missing,
|
||||
)
|
||||
return TaskQueuedResponse(
|
||||
task_id=task_id,
|
||||
status=TaskStatus.DONE.value,
|
||||
)
|
||||
62
app/modules/chat/evidence_gate.py
Normal file
62
app/modules/chat/evidence_gate.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from app.modules.rag.explain.models import ExplainPack
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EvidenceGateDecision:
|
||||
passed: bool
|
||||
answer: str = ""
|
||||
diagnostics: dict[str, list[str]] = field(default_factory=dict)
|
||||
|
||||
|
||||
class CodeExplainEvidenceGate:
|
||||
def __init__(self, min_excerpts: int = 2) -> None:
|
||||
self._min_excerpts = min_excerpts
|
||||
|
||||
def evaluate(self, pack: ExplainPack) -> EvidenceGateDecision:
|
||||
diagnostics = self._diagnostics(pack)
|
||||
if len(pack.code_excerpts) >= self._min_excerpts:
|
||||
return EvidenceGateDecision(passed=True, diagnostics=diagnostics)
|
||||
return EvidenceGateDecision(
|
||||
passed=False,
|
||||
answer=self._build_answer(pack, diagnostics),
|
||||
diagnostics=diagnostics,
|
||||
)
|
||||
|
||||
def _diagnostics(self, pack: ExplainPack) -> dict[str, list[str]]:
|
||||
return {
|
||||
"entrypoints": [item.title for item in pack.selected_entrypoints[:3] if item.title],
|
||||
"symbols": [item.title for item in pack.seed_symbols[:5] if item.title],
|
||||
"paths": self._paths(pack),
|
||||
"missing": list(pack.missing),
|
||||
}
|
||||
|
||||
def _paths(self, pack: ExplainPack) -> list[str]:
|
||||
values: list[str] = []
|
||||
for item in pack.selected_entrypoints + pack.seed_symbols:
|
||||
path = item.source or (item.location.path if item.location else "")
|
||||
if path and path not in values:
|
||||
values.append(path)
|
||||
for excerpt in pack.code_excerpts:
|
||||
if excerpt.path and excerpt.path not in values:
|
||||
values.append(excerpt.path)
|
||||
return values[:6]
|
||||
|
||||
def _build_answer(self, pack: ExplainPack, diagnostics: dict[str, list[str]]) -> str:
|
||||
lines = [
|
||||
"Недостаточно опоры в коде, чтобы дать объяснение без догадок.",
|
||||
"",
|
||||
f"Найдено фрагментов кода: {len(pack.code_excerpts)} из {self._min_excerpts} минимально необходимых.",
|
||||
]
|
||||
if diagnostics["paths"]:
|
||||
lines.append(f"Пути: {', '.join(diagnostics['paths'])}")
|
||||
if diagnostics["entrypoints"]:
|
||||
lines.append(f"Entrypoints: {', '.join(diagnostics['entrypoints'])}")
|
||||
if diagnostics["symbols"]:
|
||||
lines.append(f"Символы: {', '.join(diagnostics['symbols'])}")
|
||||
if diagnostics["missing"]:
|
||||
lines.append(f"Диагностика: {', '.join(diagnostics['missing'])}")
|
||||
return "\n".join(lines).strip()
|
||||
@@ -1,13 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fastapi import APIRouter, Header
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from app.core.exceptions import AppError
|
||||
from app.modules.chat.direct_service import CodeExplainChatService
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
from app.modules.chat.service import ChatOrchestrator
|
||||
from app.modules.chat.task_store import TaskStore
|
||||
from app.modules.contracts import AgentRunner
|
||||
from app.modules.rag_session.session_store import RagSessionStore
|
||||
from app.modules.shared.event_bus import EventBus
|
||||
from app.modules.shared.idempotency_store import IdempotencyStore
|
||||
from app.modules.shared.retry_executor import RetryExecutor
|
||||
@@ -20,6 +23,11 @@ from app.schemas.chat import (
|
||||
)
|
||||
from app.schemas.common import ModuleName
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
from app.modules.contracts import AgentRunner
|
||||
from app.modules.rag_session.session_store import RagSessionStore
|
||||
|
||||
|
||||
class ChatModule:
|
||||
def __init__(
|
||||
@@ -29,12 +37,16 @@ class ChatModule:
|
||||
retry: RetryExecutor,
|
||||
rag_sessions: RagSessionStore,
|
||||
repository: ChatRepository,
|
||||
direct_chat: CodeExplainChatService | None = None,
|
||||
task_store: TaskStore | None = None,
|
||||
) -> None:
|
||||
self._rag_sessions = rag_sessions
|
||||
self.tasks = TaskStore()
|
||||
self._simple_code_explain_only = os.getenv("SIMPLE_CODE_EXPLAIN_ONLY", "true").lower() in {"1", "true", "yes"}
|
||||
self.tasks = task_store or TaskStore()
|
||||
self.dialogs = DialogSessionStore(repository)
|
||||
self.idempotency = IdempotencyStore()
|
||||
self.events = event_bus
|
||||
self.direct_chat = direct_chat
|
||||
self.chat = ChatOrchestrator(
|
||||
task_store=self.tasks,
|
||||
dialogs=self.dialogs,
|
||||
@@ -59,11 +71,13 @@ class ChatModule:
|
||||
rag_session_id=dialog.rag_session_id,
|
||||
)
|
||||
|
||||
@router.post("/api/chat/messages", response_model=TaskQueuedResponse)
|
||||
@router.post("/api/chat/messages", response_model=TaskQueuedResponse | TaskResultResponse)
|
||||
async def send_message(
|
||||
request: ChatMessageRequest,
|
||||
idempotency_key: str | None = Header(default=None, alias="Idempotency-Key"),
|
||||
) -> TaskQueuedResponse:
|
||||
) -> TaskQueuedResponse | TaskResultResponse:
|
||||
if self._simple_code_explain_only and self.direct_chat is not None:
|
||||
return await self.direct_chat.handle_message(request)
|
||||
task = await self.chat.enqueue_message(request, idempotency_key)
|
||||
return TaskQueuedResponse(task_id=task.task_id, status=task.status.value)
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from app.modules.contracts import AgentRunner
|
||||
from app.schemas.chat import ChatMessageRequest, TaskResultType, TaskStatus
|
||||
from app.schemas.common import ErrorPayload, ModuleName
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
from app.modules.chat.session_resolver import ChatSessionResolver
|
||||
from app.modules.chat.task_store import TaskState, TaskStore
|
||||
from app.modules.shared.event_bus import EventBus
|
||||
from app.modules.shared.idempotency_store import IdempotencyStore
|
||||
@@ -41,6 +42,7 @@ class ChatOrchestrator:
|
||||
self._retry = retry
|
||||
self._rag_session_exists = rag_session_exists
|
||||
self._message_sink = message_sink
|
||||
self._session_resolver = ChatSessionResolver(dialogs, rag_session_exists)
|
||||
|
||||
async def enqueue_message(
|
||||
self,
|
||||
@@ -52,7 +54,7 @@ class ChatOrchestrator:
|
||||
if existing:
|
||||
task = self._task_store.get(existing)
|
||||
if task:
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"enqueue_message reused task by idempotency key: task_id=%s mode=%s",
|
||||
task.task_id,
|
||||
request.mode.value,
|
||||
@@ -63,7 +65,7 @@ class ChatOrchestrator:
|
||||
if idempotency_key:
|
||||
self._idempotency.put(idempotency_key, task.task_id)
|
||||
asyncio.create_task(self._process_task(task.task_id, request))
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"enqueue_message created task: task_id=%s mode=%s",
|
||||
task.task_id,
|
||||
request.mode.value,
|
||||
@@ -135,6 +137,13 @@ class ChatOrchestrator:
|
||||
task.changeset = result.changeset
|
||||
if task.result_type == TaskResultType.ANSWER and task.answer:
|
||||
self._message_sink(dialog_session_id, "assistant", task.answer, task_id=task_id)
|
||||
LOGGER.warning(
|
||||
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s answer=%s",
|
||||
task_id,
|
||||
dialog_session_id,
|
||||
task.result_type.value,
|
||||
_truncate_for_log(task.answer),
|
||||
)
|
||||
elif task.result_type == TaskResultType.CHANGESET:
|
||||
self._message_sink(
|
||||
dialog_session_id,
|
||||
@@ -146,6 +155,14 @@ class ChatOrchestrator:
|
||||
"changeset": [item.model_dump(mode="json") for item in task.changeset],
|
||||
},
|
||||
)
|
||||
LOGGER.warning(
|
||||
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s changeset_items=%s answer=%s",
|
||||
task_id,
|
||||
dialog_session_id,
|
||||
task.result_type.value,
|
||||
len(task.changeset),
|
||||
_truncate_for_log(task.answer or ""),
|
||||
)
|
||||
self._task_store.save(task)
|
||||
await self._events.publish(
|
||||
task_id,
|
||||
@@ -160,7 +177,7 @@ class ChatOrchestrator:
|
||||
},
|
||||
)
|
||||
await self._publish_progress(task_id, "task.done", "Обработка завершена.", progress=100)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"_process_task completed: task_id=%s status=%s result_type=%s changeset_items=%s",
|
||||
task_id,
|
||||
task.status.value,
|
||||
@@ -232,7 +249,7 @@ class ChatOrchestrator:
|
||||
if progress is not None:
|
||||
payload["progress"] = max(0, min(100, int(progress)))
|
||||
await self._events.publish(task_id, kind, payload)
|
||||
LOGGER.warning(
|
||||
LOGGER.debug(
|
||||
"_publish_progress emitted: task_id=%s kind=%s stage=%s progress=%s",
|
||||
task_id,
|
||||
kind,
|
||||
@@ -259,35 +276,7 @@ class ChatOrchestrator:
|
||||
meta={"heartbeat": True},
|
||||
)
|
||||
index += 1
|
||||
LOGGER.warning("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index)
|
||||
LOGGER.debug("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index)
|
||||
|
||||
def _resolve_sessions(self, request: ChatMessageRequest) -> tuple[str, str]:
|
||||
# Legacy compatibility: old session_id/project_id flow.
|
||||
if request.dialog_session_id and request.rag_session_id:
|
||||
dialog = self._dialogs.get(request.dialog_session_id)
|
||||
if not dialog:
|
||||
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
|
||||
if dialog.rag_session_id != request.rag_session_id:
|
||||
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
|
||||
LOGGER.warning(
|
||||
"_resolve_sessions resolved by dialog_session_id: dialog_session_id=%s rag_session_id=%s",
|
||||
request.dialog_session_id,
|
||||
request.rag_session_id,
|
||||
)
|
||||
return request.dialog_session_id, request.rag_session_id
|
||||
|
||||
if request.session_id and request.project_id:
|
||||
if not self._rag_session_exists(request.project_id):
|
||||
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
|
||||
LOGGER.warning(
|
||||
"_resolve_sessions resolved by legacy session/project: session_id=%s project_id=%s",
|
||||
request.session_id,
|
||||
request.project_id,
|
||||
)
|
||||
return request.session_id, request.project_id
|
||||
|
||||
raise AppError(
|
||||
"missing_sessions",
|
||||
"dialog_session_id and rag_session_id are required",
|
||||
ModuleName.BACKEND,
|
||||
)
|
||||
return self._session_resolver.resolve(request)
|
||||
|
||||
36
app/modules/chat/session_resolver.py
Normal file
36
app/modules/chat/session_resolver.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.core.exceptions import AppError
|
||||
from app.schemas.chat import ChatMessageRequest
|
||||
from app.schemas.common import ModuleName
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
|
||||
|
||||
class ChatSessionResolver:
|
||||
def __init__(self, dialogs: DialogSessionStore, rag_session_exists) -> None:
|
||||
self._dialogs = dialogs
|
||||
self._rag_session_exists = rag_session_exists
|
||||
|
||||
def resolve(self, request: ChatMessageRequest) -> tuple[str, str]:
|
||||
if request.dialog_session_id and request.rag_session_id:
|
||||
dialog = self._dialogs.get(request.dialog_session_id)
|
||||
if not dialog:
|
||||
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
|
||||
if dialog.rag_session_id != request.rag_session_id:
|
||||
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
|
||||
return request.dialog_session_id, request.rag_session_id
|
||||
|
||||
if request.session_id and request.project_id:
|
||||
if not self._rag_session_exists(request.project_id):
|
||||
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
|
||||
return request.session_id, request.project_id
|
||||
|
||||
raise AppError(
|
||||
"missing_sessions",
|
||||
"dialog_session_id and rag_session_id are required",
|
||||
ModuleName.BACKEND,
|
||||
)
|
||||
@@ -90,6 +90,41 @@ sequenceDiagram
|
||||
Rag-->>Agent: items
|
||||
```
|
||||
|
||||
### Retrieval + project/qa reasoning
|
||||
Назначение: `RAG` вызывается не в начале runtime, а внутри отдельного graph-шага `context_retrieval` для `project/qa`.
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Agent as GraphAgentRuntime
|
||||
participant Orch as OrchestratorService
|
||||
participant G1 as conversation_understanding
|
||||
participant G2 as question_classification
|
||||
participant G3 as context_retrieval
|
||||
participant Rag as RagService
|
||||
participant G4 as context_analysis
|
||||
participant G5 as answer_composition
|
||||
|
||||
Agent->>Orch: run(task)
|
||||
Orch->>G1: execute
|
||||
G1-->>Orch: resolved_request
|
||||
Orch->>G2: execute
|
||||
G2-->>Orch: question_profile
|
||||
Orch->>G3: execute
|
||||
G3->>Rag: retrieve(query)
|
||||
Rag-->>G3: rag_items
|
||||
G3-->>Orch: source_bundle
|
||||
Orch->>G4: execute
|
||||
G4-->>Orch: analysis_brief
|
||||
Orch->>G5: execute
|
||||
G5-->>Orch: final_answer
|
||||
Orch-->>Agent: final_answer
|
||||
```
|
||||
|
||||
Для `project/qa` это означает:
|
||||
- ранний глобальный retrieval больше не нужен;
|
||||
- `RAG` возвращает записи только для конкретного шага `context_retrieval`;
|
||||
- оркестратор управляет цепочкой graph-шагов;
|
||||
- пользовательский ответ собирается после анализа, а не напрямую из сырого retrieval.
|
||||
|
||||
## 5. Слои, фиксируемые в RAG
|
||||
|
||||
### 5.1. Слои DOCS
|
||||
|
||||
36
app/modules/rag/explain/__init__.py
Normal file
36
app/modules/rag/explain/__init__.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
__all__ = [
|
||||
"CodeExcerpt",
|
||||
"CodeExplainRetrieverV2",
|
||||
"CodeGraphRepository",
|
||||
"EvidenceItem",
|
||||
"ExplainIntent",
|
||||
"ExplainIntentBuilder",
|
||||
"ExplainPack",
|
||||
"LayeredRetrievalGateway",
|
||||
"PromptBudgeter",
|
||||
"TracePath",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
module_map = {
|
||||
"CodeExcerpt": "app.modules.rag.explain.models",
|
||||
"EvidenceItem": "app.modules.rag.explain.models",
|
||||
"ExplainIntent": "app.modules.rag.explain.models",
|
||||
"ExplainPack": "app.modules.rag.explain.models",
|
||||
"TracePath": "app.modules.rag.explain.models",
|
||||
"ExplainIntentBuilder": "app.modules.rag.explain.intent_builder",
|
||||
"PromptBudgeter": "app.modules.rag.explain.budgeter",
|
||||
"LayeredRetrievalGateway": "app.modules.rag.explain.layered_gateway",
|
||||
"CodeGraphRepository": "app.modules.rag.explain.graph_repository",
|
||||
"CodeExplainRetrieverV2": "app.modules.rag.explain.retriever_v2",
|
||||
}
|
||||
module_name = module_map.get(name)
|
||||
if module_name is None:
|
||||
raise AttributeError(name)
|
||||
module = import_module(module_name)
|
||||
return getattr(module, name)
|
||||
BIN
app/modules/rag/explain/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
app/modules/rag/explain/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
app/modules/rag/explain/__pycache__/budgeter.cpython-312.pyc
Normal file
BIN
app/modules/rag/explain/__pycache__/budgeter.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
app/modules/rag/explain/__pycache__/models.cpython-312.pyc
Normal file
BIN
app/modules/rag/explain/__pycache__/models.cpython-312.pyc
Normal file
Binary file not shown.
BIN
app/modules/rag/explain/__pycache__/retriever_v2.cpython-312.pyc
Normal file
BIN
app/modules/rag/explain/__pycache__/retriever_v2.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
62
app/modules/rag/explain/budgeter.py
Normal file
62
app/modules/rag/explain/budgeter.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.modules.rag.explain.models import ExplainPack
|
||||
|
||||
|
||||
class PromptBudgeter:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
max_paths: int = 3,
|
||||
max_symbols: int = 25,
|
||||
max_excerpts: int = 40,
|
||||
max_chars: int = 30000,
|
||||
) -> None:
|
||||
self._max_paths = max_paths
|
||||
self._max_symbols = max_symbols
|
||||
self._max_excerpts = max_excerpts
|
||||
self._max_chars = max_chars
|
||||
|
||||
def build_prompt_input(self, question: str, pack: ExplainPack) -> str:
|
||||
symbol_ids: list[str] = []
|
||||
for path in pack.trace_paths[: self._max_paths]:
|
||||
for symbol_id in path.symbol_ids:
|
||||
if symbol_id and symbol_id not in symbol_ids and len(symbol_ids) < self._max_symbols:
|
||||
symbol_ids.append(symbol_id)
|
||||
excerpts = []
|
||||
total_chars = 0
|
||||
for excerpt in pack.code_excerpts:
|
||||
if symbol_ids and excerpt.symbol_id and excerpt.symbol_id not in symbol_ids:
|
||||
continue
|
||||
body = excerpt.content.strip()
|
||||
remaining = self._max_chars - total_chars
|
||||
if remaining <= 0 or len(excerpts) >= self._max_excerpts:
|
||||
break
|
||||
if len(body) > remaining:
|
||||
body = body[:remaining].rstrip() + "...[truncated]"
|
||||
excerpts.append(
|
||||
{
|
||||
"evidence_id": excerpt.evidence_id,
|
||||
"title": excerpt.title,
|
||||
"path": excerpt.path,
|
||||
"start_line": excerpt.start_line,
|
||||
"end_line": excerpt.end_line,
|
||||
"focus": excerpt.focus,
|
||||
"content": body,
|
||||
}
|
||||
)
|
||||
total_chars += len(body)
|
||||
payload = {
|
||||
"question": question,
|
||||
"intent": pack.intent.model_dump(mode="json"),
|
||||
"selected_entrypoints": [item.model_dump(mode="json") for item in pack.selected_entrypoints[:5]],
|
||||
"seed_symbols": [item.model_dump(mode="json") for item in pack.seed_symbols[: self._max_symbols]],
|
||||
"trace_paths": [path.model_dump(mode="json") for path in pack.trace_paths[: self._max_paths]],
|
||||
"evidence_index": {key: value.model_dump(mode="json") for key, value in pack.evidence_index.items()},
|
||||
"code_excerpts": excerpts,
|
||||
"missing": pack.missing,
|
||||
"conflicts": pack.conflicts,
|
||||
}
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
59
app/modules/rag/explain/excerpt_planner.py
Normal file
59
app/modules/rag/explain/excerpt_planner.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.explain.models import CodeExcerpt, LayeredRetrievalItem
|
||||
|
||||
|
||||
class ExcerptPlanner:
|
||||
_FOCUS_TOKENS = ("raise", "except", "db", "select", "insert", "update", "delete", "http", "publish", "emit")
|
||||
|
||||
def plan(self, chunk: LayeredRetrievalItem, *, evidence_id: str, symbol_id: str | None) -> list[CodeExcerpt]:
|
||||
location = chunk.location
|
||||
if location is None:
|
||||
return []
|
||||
excerpts = [
|
||||
CodeExcerpt(
|
||||
evidence_id=evidence_id,
|
||||
symbol_id=symbol_id,
|
||||
title=chunk.title,
|
||||
path=location.path,
|
||||
start_line=location.start_line,
|
||||
end_line=location.end_line,
|
||||
content=chunk.content.strip(),
|
||||
focus="overview",
|
||||
)
|
||||
]
|
||||
focus = self._focus_excerpt(chunk, evidence_id=evidence_id, symbol_id=symbol_id)
|
||||
if focus is not None:
|
||||
excerpts.append(focus)
|
||||
return excerpts
|
||||
|
||||
def _focus_excerpt(
|
||||
self,
|
||||
chunk: LayeredRetrievalItem,
|
||||
*,
|
||||
evidence_id: str,
|
||||
symbol_id: str | None,
|
||||
) -> CodeExcerpt | None:
|
||||
location = chunk.location
|
||||
if location is None:
|
||||
return None
|
||||
lines = chunk.content.splitlines()
|
||||
for index, line in enumerate(lines):
|
||||
lowered = line.lower()
|
||||
if not any(token in lowered for token in self._FOCUS_TOKENS):
|
||||
continue
|
||||
start = max(0, index - 2)
|
||||
end = min(len(lines), index + 3)
|
||||
if end - start >= len(lines):
|
||||
return None
|
||||
return CodeExcerpt(
|
||||
evidence_id=evidence_id,
|
||||
symbol_id=symbol_id,
|
||||
title=f"{chunk.title}:focus",
|
||||
path=location.path,
|
||||
start_line=(location.start_line or 1) + start,
|
||||
end_line=(location.start_line or 1) + end - 1,
|
||||
content="\n".join(lines[start:end]).strip(),
|
||||
focus="focus",
|
||||
)
|
||||
return None
|
||||
216
app/modules/rag/explain/graph_repository.py
Normal file
216
app/modules/rag/explain/graph_repository.py
Normal file
@@ -0,0 +1,216 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.shared.db import get_engine
|
||||
|
||||
|
||||
class CodeGraphRepository:
|
||||
def get_out_edges(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
src_symbol_ids: list[str],
|
||||
edge_types: list[str],
|
||||
limit_per_src: int,
|
||||
) -> list[LayeredRetrievalItem]:
|
||||
if not src_symbol_ids:
|
||||
return []
|
||||
sql = """
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C2_DEPENDENCY_GRAPH'
|
||||
AND CAST(metadata_json AS jsonb)->>'src_symbol_id' = ANY(:src_ids)
|
||||
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
|
||||
ORDER BY path, span_start
|
||||
"""
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(sql),
|
||||
{"sid": rag_session_id, "src_ids": src_symbol_ids, "edge_types": edge_types},
|
||||
).mappings().fetchall()
|
||||
grouped: dict[str, int] = {}
|
||||
items: list[LayeredRetrievalItem] = []
|
||||
for row in rows:
|
||||
metadata = self._loads(row.get("metadata_json"))
|
||||
src_symbol_id = str(metadata.get("src_symbol_id") or "")
|
||||
grouped[src_symbol_id] = grouped.get(src_symbol_id, 0) + 1
|
||||
if grouped[src_symbol_id] > limit_per_src:
|
||||
continue
|
||||
items.append(self._to_item(row, metadata))
|
||||
return items
|
||||
|
||||
def get_in_edges(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
dst_symbol_ids: list[str],
|
||||
edge_types: list[str],
|
||||
limit_per_dst: int,
|
||||
) -> list[LayeredRetrievalItem]:
|
||||
if not dst_symbol_ids:
|
||||
return []
|
||||
sql = """
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C2_DEPENDENCY_GRAPH'
|
||||
AND CAST(metadata_json AS jsonb)->>'dst_symbol_id' = ANY(:dst_ids)
|
||||
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
|
||||
ORDER BY path, span_start
|
||||
"""
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(sql),
|
||||
{"sid": rag_session_id, "dst_ids": dst_symbol_ids, "edge_types": edge_types},
|
||||
).mappings().fetchall()
|
||||
grouped: dict[str, int] = {}
|
||||
items: list[LayeredRetrievalItem] = []
|
||||
for row in rows:
|
||||
metadata = self._loads(row.get("metadata_json"))
|
||||
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
|
||||
grouped[dst_symbol_id] = grouped.get(dst_symbol_id, 0) + 1
|
||||
if grouped[dst_symbol_id] > limit_per_dst:
|
||||
continue
|
||||
items.append(self._to_item(row, metadata))
|
||||
return items
|
||||
|
||||
def resolve_symbol_by_ref(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
dst_ref: str,
|
||||
package_hint: str | None = None,
|
||||
) -> LayeredRetrievalItem | None:
|
||||
ref = (dst_ref or "").strip()
|
||||
if not ref:
|
||||
return None
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end, qname
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C1_SYMBOL_CATALOG'
|
||||
AND (qname = :ref OR title = :ref OR qname LIKE :tail)
|
||||
ORDER BY path
|
||||
LIMIT 12
|
||||
"""
|
||||
),
|
||||
{"sid": rag_session_id, "ref": ref, "tail": f"%{ref}"},
|
||||
).mappings().fetchall()
|
||||
best: LayeredRetrievalItem | None = None
|
||||
best_score = -1
|
||||
for row in rows:
|
||||
metadata = self._loads(row.get("metadata_json"))
|
||||
package = str(metadata.get("package_or_module") or "")
|
||||
score = 0
|
||||
if str(row.get("qname") or "") == ref:
|
||||
score += 3
|
||||
if str(row.get("title") or "") == ref:
|
||||
score += 2
|
||||
if package_hint and package.startswith(package_hint):
|
||||
score += 3
|
||||
if package_hint and package_hint in str(row.get("path") or ""):
|
||||
score += 1
|
||||
if score > best_score:
|
||||
best = self._to_item(row, metadata)
|
||||
best_score = score
|
||||
return best
|
||||
|
||||
def get_symbols_by_ids(self, rag_session_id: str, symbol_ids: list[str]) -> list[LayeredRetrievalItem]:
|
||||
if not symbol_ids:
|
||||
return []
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C1_SYMBOL_CATALOG'
|
||||
AND symbol_id = ANY(:symbol_ids)
|
||||
ORDER BY path, span_start
|
||||
"""
|
||||
),
|
||||
{"sid": rag_session_id, "symbol_ids": symbol_ids},
|
||||
).mappings().fetchall()
|
||||
return [self._to_item(row, self._loads(row.get("metadata_json"))) for row in rows]
|
||||
|
||||
def get_chunks_by_symbol_ids(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
symbol_ids: list[str],
|
||||
prefer_chunk_type: str = "symbol_block",
|
||||
) -> list[LayeredRetrievalItem]:
|
||||
symbols = self.get_symbols_by_ids(rag_session_id, symbol_ids)
|
||||
chunks: list[LayeredRetrievalItem] = []
|
||||
for symbol in symbols:
|
||||
location = symbol.location
|
||||
if location is None:
|
||||
continue
|
||||
chunk = self._chunk_for_symbol(rag_session_id, symbol, prefer_chunk_type=prefer_chunk_type)
|
||||
if chunk is not None:
|
||||
chunks.append(chunk)
|
||||
return chunks
|
||||
|
||||
def _chunk_for_symbol(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
symbol: LayeredRetrievalItem,
|
||||
*,
|
||||
prefer_chunk_type: str,
|
||||
) -> LayeredRetrievalItem | None:
|
||||
location = symbol.location
|
||||
if location is None:
|
||||
return None
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C0_SOURCE_CHUNKS'
|
||||
AND path = :path
|
||||
AND COALESCE(span_start, 0) <= :end_line
|
||||
AND COALESCE(span_end, 999999) >= :start_line
|
||||
ORDER BY
|
||||
CASE WHEN CAST(metadata_json AS jsonb)->>'chunk_type' = :prefer_chunk_type THEN 0 ELSE 1 END,
|
||||
ABS(COALESCE(span_start, 0) - :start_line)
|
||||
LIMIT 1
|
||||
"""
|
||||
),
|
||||
{
|
||||
"sid": rag_session_id,
|
||||
"path": location.path,
|
||||
"start_line": location.start_line or 0,
|
||||
"end_line": location.end_line or 999999,
|
||||
"prefer_chunk_type": prefer_chunk_type,
|
||||
},
|
||||
).mappings().fetchall()
|
||||
if not rows:
|
||||
return None
|
||||
row = rows[0]
|
||||
return self._to_item(row, self._loads(row.get("metadata_json")))
|
||||
|
||||
def _to_item(self, row, metadata: dict) -> LayeredRetrievalItem:
|
||||
return LayeredRetrievalItem(
|
||||
source=str(row.get("path") or ""),
|
||||
content=str(row.get("content") or ""),
|
||||
layer=str(row.get("layer") or ""),
|
||||
title=str(row.get("title") or ""),
|
||||
metadata=metadata,
|
||||
location=CodeLocation(
|
||||
path=str(row.get("path") or ""),
|
||||
start_line=row.get("span_start"),
|
||||
end_line=row.get("span_end"),
|
||||
),
|
||||
)
|
||||
|
||||
def _loads(self, value) -> dict:
|
||||
if not value:
|
||||
return {}
|
||||
return json.loads(str(value))
|
||||
102
app/modules/rag/explain/intent_builder.py
Normal file
102
app/modules/rag/explain/intent_builder.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.rag.explain.models import ExplainHints, ExplainIntent
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
|
||||
class ExplainIntentBuilder:
|
||||
_ROUTE_RE = re.compile(r"(/[A-Za-z0-9_./{}:-]+)")
|
||||
_FILE_RE = re.compile(r"([A-Za-z0-9_./-]+\.py)")
|
||||
_SYMBOL_RE = re.compile(r"\b([A-Z][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*|[A-Z][A-Za-z0-9_]{2,}|[a-z_][A-Za-z0-9_]{2,})\b")
|
||||
_COMMAND_RE = re.compile(r"`([A-Za-z0-9:_-]+)`")
|
||||
_TEST_KEYWORDS = (
|
||||
"тест",
|
||||
"tests",
|
||||
"test ",
|
||||
"unit-test",
|
||||
"unit test",
|
||||
"юнит-тест",
|
||||
"pytest",
|
||||
"spec",
|
||||
"как покрыто тестами",
|
||||
"как проверяется",
|
||||
"how is it tested",
|
||||
"how it's tested",
|
||||
)
|
||||
|
||||
def build(self, user_query: str) -> ExplainIntent:
|
||||
normalized = " ".join((user_query or "").split())
|
||||
lowered = normalized.lower()
|
||||
keywords = self._keywords(normalized)
|
||||
hints = ExplainHints(
|
||||
paths=self._dedupe(self._FILE_RE.findall(normalized)),
|
||||
symbols=self._symbols(normalized),
|
||||
endpoints=self._dedupe(self._ROUTE_RE.findall(normalized)),
|
||||
commands=self._commands(normalized, lowered),
|
||||
)
|
||||
return ExplainIntent(
|
||||
raw_query=user_query,
|
||||
normalized_query=normalized,
|
||||
keywords=keywords[:12],
|
||||
hints=hints,
|
||||
include_tests=self._include_tests(lowered),
|
||||
expected_entry_types=self._entry_types(lowered, hints),
|
||||
depth=self._depth(lowered),
|
||||
)
|
||||
|
||||
def _keywords(self, text: str) -> list[str]:
|
||||
keywords = extract_query_terms(text)
|
||||
for token in self._symbols(text):
|
||||
if token not in keywords:
|
||||
keywords.append(token)
|
||||
for token in self._ROUTE_RE.findall(text):
|
||||
if token not in keywords:
|
||||
keywords.append(token)
|
||||
return self._dedupe(keywords)
|
||||
|
||||
def _symbols(self, text: str) -> list[str]:
|
||||
values = []
|
||||
for raw in self._SYMBOL_RE.findall(text):
|
||||
token = raw.strip()
|
||||
if len(token) < 3:
|
||||
continue
|
||||
if token.endswith(".py"):
|
||||
continue
|
||||
values.append(token)
|
||||
return self._dedupe(values)
|
||||
|
||||
def _commands(self, text: str, lowered: str) -> list[str]:
|
||||
values = list(self._COMMAND_RE.findall(text))
|
||||
if " command " in f" {lowered} ":
|
||||
values.extend(re.findall(r"command\s+([A-Za-z0-9:_-]+)", lowered))
|
||||
if " cli " in f" {lowered} ":
|
||||
values.extend(re.findall(r"cli\s+([A-Za-z0-9:_-]+)", lowered))
|
||||
return self._dedupe(values)
|
||||
|
||||
def _entry_types(self, lowered: str, hints: ExplainHints) -> list[str]:
|
||||
if hints.endpoints or any(token in lowered for token in ("endpoint", "route", "handler", "http", "api")):
|
||||
return ["http"]
|
||||
if hints.commands or any(token in lowered for token in ("cli", "command", "click", "typer")):
|
||||
return ["cli"]
|
||||
return ["http", "cli"]
|
||||
|
||||
def _depth(self, lowered: str) -> str:
|
||||
if any(token in lowered for token in ("deep", "подроб", "деталь", "full flow", "trace")):
|
||||
return "deep"
|
||||
if any(token in lowered for token in ("high level", "overview", "кратко", "summary")):
|
||||
return "high"
|
||||
return "medium"
|
||||
|
||||
def _include_tests(self, lowered: str) -> bool:
|
||||
normalized = f" {lowered} "
|
||||
return any(token in normalized for token in self._TEST_KEYWORDS)
|
||||
|
||||
def _dedupe(self, values: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
item = value.strip()
|
||||
if item and item not in result:
|
||||
result.append(item)
|
||||
return result
|
||||
289
app/modules/rag/explain/layered_gateway.py
Normal file
289
app/modules/rag/explain/layered_gateway.py
Normal file
@@ -0,0 +1,289 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.rag.retrieval.test_filter import build_test_filters, debug_disable_test_filter
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.persistence.repository import RagRepository
|
||||
from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class LayerRetrievalResult:
|
||||
items: list[LayeredRetrievalItem]
|
||||
missing: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class LayeredRetrievalGateway:
|
||||
def __init__(self, repository: RagRepository, embedder: GigaChatEmbedder) -> None:
|
||||
self._repository = repository
|
||||
self._embedder = embedder
|
||||
|
||||
def retrieve_layer(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
query: str,
|
||||
layer: str,
|
||||
*,
|
||||
limit: int,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_tests: bool = True,
|
||||
prefer_non_tests: bool = False,
|
||||
include_spans: bool = False,
|
||||
) -> LayerRetrievalResult:
|
||||
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
|
||||
filter_args = self._filter_args(effective_exclude_tests)
|
||||
query_embedding: list[float] | None = None
|
||||
try:
|
||||
query_embedding = self._embedder.embed([query])[0]
|
||||
rows = self._repository.retrieve(
|
||||
rag_session_id,
|
||||
query_embedding,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
layers=[layer],
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
|
||||
exclude_like_patterns=filter_args["exclude_like_patterns"],
|
||||
prefer_non_tests=prefer_non_tests or not effective_exclude_tests,
|
||||
)
|
||||
return self._success_result(
|
||||
rows,
|
||||
rag_session_id=rag_session_id,
|
||||
label="layered retrieval",
|
||||
include_spans=include_spans,
|
||||
layer=layer,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
)
|
||||
except Exception as exc:
|
||||
if query_embedding is None:
|
||||
self._log_failure(
|
||||
label="layered retrieval",
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
)
|
||||
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
|
||||
retry_result = self._retry_without_test_filter(
|
||||
operation=lambda: self._repository.retrieve(
|
||||
rag_session_id,
|
||||
query_embedding,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
layers=[layer],
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=None,
|
||||
exclude_like_patterns=None,
|
||||
prefer_non_tests=True,
|
||||
),
|
||||
label="layered retrieval",
|
||||
rag_session_id=rag_session_id,
|
||||
include_spans=include_spans,
|
||||
layer=layer,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
missing_prefix=f"layer:{layer} retrieval_failed",
|
||||
)
|
||||
if retry_result is not None:
|
||||
return retry_result
|
||||
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
|
||||
|
||||
def retrieve_lexical_code(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
query: str,
|
||||
*,
|
||||
limit: int,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_tests: bool = True,
|
||||
include_spans: bool = False,
|
||||
) -> LayerRetrievalResult:
|
||||
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
|
||||
filter_args = self._filter_args(effective_exclude_tests)
|
||||
try:
|
||||
rows = self._repository.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
|
||||
exclude_like_patterns=filter_args["exclude_like_patterns"],
|
||||
prefer_non_tests=not effective_exclude_tests,
|
||||
)
|
||||
return self._success_result(
|
||||
rows,
|
||||
rag_session_id=rag_session_id,
|
||||
label="lexical retrieval",
|
||||
include_spans=include_spans,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
)
|
||||
except Exception as exc:
|
||||
retry_result = self._retry_without_test_filter(
|
||||
operation=lambda: self._repository.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=None,
|
||||
exclude_like_patterns=None,
|
||||
prefer_non_tests=True,
|
||||
),
|
||||
label="lexical retrieval",
|
||||
rag_session_id=rag_session_id,
|
||||
include_spans=include_spans,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
missing_prefix="layer:C0 lexical_retrieval_failed",
|
||||
)
|
||||
if retry_result is not None:
|
||||
return retry_result
|
||||
return LayerRetrievalResult(items=[], missing=[self._failure_missing("layer:C0 lexical_retrieval_failed", exc)])
|
||||
|
||||
def _retry_without_test_filter(
|
||||
self,
|
||||
*,
|
||||
operation: Callable[[], list[dict]],
|
||||
label: str,
|
||||
rag_session_id: str,
|
||||
include_spans: bool,
|
||||
exclude_tests: bool,
|
||||
path_prefixes: list[str] | None,
|
||||
exc: Exception,
|
||||
missing_prefix: str,
|
||||
layer: str | None = None,
|
||||
) -> LayerRetrievalResult | None:
|
||||
if not exclude_tests:
|
||||
self._log_failure(
|
||||
label=label,
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
)
|
||||
return None
|
||||
self._log_failure(
|
||||
label=label,
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
retried_without_test_filter=True,
|
||||
)
|
||||
try:
|
||||
rows = operation()
|
||||
except Exception as retry_exc:
|
||||
self._log_failure(
|
||||
label=f"{label} retry",
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=False,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=retry_exc,
|
||||
)
|
||||
return None
|
||||
result = self._success_result(
|
||||
rows,
|
||||
rag_session_id=rag_session_id,
|
||||
label=f"{label} retry",
|
||||
include_spans=include_spans,
|
||||
layer=layer,
|
||||
exclude_tests=False,
|
||||
path_prefixes=path_prefixes,
|
||||
)
|
||||
result.missing.append(f"{missing_prefix}:retried_without_test_filter")
|
||||
return result
|
||||
|
||||
def _success_result(
|
||||
self,
|
||||
rows: list[dict],
|
||||
*,
|
||||
rag_session_id: str,
|
||||
label: str,
|
||||
include_spans: bool,
|
||||
exclude_tests: bool,
|
||||
path_prefixes: list[str] | None,
|
||||
layer: str | None = None,
|
||||
) -> LayerRetrievalResult:
|
||||
items = [self._to_item(row, include_spans=include_spans) for row in rows]
|
||||
LOGGER.warning(
|
||||
"%s: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s returned_count=%s top_paths=%s",
|
||||
label,
|
||||
rag_session_id,
|
||||
layer,
|
||||
exclude_tests,
|
||||
path_prefixes or [],
|
||||
len(items),
|
||||
[item.source for item in items[:3]],
|
||||
)
|
||||
return LayerRetrievalResult(items=items)
|
||||
|
||||
def _log_failure(
|
||||
self,
|
||||
*,
|
||||
label: str,
|
||||
rag_session_id: str,
|
||||
exclude_tests: bool,
|
||||
path_prefixes: list[str] | None,
|
||||
exc: Exception,
|
||||
layer: str | None = None,
|
||||
retried_without_test_filter: bool = False,
|
||||
) -> None:
|
||||
LOGGER.warning(
|
||||
"%s failed: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s retried_without_test_filter=%s error=%s",
|
||||
label,
|
||||
rag_session_id,
|
||||
layer,
|
||||
exclude_tests,
|
||||
path_prefixes or [],
|
||||
retried_without_test_filter,
|
||||
self._exception_summary(exc),
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _filter_args(self, exclude_tests: bool) -> dict[str, list[str] | None]:
|
||||
test_filters = build_test_filters() if exclude_tests else None
|
||||
return {
|
||||
"exclude_path_prefixes": test_filters.exclude_path_prefixes if test_filters else None,
|
||||
"exclude_like_patterns": test_filters.exclude_like_patterns if test_filters else None,
|
||||
}
|
||||
|
||||
def _failure_missing(self, prefix: str, exc: Exception) -> str:
|
||||
return f"{prefix}:{self._exception_summary(exc)}"
|
||||
|
||||
def _exception_summary(self, exc: Exception) -> str:
|
||||
message = " ".join(str(exc).split())
|
||||
if len(message) > 180:
|
||||
message = message[:177] + "..."
|
||||
return f"{type(exc).__name__}:{message or 'no_message'}"
|
||||
|
||||
def _to_item(self, row: dict, *, include_spans: bool) -> LayeredRetrievalItem:
|
||||
location = None
|
||||
if include_spans:
|
||||
location = CodeLocation(
|
||||
path=str(row.get("path") or ""),
|
||||
start_line=row.get("span_start"),
|
||||
end_line=row.get("span_end"),
|
||||
)
|
||||
return LayeredRetrievalItem(
|
||||
source=str(row.get("path") or ""),
|
||||
content=str(row.get("content") or ""),
|
||||
layer=str(row.get("layer") or ""),
|
||||
title=str(row.get("title") or ""),
|
||||
metadata=dict(row.get("metadata", {}) or {}),
|
||||
score=row.get("distance"),
|
||||
location=location,
|
||||
)
|
||||
91
app/modules/rag/explain/models.py
Normal file
91
app/modules/rag/explain/models.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class ExplainHints(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
paths: list[str] = Field(default_factory=list)
|
||||
symbols: list[str] = Field(default_factory=list)
|
||||
endpoints: list[str] = Field(default_factory=list)
|
||||
commands: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ExplainIntent(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
raw_query: str
|
||||
normalized_query: str
|
||||
keywords: list[str] = Field(default_factory=list)
|
||||
hints: ExplainHints = Field(default_factory=ExplainHints)
|
||||
include_tests: bool = False
|
||||
expected_entry_types: list[Literal["http", "cli"]] = Field(default_factory=list)
|
||||
depth: Literal["high", "medium", "deep"] = "medium"
|
||||
|
||||
|
||||
class CodeLocation(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
path: str
|
||||
start_line: int | None = None
|
||||
end_line: int | None = None
|
||||
|
||||
|
||||
class LayeredRetrievalItem(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
source: str
|
||||
content: str
|
||||
layer: str
|
||||
title: str
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
score: float | None = None
|
||||
location: CodeLocation | None = None
|
||||
|
||||
|
||||
class TracePath(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
symbol_ids: list[str] = Field(default_factory=list)
|
||||
score: float = 0.0
|
||||
entrypoint_id: str | None = None
|
||||
notes: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class EvidenceItem(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
evidence_id: str
|
||||
kind: Literal["entrypoint", "symbol", "edge", "excerpt"]
|
||||
summary: str
|
||||
location: CodeLocation | None = None
|
||||
supports: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class CodeExcerpt(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
evidence_id: str
|
||||
symbol_id: str | None = None
|
||||
title: str
|
||||
path: str
|
||||
start_line: int | None = None
|
||||
end_line: int | None = None
|
||||
content: str
|
||||
focus: str = "overview"
|
||||
|
||||
|
||||
class ExplainPack(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
intent: ExplainIntent
|
||||
selected_entrypoints: list[LayeredRetrievalItem] = Field(default_factory=list)
|
||||
seed_symbols: list[LayeredRetrievalItem] = Field(default_factory=list)
|
||||
trace_paths: list[TracePath] = Field(default_factory=list)
|
||||
evidence_index: dict[str, EvidenceItem] = Field(default_factory=dict)
|
||||
code_excerpts: list[CodeExcerpt] = Field(default_factory=list)
|
||||
missing: list[str] = Field(default_factory=list)
|
||||
conflicts: list[str] = Field(default_factory=list)
|
||||
328
app/modules/rag/explain/retriever_v2.py
Normal file
328
app/modules/rag/explain/retriever_v2.py
Normal file
@@ -0,0 +1,328 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
|
||||
from app.modules.rag.explain.layered_gateway import LayerRetrievalResult, LayeredRetrievalGateway
|
||||
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, ExplainPack, LayeredRetrievalItem
|
||||
from app.modules.rag.explain.source_excerpt_fetcher import SourceExcerptFetcher
|
||||
from app.modules.rag.explain.trace_builder import TraceBuilder
|
||||
from app.modules.rag.retrieval.test_filter import exclude_tests_default, is_test_path
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
_MIN_EXCERPTS = 2
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.graph_repository import CodeGraphRepository
|
||||
from app.modules.rag.explain.models import ExplainIntent
|
||||
|
||||
|
||||
class CodeExplainRetrieverV2:
|
||||
def __init__(
|
||||
self,
|
||||
gateway: LayeredRetrievalGateway,
|
||||
graph_repository: CodeGraphRepository,
|
||||
intent_builder: ExplainIntentBuilder | None = None,
|
||||
trace_builder: TraceBuilder | None = None,
|
||||
excerpt_fetcher: SourceExcerptFetcher | None = None,
|
||||
) -> None:
|
||||
self._gateway = gateway
|
||||
self._graph = graph_repository
|
||||
self._intent_builder = intent_builder or ExplainIntentBuilder()
|
||||
self._trace_builder = trace_builder or TraceBuilder(graph_repository)
|
||||
self._excerpt_fetcher = excerpt_fetcher or SourceExcerptFetcher(graph_repository)
|
||||
|
||||
def build_pack(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
user_query: str,
|
||||
*,
|
||||
file_candidates: list[dict] | None = None,
|
||||
) -> ExplainPack:
|
||||
intent = self._intent_builder.build(user_query)
|
||||
path_prefixes = _path_prefixes(intent, file_candidates or [])
|
||||
exclude_tests = exclude_tests_default() and not intent.include_tests
|
||||
pack = self._run_pass(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
|
||||
if exclude_tests and len(pack.code_excerpts) < _MIN_EXCERPTS:
|
||||
self._merge_test_fallback(pack, rag_session_id, intent, path_prefixes)
|
||||
self._log_pack(rag_session_id, pack)
|
||||
return pack
|
||||
|
||||
def _run_pass(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
*,
|
||||
exclude_tests: bool,
|
||||
) -> ExplainPack:
|
||||
missing: list[str] = []
|
||||
entrypoints_result = self._entrypoints(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
|
||||
missing.extend(entrypoints_result.missing)
|
||||
selected_entrypoints = self._filter_entrypoints(intent, entrypoints_result.items)
|
||||
if not selected_entrypoints:
|
||||
missing.append("layer:C3 empty")
|
||||
seed_result = self._seed_symbols(rag_session_id, intent, path_prefixes, selected_entrypoints, exclude_tests=exclude_tests)
|
||||
missing.extend(seed_result.missing)
|
||||
seed_symbols = seed_result.items
|
||||
if not seed_symbols:
|
||||
missing.append("layer:C1 empty")
|
||||
depth = 4 if intent.depth == "deep" else 3 if intent.depth == "medium" else 2
|
||||
trace_paths = self._trace_builder.build_paths(rag_session_id, seed_symbols, max_depth=depth) if seed_symbols else []
|
||||
excerpts, excerpt_evidence = self._excerpt_fetcher.fetch(rag_session_id, trace_paths) if trace_paths else ([], {})
|
||||
if not excerpts:
|
||||
lexical_result = self._gateway.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
limit=6,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=exclude_tests,
|
||||
include_spans=True,
|
||||
)
|
||||
missing.extend(lexical_result.missing)
|
||||
excerpts, excerpt_evidence = _lexical_excerpts(lexical_result.items)
|
||||
if not excerpts:
|
||||
missing.append("layer:C0 empty")
|
||||
evidence_index = _evidence_index(selected_entrypoints, seed_symbols)
|
||||
evidence_index.update(excerpt_evidence)
|
||||
missing.extend(_missing(selected_entrypoints, seed_symbols, trace_paths, excerpts))
|
||||
return ExplainPack(
|
||||
intent=intent,
|
||||
selected_entrypoints=selected_entrypoints,
|
||||
seed_symbols=seed_symbols,
|
||||
trace_paths=trace_paths,
|
||||
evidence_index=evidence_index,
|
||||
code_excerpts=excerpts,
|
||||
missing=_cleanup_missing(_dedupe(missing), has_excerpts=bool(excerpts)),
|
||||
conflicts=[],
|
||||
)
|
||||
|
||||
def _merge_test_fallback(
|
||||
self,
|
||||
pack: ExplainPack,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
) -> None:
|
||||
lexical_result = self._gateway.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
limit=6,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=False,
|
||||
include_spans=True,
|
||||
)
|
||||
excerpt_offset = len([key for key in pack.evidence_index if key.startswith("excerpt_")])
|
||||
excerpts, evidence = _lexical_excerpts(
|
||||
lexical_result.items,
|
||||
start_index=excerpt_offset,
|
||||
is_test_fallback=True,
|
||||
)
|
||||
if not excerpts:
|
||||
pack.missing = _dedupe(pack.missing + lexical_result.missing)
|
||||
return
|
||||
seen = {(item.path, item.start_line, item.end_line, item.content) for item in pack.code_excerpts}
|
||||
for excerpt in excerpts:
|
||||
key = (excerpt.path, excerpt.start_line, excerpt.end_line, excerpt.content)
|
||||
if key in seen:
|
||||
continue
|
||||
pack.code_excerpts.append(excerpt)
|
||||
seen.add(key)
|
||||
pack.evidence_index.update(evidence)
|
||||
pack.missing = _cleanup_missing(_dedupe(pack.missing + lexical_result.missing), has_excerpts=bool(pack.code_excerpts))
|
||||
|
||||
def _entrypoints(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
*,
|
||||
exclude_tests: bool,
|
||||
) -> LayerRetrievalResult:
|
||||
return self._gateway.retrieve_layer(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
RagLayer.CODE_ENTRYPOINTS,
|
||||
limit=6,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=exclude_tests,
|
||||
prefer_non_tests=True,
|
||||
include_spans=True,
|
||||
)
|
||||
|
||||
def _filter_entrypoints(self, intent: ExplainIntent, items: list[LayeredRetrievalItem]) -> list[LayeredRetrievalItem]:
|
||||
if not intent.expected_entry_types:
|
||||
return items[:3]
|
||||
filtered = [item for item in items if str(item.metadata.get("entry_type") or "") in intent.expected_entry_types]
|
||||
return filtered[:3] or items[:3]
|
||||
|
||||
def _seed_symbols(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
entrypoints: list[LayeredRetrievalItem],
|
||||
*,
|
||||
exclude_tests: bool,
|
||||
) -> LayerRetrievalResult:
|
||||
symbol_result = self._gateway.retrieve_layer(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
RagLayer.CODE_SYMBOL_CATALOG,
|
||||
limit=12,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=exclude_tests,
|
||||
prefer_non_tests=True,
|
||||
include_spans=True,
|
||||
)
|
||||
handlers: list[LayeredRetrievalItem] = []
|
||||
handler_ids = [str(item.metadata.get("handler_symbol_id") or "") for item in entrypoints]
|
||||
if handler_ids:
|
||||
handlers = self._graph.get_symbols_by_ids(rag_session_id, [item for item in handler_ids if item])
|
||||
seeds: list[LayeredRetrievalItem] = []
|
||||
seen: set[str] = set()
|
||||
for item in handlers + symbol_result.items:
|
||||
symbol_id = str(item.metadata.get("symbol_id") or "")
|
||||
if not symbol_id or symbol_id in seen:
|
||||
continue
|
||||
seen.add(symbol_id)
|
||||
seeds.append(item)
|
||||
if len(seeds) >= 8:
|
||||
break
|
||||
return LayerRetrievalResult(items=seeds, missing=list(symbol_result.missing))
|
||||
|
||||
def _log_pack(self, rag_session_id: str, pack: ExplainPack) -> None:
|
||||
prod_excerpt_count = len([excerpt for excerpt in pack.code_excerpts if not _is_test_excerpt(excerpt)])
|
||||
test_excerpt_count = len(pack.code_excerpts) - prod_excerpt_count
|
||||
LOGGER.warning(
|
||||
"code explain pack: rag_session_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s prod_excerpt_count=%s test_excerpt_count=%s missing=%s",
|
||||
rag_session_id,
|
||||
len(pack.selected_entrypoints),
|
||||
len(pack.seed_symbols),
|
||||
len(pack.trace_paths),
|
||||
len(pack.code_excerpts),
|
||||
prod_excerpt_count,
|
||||
test_excerpt_count,
|
||||
pack.missing,
|
||||
)
|
||||
|
||||
|
||||
def _evidence_index(
|
||||
entrypoints: list[LayeredRetrievalItem],
|
||||
seed_symbols: list[LayeredRetrievalItem],
|
||||
) -> dict[str, EvidenceItem]:
|
||||
result: dict[str, EvidenceItem] = {}
|
||||
for index, item in enumerate(entrypoints, start=1):
|
||||
evidence_id = f"entrypoint_{index}"
|
||||
result[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="entrypoint",
|
||||
summary=item.title,
|
||||
location=item.location,
|
||||
supports=[str(item.metadata.get("handler_symbol_id") or "")],
|
||||
)
|
||||
for index, item in enumerate(seed_symbols, start=1):
|
||||
evidence_id = f"symbol_{index}"
|
||||
result[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="symbol",
|
||||
summary=item.title,
|
||||
location=item.location,
|
||||
supports=[str(item.metadata.get("symbol_id") or "")],
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _missing(
|
||||
entrypoints: list[LayeredRetrievalItem],
|
||||
seed_symbols: list[LayeredRetrievalItem],
|
||||
trace_paths,
|
||||
excerpts,
|
||||
) -> list[str]:
|
||||
missing: list[str] = []
|
||||
if not entrypoints:
|
||||
missing.append("entrypoints")
|
||||
if not seed_symbols:
|
||||
missing.append("seed_symbols")
|
||||
if not trace_paths:
|
||||
missing.append("trace_paths")
|
||||
if not excerpts:
|
||||
missing.append("code_excerpts")
|
||||
return missing
|
||||
|
||||
|
||||
def _lexical_excerpts(
|
||||
items: list[LayeredRetrievalItem],
|
||||
*,
|
||||
start_index: int = 0,
|
||||
is_test_fallback: bool = False,
|
||||
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
|
||||
excerpts: list[CodeExcerpt] = []
|
||||
evidence_index: dict[str, EvidenceItem] = {}
|
||||
for item in items:
|
||||
evidence_id = f"excerpt_{start_index + len(evidence_index) + 1}"
|
||||
location = item.location
|
||||
evidence_index[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="excerpt",
|
||||
summary=item.title or item.source,
|
||||
location=location,
|
||||
supports=[],
|
||||
)
|
||||
focus = "lexical"
|
||||
if _item_is_test(item):
|
||||
focus = "test:lexical"
|
||||
elif is_test_fallback:
|
||||
focus = "lexical"
|
||||
excerpts.append(
|
||||
CodeExcerpt(
|
||||
evidence_id=evidence_id,
|
||||
symbol_id=str(item.metadata.get("symbol_id") or "") or None,
|
||||
title=item.title or item.source,
|
||||
path=item.source,
|
||||
start_line=location.start_line if location else None,
|
||||
end_line=location.end_line if location else None,
|
||||
content=item.content,
|
||||
focus=focus,
|
||||
)
|
||||
)
|
||||
return excerpts, evidence_index
|
||||
|
||||
|
||||
def _item_is_test(item: LayeredRetrievalItem) -> bool:
|
||||
return bool(item.metadata.get("is_test")) or is_test_path(item.source)
|
||||
|
||||
|
||||
def _is_test_excerpt(excerpt: CodeExcerpt) -> bool:
|
||||
return excerpt.focus.startswith("test:") or is_test_path(excerpt.path)
|
||||
|
||||
|
||||
def _path_prefixes(intent: ExplainIntent, file_candidates: list[dict]) -> list[str]:
|
||||
values: list[str] = []
|
||||
for path in intent.hints.paths:
|
||||
prefix = path.rsplit("/", 1)[0] if "/" in path else path
|
||||
if prefix and prefix not in values:
|
||||
values.append(prefix)
|
||||
for item in file_candidates[:6]:
|
||||
path = str(item.get("path") or "")
|
||||
prefix = path.rsplit("/", 1)[0] if "/" in path else ""
|
||||
if prefix and prefix not in values:
|
||||
values.append(prefix)
|
||||
return values
|
||||
|
||||
|
||||
def _cleanup_missing(values: list[str], *, has_excerpts: bool) -> list[str]:
|
||||
if not has_excerpts:
|
||||
return values
|
||||
return [value for value in values if value not in {"code_excerpts", "layer:C0 empty"}]
|
||||
|
||||
|
||||
def _dedupe(values: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
item = value.strip()
|
||||
if item and item not in result:
|
||||
result.append(item)
|
||||
return result
|
||||
53
app/modules/rag/explain/source_excerpt_fetcher.py
Normal file
53
app/modules/rag/explain/source_excerpt_fetcher.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.rag.explain.excerpt_planner import ExcerptPlanner
|
||||
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, TracePath
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.graph_repository import CodeGraphRepository
|
||||
|
||||
|
||||
class SourceExcerptFetcher:
|
||||
def __init__(self, graph_repository: CodeGraphRepository, planner: ExcerptPlanner | None = None) -> None:
|
||||
self._graph = graph_repository
|
||||
self._planner = planner or ExcerptPlanner()
|
||||
|
||||
def fetch(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
trace_paths: list[TracePath],
|
||||
*,
|
||||
max_excerpts: int = 40,
|
||||
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
|
||||
ordered_symbol_ids: list[str] = []
|
||||
for path in trace_paths:
|
||||
for symbol_id in path.symbol_ids:
|
||||
if symbol_id and symbol_id not in ordered_symbol_ids:
|
||||
ordered_symbol_ids.append(symbol_id)
|
||||
chunks = self._graph.get_chunks_by_symbol_ids(rag_session_id, ordered_symbol_ids)
|
||||
excerpts: list[CodeExcerpt] = []
|
||||
evidence_index: dict[str, EvidenceItem] = {}
|
||||
for chunk in chunks:
|
||||
symbol_id = str(chunk.metadata.get("symbol_id") or "")
|
||||
evidence_id = f"excerpt_{len(evidence_index) + 1}"
|
||||
location = chunk.location
|
||||
evidence_index[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="excerpt",
|
||||
summary=chunk.title,
|
||||
location=location,
|
||||
supports=[symbol_id] if symbol_id else [],
|
||||
)
|
||||
is_test_chunk = bool(chunk.metadata.get("is_test")) or is_test_path(location.path if location else chunk.source)
|
||||
for excerpt in self._planner.plan(chunk, evidence_id=evidence_id, symbol_id=symbol_id):
|
||||
if len(excerpts) >= max_excerpts:
|
||||
break
|
||||
if is_test_chunk and not excerpt.focus.startswith("test:"):
|
||||
excerpt.focus = f"test:{excerpt.focus}"
|
||||
excerpts.append(excerpt)
|
||||
if len(excerpts) >= max_excerpts:
|
||||
break
|
||||
return excerpts, evidence_index
|
||||
102
app/modules/rag/explain/trace_builder.py
Normal file
102
app/modules/rag/explain/trace_builder.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.rag.explain.models import LayeredRetrievalItem, TracePath
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.graph_repository import CodeGraphRepository
|
||||
|
||||
|
||||
class TraceBuilder:
|
||||
def __init__(self, graph_repository: CodeGraphRepository) -> None:
|
||||
self._graph = graph_repository
|
||||
|
||||
def build_paths(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
seed_symbols: list[LayeredRetrievalItem],
|
||||
*,
|
||||
max_depth: int,
|
||||
max_paths: int = 3,
|
||||
edge_types: list[str] | None = None,
|
||||
) -> list[TracePath]:
|
||||
edges_filter = edge_types or ["calls", "imports", "inherits"]
|
||||
symbol_map = self._symbol_map(seed_symbols)
|
||||
paths: list[TracePath] = []
|
||||
for seed in seed_symbols:
|
||||
seed_id = str(seed.metadata.get("symbol_id") or "")
|
||||
if not seed_id:
|
||||
continue
|
||||
queue: list[tuple[list[str], float, list[str]]] = [([seed_id], 0.0, [])]
|
||||
while queue and len(paths) < max_paths * 3:
|
||||
current_path, score, notes = queue.pop(0)
|
||||
src_symbol_id = current_path[-1]
|
||||
out_edges = self._graph.get_out_edges(rag_session_id, [src_symbol_id], edges_filter, limit_per_src=4)
|
||||
if not out_edges or len(current_path) >= max_depth:
|
||||
paths.append(TracePath(symbol_ids=current_path, score=score, notes=notes))
|
||||
continue
|
||||
for edge in out_edges:
|
||||
metadata = edge.metadata
|
||||
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
|
||||
next_notes = list(notes)
|
||||
next_score = score + self._edge_score(edge, symbol_map.get(src_symbol_id))
|
||||
if not dst_symbol_id:
|
||||
dst_ref = str(metadata.get("dst_ref") or "")
|
||||
package_hint = self._package_hint(symbol_map.get(src_symbol_id))
|
||||
resolved = self._graph.resolve_symbol_by_ref(rag_session_id, dst_ref, package_hint=package_hint)
|
||||
if resolved is not None:
|
||||
dst_symbol_id = str(resolved.metadata.get("symbol_id") or "")
|
||||
symbol_map[dst_symbol_id] = resolved
|
||||
next_score += 2.0
|
||||
next_notes.append(f"resolved:{dst_ref}")
|
||||
if not dst_symbol_id or dst_symbol_id in current_path:
|
||||
paths.append(TracePath(symbol_ids=current_path, score=next_score, notes=next_notes))
|
||||
continue
|
||||
if dst_symbol_id not in symbol_map:
|
||||
symbols = self._graph.get_symbols_by_ids(rag_session_id, [dst_symbol_id])
|
||||
if symbols:
|
||||
symbol_map[dst_symbol_id] = symbols[0]
|
||||
queue.append((current_path + [dst_symbol_id], next_score, next_notes))
|
||||
unique = self._unique_paths(paths)
|
||||
unique.sort(key=lambda item: item.score, reverse=True)
|
||||
return unique[:max_paths] or [TracePath(symbol_ids=[seed.metadata.get("symbol_id", "")], score=0.0) for seed in seed_symbols[:1]]
|
||||
|
||||
def _edge_score(self, edge: LayeredRetrievalItem, source_symbol: LayeredRetrievalItem | None) -> float:
|
||||
metadata = edge.metadata
|
||||
score = 1.0
|
||||
if str(metadata.get("resolution") or "") == "resolved":
|
||||
score += 2.0
|
||||
source_path = source_symbol.source if source_symbol is not None else ""
|
||||
if source_path and edge.source == source_path:
|
||||
score += 1.0
|
||||
if "tests/" in edge.source or "/tests/" in edge.source:
|
||||
score -= 3.0
|
||||
return score
|
||||
|
||||
def _package_hint(self, symbol: LayeredRetrievalItem | None) -> str | None:
|
||||
if symbol is None:
|
||||
return None
|
||||
package = str(symbol.metadata.get("package_or_module") or "")
|
||||
if not package:
|
||||
return None
|
||||
return ".".join(package.split(".")[:-1]) or package
|
||||
|
||||
def _symbol_map(self, items: list[LayeredRetrievalItem]) -> dict[str, LayeredRetrievalItem]:
|
||||
result: dict[str, LayeredRetrievalItem] = {}
|
||||
for item in items:
|
||||
symbol_id = str(item.metadata.get("symbol_id") or "")
|
||||
if symbol_id:
|
||||
result[symbol_id] = item
|
||||
return result
|
||||
|
||||
def _unique_paths(self, items: list[TracePath]) -> list[TracePath]:
|
||||
result: list[TracePath] = []
|
||||
seen: set[tuple[str, ...]] = set()
|
||||
for item in items:
|
||||
key = tuple(symbol_id for symbol_id in item.symbol_ids if symbol_id)
|
||||
if not key or key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
result.append(item)
|
||||
return result
|
||||
Binary file not shown.
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.code_text.chunker import CodeChunk
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
|
||||
class CodeTextDocumentBuilder:
|
||||
@@ -17,6 +18,7 @@ class CodeTextDocumentBuilder:
|
||||
"chunk_index": chunk_index,
|
||||
"chunk_type": chunk.chunk_type,
|
||||
"module_or_unit": source.path.replace("/", ".").removesuffix(".py"),
|
||||
"is_test": is_test_path(source.path),
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
)
|
||||
|
||||
Binary file not shown.
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.edges.extractor import PyEdge
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
|
||||
class EdgeDocumentBuilder:
|
||||
@@ -22,6 +23,7 @@ class EdgeDocumentBuilder:
|
||||
"dst_symbol_id": edge.dst_symbol_id,
|
||||
"dst_ref": edge.dst_ref,
|
||||
"resolution": edge.resolution,
|
||||
"is_test": is_test_path(source.path),
|
||||
"lang_payload": edge.metadata,
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
|
||||
Binary file not shown.
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
|
||||
class EntrypointDocumentBuilder:
|
||||
@@ -19,6 +20,7 @@ class EntrypointDocumentBuilder:
|
||||
"framework": entrypoint.framework,
|
||||
"route_or_command": entrypoint.route_or_command,
|
||||
"handler_symbol_id": entrypoint.handler_symbol_id,
|
||||
"is_test": is_test_path(source.path),
|
||||
"lang_payload": entrypoint.metadata,
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
|
||||
Binary file not shown.
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.symbols.extractor import PySymbol
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
|
||||
class SymbolDocumentBuilder:
|
||||
@@ -26,6 +27,7 @@ class SymbolDocumentBuilder:
|
||||
"parent_symbol_id": symbol.parent_symbol_id,
|
||||
"package_or_module": source.path.replace("/", ".").removesuffix(".py"),
|
||||
"is_entry_candidate": bool(symbol.decorators),
|
||||
"is_test": is_test_path(source.path),
|
||||
"lang_payload": symbol.lang_payload,
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user