Фиксация изменений
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,49 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
|
||||
class ScrubbingFormatter(logging.Formatter):
|
||||
_KEY_VALUE_PATTERNS = (
|
||||
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*id)=([^\s,]+)"),
|
||||
re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*_key)=([^\s,]+)"),
|
||||
)
|
||||
_TEXT_PATTERNS = (
|
||||
re.compile(r"\b(index|task|dialog|rag|session|plan|artifact|evidence|symbol|edge|entry) id\b[:=]\s*([^\s,]+)", re.IGNORECASE),
|
||||
)
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
rendered = super().format(record)
|
||||
scrubbed = self._scrub(rendered).rstrip("\n")
|
||||
return scrubbed + "\n"
|
||||
|
||||
def _scrub(self, message: str) -> str:
|
||||
output = message
|
||||
for pattern in self._KEY_VALUE_PATTERNS:
|
||||
output = pattern.sub(self._replace_key_value, output)
|
||||
for pattern in self._TEXT_PATTERNS:
|
||||
output = pattern.sub(self._replace_text, output)
|
||||
return output
|
||||
|
||||
def _replace_key_value(self, match: re.Match[str]) -> str:
|
||||
return f"{match.group(1)}=<redacted>"
|
||||
|
||||
def _replace_text(self, match: re.Match[str]) -> str:
|
||||
return f"{match.group(1)} id=<redacted>"
|
||||
|
||||
|
||||
def configure_logging() -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.WARNING,
|
||||
force=True,
|
||||
format="%(levelname)s:%(name)s:%(message)s",
|
||||
)
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.WARNING)
|
||||
formatter = ScrubbingFormatter("%(levelname)s:%(name)s:%(message)s")
|
||||
for handler in root_logger.handlers:
|
||||
handler.setFormatter(formatter)
|
||||
logging.getLogger("uvicorn").setLevel(logging.WARNING)
|
||||
logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
|
||||
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
|
||||
+10
@@ -1,10 +1,20 @@
|
||||
import logging
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.logging_setup import configure_logging
|
||||
from app.core.error_handlers import register_error_handlers
|
||||
from app.modules.application import ModularApplication
|
||||
|
||||
|
||||
def _configure_logging() -> None:
|
||||
configure_logging()
|
||||
|
||||
|
||||
_configure_logging()
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
app = FastAPI(title="Agent Backend MVP", version="0.1.0")
|
||||
modules = ModularApplication()
|
||||
|
||||
Binary file not shown.
@@ -37,6 +37,8 @@ classDiagram
|
||||
Методы: `run` — строит, валидирует и исполняет execution plan.
|
||||
- `TaskSpecBuilder`: формирует спецификацию задачи для оркестратора.
|
||||
Методы: `build` — собирает `TaskSpec` из route, контекстов и ограничений.
|
||||
- `ProjectQaConversationGraphFactory`, `ProjectQaClassificationGraphFactory`, `ProjectQaRetrievalGraphFactory`, `ProjectQaAnalysisGraphFactory`, `ProjectQaAnswerGraphFactory`: набор маленьких graph-исполнителей для `project/qa`.
|
||||
Роли: нормализация запроса; классификация project-question; поздний retrieval из `RAG`; анализ code/docs контекста; сборка финального ответа.
|
||||
- `StorySessionRecorder`: пишет session-scoped артефакты для последующего bind к Story.
|
||||
Методы: `record_run` — сохраняет входные источники и выходные артефакты сессии.
|
||||
- `StoryContextRepository`: репозиторий Story-контекста и его связей.
|
||||
@@ -58,3 +60,32 @@ sequenceDiagram
|
||||
Router->>Confluence: fetch_page(url)
|
||||
Confluence-->>Router: page(content_markdown, metadata)
|
||||
```
|
||||
|
||||
### `project/qa` reasoning flow
|
||||
Назначение: оркестратор планирует шаги, а каждый шаг исполняется отдельным graph. Retrieval вызывается поздно, внутри шага `context_retrieval`.
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Runtime as GraphAgentRuntime
|
||||
participant Orch as OrchestratorService
|
||||
participant G1 as conversation_understanding
|
||||
participant G2 as question_classification
|
||||
participant G3 as context_retrieval
|
||||
participant Rag as RagService
|
||||
participant G4 as context_analysis
|
||||
participant G5 as answer_composition
|
||||
|
||||
Runtime->>Orch: run(task)
|
||||
Orch->>G1: execute
|
||||
G1-->>Orch: resolved_request
|
||||
Orch->>G2: execute
|
||||
G2-->>Orch: question_profile
|
||||
Orch->>G3: execute
|
||||
G3->>Rag: retrieve(query)
|
||||
Rag-->>G3: rag_items
|
||||
G3-->>Orch: source_bundle
|
||||
Orch->>G4: execute
|
||||
G4-->>Orch: analysis_brief
|
||||
Orch->>G5: execute
|
||||
G5-->>Orch: final_answer
|
||||
Orch-->>Runtime: final_answer
|
||||
```
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,8 +1,13 @@
|
||||
__all__ = [
|
||||
"BaseGraphFactory",
|
||||
"DocsGraphFactory",
|
||||
"ProjectQaAnalysisGraphFactory",
|
||||
"ProjectQaAnswerGraphFactory",
|
||||
"ProjectQaClassificationGraphFactory",
|
||||
"ProjectQaConversationGraphFactory",
|
||||
"ProjectEditsGraphFactory",
|
||||
"ProjectQaGraphFactory",
|
||||
"ProjectQaRetrievalGraphFactory",
|
||||
]
|
||||
|
||||
|
||||
@@ -15,6 +20,26 @@ def __getattr__(name: str):
|
||||
from app.modules.agent.engine.graphs.docs_graph import DocsGraphFactory
|
||||
|
||||
return DocsGraphFactory
|
||||
if name == "ProjectQaConversationGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaConversationGraphFactory
|
||||
|
||||
return ProjectQaConversationGraphFactory
|
||||
if name == "ProjectQaClassificationGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaClassificationGraphFactory
|
||||
|
||||
return ProjectQaClassificationGraphFactory
|
||||
if name == "ProjectQaRetrievalGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaRetrievalGraphFactory
|
||||
|
||||
return ProjectQaRetrievalGraphFactory
|
||||
if name == "ProjectQaAnalysisGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnalysisGraphFactory
|
||||
|
||||
return ProjectQaAnalysisGraphFactory
|
||||
if name == "ProjectQaAnswerGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_qa_step_graphs import ProjectQaAnswerGraphFactory
|
||||
|
||||
return ProjectQaAnswerGraphFactory
|
||||
if name == "ProjectEditsGraphFactory":
|
||||
from app.modules.agent.engine.graphs.project_edits_graph import ProjectEditsGraphFactory
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -59,7 +59,7 @@ class BaseGraphFactory:
|
||||
f"Confluence context:\n{conf}",
|
||||
]
|
||||
)
|
||||
answer = self._llm.generate("general_answer", user_input)
|
||||
answer = self._llm.generate("general_answer", user_input, log_context="graph.default.answer")
|
||||
emit_progress_sync(
|
||||
state,
|
||||
stage="graph.default.answer.done",
|
||||
|
||||
@@ -52,7 +52,7 @@ class DocsContextAnalyzer:
|
||||
f"Detected documentation candidates:\n{snippets}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_detect", user_input)
|
||||
raw = self._llm.generate("docs_detect", user_input, log_context="graph.docs.detect_existing_docs")
|
||||
exists = self.parse_bool_marker(raw, "exists", default=True)
|
||||
summary = self.parse_text_marker(raw, "summary", default="Documentation files detected.")
|
||||
return {"existing_docs_detected": exists, "existing_docs_summary": summary}
|
||||
@@ -71,7 +71,7 @@ class DocsContextAnalyzer:
|
||||
f"Existing docs summary:\n{state.get('existing_docs_summary', '')}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_strategy", user_input)
|
||||
raw = self._llm.generate("docs_strategy", user_input, log_context="graph.docs.decide_strategy")
|
||||
strategy = self.parse_text_marker(raw, "strategy", default="").lower()
|
||||
if strategy not in {"incremental_update", "from_scratch"}:
|
||||
strategy = "incremental_update" if state.get("existing_docs_detected", False) else "from_scratch"
|
||||
@@ -260,7 +260,7 @@ class DocsContentComposer:
|
||||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||||
]
|
||||
)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_incremental_changes")
|
||||
return {
|
||||
"doc_plan": plan,
|
||||
"target_path": target_path,
|
||||
@@ -279,7 +279,7 @@ class DocsContentComposer:
|
||||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||||
]
|
||||
)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input)
|
||||
plan = self._llm.generate("docs_plan_sections", user_input, log_context="graph.docs.plan_new_document")
|
||||
return {"doc_plan": plan, "target_path": target_path, "target_file_content": "", "target_file_hash": ""}
|
||||
|
||||
def generate_doc_content(self, state: AgentGraphState) -> dict:
|
||||
@@ -294,7 +294,7 @@ class DocsContentComposer:
|
||||
f"Examples bundle:\n{state.get('rules_bundle', '')}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_generation", user_input)
|
||||
raw = self._llm.generate("docs_generation", user_input, log_context="graph.docs.generate_doc_content")
|
||||
bundle = self._bundle.parse_docs_bundle(raw)
|
||||
if bundle:
|
||||
first_content = str(bundle[0].get("content", "")).strip()
|
||||
@@ -369,7 +369,7 @@ class DocsContentComposer:
|
||||
f"Generated document:\n{generated}",
|
||||
]
|
||||
)
|
||||
raw = self._llm.generate("docs_self_check", user_input)
|
||||
raw = self._llm.generate("docs_self_check", user_input, log_context="graph.docs.self_check")
|
||||
passed = DocsContextAnalyzer.parse_bool_marker(raw, "pass", default=False)
|
||||
feedback = DocsContextAnalyzer.parse_text_marker(raw, "feedback", default="No validation feedback provided.")
|
||||
return {"validation_attempts": attempts, "validation_passed": passed, "validation_feedback": feedback}
|
||||
@@ -379,7 +379,7 @@ class DocsContentComposer:
|
||||
bundle = state.get("generated_docs_bundle", []) or []
|
||||
strategy = state.get("docs_strategy", "from_scratch")
|
||||
if strategy == "from_scratch" and not self._bundle.bundle_has_required_structure(bundle):
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"build_changeset fallback bundle used: strategy=%s bundle_items=%s",
|
||||
strategy,
|
||||
len(bundle),
|
||||
@@ -452,7 +452,11 @@ class DocsContentComposer:
|
||||
]
|
||||
)
|
||||
try:
|
||||
summary = self._llm.generate("docs_execution_summary", user_input).strip()
|
||||
summary = self._llm.generate(
|
||||
"docs_execution_summary",
|
||||
user_input,
|
||||
log_context="graph.docs.summarize_result",
|
||||
).strip()
|
||||
except Exception:
|
||||
summary = ""
|
||||
if not summary:
|
||||
|
||||
@@ -48,7 +48,9 @@ class ProjectEditsLogic:
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
parsed = self._support.parse_json(self._llm.generate("project_edits_plan", user_input))
|
||||
parsed = self._support.parse_json(
|
||||
self._llm.generate("project_edits_plan", user_input, log_context="graph.project_edits.plan_changes")
|
||||
)
|
||||
contracts = self._contracts.parse(
|
||||
parsed,
|
||||
request=str(state.get("message", "")),
|
||||
@@ -165,7 +167,13 @@ class ProjectEditsLogic:
|
||||
"changeset": [{"op": x.op.value, "path": x.path, "reason": x.reason} for x in changeset[:20]],
|
||||
"rule": "Changes must stay inside contract blocks and not affect unrelated sections.",
|
||||
}
|
||||
parsed = self._support.parse_json(self._llm.generate("project_edits_self_check", json.dumps(payload, ensure_ascii=False)))
|
||||
parsed = self._support.parse_json(
|
||||
self._llm.generate(
|
||||
"project_edits_self_check",
|
||||
json.dumps(payload, ensure_ascii=False),
|
||||
log_context="graph.project_edits.self_check",
|
||||
)
|
||||
)
|
||||
passed = bool(parsed.get("pass")) if isinstance(parsed, dict) else False
|
||||
feedback = str(parsed.get("feedback", "")).strip() if isinstance(parsed, dict) else ""
|
||||
return {
|
||||
@@ -192,7 +200,11 @@ class ProjectEditsLogic:
|
||||
"rag_context": self._support.shorten(state.get("rag_context", ""), 5000),
|
||||
"confluence_context": self._support.shorten(state.get("confluence_context", ""), 5000),
|
||||
}
|
||||
raw = self._llm.generate("project_edits_hunks", json.dumps(prompt_payload, ensure_ascii=False))
|
||||
raw = self._llm.generate(
|
||||
"project_edits_hunks",
|
||||
json.dumps(prompt_payload, ensure_ascii=False),
|
||||
log_context="graph.project_edits.generate_changeset",
|
||||
)
|
||||
parsed = self._support.parse_json(raw)
|
||||
hunks = parsed.get("hunks", []) if isinstance(parsed, dict) else []
|
||||
if not isinstance(hunks, list) or not hunks:
|
||||
|
||||
@@ -33,7 +33,7 @@ class ProjectQaGraphFactory:
|
||||
f"Confluence context:\n{state.get('confluence_context', '')}",
|
||||
]
|
||||
)
|
||||
answer = self._llm.generate("project_answer", user_input)
|
||||
answer = self._llm.generate("project_answer", user_input, log_context="graph.project_qa.answer")
|
||||
emit_progress_sync(
|
||||
state,
|
||||
stage="graph.project_qa.answer.done",
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
|
||||
from app.modules.agent.engine.graphs.progress import emit_progress_sync
|
||||
from app.modules.agent.engine.graphs.state import AgentGraphState
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.contracts import RagRetriever
|
||||
from app.modules.rag.explain import ExplainPack, PromptBudgeter
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProjectQaConversationGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("resolve_request", self._resolve_request)
|
||||
graph.add_edge(START, "resolve_request")
|
||||
graph.add_edge("resolve_request", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _resolve_request(self, state: AgentGraphState) -> dict:
|
||||
emit_progress_sync(state, stage="graph.project_qa.conversation_understanding", message="Нормализую пользовательский запрос.")
|
||||
resolved = self._support.resolve_request(str(state.get("message", "") or ""))
|
||||
LOGGER.warning("graph step result: graph=project_qa/conversation_understanding normalized=%s", resolved.get("normalized_message", ""))
|
||||
return {"resolved_request": resolved}
|
||||
|
||||
|
||||
class ProjectQaClassificationGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("classify_question", self._classify_question)
|
||||
graph.add_edge(START, "classify_question")
|
||||
graph.add_edge("classify_question", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _classify_question(self, state: AgentGraphState) -> dict:
|
||||
resolved = state.get("resolved_request", {}) or {}
|
||||
message = str(resolved.get("normalized_message") or state.get("message", "") or "")
|
||||
profile = self._support.build_profile(message)
|
||||
LOGGER.warning("graph step result: graph=project_qa/question_classification domain=%s intent=%s", profile.get("domain"), profile.get("intent"))
|
||||
return {"question_profile": profile}
|
||||
|
||||
|
||||
class ProjectQaRetrievalGraphFactory:
|
||||
def __init__(self, rag: RagRetriever, llm: AgentLlmService | None = None) -> None:
|
||||
self._rag = rag
|
||||
self._support = ProjectQaSupport()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("retrieve_context", self._retrieve_context)
|
||||
graph.add_edge(START, "retrieve_context")
|
||||
graph.add_edge("retrieve_context", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _retrieve_context(self, state: AgentGraphState) -> dict:
|
||||
emit_progress_sync(state, stage="graph.project_qa.context_retrieval", message="Собираю контекст по проекту.")
|
||||
resolved = state.get("resolved_request", {}) or {}
|
||||
profile = state.get("question_profile", {}) or {}
|
||||
files_map = dict(state.get("files_map", {}) or {})
|
||||
rag_items: list[dict] = []
|
||||
source_bundle = self._support.build_source_bundle(profile, list(rag_items), files_map)
|
||||
LOGGER.warning(
|
||||
"graph step result: graph=project_qa/context_retrieval mode=%s rag_items=%s file_candidates=%s legacy_rag=%s",
|
||||
profile.get("domain"),
|
||||
len(source_bundle.get("rag_items", []) or []),
|
||||
len(source_bundle.get("file_candidates", []) or []),
|
||||
False,
|
||||
)
|
||||
return {"source_bundle": source_bundle}
|
||||
|
||||
|
||||
class ProjectQaAnalysisGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
self._analyzer = ProjectQaAnalyzer()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("analyze_context", self._analyze_context)
|
||||
graph.add_edge(START, "analyze_context")
|
||||
graph.add_edge("analyze_context", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _analyze_context(self, state: AgentGraphState) -> dict:
|
||||
explain_pack = state.get("explain_pack")
|
||||
if explain_pack:
|
||||
analysis = self._analysis_from_pack(explain_pack)
|
||||
LOGGER.warning(
|
||||
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
|
||||
len(analysis.get("findings", []) or []),
|
||||
len(analysis.get("evidence", []) or []),
|
||||
)
|
||||
return {"analysis_brief": analysis}
|
||||
bundle = state.get("source_bundle", {}) or {}
|
||||
profile = bundle.get("profile", {}) or state.get("question_profile", {}) or {}
|
||||
rag_items = list(bundle.get("rag_items", []) or [])
|
||||
file_candidates = list(bundle.get("file_candidates", []) or [])
|
||||
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates) if str(profile.get("domain")) == "code" else self._analyzer.analyze_docs(profile, rag_items)
|
||||
LOGGER.warning(
|
||||
"graph step result: graph=project_qa/context_analysis findings=%s evidence=%s",
|
||||
len(analysis.get("findings", []) or []),
|
||||
len(analysis.get("evidence", []) or []),
|
||||
)
|
||||
return {"analysis_brief": analysis}
|
||||
|
||||
def _analysis_from_pack(self, raw_pack) -> dict:
|
||||
pack = ExplainPack.model_validate(raw_pack)
|
||||
findings: list[str] = []
|
||||
evidence: list[str] = []
|
||||
for entrypoint in pack.selected_entrypoints[:3]:
|
||||
findings.append(f"Entrypoint `{entrypoint.title}` maps to handler `{entrypoint.metadata.get('handler_symbol_id', '')}`.")
|
||||
if entrypoint.source:
|
||||
evidence.append(entrypoint.source)
|
||||
for path in pack.trace_paths[:3]:
|
||||
if path.symbol_ids:
|
||||
findings.append(f"Trace path: {' -> '.join(path.symbol_ids)}")
|
||||
for excerpt in pack.code_excerpts[:4]:
|
||||
evidence.append(f"{excerpt.path}:{excerpt.start_line}-{excerpt.end_line} [{excerpt.evidence_id}]")
|
||||
return {
|
||||
"subject": pack.intent.normalized_query,
|
||||
"findings": findings or ["No explain trace was built from the available code evidence."],
|
||||
"evidence": evidence,
|
||||
"gaps": list(pack.missing),
|
||||
"answer_mode": "summary",
|
||||
}
|
||||
|
||||
|
||||
class ProjectQaAnswerGraphFactory:
|
||||
def __init__(self, llm: AgentLlmService | None = None) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
self._llm = llm
|
||||
self._budgeter = PromptBudgeter()
|
||||
|
||||
def build(self, checkpointer=None):
|
||||
graph = StateGraph(AgentGraphState)
|
||||
graph.add_node("compose_answer", self._compose_answer)
|
||||
graph.add_edge(START, "compose_answer")
|
||||
graph.add_edge("compose_answer", END)
|
||||
return graph.compile(checkpointer=checkpointer)
|
||||
|
||||
def _compose_answer(self, state: AgentGraphState) -> dict:
|
||||
profile = state.get("question_profile", {}) or {}
|
||||
analysis = state.get("analysis_brief", {}) or {}
|
||||
brief = self._support.build_answer_brief(profile, analysis)
|
||||
explain_pack = state.get("explain_pack")
|
||||
answer = self._compose_explain_answer(state, explain_pack)
|
||||
if not answer:
|
||||
answer = self._support.compose_answer(brief)
|
||||
LOGGER.warning("graph step result: graph=project_qa/answer_composition answer_len=%s", len(answer or ""))
|
||||
return {"answer_brief": brief, "final_answer": answer}
|
||||
|
||||
def _compose_explain_answer(self, state: AgentGraphState, raw_pack) -> str:
|
||||
if raw_pack is None or self._llm is None:
|
||||
return ""
|
||||
pack = ExplainPack.model_validate(raw_pack)
|
||||
prompt_input = self._budgeter.build_prompt_input(str(state.get("message", "") or ""), pack)
|
||||
return self._llm.generate(
|
||||
"code_explain_answer_v2",
|
||||
prompt_input,
|
||||
log_context="graph.project_qa.answer_v2",
|
||||
).strip()
|
||||
@@ -25,6 +25,12 @@ class AgentGraphState(TypedDict, total=False):
|
||||
validation_passed: bool
|
||||
validation_feedback: str
|
||||
validation_attempts: int
|
||||
resolved_request: dict
|
||||
question_profile: dict
|
||||
source_bundle: dict
|
||||
analysis_brief: dict
|
||||
answer_brief: dict
|
||||
final_answer: str
|
||||
answer: str
|
||||
changeset: list[ChangeItem]
|
||||
edits_requested_path: str
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,13 +1,17 @@
|
||||
from app.modules.agent.engine.orchestrator.actions.code_explain_actions import CodeExplainActions
|
||||
from app.modules.agent.engine.orchestrator.actions.docs_actions import DocsActions
|
||||
from app.modules.agent.engine.orchestrator.actions.edit_actions import EditActions
|
||||
from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions
|
||||
from app.modules.agent.engine.orchestrator.actions.gherkin_actions import GherkinActions
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_actions import ProjectQaActions
|
||||
from app.modules.agent.engine.orchestrator.actions.review_actions import ReviewActions
|
||||
|
||||
__all__ = [
|
||||
"CodeExplainActions",
|
||||
"DocsActions",
|
||||
"EditActions",
|
||||
"ExplainActions",
|
||||
"GherkinActions",
|
||||
"ProjectQaActions",
|
||||
"ReviewActions",
|
||||
]
|
||||
|
||||
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
from app.modules.agent.engine.orchestrator.models import ArtifactType
|
||||
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
|
||||
from app.modules.rag.explain.models import ExplainPack
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CodeExplainActions(ActionSupport):
|
||||
def __init__(self, retriever: CodeExplainRetrieverV2 | None = None) -> None:
|
||||
self._retriever = retriever
|
||||
self._intent_builder = ExplainIntentBuilder()
|
||||
|
||||
def build_code_explain_pack(self, ctx: ExecutionContext) -> list[str]:
|
||||
file_candidates = list((self.get(ctx, "source_bundle", {}) or {}).get("file_candidates", []) or [])
|
||||
if self._retriever is None:
|
||||
pack = ExplainPack(
|
||||
intent=self._intent_builder.build(ctx.task.user_message),
|
||||
missing=["code_explain_retriever_unavailable"],
|
||||
)
|
||||
else:
|
||||
pack = self._retriever.build_pack(
|
||||
ctx.task.rag_session_id,
|
||||
ctx.task.user_message,
|
||||
file_candidates=file_candidates,
|
||||
)
|
||||
LOGGER.warning(
|
||||
"code explain action: task_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s missing=%s",
|
||||
ctx.task.task_id,
|
||||
len(pack.selected_entrypoints),
|
||||
len(pack.seed_symbols),
|
||||
len(pack.trace_paths),
|
||||
len(pack.code_excerpts),
|
||||
pack.missing,
|
||||
)
|
||||
return [self.put(ctx, "explain_pack", ArtifactType.STRUCTURED_JSON, pack.model_dump(mode="json"))]
|
||||
@@ -0,0 +1,117 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_analyzer import ProjectQaAnalyzer
|
||||
from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
|
||||
from app.modules.agent.engine.orchestrator.actions.project_qa_support import ProjectQaSupport
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
from app.modules.agent.engine.orchestrator.models import ArtifactType
|
||||
|
||||
|
||||
class ProjectQaActions(ActionSupport):
|
||||
def __init__(self) -> None:
|
||||
self._support = ProjectQaSupport()
|
||||
self._analyzer = ProjectQaAnalyzer()
|
||||
|
||||
def classify_project_question(self, ctx: ExecutionContext) -> list[str]:
|
||||
message = str(ctx.task.user_message or "")
|
||||
profile = self._support.build_profile(message)
|
||||
return [self.put(ctx, "question_profile", ArtifactType.STRUCTURED_JSON, profile)]
|
||||
|
||||
def collect_project_sources(self, ctx: ExecutionContext) -> list[str]:
|
||||
profile = self.get(ctx, "question_profile", {}) or {}
|
||||
terms = list(profile.get("terms", []) or [])
|
||||
entities = list(profile.get("entities", []) or [])
|
||||
rag_items = list(ctx.task.metadata.get("rag_items", []) or [])
|
||||
files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
|
||||
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
|
||||
|
||||
ranked_rag = []
|
||||
for item in rag_items:
|
||||
score = self._support.rag_score(item, terms, entities)
|
||||
source = str(item.get("source", "") or "")
|
||||
if not explicit_test and self._support.is_test_path(source):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_rag.append((score, item))
|
||||
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
ranked_files = []
|
||||
for path, payload in files_map.items():
|
||||
score = self._support.file_score(path, payload, terms, entities)
|
||||
if not explicit_test and self._support.is_test_path(path):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_files.append(
|
||||
(
|
||||
score,
|
||||
{
|
||||
"path": path,
|
||||
"content": str(payload.get("content", "")),
|
||||
"content_hash": str(payload.get("content_hash", "")),
|
||||
},
|
||||
)
|
||||
)
|
||||
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
bundle = {
|
||||
"profile": profile,
|
||||
"rag_items": [item for _, item in ranked_rag[:12]],
|
||||
"file_candidates": [item for _, item in ranked_files[:10]],
|
||||
"rag_total": len(ranked_rag),
|
||||
"files_total": len(ranked_files),
|
||||
}
|
||||
return [self.put(ctx, "source_bundle", ArtifactType.STRUCTURED_JSON, bundle)]
|
||||
|
||||
def analyze_project_sources(self, ctx: ExecutionContext) -> list[str]:
|
||||
bundle = self.get(ctx, "source_bundle", {}) or {}
|
||||
profile = bundle.get("profile", {}) or {}
|
||||
rag_items = list(bundle.get("rag_items", []) or [])
|
||||
file_candidates = list(bundle.get("file_candidates", []) or [])
|
||||
|
||||
if str(profile.get("domain")) == "code":
|
||||
analysis = self._analyzer.analyze_code(profile, rag_items, file_candidates)
|
||||
else:
|
||||
analysis = self._analyzer.analyze_docs(profile, rag_items)
|
||||
return [self.put(ctx, "analysis_brief", ArtifactType.STRUCTURED_JSON, analysis)]
|
||||
|
||||
def build_project_answer_brief(self, ctx: ExecutionContext) -> list[str]:
|
||||
profile = self.get(ctx, "question_profile", {}) or {}
|
||||
analysis = self.get(ctx, "analysis_brief", {}) or {}
|
||||
brief = {
|
||||
"question_profile": profile,
|
||||
"resolved_subject": analysis.get("subject"),
|
||||
"key_findings": analysis.get("findings", []),
|
||||
"supporting_evidence": analysis.get("evidence", []),
|
||||
"missing_evidence": analysis.get("gaps", []),
|
||||
"answer_mode": analysis.get("answer_mode", "summary"),
|
||||
}
|
||||
return [self.put(ctx, "answer_brief", ArtifactType.STRUCTURED_JSON, brief)]
|
||||
|
||||
def compose_project_answer(self, ctx: ExecutionContext) -> list[str]:
|
||||
brief = self.get(ctx, "answer_brief", {}) or {}
|
||||
profile = brief.get("question_profile", {}) or {}
|
||||
russian = bool(profile.get("russian"))
|
||||
answer_mode = str(brief.get("answer_mode") or "summary")
|
||||
findings = list(brief.get("key_findings", []) or [])
|
||||
evidence = list(brief.get("supporting_evidence", []) or [])
|
||||
gaps = list(brief.get("missing_evidence", []) or [])
|
||||
|
||||
title = "## Кратко" if russian else "## Summary"
|
||||
lines = [title]
|
||||
if answer_mode == "inventory":
|
||||
lines.append("### Что реализовано" if russian else "### Implemented items")
|
||||
else:
|
||||
lines.append("### Что видно по проекту" if russian else "### What the project shows")
|
||||
if findings:
|
||||
lines.extend(f"- {item}" for item in findings)
|
||||
else:
|
||||
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
|
||||
if evidence:
|
||||
lines.append("")
|
||||
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
|
||||
lines.extend(f"- `{item}`" for item in evidence[:5])
|
||||
if gaps:
|
||||
lines.append("")
|
||||
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
|
||||
lines.extend(f"- {item}" for item in gaps[:3])
|
||||
return [self.put(ctx, "final_answer", ArtifactType.TEXT, "\n".join(lines))]
|
||||
@@ -0,0 +1,154 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class ProjectQaAnalyzer:
|
||||
def analyze_code(self, profile: dict, rag_items: list[dict], file_candidates: list[dict]) -> dict:
|
||||
terms = list(profile.get("terms", []) or [])
|
||||
intent = str(profile.get("intent") or "lookup")
|
||||
russian = bool(profile.get("russian"))
|
||||
findings: list[str] = []
|
||||
evidence: list[str] = []
|
||||
gaps: list[str] = []
|
||||
|
||||
symbol_titles = [str(item.get("title", "") or "") for item in rag_items if str(item.get("layer", "")).startswith("C1")]
|
||||
symbol_set = set(symbol_titles)
|
||||
file_paths = [str(item.get("path", "") or item.get("source", "") or "") for item in rag_items]
|
||||
file_paths.extend(str(item.get("path", "") or "") for item in file_candidates)
|
||||
|
||||
if "ConfigManager" in profile.get("entities", []) or "configmanager" in terms or "config_manager" in terms:
|
||||
alias_file = self.find_path(file_paths, "src/config_manager/__init__.py")
|
||||
if alias_file:
|
||||
findings.append(
|
||||
"Публичный `ConfigManager` экспортируется из `src/config_manager/__init__.py` как alias на `ConfigManagerV2`."
|
||||
if russian
|
||||
else "Public `ConfigManager` is exported from `src/config_manager/__init__.py` as an alias to `ConfigManagerV2`."
|
||||
)
|
||||
evidence.append("src/config_manager/__init__.py")
|
||||
|
||||
if "controlchannel" in {name.lower() for name in symbol_set}:
|
||||
findings.append(
|
||||
"Базовый контракт управления задает `ControlChannel`: он определяет команды `start` и `stop` для внешнего канала управления."
|
||||
if russian
|
||||
else "`ControlChannel` defines the base management contract with `start` and `stop` commands."
|
||||
)
|
||||
evidence.append("src/config_manager/v2/control/base.py")
|
||||
|
||||
if "ControlChannelBridge" in symbol_set:
|
||||
findings.append(
|
||||
"`ControlChannelBridge` связывает внешний канал управления с lifecycle-методами менеджера: `on_start`, `on_stop`, `on_status`."
|
||||
if russian
|
||||
else "`ControlChannelBridge` connects the external control channel to manager lifecycle methods: `on_start`, `on_stop`, `on_status`."
|
||||
)
|
||||
evidence.append("src/config_manager/v2/core/control_bridge.py")
|
||||
|
||||
implementation_files = self.find_management_implementations(file_candidates)
|
||||
if implementation_files:
|
||||
labels = ", ".join(f"`{path}`" for path in implementation_files)
|
||||
channel_names = self.implementation_names(implementation_files)
|
||||
findings.append(
|
||||
f"В коде найдены конкретные реализации каналов управления: {', '.join(channel_names)} ({labels})."
|
||||
if russian
|
||||
else f"Concrete management channel implementations were found in code: {', '.join(channel_names)} ({labels})."
|
||||
)
|
||||
evidence.extend(implementation_files)
|
||||
elif intent == "inventory":
|
||||
gaps.append(
|
||||
"В текущем контексте не удалось уверенно подтвердить конкретные файлы-реализации каналов, кроме базового контракта и bridge-слоя."
|
||||
if russian
|
||||
else "The current context does not yet confirm concrete channel implementation files beyond the base contract and bridge layer."
|
||||
)
|
||||
|
||||
package_doc = self.find_management_doc(file_candidates)
|
||||
if package_doc:
|
||||
findings.append(
|
||||
f"Пакет управления прямо описывает внешние каналы через `{package_doc}`."
|
||||
if russian
|
||||
else f"The control package directly describes external channels in `{package_doc}`."
|
||||
)
|
||||
evidence.append(package_doc)
|
||||
|
||||
subject = "management channels"
|
||||
if profile.get("entities"):
|
||||
subject = ", ".join(profile["entities"])
|
||||
return {
|
||||
"subject": subject,
|
||||
"findings": self.dedupe(findings),
|
||||
"evidence": self.dedupe(evidence),
|
||||
"gaps": gaps,
|
||||
"answer_mode": "inventory" if intent == "inventory" else "summary",
|
||||
}
|
||||
|
||||
def analyze_docs(self, profile: dict, rag_items: list[dict]) -> dict:
|
||||
findings: list[str] = []
|
||||
evidence: list[str] = []
|
||||
for item in rag_items[:5]:
|
||||
title = str(item.get("title", "") or "")
|
||||
source = str(item.get("source", "") or "")
|
||||
content = str(item.get("content", "") or "").strip()
|
||||
if content:
|
||||
findings.append(content.splitlines()[0][:220])
|
||||
if source:
|
||||
evidence.append(source)
|
||||
elif title:
|
||||
evidence.append(title)
|
||||
return {
|
||||
"subject": "docs",
|
||||
"findings": self.dedupe(findings),
|
||||
"evidence": self.dedupe(evidence),
|
||||
"gaps": [] if findings else ["Недостаточно данных в документации." if profile.get("russian") else "Not enough data in documentation."],
|
||||
"answer_mode": "summary",
|
||||
}
|
||||
|
||||
def find_management_implementations(self, file_candidates: list[dict]) -> list[str]:
|
||||
found: list[str] = []
|
||||
for item in file_candidates:
|
||||
path = str(item.get("path", "") or "")
|
||||
lowered = path.lower()
|
||||
if self.is_test_path(path):
|
||||
continue
|
||||
if any(token in lowered for token in ("http_channel.py", "telegram.py", "telegram_channel.py", "http.py")):
|
||||
found.append(path)
|
||||
continue
|
||||
content = str(item.get("content", "") or "").lower()
|
||||
if "controlchannel" in content and "class " in content:
|
||||
found.append(path)
|
||||
continue
|
||||
if ("channel" in lowered or "control" in lowered) and any(token in content for token in ("http", "telegram", "bot")):
|
||||
found.append(path)
|
||||
return self.dedupe(found)[:4]
|
||||
|
||||
def implementation_names(self, paths: list[str]) -> list[str]:
|
||||
names: list[str] = []
|
||||
for path in paths:
|
||||
stem = path.rsplit("/", 1)[-1].rsplit(".", 1)[0]
|
||||
label = stem.replace("_", " ").strip()
|
||||
if label and label not in names:
|
||||
names.append(label)
|
||||
return names
|
||||
|
||||
def find_management_doc(self, file_candidates: list[dict]) -> str | None:
|
||||
for item in file_candidates:
|
||||
path = str(item.get("path", "") or "")
|
||||
if self.is_test_path(path):
|
||||
continue
|
||||
content = str(item.get("content", "") or "").lower()
|
||||
if any(token in content for token in ("каналы внешнего управления", "external control channels", "http api", "telegram")):
|
||||
return path
|
||||
return None
|
||||
|
||||
def find_path(self, paths: list[str], target: str) -> str | None:
|
||||
for path in paths:
|
||||
if path == target:
|
||||
return path
|
||||
return None
|
||||
|
||||
def dedupe(self, items: list[str]) -> list[str]:
|
||||
seen: list[str] = []
|
||||
for item in items:
|
||||
if item and item not in seen:
|
||||
seen.append(item)
|
||||
return seen
|
||||
|
||||
def is_test_path(self, path: str) -> bool:
|
||||
lowered = path.lower()
|
||||
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
|
||||
@@ -0,0 +1,166 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
|
||||
class ProjectQaSupport:
|
||||
def resolve_request(self, message: str) -> dict:
|
||||
profile = self.build_profile(message)
|
||||
subject = profile["entities"][0] if profile.get("entities") else ""
|
||||
return {
|
||||
"original_message": message,
|
||||
"normalized_message": " ".join((message or "").split()),
|
||||
"subject_hint": subject,
|
||||
"source_hint": profile["domain"],
|
||||
"russian": profile["russian"],
|
||||
}
|
||||
|
||||
def build_profile(self, message: str) -> dict:
|
||||
lowered = message.lower()
|
||||
return {
|
||||
"domain": "code" if self.looks_like_code_question(lowered) else "docs",
|
||||
"intent": self.detect_intent(lowered),
|
||||
"terms": extract_query_terms(message),
|
||||
"entities": self.extract_entities(message),
|
||||
"russian": self.is_russian(message),
|
||||
}
|
||||
|
||||
def build_retrieval_query(self, resolved_request: dict, profile: dict) -> str:
|
||||
normalized = str(resolved_request.get("normalized_message") or resolved_request.get("original_message") or "").strip()
|
||||
if profile.get("domain") == "code" and "по коду" not in normalized.lower():
|
||||
return f"по коду {normalized}".strip()
|
||||
return normalized
|
||||
|
||||
def build_source_bundle(self, profile: dict, rag_items: list[dict], files_map: dict[str, dict]) -> dict:
|
||||
terms = list(profile.get("terms", []) or [])
|
||||
entities = list(profile.get("entities", []) or [])
|
||||
explicit_test = any(term in {"test", "tests", "тест", "тесты"} for term in terms)
|
||||
|
||||
ranked_rag: list[tuple[int, dict]] = []
|
||||
for item in rag_items:
|
||||
score = self.rag_score(item, terms, entities)
|
||||
source = str(item.get("source", "") or "")
|
||||
if not explicit_test and self.is_test_path(source):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_rag.append((score, item))
|
||||
ranked_rag.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
ranked_files: list[tuple[int, dict]] = []
|
||||
for path, payload in files_map.items():
|
||||
score = self.file_score(path, payload, terms, entities)
|
||||
if not explicit_test and self.is_test_path(path):
|
||||
score -= 3
|
||||
if score > 0:
|
||||
ranked_files.append(
|
||||
(
|
||||
score,
|
||||
{
|
||||
"path": path,
|
||||
"content": str(payload.get("content", "")),
|
||||
"content_hash": str(payload.get("content_hash", "")),
|
||||
},
|
||||
)
|
||||
)
|
||||
ranked_files.sort(key=lambda pair: pair[0], reverse=True)
|
||||
|
||||
return {
|
||||
"profile": profile,
|
||||
"rag_items": [item for _, item in ranked_rag[:12]],
|
||||
"file_candidates": [item for _, item in ranked_files[:10]],
|
||||
"rag_total": len(ranked_rag),
|
||||
"files_total": len(ranked_files),
|
||||
}
|
||||
|
||||
def build_answer_brief(self, profile: dict, analysis: dict) -> dict:
|
||||
return {
|
||||
"question_profile": profile,
|
||||
"resolved_subject": analysis.get("subject"),
|
||||
"key_findings": analysis.get("findings", []),
|
||||
"supporting_evidence": analysis.get("evidence", []),
|
||||
"missing_evidence": analysis.get("gaps", []),
|
||||
"answer_mode": analysis.get("answer_mode", "summary"),
|
||||
}
|
||||
|
||||
def compose_answer(self, brief: dict) -> str:
|
||||
profile = brief.get("question_profile", {}) or {}
|
||||
russian = bool(profile.get("russian"))
|
||||
answer_mode = str(brief.get("answer_mode") or "summary")
|
||||
findings = list(brief.get("key_findings", []) or [])
|
||||
evidence = list(brief.get("supporting_evidence", []) or [])
|
||||
gaps = list(brief.get("missing_evidence", []) or [])
|
||||
|
||||
title = "## Кратко" if russian else "## Summary"
|
||||
lines = [title]
|
||||
lines.append("### Что реализовано" if answer_mode == "inventory" and russian else "### Implemented items" if answer_mode == "inventory" else "### Что видно по проекту" if russian else "### What the project shows")
|
||||
if findings:
|
||||
lines.extend(f"- {item}" for item in findings)
|
||||
else:
|
||||
lines.append("Не удалось собрать подтвержденные выводы по доступным данным." if russian else "No supported findings could be assembled from the available data.")
|
||||
if evidence:
|
||||
lines.append("")
|
||||
lines.append("### Где смотреть в проекте" if russian else "### Where to look in the project")
|
||||
lines.extend(f"- `{item}`" for item in evidence[:5])
|
||||
if gaps:
|
||||
lines.append("")
|
||||
lines.append("### Что пока не подтверждено кодом" if russian else "### What is not yet confirmed in code")
|
||||
lines.extend(f"- {item}" for item in gaps[:3])
|
||||
return "\n".join(lines)
|
||||
|
||||
def detect_intent(self, lowered: str) -> str:
|
||||
if any(token in lowered for token in ("какие", "что уже реализ", "список", "перечень", "какие есть")):
|
||||
return "inventory"
|
||||
if any(token in lowered for token in ("где", "find", "where")):
|
||||
return "lookup"
|
||||
if any(token in lowered for token in ("сравни", "compare")):
|
||||
return "compare"
|
||||
return "explain"
|
||||
|
||||
def looks_like_code_question(self, lowered: str) -> bool:
|
||||
code_markers = ("по коду", "код", "реализ", "имплементац", "класс", "метод", "модул", "файл", "канал", "handler", "endpoint")
|
||||
return any(marker in lowered for marker in code_markers) or bool(re.search(r"\b[A-Z][A-Za-z0-9_]{2,}\b", lowered))
|
||||
|
||||
def extract_entities(self, message: str) -> list[str]:
|
||||
return re.findall(r"\b[A-Z][A-Za-z0-9_]{2,}\b", message)[:5]
|
||||
|
||||
def rag_score(self, item: dict, terms: list[str], entities: list[str]) -> int:
|
||||
haystacks = [
|
||||
str(item.get("source", "") or "").lower(),
|
||||
str(item.get("title", "") or "").lower(),
|
||||
str(item.get("content", "") or "").lower(),
|
||||
str((item.get("metadata", {}) or {}).get("qname", "") or "").lower(),
|
||||
]
|
||||
score = 0
|
||||
for term in terms:
|
||||
if any(term in hay for hay in haystacks):
|
||||
score += 3
|
||||
for entity in entities:
|
||||
if any(entity.lower() in hay for hay in haystacks):
|
||||
score += 5
|
||||
return score
|
||||
|
||||
def file_score(self, path: str, payload: dict, terms: list[str], entities: list[str]) -> int:
|
||||
content = str(payload.get("content", "") or "").lower()
|
||||
path_lower = path.lower()
|
||||
score = 0
|
||||
for term in terms:
|
||||
if term in path_lower:
|
||||
score += 4
|
||||
elif term in content:
|
||||
score += 2
|
||||
for entity in entities:
|
||||
entity_lower = entity.lower()
|
||||
if entity_lower in path_lower:
|
||||
score += 5
|
||||
elif entity_lower in content:
|
||||
score += 3
|
||||
return score
|
||||
|
||||
def is_test_path(self, path: str) -> bool:
|
||||
lowered = path.lower()
|
||||
return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
|
||||
|
||||
def is_russian(self, text: str) -> bool:
|
||||
return any("а" <= ch.lower() <= "я" or ch.lower() == "ё" for ch in text)
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import logging
|
||||
import time
|
||||
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
@@ -9,6 +10,8 @@ from app.modules.agent.engine.orchestrator.models import PlanStatus, PlanStep, S
|
||||
from app.modules.agent.engine.orchestrator.quality_gates import QualityGateRunner
|
||||
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExecutionEngine:
|
||||
def __init__(self, step_registry: StepRegistry, gates: QualityGateRunner) -> None:
|
||||
@@ -22,17 +25,18 @@ class ExecutionEngine:
|
||||
for step in ctx.plan.steps:
|
||||
dep_issue = self._dependency_issue(step, step_results)
|
||||
if dep_issue:
|
||||
step_results.append(
|
||||
StepResult(
|
||||
result = StepResult(
|
||||
step_id=step.step_id,
|
||||
status=StepStatus.SKIPPED,
|
||||
warnings=[dep_issue],
|
||||
)
|
||||
)
|
||||
step_results.append(result)
|
||||
self._log_step_result(ctx, step, result)
|
||||
continue
|
||||
|
||||
result = await self._run_with_retry(step, ctx)
|
||||
step_results.append(result)
|
||||
self._log_step_result(ctx, step, result)
|
||||
if result.status in {StepStatus.FAILED, StepStatus.RETRY_EXHAUSTED} and step.on_failure == "fail":
|
||||
ctx.plan.status = PlanStatus.FAILED
|
||||
return step_results
|
||||
@@ -65,6 +69,15 @@ class ExecutionEngine:
|
||||
while attempt < max_attempts:
|
||||
attempt += 1
|
||||
started_at = time.monotonic()
|
||||
LOGGER.warning(
|
||||
"orchestrator step start: task_id=%s step_id=%s action_id=%s executor=%s attempt=%s graph_id=%s",
|
||||
ctx.task.task_id,
|
||||
step.step_id,
|
||||
step.action_id,
|
||||
step.executor,
|
||||
attempt,
|
||||
step.graph_id or "",
|
||||
)
|
||||
await self._emit_progress(ctx, f"orchestrator.step.{step.step_id}", step.title)
|
||||
|
||||
try:
|
||||
@@ -113,3 +126,21 @@ class ExecutionEngine:
|
||||
result = ctx.progress_cb(stage, message, "task_progress", {"layer": "orchestrator"})
|
||||
if inspect.isawaitable(result):
|
||||
await result
|
||||
|
||||
def _log_step_result(self, ctx: ExecutionContext, step: PlanStep, result: StepResult) -> None:
|
||||
artifact_keys = []
|
||||
for artifact_id in result.produced_artifact_ids:
|
||||
item = next((artifact for artifact in ctx.artifacts.all_items() if artifact.artifact_id == artifact_id), None)
|
||||
if item is not None:
|
||||
artifact_keys.append(item.key)
|
||||
LOGGER.warning(
|
||||
"orchestrator step result: task_id=%s step_id=%s action_id=%s status=%s duration_ms=%s artifact_keys=%s warnings=%s error=%s",
|
||||
ctx.task.task_id,
|
||||
step.step_id,
|
||||
step.action_id,
|
||||
result.status.value,
|
||||
result.duration_ms,
|
||||
artifact_keys,
|
||||
result.warnings,
|
||||
result.error_message or "",
|
||||
)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
|
||||
from app.core.exceptions import AppError
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext, GraphInvoker, GraphResolver, ProgressCallback
|
||||
@@ -14,6 +15,8 @@ from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
|
||||
from app.modules.agent.engine.orchestrator.template_registry import ScenarioTemplateRegistry
|
||||
from app.schemas.common import ModuleName
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OrchestratorService:
|
||||
def __init__(
|
||||
@@ -74,6 +77,21 @@ class OrchestratorService:
|
||||
)
|
||||
result = self._assembler.assemble(ctx, step_results)
|
||||
await self._emit_progress(progress_cb, "orchestrator.done", "Execution plan completed.")
|
||||
LOGGER.warning(
|
||||
"orchestrator decision: task_id=%s scenario=%s plan_status=%s steps=%s changeset_items=%s answer_len=%s",
|
||||
task.task_id,
|
||||
task.scenario.value,
|
||||
result.meta.get("plan", {}).get("status", ""),
|
||||
[
|
||||
{
|
||||
"step_id": step.step_id,
|
||||
"status": step.status.value,
|
||||
}
|
||||
for step in result.steps
|
||||
],
|
||||
len(result.changeset),
|
||||
len(result.answer or ""),
|
||||
)
|
||||
return result
|
||||
|
||||
async def _emit_progress(self, progress_cb: ProgressCallback | None, stage: str, message: str) -> None:
|
||||
|
||||
@@ -2,29 +2,50 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.engine.graphs.progress_registry import progress_registry
|
||||
from app.modules.agent.engine.orchestrator.actions import DocsActions, EditActions, ExplainActions, GherkinActions, ReviewActions
|
||||
from app.modules.agent.engine.orchestrator.actions import (
|
||||
CodeExplainActions,
|
||||
DocsActions,
|
||||
EditActions,
|
||||
ExplainActions,
|
||||
GherkinActions,
|
||||
ProjectQaActions,
|
||||
ReviewActions,
|
||||
)
|
||||
from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
|
||||
from app.modules.agent.engine.orchestrator.models import ArtifactType, PlanStep
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
StepFn = Callable[[ExecutionContext], list[str]]
|
||||
|
||||
|
||||
class StepRegistry:
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, code_explain_retriever: CodeExplainRetrieverV2 | None = None) -> None:
|
||||
code_explain = CodeExplainActions(code_explain_retriever)
|
||||
explain = ExplainActions()
|
||||
review = ReviewActions()
|
||||
docs = DocsActions()
|
||||
edits = EditActions()
|
||||
gherkin = GherkinActions()
|
||||
project_qa = ProjectQaActions()
|
||||
|
||||
self._functions: dict[str, StepFn] = {
|
||||
"collect_state": self._collect_state,
|
||||
"finalize_graph_output": self._finalize_graph_output,
|
||||
"execute_project_qa_graph": self._collect_state,
|
||||
"build_code_explain_pack": code_explain.build_code_explain_pack,
|
||||
"collect_sources": explain.collect_sources,
|
||||
"extract_logic": explain.extract_logic,
|
||||
"summarize": explain.summarize,
|
||||
"classify_project_question": project_qa.classify_project_question,
|
||||
"collect_project_sources": project_qa.collect_project_sources,
|
||||
"analyze_project_sources": project_qa.analyze_project_sources,
|
||||
"build_project_answer_brief": project_qa.build_project_answer_brief,
|
||||
"compose_project_answer": project_qa.compose_project_answer,
|
||||
"fetch_source_doc": review.fetch_source_doc,
|
||||
"normalize_document": review.normalize_document,
|
||||
"structural_check": review.structural_check,
|
||||
@@ -66,6 +87,7 @@ class StepRegistry:
|
||||
state = {
|
||||
"task_id": ctx.task.task_id,
|
||||
"project_id": ctx.task.rag_session_id,
|
||||
"scenario": ctx.task.scenario.value,
|
||||
"message": ctx.task.user_message,
|
||||
"progress_key": ctx.task.task_id,
|
||||
"rag_context": str(ctx.task.metadata.get("rag_context", "")),
|
||||
@@ -86,7 +108,7 @@ class StepRegistry:
|
||||
raise RuntimeError(f"Unsupported graph_id: {graph_key}")
|
||||
|
||||
graph = ctx.graph_resolver(domain_id, process_id)
|
||||
state = ctx.artifacts.get_content("agent_state", {}) or {}
|
||||
state = self._build_graph_state(ctx)
|
||||
|
||||
if ctx.progress_cb is not None:
|
||||
progress_registry.register(ctx.task.task_id, ctx.progress_cb)
|
||||
@@ -96,8 +118,29 @@ class StepRegistry:
|
||||
if ctx.progress_cb is not None:
|
||||
progress_registry.unregister(ctx.task.task_id)
|
||||
|
||||
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result)
|
||||
return [item.artifact_id]
|
||||
return self._store_graph_outputs(step, ctx, result)
|
||||
|
||||
def _build_graph_state(self, ctx: ExecutionContext) -> dict:
|
||||
state = dict(ctx.artifacts.get_content("agent_state", {}) or {})
|
||||
for item in ctx.artifacts.all_items():
|
||||
state[item.key] = ctx.artifacts.get_content(item.key)
|
||||
return state
|
||||
|
||||
def _store_graph_outputs(self, step: PlanStep, ctx: ExecutionContext, result: dict) -> list[str]:
|
||||
if not isinstance(result, dict):
|
||||
raise RuntimeError("graph_result must be an object")
|
||||
if len(step.outputs) == 1 and step.outputs[0].key == "graph_result":
|
||||
item = ctx.artifacts.put(key="graph_result", artifact_type=ArtifactType.STRUCTURED_JSON, content=result)
|
||||
return [item.artifact_id]
|
||||
|
||||
artifact_ids: list[str] = []
|
||||
for output in step.outputs:
|
||||
value = result.get(output.key)
|
||||
if value is None and output.required:
|
||||
raise RuntimeError(f"graph_output_missing:{step.step_id}:{output.key}")
|
||||
item = ctx.artifacts.put(key=output.key, artifact_type=output.type, content=value)
|
||||
artifact_ids.append(item.artifact_id)
|
||||
return artifact_ids
|
||||
|
||||
def _finalize_graph_output(self, ctx: ExecutionContext) -> list[str]:
|
||||
raw = ctx.artifacts.get_content("graph_result", {}) or {}
|
||||
|
||||
@@ -16,6 +16,8 @@ class ScenarioTemplateRegistry:
|
||||
return builders.get(task.scenario, self._general)(task)
|
||||
|
||||
def _general(self, task: TaskSpec) -> ExecutionPlan:
|
||||
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
|
||||
return self._project_qa(task)
|
||||
steps = [
|
||||
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
|
||||
self._step(
|
||||
@@ -39,7 +41,77 @@ class ScenarioTemplateRegistry:
|
||||
]
|
||||
return self._plan(task, "general_qa_v1", steps, [self._gate("non_empty_answer_or_changeset")])
|
||||
|
||||
def _project_qa(self, task: TaskSpec) -> ExecutionPlan:
|
||||
steps = [
|
||||
self._step("collect_state", "Collect state", "collect_state", outputs=[self._out("agent_state", ArtifactType.STRUCTURED_JSON)]),
|
||||
self._step(
|
||||
"conversation_understanding",
|
||||
"Conversation understanding",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/conversation_understanding",
|
||||
depends_on=["collect_state"],
|
||||
outputs=[self._out("resolved_request", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
self._step(
|
||||
"question_classification",
|
||||
"Question classification",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/question_classification",
|
||||
depends_on=["conversation_understanding"],
|
||||
outputs=[self._out("question_profile", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
self._step(
|
||||
"context_retrieval",
|
||||
"Context retrieval",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/context_retrieval",
|
||||
depends_on=["question_classification"],
|
||||
outputs=[self._out("source_bundle", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
]
|
||||
analysis_depends_on = ["context_retrieval"]
|
||||
if task.scenario == Scenario.EXPLAIN_PART:
|
||||
steps.append(
|
||||
self._step(
|
||||
"code_explain_pack_step",
|
||||
"Build code explain pack",
|
||||
"build_code_explain_pack",
|
||||
depends_on=["context_retrieval"],
|
||||
outputs=[self._out("explain_pack", ArtifactType.STRUCTURED_JSON)],
|
||||
)
|
||||
)
|
||||
analysis_depends_on = ["code_explain_pack_step"]
|
||||
steps.extend(
|
||||
[
|
||||
self._step(
|
||||
"context_analysis",
|
||||
"Context analysis",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/context_analysis",
|
||||
depends_on=analysis_depends_on,
|
||||
outputs=[self._out("analysis_brief", ArtifactType.STRUCTURED_JSON)],
|
||||
),
|
||||
self._step(
|
||||
"answer_composition",
|
||||
"Answer composition",
|
||||
"execute_project_qa_graph",
|
||||
executor="graph",
|
||||
graph_id="project_qa/answer_composition",
|
||||
depends_on=["context_analysis"],
|
||||
outputs=[self._out("answer_brief", ArtifactType.STRUCTURED_JSON, required=False), self._out("final_answer", ArtifactType.TEXT)],
|
||||
gates=[self._gate("non_empty_answer_or_changeset")],
|
||||
),
|
||||
]
|
||||
)
|
||||
return self._plan(task, "project_qa_reasoning_v1", steps, [self._gate("non_empty_answer_or_changeset")])
|
||||
|
||||
def _explain(self, task: TaskSpec) -> ExecutionPlan:
|
||||
if task.routing.domain_id == "project" and task.routing.process_id == "qa":
|
||||
return self._project_qa(task)
|
||||
steps = [
|
||||
self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]),
|
||||
self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]),
|
||||
|
||||
@@ -2,21 +2,28 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.contracts import RagRetriever
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.agent.repository import AgentRepository
|
||||
from app.modules.agent.engine.router.router_service import RouterService
|
||||
|
||||
|
||||
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository") -> "RouterService":
|
||||
def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepository", rag: RagRetriever) -> "RouterService":
|
||||
from app.modules.agent.engine.graphs import (
|
||||
BaseGraphFactory,
|
||||
DocsGraphFactory,
|
||||
ProjectEditsGraphFactory,
|
||||
ProjectQaAnalysisGraphFactory,
|
||||
ProjectQaAnswerGraphFactory,
|
||||
ProjectQaClassificationGraphFactory,
|
||||
ProjectQaConversationGraphFactory,
|
||||
ProjectQaGraphFactory,
|
||||
ProjectQaRetrievalGraphFactory,
|
||||
)
|
||||
from app.modules.agent.engine.router.context_store import RouterContextStore
|
||||
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
|
||||
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
|
||||
from app.modules.agent.engine.router.registry import IntentRegistry
|
||||
from app.modules.agent.engine.router.router_service import RouterService
|
||||
|
||||
@@ -26,13 +33,20 @@ def build_router_service(llm: AgentLlmService, agent_repository: "AgentRepositor
|
||||
registry.register("project", "qa", ProjectQaGraphFactory(llm).build)
|
||||
registry.register("project", "edits", ProjectEditsGraphFactory(llm).build)
|
||||
registry.register("docs", "generation", DocsGraphFactory(llm).build)
|
||||
registry.register("project_qa", "conversation_understanding", ProjectQaConversationGraphFactory(llm).build)
|
||||
registry.register("project_qa", "question_classification", ProjectQaClassificationGraphFactory(llm).build)
|
||||
registry.register("project_qa", "context_retrieval", ProjectQaRetrievalGraphFactory(rag, llm).build)
|
||||
registry.register("project_qa", "context_analysis", ProjectQaAnalysisGraphFactory(llm).build)
|
||||
registry.register("project_qa", "answer_composition", ProjectQaAnswerGraphFactory(llm).build)
|
||||
|
||||
classifier = IntentClassifier(llm)
|
||||
switch_detector = IntentSwitchDetector()
|
||||
context_store = RouterContextStore(agent_repository)
|
||||
return RouterService(
|
||||
registry=registry,
|
||||
classifier=classifier,
|
||||
context_store=context_store,
|
||||
switch_detector=switch_detector,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -17,6 +17,7 @@ class RouterContextStore:
|
||||
process_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str,
|
||||
decision_type: str = "start",
|
||||
max_history: int = 10,
|
||||
) -> None:
|
||||
self._repo.update_router_context(
|
||||
@@ -25,5 +26,6 @@ class RouterContextStore:
|
||||
process_id=process_id,
|
||||
user_message=user_message,
|
||||
assistant_message=assistant_message,
|
||||
decision_type=decision_type,
|
||||
max_history=max_history,
|
||||
)
|
||||
|
||||
@@ -17,11 +17,7 @@ class IntentClassifier:
|
||||
def __init__(self, llm: AgentLlmService) -> None:
|
||||
self._llm = llm
|
||||
|
||||
def classify(self, user_message: str, context: RouterContext, mode: str = "auto") -> RouteDecision:
|
||||
forced = self._from_mode(mode)
|
||||
if forced:
|
||||
return forced
|
||||
|
||||
def classify_new_intent(self, user_message: str, context: RouterContext) -> RouteDecision:
|
||||
text = (user_message or "").strip().lower()
|
||||
if text in self._short_confirmations and context.last_routing:
|
||||
return RouteDecision(
|
||||
@@ -30,6 +26,7 @@ class IntentClassifier:
|
||||
confidence=1.0,
|
||||
reason="short_confirmation",
|
||||
use_previous=True,
|
||||
decision_type="continue",
|
||||
)
|
||||
|
||||
deterministic = self._deterministic_route(text)
|
||||
@@ -45,9 +42,10 @@ class IntentClassifier:
|
||||
process_id="general",
|
||||
confidence=0.8,
|
||||
reason="default",
|
||||
decision_type="start",
|
||||
)
|
||||
|
||||
def _from_mode(self, mode: str) -> RouteDecision | None:
|
||||
def from_mode(self, mode: str) -> RouteDecision | None:
|
||||
mapping = {
|
||||
"project_qa": ("project", "qa"),
|
||||
"project_edits": ("project", "edits"),
|
||||
@@ -65,6 +63,8 @@ class IntentClassifier:
|
||||
process_id=route[1],
|
||||
confidence=1.0,
|
||||
reason=f"mode_override:{mode}",
|
||||
decision_type="switch",
|
||||
explicit_switch=True,
|
||||
)
|
||||
|
||||
def _classify_with_llm(self, user_message: str, context: RouterContext) -> RouteDecision | None:
|
||||
@@ -96,6 +96,7 @@ class IntentClassifier:
|
||||
process_id=route[1],
|
||||
confidence=confidence,
|
||||
reason=f"llm_router:{payload.get('reason', 'ok')}",
|
||||
decision_type="start",
|
||||
)
|
||||
|
||||
def _parse_llm_payload(self, raw: str) -> dict[str, str | float] | None:
|
||||
@@ -139,6 +140,8 @@ class IntentClassifier:
|
||||
process_id="edits",
|
||||
confidence=0.97,
|
||||
reason="deterministic_targeted_file_edit",
|
||||
decision_type="switch",
|
||||
explicit_switch=True,
|
||||
)
|
||||
if self._is_broad_docs_request(text):
|
||||
return RouteDecision(
|
||||
@@ -146,6 +149,8 @@ class IntentClassifier:
|
||||
process_id="generation",
|
||||
confidence=0.95,
|
||||
reason="deterministic_docs_generation",
|
||||
decision_type="switch",
|
||||
explicit_switch=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.agent.engine.router.schemas import RouterContext
|
||||
|
||||
|
||||
class IntentSwitchDetector:
|
||||
_EXPLICIT_SWITCH_MARKERS = (
|
||||
"теперь",
|
||||
"а теперь",
|
||||
"давай теперь",
|
||||
"переключись",
|
||||
"переключаемся",
|
||||
"сейчас другое",
|
||||
"новая задача",
|
||||
"new task",
|
||||
"switch to",
|
||||
"now do",
|
||||
"instead",
|
||||
)
|
||||
_FOLLOW_UP_MARKERS = (
|
||||
"а еще",
|
||||
"а ещё",
|
||||
"подробнее",
|
||||
"почему",
|
||||
"зачем",
|
||||
"что если",
|
||||
"и еще",
|
||||
"и ещё",
|
||||
"покажи подробнее",
|
||||
"можешь подробнее",
|
||||
)
|
||||
|
||||
def should_switch(self, user_message: str, context: RouterContext) -> bool:
|
||||
if not context.dialog_started or context.active_intent is None:
|
||||
return False
|
||||
text = " ".join((user_message or "").strip().lower().split())
|
||||
if not text:
|
||||
return False
|
||||
if self._is_follow_up(text):
|
||||
return False
|
||||
if any(marker in text for marker in self._EXPLICIT_SWITCH_MARKERS):
|
||||
return True
|
||||
return self._is_strong_targeted_edit_request(text) or self._is_strong_docs_request(text)
|
||||
|
||||
def _is_follow_up(self, text: str) -> bool:
|
||||
return any(marker in text for marker in self._FOLLOW_UP_MARKERS)
|
||||
|
||||
def _is_strong_targeted_edit_request(self, text: str) -> bool:
|
||||
edit_markers = (
|
||||
"добавь",
|
||||
"добавить",
|
||||
"измени",
|
||||
"исправь",
|
||||
"обнови",
|
||||
"удали",
|
||||
"замени",
|
||||
"append",
|
||||
"update",
|
||||
"edit",
|
||||
"remove",
|
||||
"replace",
|
||||
)
|
||||
has_edit_marker = any(marker in text for marker in edit_markers)
|
||||
has_file_marker = (
|
||||
"readme" in text
|
||||
or bool(re.search(r"\b[\w.\-/]+\.(md|txt|rst|yaml|yml|json|toml|ini|cfg|py)\b", text))
|
||||
)
|
||||
return has_edit_marker and has_file_marker
|
||||
|
||||
def _is_strong_docs_request(self, text: str) -> bool:
|
||||
docs_markers = (
|
||||
"подготовь документац",
|
||||
"сгенерируй документац",
|
||||
"создай документац",
|
||||
"опиши документац",
|
||||
"generate documentation",
|
||||
"write documentation",
|
||||
)
|
||||
return any(marker in text for marker in docs_markers)
|
||||
@@ -1,7 +1,8 @@
|
||||
from app.modules.agent.engine.router.context_store import RouterContextStore
|
||||
from app.modules.agent.engine.router.intent_classifier import IntentClassifier
|
||||
from app.modules.agent.engine.router.intent_switch_detector import IntentSwitchDetector
|
||||
from app.modules.agent.engine.router.registry import IntentRegistry
|
||||
from app.modules.agent.engine.router.schemas import RouteResolution
|
||||
from app.modules.agent.engine.router.schemas import RouteDecision, RouteResolution
|
||||
|
||||
|
||||
class RouterService:
|
||||
@@ -10,27 +11,48 @@ class RouterService:
|
||||
registry: IntentRegistry,
|
||||
classifier: IntentClassifier,
|
||||
context_store: RouterContextStore,
|
||||
switch_detector: IntentSwitchDetector | None = None,
|
||||
min_confidence: float = 0.7,
|
||||
) -> None:
|
||||
self._registry = registry
|
||||
self._classifier = classifier
|
||||
self._ctx = context_store
|
||||
self._switch_detector = switch_detector or IntentSwitchDetector()
|
||||
self._min_confidence = min_confidence
|
||||
|
||||
def resolve(self, user_message: str, conversation_key: str, mode: str = "auto") -> RouteResolution:
|
||||
context = self._ctx.get(conversation_key)
|
||||
decision = self._classifier.classify(user_message, context, mode=mode)
|
||||
if decision.confidence < self._min_confidence:
|
||||
return self._fallback("low_confidence")
|
||||
if not self._registry.is_valid(decision.domain_id, decision.process_id):
|
||||
return self._fallback("invalid_route")
|
||||
return RouteResolution(
|
||||
domain_id=decision.domain_id,
|
||||
process_id=decision.process_id,
|
||||
confidence=decision.confidence,
|
||||
reason=decision.reason,
|
||||
fallback_used=False,
|
||||
)
|
||||
forced = self._classifier.from_mode(mode)
|
||||
if forced:
|
||||
return self._resolution(forced)
|
||||
|
||||
if not context.dialog_started or context.active_intent is None:
|
||||
decision = self._classifier.classify_new_intent(user_message, context)
|
||||
if not self._is_acceptable(decision):
|
||||
return self._fallback("low_confidence")
|
||||
return self._resolution(
|
||||
decision.model_copy(
|
||||
update={
|
||||
"decision_type": "start",
|
||||
"explicit_switch": False,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
if self._switch_detector.should_switch(user_message, context):
|
||||
decision = self._classifier.classify_new_intent(user_message, context)
|
||||
if self._is_acceptable(decision):
|
||||
return self._resolution(
|
||||
decision.model_copy(
|
||||
update={
|
||||
"decision_type": "switch",
|
||||
"explicit_switch": True,
|
||||
}
|
||||
)
|
||||
)
|
||||
return self._continue_current(context, "explicit_switch_unresolved_keep_current")
|
||||
|
||||
return self._continue_current(context, "continue_current_intent")
|
||||
|
||||
def persist_context(
|
||||
self,
|
||||
@@ -40,6 +62,7 @@ class RouterService:
|
||||
process_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str,
|
||||
decision_type: str = "start",
|
||||
) -> None:
|
||||
self._ctx.update(
|
||||
conversation_key,
|
||||
@@ -47,6 +70,7 @@ class RouterService:
|
||||
process_id=process_id,
|
||||
user_message=user_message,
|
||||
assistant_message=assistant_message,
|
||||
decision_type=decision_type,
|
||||
)
|
||||
|
||||
def graph_factory(self, domain_id: str, process_id: str):
|
||||
@@ -59,4 +83,32 @@ class RouterService:
|
||||
confidence=0.0,
|
||||
reason=reason,
|
||||
fallback_used=True,
|
||||
decision_type="start",
|
||||
explicit_switch=False,
|
||||
)
|
||||
|
||||
def _continue_current(self, context, reason: str) -> RouteResolution:
|
||||
active = context.active_intent or context.last_routing or {"domain_id": "default", "process_id": "general"}
|
||||
return RouteResolution(
|
||||
domain_id=str(active["domain_id"]),
|
||||
process_id=str(active["process_id"]),
|
||||
confidence=1.0,
|
||||
reason=reason,
|
||||
fallback_used=False,
|
||||
decision_type="continue",
|
||||
explicit_switch=False,
|
||||
)
|
||||
|
||||
def _is_acceptable(self, decision: RouteDecision) -> bool:
|
||||
return decision.confidence >= self._min_confidence and self._registry.is_valid(decision.domain_id, decision.process_id)
|
||||
|
||||
def _resolution(self, decision: RouteDecision) -> RouteResolution:
|
||||
return RouteResolution(
|
||||
domain_id=decision.domain_id,
|
||||
process_id=decision.process_id,
|
||||
confidence=decision.confidence,
|
||||
reason=decision.reason,
|
||||
fallback_used=False,
|
||||
decision_type=decision.decision_type,
|
||||
explicit_switch=decision.explicit_switch,
|
||||
)
|
||||
|
||||
@@ -7,6 +7,8 @@ class RouteDecision(BaseModel):
|
||||
confidence: float = 0.0
|
||||
reason: str = ""
|
||||
use_previous: bool = False
|
||||
decision_type: str = "start"
|
||||
explicit_switch: bool = False
|
||||
|
||||
@field_validator("confidence")
|
||||
@classmethod
|
||||
@@ -20,8 +22,13 @@ class RouteResolution(BaseModel):
|
||||
confidence: float
|
||||
reason: str
|
||||
fallback_used: bool = False
|
||||
decision_type: str = "start"
|
||||
explicit_switch: bool = False
|
||||
|
||||
|
||||
class RouterContext(BaseModel):
|
||||
last_routing: dict[str, str] | None = None
|
||||
message_history: list[dict[str, str]] = Field(default_factory=list)
|
||||
active_intent: dict[str, str] | None = None
|
||||
dialog_started: bool = False
|
||||
turn_index: int = 0
|
||||
|
||||
Binary file not shown.
@@ -1,14 +1,40 @@
|
||||
import logging
|
||||
|
||||
from app.modules.agent.prompt_loader import PromptLoader
|
||||
from app.modules.shared.gigachat.client import GigaChatClient
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _truncate_for_log(text: str, max_chars: int = 1500) -> str:
|
||||
value = (text or "").replace("\n", "\\n").strip()
|
||||
if len(value) <= max_chars:
|
||||
return value
|
||||
return value[:max_chars].rstrip() + "...[truncated]"
|
||||
|
||||
|
||||
class AgentLlmService:
|
||||
def __init__(self, client: GigaChatClient, prompts: PromptLoader) -> None:
|
||||
self._client = client
|
||||
self._prompts = prompts
|
||||
|
||||
def generate(self, prompt_name: str, user_input: str) -> str:
|
||||
def generate(self, prompt_name: str, user_input: str, *, log_context: str | None = None) -> str:
|
||||
system_prompt = self._prompts.load(prompt_name)
|
||||
if not system_prompt:
|
||||
system_prompt = "You are a helpful assistant."
|
||||
return self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
|
||||
if log_context:
|
||||
LOGGER.warning(
|
||||
"graph llm input: context=%s prompt=%s user_input=%s",
|
||||
log_context,
|
||||
prompt_name,
|
||||
_truncate_for_log(user_input),
|
||||
)
|
||||
output = self._client.complete(system_prompt=system_prompt, user_prompt=user_input)
|
||||
if log_context:
|
||||
LOGGER.warning(
|
||||
"graph llm output: context=%s prompt=%s output=%s",
|
||||
log_context,
|
||||
prompt_name,
|
||||
_truncate_for_log(output),
|
||||
)
|
||||
return output
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.changeset_validator import ChangeSetValidator
|
||||
from app.modules.agent.confluence_service import ConfluenceService
|
||||
@@ -19,12 +22,17 @@ class ConfluenceFetchRequest(BaseModel):
|
||||
url: HttpUrl
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
class AgentModule:
|
||||
def __init__(
|
||||
self,
|
||||
rag_retriever: RagRetriever,
|
||||
agent_repository: AgentRepository,
|
||||
story_context_repository: StoryContextRepository,
|
||||
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
|
||||
) -> None:
|
||||
self.confluence = ConfluenceService()
|
||||
self.changeset_validator = ChangeSetValidator()
|
||||
@@ -34,14 +42,16 @@ class AgentModule:
|
||||
client = GigaChatClient(settings, token_provider)
|
||||
prompt_loader = PromptLoader()
|
||||
llm = AgentLlmService(client=client, prompts=prompt_loader)
|
||||
self.llm = llm
|
||||
story_recorder = StorySessionRecorder(story_context_repository)
|
||||
self.runtime = GraphAgentRuntime(
|
||||
rag=rag_retriever,
|
||||
confluence=self.confluence,
|
||||
changeset_validator=self.changeset_validator,
|
||||
llm=llm,
|
||||
llm=self.llm,
|
||||
agent_repository=agent_repository,
|
||||
story_recorder=story_recorder,
|
||||
code_explain_retriever=code_explain_retriever,
|
||||
)
|
||||
|
||||
def internal_router(self) -> APIRouter:
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
Объяснение кода осуществляется только с использованием предоставленного ExplainPack.
|
||||
|
||||
Правила:
|
||||
- Сначала используйте доказательства.
|
||||
- Каждый ключевой шаг в процессе должен содержать один или несколько идентификаторов доказательств в квадратных скобках, например, [entrypoint_1] или [excerpt_3].
|
||||
- Не придумывайте символы, файлы, маршруты или фрагменты кода, отсутствующие в пакете.
|
||||
- Если доказательства неполные, укажите это явно.
|
||||
- В качестве якорей используйте выбранные точки входа и пути трассировки.
|
||||
|
||||
Верните Markdown со следующей структурой:
|
||||
1. Краткое описание
|
||||
2. Пошаговый процесс
|
||||
3. Данные и побочные эффекты
|
||||
4. Ошибки и граничные случаи
|
||||
5. Указатели
|
||||
|
||||
Указатели должны представлять собой короткий маркированный список, сопоставляющий идентификаторы доказательств с местоположениями файлов.
|
||||
@@ -0,0 +1,24 @@
|
||||
Ты intent-router для layered RAG.
|
||||
На вход ты получаешь JSON с полями:
|
||||
- message: текущий запрос пользователя
|
||||
- active_intent: текущий активный intent диалога или null
|
||||
- last_query: предыдущий запрос пользователя
|
||||
- allowed_intents: допустимые intent'ы
|
||||
|
||||
Выбери ровно один intent из allowed_intents.
|
||||
Верни только JSON без markdown и пояснений.
|
||||
|
||||
Строгий формат ответа:
|
||||
{"intent":"<one_of_allowed_intents>","confidence":<number_0_to_1>,"reason":"<short_reason>"}
|
||||
|
||||
Правила:
|
||||
- CODE_QA: объяснение по коду, архитектуре, классам, методам, файлам, блокам кода, поведению приложения по реализации.
|
||||
- DOCS_QA: объяснение по документации, README, markdown, specs, runbooks, разделам документации.
|
||||
- GENERATE_DOCS_FROM_CODE: просьба сгенерировать, подготовить или обновить документацию по коду.
|
||||
- PROJECT_MISC: прочие вопросы по проекту, не относящиеся явно к коду или документации.
|
||||
|
||||
Приоритет:
|
||||
- Если пользователь просит именно подготовить документацию по коду, выбирай GENERATE_DOCS_FROM_CODE.
|
||||
- Если пользователь спрашивает про конкретный класс, файл, метод или блок кода, выбирай CODE_QA.
|
||||
- Если пользователь спрашивает про README, docs, markdown или конкретную документацию, выбирай DOCS_QA.
|
||||
- Если сигнал неочевиден, выбирай PROJECT_MISC и confidence <= 0.6.
|
||||
@@ -18,6 +18,10 @@ class AgentRepository:
|
||||
conversation_key VARCHAR(64) PRIMARY KEY,
|
||||
last_domain_id VARCHAR(64) NULL,
|
||||
last_process_id VARCHAR(64) NULL,
|
||||
active_domain_id VARCHAR(64) NULL,
|
||||
active_process_id VARCHAR(64) NULL,
|
||||
dialog_started BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
turn_index INTEGER NOT NULL DEFAULT 0,
|
||||
message_history_json TEXT NOT NULL DEFAULT '[]',
|
||||
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
@@ -64,14 +68,24 @@ class AgentRepository:
|
||||
"""
|
||||
)
|
||||
)
|
||||
self._ensure_router_context_columns(conn)
|
||||
conn.commit()
|
||||
|
||||
def _ensure_router_context_columns(self, conn) -> None:
|
||||
for statement in (
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_domain_id VARCHAR(64) NULL",
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS active_process_id VARCHAR(64) NULL",
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS dialog_started BOOLEAN NOT NULL DEFAULT FALSE",
|
||||
"ALTER TABLE router_context ADD COLUMN IF NOT EXISTS turn_index INTEGER NOT NULL DEFAULT 0",
|
||||
):
|
||||
conn.execute(text(statement))
|
||||
|
||||
def get_router_context(self, conversation_key: str) -> RouterContext:
|
||||
with get_engine().connect() as conn:
|
||||
row = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT last_domain_id, last_process_id, message_history_json
|
||||
SELECT last_domain_id, last_process_id, active_domain_id, active_process_id, dialog_started, turn_index, message_history_json
|
||||
FROM router_context
|
||||
WHERE conversation_key = :key
|
||||
"""
|
||||
@@ -82,7 +96,7 @@ class AgentRepository:
|
||||
if not row:
|
||||
return RouterContext()
|
||||
|
||||
history_raw = row[2] or "[]"
|
||||
history_raw = row[6] or "[]"
|
||||
try:
|
||||
history = json.loads(history_raw)
|
||||
except json.JSONDecodeError:
|
||||
@@ -91,6 +105,9 @@ class AgentRepository:
|
||||
last = None
|
||||
if row[0] and row[1]:
|
||||
last = {"domain_id": str(row[0]), "process_id": str(row[1])}
|
||||
active = None
|
||||
if row[2] and row[3]:
|
||||
active = {"domain_id": str(row[2]), "process_id": str(row[3])}
|
||||
|
||||
clean_history = []
|
||||
for item in history if isinstance(history, list) else []:
|
||||
@@ -101,7 +118,13 @@ class AgentRepository:
|
||||
if role in {"user", "assistant"} and content:
|
||||
clean_history.append({"role": role, "content": content})
|
||||
|
||||
return RouterContext(last_routing=last, message_history=clean_history)
|
||||
return RouterContext(
|
||||
last_routing=last,
|
||||
message_history=clean_history,
|
||||
active_intent=active or last,
|
||||
dialog_started=bool(row[4]),
|
||||
turn_index=int(row[5] or 0),
|
||||
)
|
||||
|
||||
def update_router_context(
|
||||
self,
|
||||
@@ -111,6 +134,7 @@ class AgentRepository:
|
||||
process_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str,
|
||||
decision_type: str,
|
||||
max_history: int,
|
||||
) -> None:
|
||||
current = self.get_router_context(conversation_key)
|
||||
@@ -121,17 +145,29 @@ class AgentRepository:
|
||||
history.append({"role": "assistant", "content": assistant_message})
|
||||
if max_history > 0:
|
||||
history = history[-max_history:]
|
||||
current_active = current.active_intent or current.last_routing or {"domain_id": domain_id, "process_id": process_id}
|
||||
next_active = (
|
||||
{"domain_id": domain_id, "process_id": process_id}
|
||||
if decision_type in {"start", "switch"}
|
||||
else current_active
|
||||
)
|
||||
next_turn_index = max(0, int(current.turn_index or 0)) + (1 if user_message else 0)
|
||||
|
||||
with get_engine().connect() as conn:
|
||||
conn.execute(
|
||||
text(
|
||||
"""
|
||||
INSERT INTO router_context (
|
||||
conversation_key, last_domain_id, last_process_id, message_history_json
|
||||
) VALUES (:key, :domain, :process, :history)
|
||||
conversation_key, last_domain_id, last_process_id, active_domain_id, active_process_id,
|
||||
dialog_started, turn_index, message_history_json
|
||||
) VALUES (:key, :domain, :process, :active_domain, :active_process, :dialog_started, :turn_index, :history)
|
||||
ON CONFLICT (conversation_key) DO UPDATE SET
|
||||
last_domain_id = EXCLUDED.last_domain_id,
|
||||
last_process_id = EXCLUDED.last_process_id,
|
||||
active_domain_id = EXCLUDED.active_domain_id,
|
||||
active_process_id = EXCLUDED.active_process_id,
|
||||
dialog_started = EXCLUDED.dialog_started,
|
||||
turn_index = EXCLUDED.turn_index,
|
||||
message_history_json = EXCLUDED.message_history_json,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
"""
|
||||
@@ -140,6 +176,10 @@ class AgentRepository:
|
||||
"key": conversation_key,
|
||||
"domain": domain_id,
|
||||
"process": process_id,
|
||||
"active_domain": str(next_active["domain_id"]),
|
||||
"active_process": str(next_active["process_id"]),
|
||||
"dialog_started": True,
|
||||
"turn_index": next_turn_index,
|
||||
"history": json.dumps(history, ensure_ascii=False),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from collections.abc import Awaitable, Callable
|
||||
import inspect
|
||||
import logging
|
||||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.agent.engine.orchestrator import OrchestratorService, TaskSpecBuilder
|
||||
from app.modules.agent.engine.orchestrator.metrics_persister import MetricsPersister
|
||||
from app.modules.agent.engine.orchestrator.models import RoutingMeta
|
||||
from app.modules.agent.engine.orchestrator.step_registry import StepRegistry
|
||||
from app.modules.agent.engine.router import build_router_service
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.agent.story_session_recorder import StorySessionRecorder
|
||||
@@ -22,6 +26,9 @@ from app.schemas.common import ModuleName
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
def _truncate_for_log(text: str | None, max_chars: int = 1500) -> str:
|
||||
value = (text or "").replace("\n", "\\n").strip()
|
||||
@@ -47,13 +54,14 @@ class GraphAgentRuntime:
|
||||
llm: AgentLlmService,
|
||||
agent_repository: AgentRepository,
|
||||
story_recorder: StorySessionRecorder | None = None,
|
||||
code_explain_retriever: CodeExplainRetrieverV2 | None = None,
|
||||
) -> None:
|
||||
self._rag = rag
|
||||
self._confluence = confluence
|
||||
self._changeset_validator = changeset_validator
|
||||
self._router = build_router_service(llm, agent_repository)
|
||||
self._router = build_router_service(llm, agent_repository, rag)
|
||||
self._task_spec_builder = TaskSpecBuilder()
|
||||
self._orchestrator = OrchestratorService()
|
||||
self._orchestrator = OrchestratorService(step_registry=StepRegistry(code_explain_retriever))
|
||||
self._metrics_persister = MetricsPersister(agent_repository)
|
||||
self._story_recorder = story_recorder
|
||||
self._checkpointer = None
|
||||
@@ -70,7 +78,7 @@ class GraphAgentRuntime:
|
||||
files: list[dict],
|
||||
progress_cb: Callable[[str, str, str, dict | None], Awaitable[None] | None] | None = None,
|
||||
) -> AgentResult:
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"GraphAgentRuntime.run started: task_id=%s dialog_session_id=%s mode=%s",
|
||||
task_id,
|
||||
dialog_session_id,
|
||||
@@ -96,9 +104,7 @@ class GraphAgentRuntime:
|
||||
meta={"domain_id": route.domain_id, "process_id": route.process_id},
|
||||
)
|
||||
files_map = self._build_files_map(files)
|
||||
|
||||
await self._emit_progress(progress_cb, "agent.rag", "Собираю релевантный контекст из RAG.")
|
||||
rag_ctx = await self._rag.retrieve(rag_session_id, message)
|
||||
rag_ctx: list[dict] = []
|
||||
await self._emit_progress(progress_cb, "agent.attachments", "Обрабатываю дополнительные вложения.")
|
||||
conf_pages = await self._fetch_confluence_pages(attachments)
|
||||
route_meta = RoutingMeta(
|
||||
@@ -157,8 +163,9 @@ class GraphAgentRuntime:
|
||||
process_id=route.process_id,
|
||||
user_message=message,
|
||||
assistant_message=final_answer,
|
||||
decision_type=route.decision_type,
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"final agent answer: task_id=%s route=%s/%s answer=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -178,7 +185,7 @@ class GraphAgentRuntime:
|
||||
answer=final_answer,
|
||||
meta={
|
||||
"route": route.model_dump(),
|
||||
"used_rag": True,
|
||||
"used_rag": False,
|
||||
"used_confluence": bool(conf_pages),
|
||||
"changeset_filtered_out": True,
|
||||
"orchestrator": orchestrator_meta,
|
||||
@@ -193,6 +200,7 @@ class GraphAgentRuntime:
|
||||
process_id=route.process_id,
|
||||
user_message=message,
|
||||
assistant_message=final_answer or f"changeset:{len(validated)}",
|
||||
decision_type=route.decision_type,
|
||||
)
|
||||
final = AgentResult(
|
||||
result_type=TaskResultType.CHANGESET,
|
||||
@@ -200,7 +208,7 @@ class GraphAgentRuntime:
|
||||
changeset=validated,
|
||||
meta={
|
||||
"route": route.model_dump(),
|
||||
"used_rag": True,
|
||||
"used_rag": False,
|
||||
"used_confluence": bool(conf_pages),
|
||||
"orchestrator": orchestrator_meta,
|
||||
"orchestrator_steps": orchestrator_steps,
|
||||
@@ -214,7 +222,7 @@ class GraphAgentRuntime:
|
||||
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
|
||||
quality=quality_meta,
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s changeset_items=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -222,7 +230,7 @@ class GraphAgentRuntime:
|
||||
final.result_type.value,
|
||||
len(final.changeset),
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"final agent answer: task_id=%s route=%s/%s answer=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -239,13 +247,14 @@ class GraphAgentRuntime:
|
||||
process_id=route.process_id,
|
||||
user_message=message,
|
||||
assistant_message=final_answer,
|
||||
decision_type=route.decision_type,
|
||||
)
|
||||
final = AgentResult(
|
||||
result_type=TaskResultType.ANSWER,
|
||||
answer=final_answer,
|
||||
meta={
|
||||
"route": route.model_dump(),
|
||||
"used_rag": True,
|
||||
"used_rag": False,
|
||||
"used_confluence": bool(conf_pages),
|
||||
"orchestrator": orchestrator_meta,
|
||||
"orchestrator_steps": orchestrator_steps,
|
||||
@@ -259,7 +268,7 @@ class GraphAgentRuntime:
|
||||
scenario=str(orchestrator_meta.get("scenario", task_spec.scenario.value)),
|
||||
quality=quality_meta,
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"GraphAgentRuntime.run completed: task_id=%s route=%s/%s result_type=%s answer_len=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -267,7 +276,7 @@ class GraphAgentRuntime:
|
||||
final.result_type.value,
|
||||
len(final.answer or ""),
|
||||
)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"final agent answer: task_id=%s route=%s/%s answer=%s",
|
||||
task_id,
|
||||
route.domain_id,
|
||||
@@ -351,7 +360,7 @@ class GraphAgentRuntime:
|
||||
factory = self._router.graph_factory("default", "general")
|
||||
if factory is None:
|
||||
raise RuntimeError("No graph factory configured")
|
||||
LOGGER.warning("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id)
|
||||
LOGGER.debug("_resolve_graph resolved: domain_id=%s process_id=%s", domain_id, process_id)
|
||||
return factory(self._checkpointer)
|
||||
|
||||
def _invoke_graph(self, graph, state: dict, dialog_session_id: str):
|
||||
@@ -365,7 +374,7 @@ class GraphAgentRuntime:
|
||||
for item in attachments:
|
||||
if item.get("type") == "confluence_url":
|
||||
pages.append(await self._confluence.fetch_page(item["url"]))
|
||||
LOGGER.warning("_fetch_confluence_pages completed: pages=%s", len(pages))
|
||||
LOGGER.info("_fetch_confluence_pages completed: pages=%s", len(pages))
|
||||
return pages
|
||||
|
||||
def _format_rag(self, items: list[dict]) -> str:
|
||||
@@ -411,7 +420,7 @@ class GraphAgentRuntime:
|
||||
"content": str(item.get("content", "")),
|
||||
"content_hash": str(item.get("content_hash", "")),
|
||||
}
|
||||
LOGGER.warning("_build_files_map completed: files=%s", len(output))
|
||||
LOGGER.debug("_build_files_map completed: files=%s", len(output))
|
||||
return output
|
||||
|
||||
def _lookup_file(self, files_map: dict[str, dict], path: str) -> dict | None:
|
||||
@@ -437,7 +446,7 @@ class GraphAgentRuntime:
|
||||
)
|
||||
item.base_hash = str(source["content_hash"])
|
||||
enriched.append(item)
|
||||
LOGGER.warning("_enrich_changeset_hashes completed: items=%s", len(enriched))
|
||||
LOGGER.debug("_enrich_changeset_hashes completed: items=%s", len(enriched))
|
||||
return enriched
|
||||
|
||||
def _sanitize_changeset(self, items: list[ChangeItem], files_map: dict[str, dict]) -> list[ChangeItem]:
|
||||
@@ -462,7 +471,7 @@ class GraphAgentRuntime:
|
||||
continue
|
||||
sanitized.append(item)
|
||||
if dropped_noop or dropped_ws:
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"_sanitize_changeset dropped items: noop=%s whitespace_only=%s kept=%s",
|
||||
dropped_noop,
|
||||
dropped_ws,
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
from app.modules.agent.module import AgentModule
|
||||
from app.modules.agent.repository import AgentRepository
|
||||
from app.modules.agent.story_context_repository import StoryContextRepository, StoryContextSchemaRepository
|
||||
from app.modules.chat.direct_service import CodeExplainChatService
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
from app.modules.chat.module import ChatModule
|
||||
from app.modules.chat.session_resolver import ChatSessionResolver
|
||||
from app.modules.chat.task_store import TaskStore
|
||||
from app.modules.rag.persistence.repository import RagRepository
|
||||
from app.modules.rag.explain import CodeExplainRetrieverV2, CodeGraphRepository, LayeredRetrievalGateway
|
||||
from app.modules.rag_session.module import RagModule
|
||||
from app.modules.rag_repo.module import RagRepoModule
|
||||
from app.modules.shared.bootstrap import bootstrap_database
|
||||
@@ -20,16 +25,32 @@ class ModularApplication:
|
||||
self.agent_repository = AgentRepository()
|
||||
self.story_context_schema_repository = StoryContextSchemaRepository()
|
||||
self.story_context_repository = StoryContextRepository()
|
||||
self.chat_tasks = TaskStore()
|
||||
|
||||
self.rag_session = RagModule(event_bus=self.events, retry=self.retry, repository=self.rag_repository)
|
||||
self.rag_repo = RagRepoModule(
|
||||
story_context_repository=self.story_context_repository,
|
||||
rag_repository=self.rag_repository,
|
||||
)
|
||||
self.code_explain_retriever = CodeExplainRetrieverV2(
|
||||
gateway=LayeredRetrievalGateway(self.rag_repository, self.rag_session.embedder),
|
||||
graph_repository=CodeGraphRepository(),
|
||||
)
|
||||
self.agent = AgentModule(
|
||||
rag_retriever=self.rag_session.rag,
|
||||
agent_repository=self.agent_repository,
|
||||
story_context_repository=self.story_context_repository,
|
||||
code_explain_retriever=self.code_explain_retriever,
|
||||
)
|
||||
self.direct_chat = CodeExplainChatService(
|
||||
retriever=self.code_explain_retriever,
|
||||
llm=self.agent.llm,
|
||||
session_resolver=ChatSessionResolver(
|
||||
dialogs=DialogSessionStore(self.chat_repository),
|
||||
rag_session_exists=lambda rag_session_id: self.rag_session.sessions.get(rag_session_id) is not None,
|
||||
),
|
||||
task_store=self.chat_tasks,
|
||||
message_sink=self.chat_repository.add_message,
|
||||
)
|
||||
self.chat = ChatModule(
|
||||
agent_runner=self.agent.runtime,
|
||||
@@ -37,6 +58,8 @@ class ModularApplication:
|
||||
retry=self.retry,
|
||||
rag_sessions=self.rag_session.sessions,
|
||||
repository=self.chat_repository,
|
||||
direct_chat=self.direct_chat,
|
||||
task_store=self.chat_tasks,
|
||||
)
|
||||
|
||||
def startup(self) -> None:
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,7 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from uuid import uuid4
|
||||
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from uuid import uuid4
|
||||
|
||||
from app.modules.agent.llm import AgentLlmService
|
||||
from app.modules.chat.evidence_gate import CodeExplainEvidenceGate
|
||||
from app.modules.chat.session_resolver import ChatSessionResolver
|
||||
from app.modules.chat.task_store import TaskState, TaskStore
|
||||
from app.modules.rag.explain import CodeExplainRetrieverV2, PromptBudgeter
|
||||
from app.schemas.chat import ChatMessageRequest, TaskQueuedResponse, TaskResultType, TaskStatus
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CodeExplainChatService:
|
||||
def __init__(
|
||||
self,
|
||||
retriever: CodeExplainRetrieverV2,
|
||||
llm: AgentLlmService,
|
||||
session_resolver: ChatSessionResolver,
|
||||
task_store: TaskStore,
|
||||
message_sink,
|
||||
budgeter: PromptBudgeter | None = None,
|
||||
evidence_gate: CodeExplainEvidenceGate | None = None,
|
||||
) -> None:
|
||||
self._retriever = retriever
|
||||
self._llm = llm
|
||||
self._session_resolver = session_resolver
|
||||
self._task_store = task_store
|
||||
self._message_sink = message_sink
|
||||
self._budgeter = budgeter or PromptBudgeter()
|
||||
self._evidence_gate = evidence_gate or CodeExplainEvidenceGate()
|
||||
|
||||
async def handle_message(self, request: ChatMessageRequest) -> TaskQueuedResponse:
|
||||
dialog_session_id, rag_session_id = self._session_resolver.resolve(request)
|
||||
task_id = str(uuid4())
|
||||
task = TaskState(task_id=task_id, status=TaskStatus.RUNNING)
|
||||
self._task_store.save(task)
|
||||
self._message_sink(dialog_session_id, "user", request.message, task_id=task_id)
|
||||
pack = self._retriever.build_pack(
|
||||
rag_session_id,
|
||||
request.message,
|
||||
file_candidates=[item.model_dump(mode="json") for item in request.files],
|
||||
)
|
||||
decision = self._evidence_gate.evaluate(pack)
|
||||
if decision.passed:
|
||||
prompt_input = self._budgeter.build_prompt_input(request.message, pack)
|
||||
answer = self._llm.generate(
|
||||
"code_explain_answer_v2",
|
||||
prompt_input,
|
||||
log_context="chat.code_explain.direct",
|
||||
).strip()
|
||||
else:
|
||||
answer = decision.answer
|
||||
self._message_sink(dialog_session_id, "assistant", answer, task_id=task_id)
|
||||
task.status = TaskStatus.DONE
|
||||
task.result_type = TaskResultType.ANSWER
|
||||
task.answer = answer
|
||||
self._task_store.save(task)
|
||||
LOGGER.warning(
|
||||
"direct code explain response: task_id=%s rag_session_id=%s excerpts=%s missing=%s",
|
||||
task_id,
|
||||
rag_session_id,
|
||||
len(pack.code_excerpts),
|
||||
pack.missing,
|
||||
)
|
||||
return TaskQueuedResponse(
|
||||
task_id=task_id,
|
||||
status=TaskStatus.DONE.value,
|
||||
)
|
||||
@@ -0,0 +1,62 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from app.modules.rag.explain.models import ExplainPack
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EvidenceGateDecision:
|
||||
passed: bool
|
||||
answer: str = ""
|
||||
diagnostics: dict[str, list[str]] = field(default_factory=dict)
|
||||
|
||||
|
||||
class CodeExplainEvidenceGate:
|
||||
def __init__(self, min_excerpts: int = 2) -> None:
|
||||
self._min_excerpts = min_excerpts
|
||||
|
||||
def evaluate(self, pack: ExplainPack) -> EvidenceGateDecision:
|
||||
diagnostics = self._diagnostics(pack)
|
||||
if len(pack.code_excerpts) >= self._min_excerpts:
|
||||
return EvidenceGateDecision(passed=True, diagnostics=diagnostics)
|
||||
return EvidenceGateDecision(
|
||||
passed=False,
|
||||
answer=self._build_answer(pack, diagnostics),
|
||||
diagnostics=diagnostics,
|
||||
)
|
||||
|
||||
def _diagnostics(self, pack: ExplainPack) -> dict[str, list[str]]:
|
||||
return {
|
||||
"entrypoints": [item.title for item in pack.selected_entrypoints[:3] if item.title],
|
||||
"symbols": [item.title for item in pack.seed_symbols[:5] if item.title],
|
||||
"paths": self._paths(pack),
|
||||
"missing": list(pack.missing),
|
||||
}
|
||||
|
||||
def _paths(self, pack: ExplainPack) -> list[str]:
|
||||
values: list[str] = []
|
||||
for item in pack.selected_entrypoints + pack.seed_symbols:
|
||||
path = item.source or (item.location.path if item.location else "")
|
||||
if path and path not in values:
|
||||
values.append(path)
|
||||
for excerpt in pack.code_excerpts:
|
||||
if excerpt.path and excerpt.path not in values:
|
||||
values.append(excerpt.path)
|
||||
return values[:6]
|
||||
|
||||
def _build_answer(self, pack: ExplainPack, diagnostics: dict[str, list[str]]) -> str:
|
||||
lines = [
|
||||
"Недостаточно опоры в коде, чтобы дать объяснение без догадок.",
|
||||
"",
|
||||
f"Найдено фрагментов кода: {len(pack.code_excerpts)} из {self._min_excerpts} минимально необходимых.",
|
||||
]
|
||||
if diagnostics["paths"]:
|
||||
lines.append(f"Пути: {', '.join(diagnostics['paths'])}")
|
||||
if diagnostics["entrypoints"]:
|
||||
lines.append(f"Entrypoints: {', '.join(diagnostics['entrypoints'])}")
|
||||
if diagnostics["symbols"]:
|
||||
lines.append(f"Символы: {', '.join(diagnostics['symbols'])}")
|
||||
if diagnostics["missing"]:
|
||||
lines.append(f"Диагностика: {', '.join(diagnostics['missing'])}")
|
||||
return "\n".join(lines).strip()
|
||||
@@ -1,13 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fastapi import APIRouter, Header
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from app.core.exceptions import AppError
|
||||
from app.modules.chat.direct_service import CodeExplainChatService
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
from app.modules.chat.service import ChatOrchestrator
|
||||
from app.modules.chat.task_store import TaskStore
|
||||
from app.modules.contracts import AgentRunner
|
||||
from app.modules.rag_session.session_store import RagSessionStore
|
||||
from app.modules.shared.event_bus import EventBus
|
||||
from app.modules.shared.idempotency_store import IdempotencyStore
|
||||
from app.modules.shared.retry_executor import RetryExecutor
|
||||
@@ -20,6 +23,11 @@ from app.schemas.chat import (
|
||||
)
|
||||
from app.schemas.common import ModuleName
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.chat.repository import ChatRepository
|
||||
from app.modules.contracts import AgentRunner
|
||||
from app.modules.rag_session.session_store import RagSessionStore
|
||||
|
||||
|
||||
class ChatModule:
|
||||
def __init__(
|
||||
@@ -29,12 +37,16 @@ class ChatModule:
|
||||
retry: RetryExecutor,
|
||||
rag_sessions: RagSessionStore,
|
||||
repository: ChatRepository,
|
||||
direct_chat: CodeExplainChatService | None = None,
|
||||
task_store: TaskStore | None = None,
|
||||
) -> None:
|
||||
self._rag_sessions = rag_sessions
|
||||
self.tasks = TaskStore()
|
||||
self._simple_code_explain_only = os.getenv("SIMPLE_CODE_EXPLAIN_ONLY", "true").lower() in {"1", "true", "yes"}
|
||||
self.tasks = task_store or TaskStore()
|
||||
self.dialogs = DialogSessionStore(repository)
|
||||
self.idempotency = IdempotencyStore()
|
||||
self.events = event_bus
|
||||
self.direct_chat = direct_chat
|
||||
self.chat = ChatOrchestrator(
|
||||
task_store=self.tasks,
|
||||
dialogs=self.dialogs,
|
||||
@@ -59,11 +71,13 @@ class ChatModule:
|
||||
rag_session_id=dialog.rag_session_id,
|
||||
)
|
||||
|
||||
@router.post("/api/chat/messages", response_model=TaskQueuedResponse)
|
||||
@router.post("/api/chat/messages", response_model=TaskQueuedResponse | TaskResultResponse)
|
||||
async def send_message(
|
||||
request: ChatMessageRequest,
|
||||
idempotency_key: str | None = Header(default=None, alias="Idempotency-Key"),
|
||||
) -> TaskQueuedResponse:
|
||||
) -> TaskQueuedResponse | TaskResultResponse:
|
||||
if self._simple_code_explain_only and self.direct_chat is not None:
|
||||
return await self.direct_chat.handle_message(request)
|
||||
task = await self.chat.enqueue_message(request, idempotency_key)
|
||||
return TaskQueuedResponse(task_id=task.task_id, status=task.status.value)
|
||||
|
||||
|
||||
+23
-34
@@ -6,6 +6,7 @@ from app.modules.contracts import AgentRunner
|
||||
from app.schemas.chat import ChatMessageRequest, TaskResultType, TaskStatus
|
||||
from app.schemas.common import ErrorPayload, ModuleName
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
from app.modules.chat.session_resolver import ChatSessionResolver
|
||||
from app.modules.chat.task_store import TaskState, TaskStore
|
||||
from app.modules.shared.event_bus import EventBus
|
||||
from app.modules.shared.idempotency_store import IdempotencyStore
|
||||
@@ -41,6 +42,7 @@ class ChatOrchestrator:
|
||||
self._retry = retry
|
||||
self._rag_session_exists = rag_session_exists
|
||||
self._message_sink = message_sink
|
||||
self._session_resolver = ChatSessionResolver(dialogs, rag_session_exists)
|
||||
|
||||
async def enqueue_message(
|
||||
self,
|
||||
@@ -52,7 +54,7 @@ class ChatOrchestrator:
|
||||
if existing:
|
||||
task = self._task_store.get(existing)
|
||||
if task:
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"enqueue_message reused task by idempotency key: task_id=%s mode=%s",
|
||||
task.task_id,
|
||||
request.mode.value,
|
||||
@@ -63,7 +65,7 @@ class ChatOrchestrator:
|
||||
if idempotency_key:
|
||||
self._idempotency.put(idempotency_key, task.task_id)
|
||||
asyncio.create_task(self._process_task(task.task_id, request))
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"enqueue_message created task: task_id=%s mode=%s",
|
||||
task.task_id,
|
||||
request.mode.value,
|
||||
@@ -135,6 +137,13 @@ class ChatOrchestrator:
|
||||
task.changeset = result.changeset
|
||||
if task.result_type == TaskResultType.ANSWER and task.answer:
|
||||
self._message_sink(dialog_session_id, "assistant", task.answer, task_id=task_id)
|
||||
LOGGER.warning(
|
||||
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s answer=%s",
|
||||
task_id,
|
||||
dialog_session_id,
|
||||
task.result_type.value,
|
||||
_truncate_for_log(task.answer),
|
||||
)
|
||||
elif task.result_type == TaskResultType.CHANGESET:
|
||||
self._message_sink(
|
||||
dialog_session_id,
|
||||
@@ -146,6 +155,14 @@ class ChatOrchestrator:
|
||||
"changeset": [item.model_dump(mode="json") for item in task.changeset],
|
||||
},
|
||||
)
|
||||
LOGGER.warning(
|
||||
"outgoing chat response: task_id=%s dialog_session_id=%s result_type=%s changeset_items=%s answer=%s",
|
||||
task_id,
|
||||
dialog_session_id,
|
||||
task.result_type.value,
|
||||
len(task.changeset),
|
||||
_truncate_for_log(task.answer or ""),
|
||||
)
|
||||
self._task_store.save(task)
|
||||
await self._events.publish(
|
||||
task_id,
|
||||
@@ -160,7 +177,7 @@ class ChatOrchestrator:
|
||||
},
|
||||
)
|
||||
await self._publish_progress(task_id, "task.done", "Обработка завершена.", progress=100)
|
||||
LOGGER.warning(
|
||||
LOGGER.info(
|
||||
"_process_task completed: task_id=%s status=%s result_type=%s changeset_items=%s",
|
||||
task_id,
|
||||
task.status.value,
|
||||
@@ -232,7 +249,7 @@ class ChatOrchestrator:
|
||||
if progress is not None:
|
||||
payload["progress"] = max(0, min(100, int(progress)))
|
||||
await self._events.publish(task_id, kind, payload)
|
||||
LOGGER.warning(
|
||||
LOGGER.debug(
|
||||
"_publish_progress emitted: task_id=%s kind=%s stage=%s progress=%s",
|
||||
task_id,
|
||||
kind,
|
||||
@@ -259,35 +276,7 @@ class ChatOrchestrator:
|
||||
meta={"heartbeat": True},
|
||||
)
|
||||
index += 1
|
||||
LOGGER.warning("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index)
|
||||
LOGGER.debug("_run_heartbeat stopped: task_id=%s ticks=%s", task_id, index)
|
||||
|
||||
def _resolve_sessions(self, request: ChatMessageRequest) -> tuple[str, str]:
|
||||
# Legacy compatibility: old session_id/project_id flow.
|
||||
if request.dialog_session_id and request.rag_session_id:
|
||||
dialog = self._dialogs.get(request.dialog_session_id)
|
||||
if not dialog:
|
||||
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
|
||||
if dialog.rag_session_id != request.rag_session_id:
|
||||
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
|
||||
LOGGER.warning(
|
||||
"_resolve_sessions resolved by dialog_session_id: dialog_session_id=%s rag_session_id=%s",
|
||||
request.dialog_session_id,
|
||||
request.rag_session_id,
|
||||
)
|
||||
return request.dialog_session_id, request.rag_session_id
|
||||
|
||||
if request.session_id and request.project_id:
|
||||
if not self._rag_session_exists(request.project_id):
|
||||
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
|
||||
LOGGER.warning(
|
||||
"_resolve_sessions resolved by legacy session/project: session_id=%s project_id=%s",
|
||||
request.session_id,
|
||||
request.project_id,
|
||||
)
|
||||
return request.session_id, request.project_id
|
||||
|
||||
raise AppError(
|
||||
"missing_sessions",
|
||||
"dialog_session_id and rag_session_id are required",
|
||||
ModuleName.BACKEND,
|
||||
)
|
||||
return self._session_resolver.resolve(request)
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.core.exceptions import AppError
|
||||
from app.schemas.chat import ChatMessageRequest
|
||||
from app.schemas.common import ModuleName
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.chat.dialog_store import DialogSessionStore
|
||||
|
||||
|
||||
class ChatSessionResolver:
|
||||
def __init__(self, dialogs: DialogSessionStore, rag_session_exists) -> None:
|
||||
self._dialogs = dialogs
|
||||
self._rag_session_exists = rag_session_exists
|
||||
|
||||
def resolve(self, request: ChatMessageRequest) -> tuple[str, str]:
|
||||
if request.dialog_session_id and request.rag_session_id:
|
||||
dialog = self._dialogs.get(request.dialog_session_id)
|
||||
if not dialog:
|
||||
raise AppError("dialog_not_found", "Dialog session not found", ModuleName.BACKEND)
|
||||
if dialog.rag_session_id != request.rag_session_id:
|
||||
raise AppError("dialog_rag_mismatch", "Dialog session does not belong to rag session", ModuleName.BACKEND)
|
||||
return request.dialog_session_id, request.rag_session_id
|
||||
|
||||
if request.session_id and request.project_id:
|
||||
if not self._rag_session_exists(request.project_id):
|
||||
raise AppError("rag_session_not_found", "RAG session not found", ModuleName.RAG)
|
||||
return request.session_id, request.project_id
|
||||
|
||||
raise AppError(
|
||||
"missing_sessions",
|
||||
"dialog_session_id and rag_session_id are required",
|
||||
ModuleName.BACKEND,
|
||||
)
|
||||
@@ -90,6 +90,41 @@ sequenceDiagram
|
||||
Rag-->>Agent: items
|
||||
```
|
||||
|
||||
### Retrieval + project/qa reasoning
|
||||
Назначение: `RAG` вызывается не в начале runtime, а внутри отдельного graph-шага `context_retrieval` для `project/qa`.
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Agent as GraphAgentRuntime
|
||||
participant Orch as OrchestratorService
|
||||
participant G1 as conversation_understanding
|
||||
participant G2 as question_classification
|
||||
participant G3 as context_retrieval
|
||||
participant Rag as RagService
|
||||
participant G4 as context_analysis
|
||||
participant G5 as answer_composition
|
||||
|
||||
Agent->>Orch: run(task)
|
||||
Orch->>G1: execute
|
||||
G1-->>Orch: resolved_request
|
||||
Orch->>G2: execute
|
||||
G2-->>Orch: question_profile
|
||||
Orch->>G3: execute
|
||||
G3->>Rag: retrieve(query)
|
||||
Rag-->>G3: rag_items
|
||||
G3-->>Orch: source_bundle
|
||||
Orch->>G4: execute
|
||||
G4-->>Orch: analysis_brief
|
||||
Orch->>G5: execute
|
||||
G5-->>Orch: final_answer
|
||||
Orch-->>Agent: final_answer
|
||||
```
|
||||
|
||||
Для `project/qa` это означает:
|
||||
- ранний глобальный retrieval больше не нужен;
|
||||
- `RAG` возвращает записи только для конкретного шага `context_retrieval`;
|
||||
- оркестратор управляет цепочкой graph-шагов;
|
||||
- пользовательский ответ собирается после анализа, а не напрямую из сырого retrieval.
|
||||
|
||||
## 5. Слои, фиксируемые в RAG
|
||||
|
||||
### 5.1. Слои DOCS
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
__all__ = [
|
||||
"CodeExcerpt",
|
||||
"CodeExplainRetrieverV2",
|
||||
"CodeGraphRepository",
|
||||
"EvidenceItem",
|
||||
"ExplainIntent",
|
||||
"ExplainIntentBuilder",
|
||||
"ExplainPack",
|
||||
"LayeredRetrievalGateway",
|
||||
"PromptBudgeter",
|
||||
"TracePath",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
module_map = {
|
||||
"CodeExcerpt": "app.modules.rag.explain.models",
|
||||
"EvidenceItem": "app.modules.rag.explain.models",
|
||||
"ExplainIntent": "app.modules.rag.explain.models",
|
||||
"ExplainPack": "app.modules.rag.explain.models",
|
||||
"TracePath": "app.modules.rag.explain.models",
|
||||
"ExplainIntentBuilder": "app.modules.rag.explain.intent_builder",
|
||||
"PromptBudgeter": "app.modules.rag.explain.budgeter",
|
||||
"LayeredRetrievalGateway": "app.modules.rag.explain.layered_gateway",
|
||||
"CodeGraphRepository": "app.modules.rag.explain.graph_repository",
|
||||
"CodeExplainRetrieverV2": "app.modules.rag.explain.retriever_v2",
|
||||
}
|
||||
module_name = module_map.get(name)
|
||||
if module_name is None:
|
||||
raise AttributeError(name)
|
||||
module = import_module(module_name)
|
||||
return getattr(module, name)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,62 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.modules.rag.explain.models import ExplainPack
|
||||
|
||||
|
||||
class PromptBudgeter:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
max_paths: int = 3,
|
||||
max_symbols: int = 25,
|
||||
max_excerpts: int = 40,
|
||||
max_chars: int = 30000,
|
||||
) -> None:
|
||||
self._max_paths = max_paths
|
||||
self._max_symbols = max_symbols
|
||||
self._max_excerpts = max_excerpts
|
||||
self._max_chars = max_chars
|
||||
|
||||
def build_prompt_input(self, question: str, pack: ExplainPack) -> str:
|
||||
symbol_ids: list[str] = []
|
||||
for path in pack.trace_paths[: self._max_paths]:
|
||||
for symbol_id in path.symbol_ids:
|
||||
if symbol_id and symbol_id not in symbol_ids and len(symbol_ids) < self._max_symbols:
|
||||
symbol_ids.append(symbol_id)
|
||||
excerpts = []
|
||||
total_chars = 0
|
||||
for excerpt in pack.code_excerpts:
|
||||
if symbol_ids and excerpt.symbol_id and excerpt.symbol_id not in symbol_ids:
|
||||
continue
|
||||
body = excerpt.content.strip()
|
||||
remaining = self._max_chars - total_chars
|
||||
if remaining <= 0 or len(excerpts) >= self._max_excerpts:
|
||||
break
|
||||
if len(body) > remaining:
|
||||
body = body[:remaining].rstrip() + "...[truncated]"
|
||||
excerpts.append(
|
||||
{
|
||||
"evidence_id": excerpt.evidence_id,
|
||||
"title": excerpt.title,
|
||||
"path": excerpt.path,
|
||||
"start_line": excerpt.start_line,
|
||||
"end_line": excerpt.end_line,
|
||||
"focus": excerpt.focus,
|
||||
"content": body,
|
||||
}
|
||||
)
|
||||
total_chars += len(body)
|
||||
payload = {
|
||||
"question": question,
|
||||
"intent": pack.intent.model_dump(mode="json"),
|
||||
"selected_entrypoints": [item.model_dump(mode="json") for item in pack.selected_entrypoints[:5]],
|
||||
"seed_symbols": [item.model_dump(mode="json") for item in pack.seed_symbols[: self._max_symbols]],
|
||||
"trace_paths": [path.model_dump(mode="json") for path in pack.trace_paths[: self._max_paths]],
|
||||
"evidence_index": {key: value.model_dump(mode="json") for key, value in pack.evidence_index.items()},
|
||||
"code_excerpts": excerpts,
|
||||
"missing": pack.missing,
|
||||
"conflicts": pack.conflicts,
|
||||
}
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
@@ -0,0 +1,59 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.explain.models import CodeExcerpt, LayeredRetrievalItem
|
||||
|
||||
|
||||
class ExcerptPlanner:
|
||||
_FOCUS_TOKENS = ("raise", "except", "db", "select", "insert", "update", "delete", "http", "publish", "emit")
|
||||
|
||||
def plan(self, chunk: LayeredRetrievalItem, *, evidence_id: str, symbol_id: str | None) -> list[CodeExcerpt]:
|
||||
location = chunk.location
|
||||
if location is None:
|
||||
return []
|
||||
excerpts = [
|
||||
CodeExcerpt(
|
||||
evidence_id=evidence_id,
|
||||
symbol_id=symbol_id,
|
||||
title=chunk.title,
|
||||
path=location.path,
|
||||
start_line=location.start_line,
|
||||
end_line=location.end_line,
|
||||
content=chunk.content.strip(),
|
||||
focus="overview",
|
||||
)
|
||||
]
|
||||
focus = self._focus_excerpt(chunk, evidence_id=evidence_id, symbol_id=symbol_id)
|
||||
if focus is not None:
|
||||
excerpts.append(focus)
|
||||
return excerpts
|
||||
|
||||
def _focus_excerpt(
|
||||
self,
|
||||
chunk: LayeredRetrievalItem,
|
||||
*,
|
||||
evidence_id: str,
|
||||
symbol_id: str | None,
|
||||
) -> CodeExcerpt | None:
|
||||
location = chunk.location
|
||||
if location is None:
|
||||
return None
|
||||
lines = chunk.content.splitlines()
|
||||
for index, line in enumerate(lines):
|
||||
lowered = line.lower()
|
||||
if not any(token in lowered for token in self._FOCUS_TOKENS):
|
||||
continue
|
||||
start = max(0, index - 2)
|
||||
end = min(len(lines), index + 3)
|
||||
if end - start >= len(lines):
|
||||
return None
|
||||
return CodeExcerpt(
|
||||
evidence_id=evidence_id,
|
||||
symbol_id=symbol_id,
|
||||
title=f"{chunk.title}:focus",
|
||||
path=location.path,
|
||||
start_line=(location.start_line or 1) + start,
|
||||
end_line=(location.start_line or 1) + end - 1,
|
||||
content="\n".join(lines[start:end]).strip(),
|
||||
focus="focus",
|
||||
)
|
||||
return None
|
||||
@@ -0,0 +1,216 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.shared.db import get_engine
|
||||
|
||||
|
||||
class CodeGraphRepository:
|
||||
def get_out_edges(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
src_symbol_ids: list[str],
|
||||
edge_types: list[str],
|
||||
limit_per_src: int,
|
||||
) -> list[LayeredRetrievalItem]:
|
||||
if not src_symbol_ids:
|
||||
return []
|
||||
sql = """
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C2_DEPENDENCY_GRAPH'
|
||||
AND CAST(metadata_json AS jsonb)->>'src_symbol_id' = ANY(:src_ids)
|
||||
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
|
||||
ORDER BY path, span_start
|
||||
"""
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(sql),
|
||||
{"sid": rag_session_id, "src_ids": src_symbol_ids, "edge_types": edge_types},
|
||||
).mappings().fetchall()
|
||||
grouped: dict[str, int] = {}
|
||||
items: list[LayeredRetrievalItem] = []
|
||||
for row in rows:
|
||||
metadata = self._loads(row.get("metadata_json"))
|
||||
src_symbol_id = str(metadata.get("src_symbol_id") or "")
|
||||
grouped[src_symbol_id] = grouped.get(src_symbol_id, 0) + 1
|
||||
if grouped[src_symbol_id] > limit_per_src:
|
||||
continue
|
||||
items.append(self._to_item(row, metadata))
|
||||
return items
|
||||
|
||||
def get_in_edges(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
dst_symbol_ids: list[str],
|
||||
edge_types: list[str],
|
||||
limit_per_dst: int,
|
||||
) -> list[LayeredRetrievalItem]:
|
||||
if not dst_symbol_ids:
|
||||
return []
|
||||
sql = """
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C2_DEPENDENCY_GRAPH'
|
||||
AND CAST(metadata_json AS jsonb)->>'dst_symbol_id' = ANY(:dst_ids)
|
||||
AND CAST(metadata_json AS jsonb)->>'edge_type' = ANY(:edge_types)
|
||||
ORDER BY path, span_start
|
||||
"""
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(sql),
|
||||
{"sid": rag_session_id, "dst_ids": dst_symbol_ids, "edge_types": edge_types},
|
||||
).mappings().fetchall()
|
||||
grouped: dict[str, int] = {}
|
||||
items: list[LayeredRetrievalItem] = []
|
||||
for row in rows:
|
||||
metadata = self._loads(row.get("metadata_json"))
|
||||
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
|
||||
grouped[dst_symbol_id] = grouped.get(dst_symbol_id, 0) + 1
|
||||
if grouped[dst_symbol_id] > limit_per_dst:
|
||||
continue
|
||||
items.append(self._to_item(row, metadata))
|
||||
return items
|
||||
|
||||
def resolve_symbol_by_ref(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
dst_ref: str,
|
||||
package_hint: str | None = None,
|
||||
) -> LayeredRetrievalItem | None:
|
||||
ref = (dst_ref or "").strip()
|
||||
if not ref:
|
||||
return None
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end, qname
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C1_SYMBOL_CATALOG'
|
||||
AND (qname = :ref OR title = :ref OR qname LIKE :tail)
|
||||
ORDER BY path
|
||||
LIMIT 12
|
||||
"""
|
||||
),
|
||||
{"sid": rag_session_id, "ref": ref, "tail": f"%{ref}"},
|
||||
).mappings().fetchall()
|
||||
best: LayeredRetrievalItem | None = None
|
||||
best_score = -1
|
||||
for row in rows:
|
||||
metadata = self._loads(row.get("metadata_json"))
|
||||
package = str(metadata.get("package_or_module") or "")
|
||||
score = 0
|
||||
if str(row.get("qname") or "") == ref:
|
||||
score += 3
|
||||
if str(row.get("title") or "") == ref:
|
||||
score += 2
|
||||
if package_hint and package.startswith(package_hint):
|
||||
score += 3
|
||||
if package_hint and package_hint in str(row.get("path") or ""):
|
||||
score += 1
|
||||
if score > best_score:
|
||||
best = self._to_item(row, metadata)
|
||||
best_score = score
|
||||
return best
|
||||
|
||||
def get_symbols_by_ids(self, rag_session_id: str, symbol_ids: list[str]) -> list[LayeredRetrievalItem]:
|
||||
if not symbol_ids:
|
||||
return []
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C1_SYMBOL_CATALOG'
|
||||
AND symbol_id = ANY(:symbol_ids)
|
||||
ORDER BY path, span_start
|
||||
"""
|
||||
),
|
||||
{"sid": rag_session_id, "symbol_ids": symbol_ids},
|
||||
).mappings().fetchall()
|
||||
return [self._to_item(row, self._loads(row.get("metadata_json"))) for row in rows]
|
||||
|
||||
def get_chunks_by_symbol_ids(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
symbol_ids: list[str],
|
||||
prefer_chunk_type: str = "symbol_block",
|
||||
) -> list[LayeredRetrievalItem]:
|
||||
symbols = self.get_symbols_by_ids(rag_session_id, symbol_ids)
|
||||
chunks: list[LayeredRetrievalItem] = []
|
||||
for symbol in symbols:
|
||||
location = symbol.location
|
||||
if location is None:
|
||||
continue
|
||||
chunk = self._chunk_for_symbol(rag_session_id, symbol, prefer_chunk_type=prefer_chunk_type)
|
||||
if chunk is not None:
|
||||
chunks.append(chunk)
|
||||
return chunks
|
||||
|
||||
def _chunk_for_symbol(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
symbol: LayeredRetrievalItem,
|
||||
*,
|
||||
prefer_chunk_type: str,
|
||||
) -> LayeredRetrievalItem | None:
|
||||
location = symbol.location
|
||||
if location is None:
|
||||
return None
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE rag_session_id = :sid
|
||||
AND layer = 'C0_SOURCE_CHUNKS'
|
||||
AND path = :path
|
||||
AND COALESCE(span_start, 0) <= :end_line
|
||||
AND COALESCE(span_end, 999999) >= :start_line
|
||||
ORDER BY
|
||||
CASE WHEN CAST(metadata_json AS jsonb)->>'chunk_type' = :prefer_chunk_type THEN 0 ELSE 1 END,
|
||||
ABS(COALESCE(span_start, 0) - :start_line)
|
||||
LIMIT 1
|
||||
"""
|
||||
),
|
||||
{
|
||||
"sid": rag_session_id,
|
||||
"path": location.path,
|
||||
"start_line": location.start_line or 0,
|
||||
"end_line": location.end_line or 999999,
|
||||
"prefer_chunk_type": prefer_chunk_type,
|
||||
},
|
||||
).mappings().fetchall()
|
||||
if not rows:
|
||||
return None
|
||||
row = rows[0]
|
||||
return self._to_item(row, self._loads(row.get("metadata_json")))
|
||||
|
||||
def _to_item(self, row, metadata: dict) -> LayeredRetrievalItem:
|
||||
return LayeredRetrievalItem(
|
||||
source=str(row.get("path") or ""),
|
||||
content=str(row.get("content") or ""),
|
||||
layer=str(row.get("layer") or ""),
|
||||
title=str(row.get("title") or ""),
|
||||
metadata=metadata,
|
||||
location=CodeLocation(
|
||||
path=str(row.get("path") or ""),
|
||||
start_line=row.get("span_start"),
|
||||
end_line=row.get("span_end"),
|
||||
),
|
||||
)
|
||||
|
||||
def _loads(self, value) -> dict:
|
||||
if not value:
|
||||
return {}
|
||||
return json.loads(str(value))
|
||||
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.rag.explain.models import ExplainHints, ExplainIntent
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
|
||||
class ExplainIntentBuilder:
|
||||
_ROUTE_RE = re.compile(r"(/[A-Za-z0-9_./{}:-]+)")
|
||||
_FILE_RE = re.compile(r"([A-Za-z0-9_./-]+\.py)")
|
||||
_SYMBOL_RE = re.compile(r"\b([A-Z][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*|[A-Z][A-Za-z0-9_]{2,}|[a-z_][A-Za-z0-9_]{2,})\b")
|
||||
_COMMAND_RE = re.compile(r"`([A-Za-z0-9:_-]+)`")
|
||||
_TEST_KEYWORDS = (
|
||||
"тест",
|
||||
"tests",
|
||||
"test ",
|
||||
"unit-test",
|
||||
"unit test",
|
||||
"юнит-тест",
|
||||
"pytest",
|
||||
"spec",
|
||||
"как покрыто тестами",
|
||||
"как проверяется",
|
||||
"how is it tested",
|
||||
"how it's tested",
|
||||
)
|
||||
|
||||
def build(self, user_query: str) -> ExplainIntent:
|
||||
normalized = " ".join((user_query or "").split())
|
||||
lowered = normalized.lower()
|
||||
keywords = self._keywords(normalized)
|
||||
hints = ExplainHints(
|
||||
paths=self._dedupe(self._FILE_RE.findall(normalized)),
|
||||
symbols=self._symbols(normalized),
|
||||
endpoints=self._dedupe(self._ROUTE_RE.findall(normalized)),
|
||||
commands=self._commands(normalized, lowered),
|
||||
)
|
||||
return ExplainIntent(
|
||||
raw_query=user_query,
|
||||
normalized_query=normalized,
|
||||
keywords=keywords[:12],
|
||||
hints=hints,
|
||||
include_tests=self._include_tests(lowered),
|
||||
expected_entry_types=self._entry_types(lowered, hints),
|
||||
depth=self._depth(lowered),
|
||||
)
|
||||
|
||||
def _keywords(self, text: str) -> list[str]:
|
||||
keywords = extract_query_terms(text)
|
||||
for token in self._symbols(text):
|
||||
if token not in keywords:
|
||||
keywords.append(token)
|
||||
for token in self._ROUTE_RE.findall(text):
|
||||
if token not in keywords:
|
||||
keywords.append(token)
|
||||
return self._dedupe(keywords)
|
||||
|
||||
def _symbols(self, text: str) -> list[str]:
|
||||
values = []
|
||||
for raw in self._SYMBOL_RE.findall(text):
|
||||
token = raw.strip()
|
||||
if len(token) < 3:
|
||||
continue
|
||||
if token.endswith(".py"):
|
||||
continue
|
||||
values.append(token)
|
||||
return self._dedupe(values)
|
||||
|
||||
def _commands(self, text: str, lowered: str) -> list[str]:
|
||||
values = list(self._COMMAND_RE.findall(text))
|
||||
if " command " in f" {lowered} ":
|
||||
values.extend(re.findall(r"command\s+([A-Za-z0-9:_-]+)", lowered))
|
||||
if " cli " in f" {lowered} ":
|
||||
values.extend(re.findall(r"cli\s+([A-Za-z0-9:_-]+)", lowered))
|
||||
return self._dedupe(values)
|
||||
|
||||
def _entry_types(self, lowered: str, hints: ExplainHints) -> list[str]:
|
||||
if hints.endpoints or any(token in lowered for token in ("endpoint", "route", "handler", "http", "api")):
|
||||
return ["http"]
|
||||
if hints.commands or any(token in lowered for token in ("cli", "command", "click", "typer")):
|
||||
return ["cli"]
|
||||
return ["http", "cli"]
|
||||
|
||||
def _depth(self, lowered: str) -> str:
|
||||
if any(token in lowered for token in ("deep", "подроб", "деталь", "full flow", "trace")):
|
||||
return "deep"
|
||||
if any(token in lowered for token in ("high level", "overview", "кратко", "summary")):
|
||||
return "high"
|
||||
return "medium"
|
||||
|
||||
def _include_tests(self, lowered: str) -> bool:
|
||||
normalized = f" {lowered} "
|
||||
return any(token in normalized for token in self._TEST_KEYWORDS)
|
||||
|
||||
def _dedupe(self, values: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
item = value.strip()
|
||||
if item and item not in result:
|
||||
result.append(item)
|
||||
return result
|
||||
@@ -0,0 +1,289 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from app.modules.rag.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.rag.retrieval.test_filter import build_test_filters, debug_disable_test_filter
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.persistence.repository import RagRepository
|
||||
from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class LayerRetrievalResult:
|
||||
items: list[LayeredRetrievalItem]
|
||||
missing: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class LayeredRetrievalGateway:
|
||||
def __init__(self, repository: RagRepository, embedder: GigaChatEmbedder) -> None:
|
||||
self._repository = repository
|
||||
self._embedder = embedder
|
||||
|
||||
def retrieve_layer(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
query: str,
|
||||
layer: str,
|
||||
*,
|
||||
limit: int,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_tests: bool = True,
|
||||
prefer_non_tests: bool = False,
|
||||
include_spans: bool = False,
|
||||
) -> LayerRetrievalResult:
|
||||
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
|
||||
filter_args = self._filter_args(effective_exclude_tests)
|
||||
query_embedding: list[float] | None = None
|
||||
try:
|
||||
query_embedding = self._embedder.embed([query])[0]
|
||||
rows = self._repository.retrieve(
|
||||
rag_session_id,
|
||||
query_embedding,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
layers=[layer],
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
|
||||
exclude_like_patterns=filter_args["exclude_like_patterns"],
|
||||
prefer_non_tests=prefer_non_tests or not effective_exclude_tests,
|
||||
)
|
||||
return self._success_result(
|
||||
rows,
|
||||
rag_session_id=rag_session_id,
|
||||
label="layered retrieval",
|
||||
include_spans=include_spans,
|
||||
layer=layer,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
)
|
||||
except Exception as exc:
|
||||
if query_embedding is None:
|
||||
self._log_failure(
|
||||
label="layered retrieval",
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
)
|
||||
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
|
||||
retry_result = self._retry_without_test_filter(
|
||||
operation=lambda: self._repository.retrieve(
|
||||
rag_session_id,
|
||||
query_embedding,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
layers=[layer],
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=None,
|
||||
exclude_like_patterns=None,
|
||||
prefer_non_tests=True,
|
||||
),
|
||||
label="layered retrieval",
|
||||
rag_session_id=rag_session_id,
|
||||
include_spans=include_spans,
|
||||
layer=layer,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
missing_prefix=f"layer:{layer} retrieval_failed",
|
||||
)
|
||||
if retry_result is not None:
|
||||
return retry_result
|
||||
return LayerRetrievalResult(items=[], missing=[self._failure_missing(f"layer:{layer} retrieval_failed", exc)])
|
||||
|
||||
def retrieve_lexical_code(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
query: str,
|
||||
*,
|
||||
limit: int,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_tests: bool = True,
|
||||
include_spans: bool = False,
|
||||
) -> LayerRetrievalResult:
|
||||
effective_exclude_tests = exclude_tests and not debug_disable_test_filter()
|
||||
filter_args = self._filter_args(effective_exclude_tests)
|
||||
try:
|
||||
rows = self._repository.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=filter_args["exclude_path_prefixes"],
|
||||
exclude_like_patterns=filter_args["exclude_like_patterns"],
|
||||
prefer_non_tests=not effective_exclude_tests,
|
||||
)
|
||||
return self._success_result(
|
||||
rows,
|
||||
rag_session_id=rag_session_id,
|
||||
label="lexical retrieval",
|
||||
include_spans=include_spans,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
)
|
||||
except Exception as exc:
|
||||
retry_result = self._retry_without_test_filter(
|
||||
operation=lambda: self._repository.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
query_text=query,
|
||||
limit=limit,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=None,
|
||||
exclude_like_patterns=None,
|
||||
prefer_non_tests=True,
|
||||
),
|
||||
label="lexical retrieval",
|
||||
rag_session_id=rag_session_id,
|
||||
include_spans=include_spans,
|
||||
exclude_tests=effective_exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
missing_prefix="layer:C0 lexical_retrieval_failed",
|
||||
)
|
||||
if retry_result is not None:
|
||||
return retry_result
|
||||
return LayerRetrievalResult(items=[], missing=[self._failure_missing("layer:C0 lexical_retrieval_failed", exc)])
|
||||
|
||||
def _retry_without_test_filter(
|
||||
self,
|
||||
*,
|
||||
operation: Callable[[], list[dict]],
|
||||
label: str,
|
||||
rag_session_id: str,
|
||||
include_spans: bool,
|
||||
exclude_tests: bool,
|
||||
path_prefixes: list[str] | None,
|
||||
exc: Exception,
|
||||
missing_prefix: str,
|
||||
layer: str | None = None,
|
||||
) -> LayerRetrievalResult | None:
|
||||
if not exclude_tests:
|
||||
self._log_failure(
|
||||
label=label,
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
)
|
||||
return None
|
||||
self._log_failure(
|
||||
label=label,
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=exclude_tests,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=exc,
|
||||
retried_without_test_filter=True,
|
||||
)
|
||||
try:
|
||||
rows = operation()
|
||||
except Exception as retry_exc:
|
||||
self._log_failure(
|
||||
label=f"{label} retry",
|
||||
rag_session_id=rag_session_id,
|
||||
layer=layer,
|
||||
exclude_tests=False,
|
||||
path_prefixes=path_prefixes,
|
||||
exc=retry_exc,
|
||||
)
|
||||
return None
|
||||
result = self._success_result(
|
||||
rows,
|
||||
rag_session_id=rag_session_id,
|
||||
label=f"{label} retry",
|
||||
include_spans=include_spans,
|
||||
layer=layer,
|
||||
exclude_tests=False,
|
||||
path_prefixes=path_prefixes,
|
||||
)
|
||||
result.missing.append(f"{missing_prefix}:retried_without_test_filter")
|
||||
return result
|
||||
|
||||
def _success_result(
|
||||
self,
|
||||
rows: list[dict],
|
||||
*,
|
||||
rag_session_id: str,
|
||||
label: str,
|
||||
include_spans: bool,
|
||||
exclude_tests: bool,
|
||||
path_prefixes: list[str] | None,
|
||||
layer: str | None = None,
|
||||
) -> LayerRetrievalResult:
|
||||
items = [self._to_item(row, include_spans=include_spans) for row in rows]
|
||||
LOGGER.warning(
|
||||
"%s: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s returned_count=%s top_paths=%s",
|
||||
label,
|
||||
rag_session_id,
|
||||
layer,
|
||||
exclude_tests,
|
||||
path_prefixes or [],
|
||||
len(items),
|
||||
[item.source for item in items[:3]],
|
||||
)
|
||||
return LayerRetrievalResult(items=items)
|
||||
|
||||
def _log_failure(
|
||||
self,
|
||||
*,
|
||||
label: str,
|
||||
rag_session_id: str,
|
||||
exclude_tests: bool,
|
||||
path_prefixes: list[str] | None,
|
||||
exc: Exception,
|
||||
layer: str | None = None,
|
||||
retried_without_test_filter: bool = False,
|
||||
) -> None:
|
||||
LOGGER.warning(
|
||||
"%s failed: rag_session_id=%s layer=%s exclude_tests=%s path_prefixes=%s retried_without_test_filter=%s error=%s",
|
||||
label,
|
||||
rag_session_id,
|
||||
layer,
|
||||
exclude_tests,
|
||||
path_prefixes or [],
|
||||
retried_without_test_filter,
|
||||
self._exception_summary(exc),
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _filter_args(self, exclude_tests: bool) -> dict[str, list[str] | None]:
|
||||
test_filters = build_test_filters() if exclude_tests else None
|
||||
return {
|
||||
"exclude_path_prefixes": test_filters.exclude_path_prefixes if test_filters else None,
|
||||
"exclude_like_patterns": test_filters.exclude_like_patterns if test_filters else None,
|
||||
}
|
||||
|
||||
def _failure_missing(self, prefix: str, exc: Exception) -> str:
|
||||
return f"{prefix}:{self._exception_summary(exc)}"
|
||||
|
||||
def _exception_summary(self, exc: Exception) -> str:
|
||||
message = " ".join(str(exc).split())
|
||||
if len(message) > 180:
|
||||
message = message[:177] + "..."
|
||||
return f"{type(exc).__name__}:{message or 'no_message'}"
|
||||
|
||||
def _to_item(self, row: dict, *, include_spans: bool) -> LayeredRetrievalItem:
|
||||
location = None
|
||||
if include_spans:
|
||||
location = CodeLocation(
|
||||
path=str(row.get("path") or ""),
|
||||
start_line=row.get("span_start"),
|
||||
end_line=row.get("span_end"),
|
||||
)
|
||||
return LayeredRetrievalItem(
|
||||
source=str(row.get("path") or ""),
|
||||
content=str(row.get("content") or ""),
|
||||
layer=str(row.get("layer") or ""),
|
||||
title=str(row.get("title") or ""),
|
||||
metadata=dict(row.get("metadata", {}) or {}),
|
||||
score=row.get("distance"),
|
||||
location=location,
|
||||
)
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class ExplainHints(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
paths: list[str] = Field(default_factory=list)
|
||||
symbols: list[str] = Field(default_factory=list)
|
||||
endpoints: list[str] = Field(default_factory=list)
|
||||
commands: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ExplainIntent(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
raw_query: str
|
||||
normalized_query: str
|
||||
keywords: list[str] = Field(default_factory=list)
|
||||
hints: ExplainHints = Field(default_factory=ExplainHints)
|
||||
include_tests: bool = False
|
||||
expected_entry_types: list[Literal["http", "cli"]] = Field(default_factory=list)
|
||||
depth: Literal["high", "medium", "deep"] = "medium"
|
||||
|
||||
|
||||
class CodeLocation(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
path: str
|
||||
start_line: int | None = None
|
||||
end_line: int | None = None
|
||||
|
||||
|
||||
class LayeredRetrievalItem(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
source: str
|
||||
content: str
|
||||
layer: str
|
||||
title: str
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
score: float | None = None
|
||||
location: CodeLocation | None = None
|
||||
|
||||
|
||||
class TracePath(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
symbol_ids: list[str] = Field(default_factory=list)
|
||||
score: float = 0.0
|
||||
entrypoint_id: str | None = None
|
||||
notes: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class EvidenceItem(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
evidence_id: str
|
||||
kind: Literal["entrypoint", "symbol", "edge", "excerpt"]
|
||||
summary: str
|
||||
location: CodeLocation | None = None
|
||||
supports: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class CodeExcerpt(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
evidence_id: str
|
||||
symbol_id: str | None = None
|
||||
title: str
|
||||
path: str
|
||||
start_line: int | None = None
|
||||
end_line: int | None = None
|
||||
content: str
|
||||
focus: str = "overview"
|
||||
|
||||
|
||||
class ExplainPack(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
intent: ExplainIntent
|
||||
selected_entrypoints: list[LayeredRetrievalItem] = Field(default_factory=list)
|
||||
seed_symbols: list[LayeredRetrievalItem] = Field(default_factory=list)
|
||||
trace_paths: list[TracePath] = Field(default_factory=list)
|
||||
evidence_index: dict[str, EvidenceItem] = Field(default_factory=dict)
|
||||
code_excerpts: list[CodeExcerpt] = Field(default_factory=list)
|
||||
missing: list[str] = Field(default_factory=list)
|
||||
conflicts: list[str] = Field(default_factory=list)
|
||||
@@ -0,0 +1,328 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.rag.explain.intent_builder import ExplainIntentBuilder
|
||||
from app.modules.rag.explain.layered_gateway import LayerRetrievalResult, LayeredRetrievalGateway
|
||||
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, ExplainPack, LayeredRetrievalItem
|
||||
from app.modules.rag.explain.source_excerpt_fetcher import SourceExcerptFetcher
|
||||
from app.modules.rag.explain.trace_builder import TraceBuilder
|
||||
from app.modules.rag.retrieval.test_filter import exclude_tests_default, is_test_path
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
_MIN_EXCERPTS = 2
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.graph_repository import CodeGraphRepository
|
||||
from app.modules.rag.explain.models import ExplainIntent
|
||||
|
||||
|
||||
class CodeExplainRetrieverV2:
|
||||
def __init__(
|
||||
self,
|
||||
gateway: LayeredRetrievalGateway,
|
||||
graph_repository: CodeGraphRepository,
|
||||
intent_builder: ExplainIntentBuilder | None = None,
|
||||
trace_builder: TraceBuilder | None = None,
|
||||
excerpt_fetcher: SourceExcerptFetcher | None = None,
|
||||
) -> None:
|
||||
self._gateway = gateway
|
||||
self._graph = graph_repository
|
||||
self._intent_builder = intent_builder or ExplainIntentBuilder()
|
||||
self._trace_builder = trace_builder or TraceBuilder(graph_repository)
|
||||
self._excerpt_fetcher = excerpt_fetcher or SourceExcerptFetcher(graph_repository)
|
||||
|
||||
def build_pack(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
user_query: str,
|
||||
*,
|
||||
file_candidates: list[dict] | None = None,
|
||||
) -> ExplainPack:
|
||||
intent = self._intent_builder.build(user_query)
|
||||
path_prefixes = _path_prefixes(intent, file_candidates or [])
|
||||
exclude_tests = exclude_tests_default() and not intent.include_tests
|
||||
pack = self._run_pass(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
|
||||
if exclude_tests and len(pack.code_excerpts) < _MIN_EXCERPTS:
|
||||
self._merge_test_fallback(pack, rag_session_id, intent, path_prefixes)
|
||||
self._log_pack(rag_session_id, pack)
|
||||
return pack
|
||||
|
||||
def _run_pass(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
*,
|
||||
exclude_tests: bool,
|
||||
) -> ExplainPack:
|
||||
missing: list[str] = []
|
||||
entrypoints_result = self._entrypoints(rag_session_id, intent, path_prefixes, exclude_tests=exclude_tests)
|
||||
missing.extend(entrypoints_result.missing)
|
||||
selected_entrypoints = self._filter_entrypoints(intent, entrypoints_result.items)
|
||||
if not selected_entrypoints:
|
||||
missing.append("layer:C3 empty")
|
||||
seed_result = self._seed_symbols(rag_session_id, intent, path_prefixes, selected_entrypoints, exclude_tests=exclude_tests)
|
||||
missing.extend(seed_result.missing)
|
||||
seed_symbols = seed_result.items
|
||||
if not seed_symbols:
|
||||
missing.append("layer:C1 empty")
|
||||
depth = 4 if intent.depth == "deep" else 3 if intent.depth == "medium" else 2
|
||||
trace_paths = self._trace_builder.build_paths(rag_session_id, seed_symbols, max_depth=depth) if seed_symbols else []
|
||||
excerpts, excerpt_evidence = self._excerpt_fetcher.fetch(rag_session_id, trace_paths) if trace_paths else ([], {})
|
||||
if not excerpts:
|
||||
lexical_result = self._gateway.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
limit=6,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=exclude_tests,
|
||||
include_spans=True,
|
||||
)
|
||||
missing.extend(lexical_result.missing)
|
||||
excerpts, excerpt_evidence = _lexical_excerpts(lexical_result.items)
|
||||
if not excerpts:
|
||||
missing.append("layer:C0 empty")
|
||||
evidence_index = _evidence_index(selected_entrypoints, seed_symbols)
|
||||
evidence_index.update(excerpt_evidence)
|
||||
missing.extend(_missing(selected_entrypoints, seed_symbols, trace_paths, excerpts))
|
||||
return ExplainPack(
|
||||
intent=intent,
|
||||
selected_entrypoints=selected_entrypoints,
|
||||
seed_symbols=seed_symbols,
|
||||
trace_paths=trace_paths,
|
||||
evidence_index=evidence_index,
|
||||
code_excerpts=excerpts,
|
||||
missing=_cleanup_missing(_dedupe(missing), has_excerpts=bool(excerpts)),
|
||||
conflicts=[],
|
||||
)
|
||||
|
||||
def _merge_test_fallback(
|
||||
self,
|
||||
pack: ExplainPack,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
) -> None:
|
||||
lexical_result = self._gateway.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
limit=6,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=False,
|
||||
include_spans=True,
|
||||
)
|
||||
excerpt_offset = len([key for key in pack.evidence_index if key.startswith("excerpt_")])
|
||||
excerpts, evidence = _lexical_excerpts(
|
||||
lexical_result.items,
|
||||
start_index=excerpt_offset,
|
||||
is_test_fallback=True,
|
||||
)
|
||||
if not excerpts:
|
||||
pack.missing = _dedupe(pack.missing + lexical_result.missing)
|
||||
return
|
||||
seen = {(item.path, item.start_line, item.end_line, item.content) for item in pack.code_excerpts}
|
||||
for excerpt in excerpts:
|
||||
key = (excerpt.path, excerpt.start_line, excerpt.end_line, excerpt.content)
|
||||
if key in seen:
|
||||
continue
|
||||
pack.code_excerpts.append(excerpt)
|
||||
seen.add(key)
|
||||
pack.evidence_index.update(evidence)
|
||||
pack.missing = _cleanup_missing(_dedupe(pack.missing + lexical_result.missing), has_excerpts=bool(pack.code_excerpts))
|
||||
|
||||
def _entrypoints(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
*,
|
||||
exclude_tests: bool,
|
||||
) -> LayerRetrievalResult:
|
||||
return self._gateway.retrieve_layer(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
RagLayer.CODE_ENTRYPOINTS,
|
||||
limit=6,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=exclude_tests,
|
||||
prefer_non_tests=True,
|
||||
include_spans=True,
|
||||
)
|
||||
|
||||
def _filter_entrypoints(self, intent: ExplainIntent, items: list[LayeredRetrievalItem]) -> list[LayeredRetrievalItem]:
|
||||
if not intent.expected_entry_types:
|
||||
return items[:3]
|
||||
filtered = [item for item in items if str(item.metadata.get("entry_type") or "") in intent.expected_entry_types]
|
||||
return filtered[:3] or items[:3]
|
||||
|
||||
def _seed_symbols(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
intent: ExplainIntent,
|
||||
path_prefixes: list[str],
|
||||
entrypoints: list[LayeredRetrievalItem],
|
||||
*,
|
||||
exclude_tests: bool,
|
||||
) -> LayerRetrievalResult:
|
||||
symbol_result = self._gateway.retrieve_layer(
|
||||
rag_session_id,
|
||||
intent.normalized_query,
|
||||
RagLayer.CODE_SYMBOL_CATALOG,
|
||||
limit=12,
|
||||
path_prefixes=path_prefixes or None,
|
||||
exclude_tests=exclude_tests,
|
||||
prefer_non_tests=True,
|
||||
include_spans=True,
|
||||
)
|
||||
handlers: list[LayeredRetrievalItem] = []
|
||||
handler_ids = [str(item.metadata.get("handler_symbol_id") or "") for item in entrypoints]
|
||||
if handler_ids:
|
||||
handlers = self._graph.get_symbols_by_ids(rag_session_id, [item for item in handler_ids if item])
|
||||
seeds: list[LayeredRetrievalItem] = []
|
||||
seen: set[str] = set()
|
||||
for item in handlers + symbol_result.items:
|
||||
symbol_id = str(item.metadata.get("symbol_id") or "")
|
||||
if not symbol_id or symbol_id in seen:
|
||||
continue
|
||||
seen.add(symbol_id)
|
||||
seeds.append(item)
|
||||
if len(seeds) >= 8:
|
||||
break
|
||||
return LayerRetrievalResult(items=seeds, missing=list(symbol_result.missing))
|
||||
|
||||
def _log_pack(self, rag_session_id: str, pack: ExplainPack) -> None:
|
||||
prod_excerpt_count = len([excerpt for excerpt in pack.code_excerpts if not _is_test_excerpt(excerpt)])
|
||||
test_excerpt_count = len(pack.code_excerpts) - prod_excerpt_count
|
||||
LOGGER.warning(
|
||||
"code explain pack: rag_session_id=%s entrypoints=%s seeds=%s paths=%s excerpts=%s prod_excerpt_count=%s test_excerpt_count=%s missing=%s",
|
||||
rag_session_id,
|
||||
len(pack.selected_entrypoints),
|
||||
len(pack.seed_symbols),
|
||||
len(pack.trace_paths),
|
||||
len(pack.code_excerpts),
|
||||
prod_excerpt_count,
|
||||
test_excerpt_count,
|
||||
pack.missing,
|
||||
)
|
||||
|
||||
|
||||
def _evidence_index(
|
||||
entrypoints: list[LayeredRetrievalItem],
|
||||
seed_symbols: list[LayeredRetrievalItem],
|
||||
) -> dict[str, EvidenceItem]:
|
||||
result: dict[str, EvidenceItem] = {}
|
||||
for index, item in enumerate(entrypoints, start=1):
|
||||
evidence_id = f"entrypoint_{index}"
|
||||
result[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="entrypoint",
|
||||
summary=item.title,
|
||||
location=item.location,
|
||||
supports=[str(item.metadata.get("handler_symbol_id") or "")],
|
||||
)
|
||||
for index, item in enumerate(seed_symbols, start=1):
|
||||
evidence_id = f"symbol_{index}"
|
||||
result[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="symbol",
|
||||
summary=item.title,
|
||||
location=item.location,
|
||||
supports=[str(item.metadata.get("symbol_id") or "")],
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _missing(
|
||||
entrypoints: list[LayeredRetrievalItem],
|
||||
seed_symbols: list[LayeredRetrievalItem],
|
||||
trace_paths,
|
||||
excerpts,
|
||||
) -> list[str]:
|
||||
missing: list[str] = []
|
||||
if not entrypoints:
|
||||
missing.append("entrypoints")
|
||||
if not seed_symbols:
|
||||
missing.append("seed_symbols")
|
||||
if not trace_paths:
|
||||
missing.append("trace_paths")
|
||||
if not excerpts:
|
||||
missing.append("code_excerpts")
|
||||
return missing
|
||||
|
||||
|
||||
def _lexical_excerpts(
|
||||
items: list[LayeredRetrievalItem],
|
||||
*,
|
||||
start_index: int = 0,
|
||||
is_test_fallback: bool = False,
|
||||
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
|
||||
excerpts: list[CodeExcerpt] = []
|
||||
evidence_index: dict[str, EvidenceItem] = {}
|
||||
for item in items:
|
||||
evidence_id = f"excerpt_{start_index + len(evidence_index) + 1}"
|
||||
location = item.location
|
||||
evidence_index[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="excerpt",
|
||||
summary=item.title or item.source,
|
||||
location=location,
|
||||
supports=[],
|
||||
)
|
||||
focus = "lexical"
|
||||
if _item_is_test(item):
|
||||
focus = "test:lexical"
|
||||
elif is_test_fallback:
|
||||
focus = "lexical"
|
||||
excerpts.append(
|
||||
CodeExcerpt(
|
||||
evidence_id=evidence_id,
|
||||
symbol_id=str(item.metadata.get("symbol_id") or "") or None,
|
||||
title=item.title or item.source,
|
||||
path=item.source,
|
||||
start_line=location.start_line if location else None,
|
||||
end_line=location.end_line if location else None,
|
||||
content=item.content,
|
||||
focus=focus,
|
||||
)
|
||||
)
|
||||
return excerpts, evidence_index
|
||||
|
||||
|
||||
def _item_is_test(item: LayeredRetrievalItem) -> bool:
|
||||
return bool(item.metadata.get("is_test")) or is_test_path(item.source)
|
||||
|
||||
|
||||
def _is_test_excerpt(excerpt: CodeExcerpt) -> bool:
|
||||
return excerpt.focus.startswith("test:") or is_test_path(excerpt.path)
|
||||
|
||||
|
||||
def _path_prefixes(intent: ExplainIntent, file_candidates: list[dict]) -> list[str]:
|
||||
values: list[str] = []
|
||||
for path in intent.hints.paths:
|
||||
prefix = path.rsplit("/", 1)[0] if "/" in path else path
|
||||
if prefix and prefix not in values:
|
||||
values.append(prefix)
|
||||
for item in file_candidates[:6]:
|
||||
path = str(item.get("path") or "")
|
||||
prefix = path.rsplit("/", 1)[0] if "/" in path else ""
|
||||
if prefix and prefix not in values:
|
||||
values.append(prefix)
|
||||
return values
|
||||
|
||||
|
||||
def _cleanup_missing(values: list[str], *, has_excerpts: bool) -> list[str]:
|
||||
if not has_excerpts:
|
||||
return values
|
||||
return [value for value in values if value not in {"code_excerpts", "layer:C0 empty"}]
|
||||
|
||||
|
||||
def _dedupe(values: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
item = value.strip()
|
||||
if item and item not in result:
|
||||
result.append(item)
|
||||
return result
|
||||
@@ -0,0 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.rag.explain.excerpt_planner import ExcerptPlanner
|
||||
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, TracePath
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.graph_repository import CodeGraphRepository
|
||||
|
||||
|
||||
class SourceExcerptFetcher:
|
||||
def __init__(self, graph_repository: CodeGraphRepository, planner: ExcerptPlanner | None = None) -> None:
|
||||
self._graph = graph_repository
|
||||
self._planner = planner or ExcerptPlanner()
|
||||
|
||||
def fetch(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
trace_paths: list[TracePath],
|
||||
*,
|
||||
max_excerpts: int = 40,
|
||||
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
|
||||
ordered_symbol_ids: list[str] = []
|
||||
for path in trace_paths:
|
||||
for symbol_id in path.symbol_ids:
|
||||
if symbol_id and symbol_id not in ordered_symbol_ids:
|
||||
ordered_symbol_ids.append(symbol_id)
|
||||
chunks = self._graph.get_chunks_by_symbol_ids(rag_session_id, ordered_symbol_ids)
|
||||
excerpts: list[CodeExcerpt] = []
|
||||
evidence_index: dict[str, EvidenceItem] = {}
|
||||
for chunk in chunks:
|
||||
symbol_id = str(chunk.metadata.get("symbol_id") or "")
|
||||
evidence_id = f"excerpt_{len(evidence_index) + 1}"
|
||||
location = chunk.location
|
||||
evidence_index[evidence_id] = EvidenceItem(
|
||||
evidence_id=evidence_id,
|
||||
kind="excerpt",
|
||||
summary=chunk.title,
|
||||
location=location,
|
||||
supports=[symbol_id] if symbol_id else [],
|
||||
)
|
||||
is_test_chunk = bool(chunk.metadata.get("is_test")) or is_test_path(location.path if location else chunk.source)
|
||||
for excerpt in self._planner.plan(chunk, evidence_id=evidence_id, symbol_id=symbol_id):
|
||||
if len(excerpts) >= max_excerpts:
|
||||
break
|
||||
if is_test_chunk and not excerpt.focus.startswith("test:"):
|
||||
excerpt.focus = f"test:{excerpt.focus}"
|
||||
excerpts.append(excerpt)
|
||||
if len(excerpts) >= max_excerpts:
|
||||
break
|
||||
return excerpts, evidence_index
|
||||
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.modules.rag.explain.models import LayeredRetrievalItem, TracePath
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.modules.rag.explain.graph_repository import CodeGraphRepository
|
||||
|
||||
|
||||
class TraceBuilder:
|
||||
def __init__(self, graph_repository: CodeGraphRepository) -> None:
|
||||
self._graph = graph_repository
|
||||
|
||||
def build_paths(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
seed_symbols: list[LayeredRetrievalItem],
|
||||
*,
|
||||
max_depth: int,
|
||||
max_paths: int = 3,
|
||||
edge_types: list[str] | None = None,
|
||||
) -> list[TracePath]:
|
||||
edges_filter = edge_types or ["calls", "imports", "inherits"]
|
||||
symbol_map = self._symbol_map(seed_symbols)
|
||||
paths: list[TracePath] = []
|
||||
for seed in seed_symbols:
|
||||
seed_id = str(seed.metadata.get("symbol_id") or "")
|
||||
if not seed_id:
|
||||
continue
|
||||
queue: list[tuple[list[str], float, list[str]]] = [([seed_id], 0.0, [])]
|
||||
while queue and len(paths) < max_paths * 3:
|
||||
current_path, score, notes = queue.pop(0)
|
||||
src_symbol_id = current_path[-1]
|
||||
out_edges = self._graph.get_out_edges(rag_session_id, [src_symbol_id], edges_filter, limit_per_src=4)
|
||||
if not out_edges or len(current_path) >= max_depth:
|
||||
paths.append(TracePath(symbol_ids=current_path, score=score, notes=notes))
|
||||
continue
|
||||
for edge in out_edges:
|
||||
metadata = edge.metadata
|
||||
dst_symbol_id = str(metadata.get("dst_symbol_id") or "")
|
||||
next_notes = list(notes)
|
||||
next_score = score + self._edge_score(edge, symbol_map.get(src_symbol_id))
|
||||
if not dst_symbol_id:
|
||||
dst_ref = str(metadata.get("dst_ref") or "")
|
||||
package_hint = self._package_hint(symbol_map.get(src_symbol_id))
|
||||
resolved = self._graph.resolve_symbol_by_ref(rag_session_id, dst_ref, package_hint=package_hint)
|
||||
if resolved is not None:
|
||||
dst_symbol_id = str(resolved.metadata.get("symbol_id") or "")
|
||||
symbol_map[dst_symbol_id] = resolved
|
||||
next_score += 2.0
|
||||
next_notes.append(f"resolved:{dst_ref}")
|
||||
if not dst_symbol_id or dst_symbol_id in current_path:
|
||||
paths.append(TracePath(symbol_ids=current_path, score=next_score, notes=next_notes))
|
||||
continue
|
||||
if dst_symbol_id not in symbol_map:
|
||||
symbols = self._graph.get_symbols_by_ids(rag_session_id, [dst_symbol_id])
|
||||
if symbols:
|
||||
symbol_map[dst_symbol_id] = symbols[0]
|
||||
queue.append((current_path + [dst_symbol_id], next_score, next_notes))
|
||||
unique = self._unique_paths(paths)
|
||||
unique.sort(key=lambda item: item.score, reverse=True)
|
||||
return unique[:max_paths] or [TracePath(symbol_ids=[seed.metadata.get("symbol_id", "")], score=0.0) for seed in seed_symbols[:1]]
|
||||
|
||||
def _edge_score(self, edge: LayeredRetrievalItem, source_symbol: LayeredRetrievalItem | None) -> float:
|
||||
metadata = edge.metadata
|
||||
score = 1.0
|
||||
if str(metadata.get("resolution") or "") == "resolved":
|
||||
score += 2.0
|
||||
source_path = source_symbol.source if source_symbol is not None else ""
|
||||
if source_path and edge.source == source_path:
|
||||
score += 1.0
|
||||
if "tests/" in edge.source or "/tests/" in edge.source:
|
||||
score -= 3.0
|
||||
return score
|
||||
|
||||
def _package_hint(self, symbol: LayeredRetrievalItem | None) -> str | None:
|
||||
if symbol is None:
|
||||
return None
|
||||
package = str(symbol.metadata.get("package_or_module") or "")
|
||||
if not package:
|
||||
return None
|
||||
return ".".join(package.split(".")[:-1]) or package
|
||||
|
||||
def _symbol_map(self, items: list[LayeredRetrievalItem]) -> dict[str, LayeredRetrievalItem]:
|
||||
result: dict[str, LayeredRetrievalItem] = {}
|
||||
for item in items:
|
||||
symbol_id = str(item.metadata.get("symbol_id") or "")
|
||||
if symbol_id:
|
||||
result[symbol_id] = item
|
||||
return result
|
||||
|
||||
def _unique_paths(self, items: list[TracePath]) -> list[TracePath]:
|
||||
result: list[TracePath] = []
|
||||
seen: set[tuple[str, ...]] = set()
|
||||
for item in items:
|
||||
key = tuple(symbol_id for symbol_id in item.symbol_ids if symbol_id)
|
||||
if not key or key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
result.append(item)
|
||||
return result
|
||||
Binary file not shown.
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.code_text.chunker import CodeChunk
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
|
||||
class CodeTextDocumentBuilder:
|
||||
@@ -17,6 +18,7 @@ class CodeTextDocumentBuilder:
|
||||
"chunk_index": chunk_index,
|
||||
"chunk_type": chunk.chunk_type,
|
||||
"module_or_unit": source.path.replace("/", ".").removesuffix(".py"),
|
||||
"is_test": is_test_path(source.path),
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
)
|
||||
|
||||
Binary file not shown.
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.edges.extractor import PyEdge
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
|
||||
class EdgeDocumentBuilder:
|
||||
@@ -22,6 +23,7 @@ class EdgeDocumentBuilder:
|
||||
"dst_symbol_id": edge.dst_symbol_id,
|
||||
"dst_ref": edge.dst_ref,
|
||||
"resolution": edge.resolution,
|
||||
"is_test": is_test_path(source.path),
|
||||
"lang_payload": edge.metadata,
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
|
||||
BIN
Binary file not shown.
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
|
||||
from app.modules.rag.retrieval.test_filter import is_test_path
|
||||
|
||||
|
||||
class EntrypointDocumentBuilder:
|
||||
@@ -19,6 +20,7 @@ class EntrypointDocumentBuilder:
|
||||
"framework": entrypoint.framework,
|
||||
"route_or_command": entrypoint.route_or_command,
|
||||
"handler_symbol_id": entrypoint.handler_symbol_id,
|
||||
"is_test": is_test_path(source.path),
|
||||
"lang_payload": entrypoint.metadata,
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user