Новый раг

This commit is contained in:
2026-03-01 14:21:33 +03:00
parent 2728c07ba9
commit 1ef0b4d68c
95 changed files with 3145 additions and 927 deletions

View File

@@ -0,0 +1,115 @@
from __future__ import annotations
from hashlib import sha256
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource
from app.modules.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
class DocsDocumentBuilder:
def build_module_catalog(self, source: RagSource, frontmatter: dict, summary_text: str, doc_kind: str) -> RagDocument | None:
module_id = str(frontmatter.get("id") or "").strip()
module_type = str(frontmatter.get("type") or "").strip()
domain = str(frontmatter.get("domain") or "").strip()
if not module_id or not module_type or not domain:
return None
links = frontmatter.get("links") or {}
metadata = {
"module_id": module_id,
"type": module_type,
"domain": domain,
"status": frontmatter.get("status"),
"version": frontmatter.get("version"),
"tags": frontmatter.get("tags") or [],
"owners": frontmatter.get("owners") or [],
"links": links,
"source_path": source.path,
"summary_text": summary_text[:4000],
"doc_kind": doc_kind,
}
metadata.update({name: links.get(name, []) for name in (
"calls_api", "called_by", "uses_logic", "used_by", "reads_db", "writes_db",
"integrates_with", "emits_events", "consumes_events",
)})
return RagDocument(
layer=RagLayer.DOCS_MODULE_CATALOG,
source=source,
title=module_id,
text=summary_text[:4000] or module_id,
metadata=metadata,
)
def build_section(self, source: RagSource, chunk: SectionChunk, frontmatter: dict, doc_kind: str) -> RagDocument:
module_id = str(frontmatter.get("id") or source.path)
metadata = {
"module_id": module_id,
"type": frontmatter.get("type"),
"domain": frontmatter.get("domain"),
"tags": frontmatter.get("tags") or [],
"section_path": chunk.section_path,
"section_title": chunk.section_title,
"order": chunk.order,
"doc_kind": doc_kind,
"source_path": source.path,
"artifact_type": "DOCS",
}
return RagDocument(
layer=RagLayer.DOCS_SECTION_INDEX,
source=source,
title=f"{module_id}:{chunk.section_title}",
text=chunk.content,
metadata=metadata,
)
def build_policy(self, source: RagSource, frontmatter: dict, chunk: SectionChunk, doc_kind: str) -> RagDocument | None:
policy_id = str(frontmatter.get("id") or "").strip()
applies_to = frontmatter.get("applies_to") or frontmatter.get("type")
if not policy_id:
return None
metadata = {
"policy_id": policy_id,
"applies_to": applies_to,
"rules": chunk.content[:4000],
"default_behaviors": frontmatter.get("default_behaviors") or [],
"doc_kind": doc_kind,
"section_path": chunk.section_path,
"source_path": source.path,
}
return RagDocument(
layer=RagLayer.DOCS_POLICY_INDEX,
source=source,
title=policy_id,
text=chunk.content[:4000],
metadata=metadata,
)
def build_fact(
self,
source: RagSource,
*,
subject_id: str,
predicate: str,
obj: str,
object_ref: str | None,
anchor: str,
tags: list[str] | None = None,
) -> RagDocument:
fact_id = sha256(f"{subject_id}|{predicate}|{obj}|{source.path}|{anchor}".encode("utf-8")).hexdigest()
metadata = {
"fact_id": fact_id,
"subject_id": subject_id,
"predicate": predicate,
"object": obj,
"object_ref": object_ref,
"anchor": anchor,
"tags": tags or [],
"source_path": source.path,
}
return RagDocument(
layer=RagLayer.DOCS_FACT_INDEX,
source=source,
title=f"{subject_id}:{predicate}",
text=f"{subject_id} {predicate} {obj}".strip(),
metadata=metadata,
links=[EvidenceLink(type=EvidenceType.DOC_FACT, target_id=fact_id, path=source.path, note=anchor)],
)