Новый раг
This commit is contained in:
115
app/modules/rag/indexing/docs/document_builder.py
Normal file
115
app/modules/rag/indexing/docs/document_builder.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from hashlib import sha256
|
||||
|
||||
from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource
|
||||
from app.modules.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
|
||||
|
||||
|
||||
class DocsDocumentBuilder:
|
||||
def build_module_catalog(self, source: RagSource, frontmatter: dict, summary_text: str, doc_kind: str) -> RagDocument | None:
|
||||
module_id = str(frontmatter.get("id") or "").strip()
|
||||
module_type = str(frontmatter.get("type") or "").strip()
|
||||
domain = str(frontmatter.get("domain") or "").strip()
|
||||
if not module_id or not module_type or not domain:
|
||||
return None
|
||||
links = frontmatter.get("links") or {}
|
||||
metadata = {
|
||||
"module_id": module_id,
|
||||
"type": module_type,
|
||||
"domain": domain,
|
||||
"status": frontmatter.get("status"),
|
||||
"version": frontmatter.get("version"),
|
||||
"tags": frontmatter.get("tags") or [],
|
||||
"owners": frontmatter.get("owners") or [],
|
||||
"links": links,
|
||||
"source_path": source.path,
|
||||
"summary_text": summary_text[:4000],
|
||||
"doc_kind": doc_kind,
|
||||
}
|
||||
metadata.update({name: links.get(name, []) for name in (
|
||||
"calls_api", "called_by", "uses_logic", "used_by", "reads_db", "writes_db",
|
||||
"integrates_with", "emits_events", "consumes_events",
|
||||
)})
|
||||
return RagDocument(
|
||||
layer=RagLayer.DOCS_MODULE_CATALOG,
|
||||
source=source,
|
||||
title=module_id,
|
||||
text=summary_text[:4000] or module_id,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def build_section(self, source: RagSource, chunk: SectionChunk, frontmatter: dict, doc_kind: str) -> RagDocument:
|
||||
module_id = str(frontmatter.get("id") or source.path)
|
||||
metadata = {
|
||||
"module_id": module_id,
|
||||
"type": frontmatter.get("type"),
|
||||
"domain": frontmatter.get("domain"),
|
||||
"tags": frontmatter.get("tags") or [],
|
||||
"section_path": chunk.section_path,
|
||||
"section_title": chunk.section_title,
|
||||
"order": chunk.order,
|
||||
"doc_kind": doc_kind,
|
||||
"source_path": source.path,
|
||||
"artifact_type": "DOCS",
|
||||
}
|
||||
return RagDocument(
|
||||
layer=RagLayer.DOCS_SECTION_INDEX,
|
||||
source=source,
|
||||
title=f"{module_id}:{chunk.section_title}",
|
||||
text=chunk.content,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def build_policy(self, source: RagSource, frontmatter: dict, chunk: SectionChunk, doc_kind: str) -> RagDocument | None:
|
||||
policy_id = str(frontmatter.get("id") or "").strip()
|
||||
applies_to = frontmatter.get("applies_to") or frontmatter.get("type")
|
||||
if not policy_id:
|
||||
return None
|
||||
metadata = {
|
||||
"policy_id": policy_id,
|
||||
"applies_to": applies_to,
|
||||
"rules": chunk.content[:4000],
|
||||
"default_behaviors": frontmatter.get("default_behaviors") or [],
|
||||
"doc_kind": doc_kind,
|
||||
"section_path": chunk.section_path,
|
||||
"source_path": source.path,
|
||||
}
|
||||
return RagDocument(
|
||||
layer=RagLayer.DOCS_POLICY_INDEX,
|
||||
source=source,
|
||||
title=policy_id,
|
||||
text=chunk.content[:4000],
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def build_fact(
|
||||
self,
|
||||
source: RagSource,
|
||||
*,
|
||||
subject_id: str,
|
||||
predicate: str,
|
||||
obj: str,
|
||||
object_ref: str | None,
|
||||
anchor: str,
|
||||
tags: list[str] | None = None,
|
||||
) -> RagDocument:
|
||||
fact_id = sha256(f"{subject_id}|{predicate}|{obj}|{source.path}|{anchor}".encode("utf-8")).hexdigest()
|
||||
metadata = {
|
||||
"fact_id": fact_id,
|
||||
"subject_id": subject_id,
|
||||
"predicate": predicate,
|
||||
"object": obj,
|
||||
"object_ref": object_ref,
|
||||
"anchor": anchor,
|
||||
"tags": tags or [],
|
||||
"source_path": source.path,
|
||||
}
|
||||
return RagDocument(
|
||||
layer=RagLayer.DOCS_FACT_INDEX,
|
||||
source=source,
|
||||
title=f"{subject_id}:{predicate}",
|
||||
text=f"{subject_id} {predicate} {obj}".strip(),
|
||||
metadata=metadata,
|
||||
links=[EvidenceLink(type=EvidenceType.DOC_FACT, target_id=fact_id, path=source.path, note=anchor)],
|
||||
)
|
||||
Reference in New Issue
Block a user