Новый раг
This commit is contained in:
77
app/modules/rag/contracts/documents.py
Normal file
77
app/modules/rag/contracts/documents.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from hashlib import sha256
|
||||
|
||||
from app.modules.rag.contracts.evidence import EvidenceLink
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RagSource:
|
||||
repo_id: str
|
||||
commit_sha: str | None
|
||||
path: str
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RagSpan:
|
||||
start_line: int | None = None
|
||||
end_line: int | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RagDocument:
|
||||
layer: str
|
||||
source: RagSource
|
||||
title: str
|
||||
text: str
|
||||
metadata: dict = field(default_factory=dict)
|
||||
links: list[EvidenceLink] = field(default_factory=list)
|
||||
span: RagSpan | None = None
|
||||
doc_id: str | None = None
|
||||
lang: str | None = None
|
||||
embedding: list[float] | None = None
|
||||
|
||||
def ensure_doc_id(self) -> str:
|
||||
if self.doc_id:
|
||||
return self.doc_id
|
||||
span_key = ""
|
||||
if self.span is not None:
|
||||
span_key = f":{self.span.start_line}:{self.span.end_line}"
|
||||
raw = "|".join(
|
||||
[
|
||||
self.layer,
|
||||
self.lang or "",
|
||||
self.source.repo_id,
|
||||
self.source.commit_sha or "",
|
||||
self.source.path,
|
||||
self.metadata.get("symbol_id", "") or self.metadata.get("module_id", ""),
|
||||
self.title,
|
||||
span_key,
|
||||
]
|
||||
)
|
||||
self.doc_id = sha256(raw.encode("utf-8")).hexdigest()
|
||||
return self.doc_id
|
||||
|
||||
def to_record(self) -> dict:
|
||||
return {
|
||||
"doc_id": self.ensure_doc_id(),
|
||||
"layer": self.layer,
|
||||
"lang": self.lang,
|
||||
"repo_id": self.source.repo_id,
|
||||
"commit_sha": self.source.commit_sha,
|
||||
"path": self.source.path,
|
||||
"title": self.title,
|
||||
"text": self.text,
|
||||
"metadata": dict(self.metadata),
|
||||
"links": [link.to_dict() for link in self.links],
|
||||
"span_start": self.span.start_line if self.span else None,
|
||||
"span_end": self.span.end_line if self.span else None,
|
||||
"embedding": self.embedding or [],
|
||||
}
|
||||
Reference in New Issue
Block a user