Новый раг
This commit is contained in:
17
app/modules/rag/contracts/__init__.py
Normal file
17
app/modules/rag/contracts/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from app.modules.rag.contracts.documents import RagDocument, RagSource, RagSpan
|
||||
from app.modules.rag.contracts.enums import DocKind, EvidenceType, RagLayer, RetrievalMode
|
||||
from app.modules.rag.contracts.evidence import EvidenceLink
|
||||
from app.modules.rag.contracts.retrieval import RetrievalItem, RetrievalQuery
|
||||
|
||||
__all__ = [
|
||||
"DocKind",
|
||||
"EvidenceLink",
|
||||
"EvidenceType",
|
||||
"RagDocument",
|
||||
"RagLayer",
|
||||
"RagSource",
|
||||
"RagSpan",
|
||||
"RetrievalItem",
|
||||
"RetrievalMode",
|
||||
"RetrievalQuery",
|
||||
]
|
||||
BIN
app/modules/rag/contracts/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
app/modules/rag/contracts/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
app/modules/rag/contracts/__pycache__/documents.cpython-312.pyc
Normal file
BIN
app/modules/rag/contracts/__pycache__/documents.cpython-312.pyc
Normal file
Binary file not shown.
BIN
app/modules/rag/contracts/__pycache__/enums.cpython-312.pyc
Normal file
BIN
app/modules/rag/contracts/__pycache__/enums.cpython-312.pyc
Normal file
Binary file not shown.
BIN
app/modules/rag/contracts/__pycache__/evidence.cpython-312.pyc
Normal file
BIN
app/modules/rag/contracts/__pycache__/evidence.cpython-312.pyc
Normal file
Binary file not shown.
BIN
app/modules/rag/contracts/__pycache__/retrieval.cpython-312.pyc
Normal file
BIN
app/modules/rag/contracts/__pycache__/retrieval.cpython-312.pyc
Normal file
Binary file not shown.
77
app/modules/rag/contracts/documents.py
Normal file
77
app/modules/rag/contracts/documents.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from hashlib import sha256
|
||||
|
||||
from app.modules.rag.contracts.evidence import EvidenceLink
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RagSource:
|
||||
repo_id: str
|
||||
commit_sha: str | None
|
||||
path: str
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RagSpan:
|
||||
start_line: int | None = None
|
||||
end_line: int | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RagDocument:
|
||||
layer: str
|
||||
source: RagSource
|
||||
title: str
|
||||
text: str
|
||||
metadata: dict = field(default_factory=dict)
|
||||
links: list[EvidenceLink] = field(default_factory=list)
|
||||
span: RagSpan | None = None
|
||||
doc_id: str | None = None
|
||||
lang: str | None = None
|
||||
embedding: list[float] | None = None
|
||||
|
||||
def ensure_doc_id(self) -> str:
|
||||
if self.doc_id:
|
||||
return self.doc_id
|
||||
span_key = ""
|
||||
if self.span is not None:
|
||||
span_key = f":{self.span.start_line}:{self.span.end_line}"
|
||||
raw = "|".join(
|
||||
[
|
||||
self.layer,
|
||||
self.lang or "",
|
||||
self.source.repo_id,
|
||||
self.source.commit_sha or "",
|
||||
self.source.path,
|
||||
self.metadata.get("symbol_id", "") or self.metadata.get("module_id", ""),
|
||||
self.title,
|
||||
span_key,
|
||||
]
|
||||
)
|
||||
self.doc_id = sha256(raw.encode("utf-8")).hexdigest()
|
||||
return self.doc_id
|
||||
|
||||
def to_record(self) -> dict:
|
||||
return {
|
||||
"doc_id": self.ensure_doc_id(),
|
||||
"layer": self.layer,
|
||||
"lang": self.lang,
|
||||
"repo_id": self.source.repo_id,
|
||||
"commit_sha": self.source.commit_sha,
|
||||
"path": self.source.path,
|
||||
"title": self.title,
|
||||
"text": self.text,
|
||||
"metadata": dict(self.metadata),
|
||||
"links": [link.to_dict() for link in self.links],
|
||||
"span_start": self.span.start_line if self.span else None,
|
||||
"span_end": self.span.end_line if self.span else None,
|
||||
"embedding": self.embedding or [],
|
||||
}
|
||||
35
app/modules/rag/contracts/enums.py
Normal file
35
app/modules/rag/contracts/enums.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class RagLayer:
|
||||
DOCS_MODULE_CATALOG = "D1_MODULE_CATALOG"
|
||||
DOCS_FACT_INDEX = "D2_FACT_INDEX"
|
||||
DOCS_SECTION_INDEX = "D3_SECTION_INDEX"
|
||||
DOCS_POLICY_INDEX = "D4_POLICY_INDEX"
|
||||
CODE_SOURCE_CHUNKS = "C0_SOURCE_CHUNKS"
|
||||
CODE_SYMBOL_CATALOG = "C1_SYMBOL_CATALOG"
|
||||
CODE_DEPENDENCY_GRAPH = "C2_DEPENDENCY_GRAPH"
|
||||
CODE_ENTRYPOINTS = "C3_ENTRYPOINTS"
|
||||
CODE_PUBLIC_API = "C4_PUBLIC_API"
|
||||
CODE_BEHAVIOR_SUMMARIES = "C5_BEHAVIOR_SUMMARIES"
|
||||
CODE_RUNTIME_TRACES = "C6_RUNTIME_TRACES"
|
||||
|
||||
|
||||
class RetrievalMode:
|
||||
DOCS = "docs"
|
||||
CODE = "code"
|
||||
|
||||
|
||||
class DocKind:
|
||||
SPEC = "spec"
|
||||
RUNBOOK = "runbook"
|
||||
README = "readme"
|
||||
MISC = "misc"
|
||||
|
||||
|
||||
class EvidenceType:
|
||||
CODE_SPAN = "code_span"
|
||||
SYMBOL = "symbol"
|
||||
EDGE = "edge"
|
||||
DOC_SECTION = "doc_section"
|
||||
DOC_FACT = "doc_fact"
|
||||
16
app/modules/rag/contracts/evidence.py
Normal file
16
app/modules/rag/contracts/evidence.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EvidenceLink:
|
||||
type: str
|
||||
target_id: str
|
||||
path: str | None = None
|
||||
start_line: int | None = None
|
||||
end_line: int | None = None
|
||||
note: str | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
23
app/modules/rag/contracts/retrieval.py
Normal file
23
app/modules/rag/contracts/retrieval.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RetrievalQuery:
|
||||
text: str
|
||||
mode: str
|
||||
limit: int = 5
|
||||
layers: list[str] = field(default_factory=list)
|
||||
path_prefixes: list[str] = field(default_factory=list)
|
||||
doc_kind: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RetrievalItem:
|
||||
content: str
|
||||
path: str
|
||||
layer: str
|
||||
title: str
|
||||
score: float | None = None
|
||||
metadata: dict | None = None
|
||||
Reference in New Issue
Block a user