Files
agent/app/modules/rag/persistence/schema_repository.py
2026-03-01 14:21:33 +03:00

180 lines
10 KiB
Python

from __future__ import annotations
from sqlalchemy import text
from app.modules.shared.db import get_engine
class RagSchemaRepository:
def ensure_tables(self) -> None:
engine = get_engine()
with engine.connect() as conn:
conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS rag_sessions (
rag_session_id VARCHAR(64) PRIMARY KEY,
project_id VARCHAR(512) NOT NULL,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
)
"""
)
)
conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS rag_index_jobs (
index_job_id VARCHAR(64) PRIMARY KEY,
rag_session_id VARCHAR(64) NOT NULL,
status VARCHAR(16) NOT NULL,
indexed_files INTEGER NOT NULL DEFAULT 0,
failed_files INTEGER NOT NULL DEFAULT 0,
cache_hit_files INTEGER NOT NULL DEFAULT 0,
cache_miss_files INTEGER NOT NULL DEFAULT 0,
error_code VARCHAR(128) NULL,
error_desc TEXT NULL,
error_module VARCHAR(64) NULL,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
)
"""
)
)
conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS rag_chunks (
id BIGSERIAL PRIMARY KEY,
rag_session_id VARCHAR(64) NOT NULL,
path TEXT NOT NULL,
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
embedding vector NULL,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
)
"""
)
)
conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS rag_blob_cache (
id BIGSERIAL PRIMARY KEY,
repo_id VARCHAR(512) NOT NULL,
blob_sha VARCHAR(128) NOT NULL,
path TEXT NOT NULL,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT uq_rag_blob_cache UNIQUE (repo_id, blob_sha, path)
)
"""
)
)
conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS rag_chunk_cache (
id BIGSERIAL PRIMARY KEY,
repo_id VARCHAR(512) NOT NULL,
blob_sha VARCHAR(128) NOT NULL,
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
embedding vector NULL,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT uq_rag_chunk_cache UNIQUE (repo_id, blob_sha, chunk_index)
)
"""
)
)
conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS rag_session_chunk_map (
id BIGSERIAL PRIMARY KEY,
rag_session_id VARCHAR(64) NOT NULL,
repo_id VARCHAR(512) NOT NULL,
blob_sha VARCHAR(128) NOT NULL,
chunk_index INTEGER NOT NULL,
path TEXT NOT NULL,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
)
"""
)
)
self._ensure_columns(conn)
self._ensure_indexes(conn)
conn.commit()
def _ensure_columns(self, conn) -> None:
for statement in (
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_id TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_version TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS owner TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS system_component TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS rag_doc_id VARCHAR(128) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS repo_id VARCHAR(512) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS title TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS links_json TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_start INTEGER NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_end INTEGER NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS symbol_id TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS qname TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS kind TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS framework TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS entrypoint_type TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS module_id TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section_path TEXT NULL",
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_kind TEXT NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS section TEXT NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_id TEXT NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_version TEXT NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS owner TEXT NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS system_component TEXT NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS section TEXT NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS path TEXT NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS title TEXT NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS links_json TEXT NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_start INTEGER NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_end INTEGER NULL",
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL",
"ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_hit_files INTEGER NOT NULL DEFAULT 0",
"ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_miss_files INTEGER NOT NULL DEFAULT 0",
):
conn.execute(text(statement))
def _ensure_indexes(self, conn) -> None:
for statement in (
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer ON rag_chunks (rag_session_id, layer)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer_path ON rag_chunks (rag_session_id, layer, path)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_qname ON rag_chunks (qname)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_symbol_id ON rag_chunks (symbol_id)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_module_id ON rag_chunks (module_id)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_doc_kind ON rag_chunks (doc_kind)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_entrypoint ON rag_chunks (entrypoint_type, framework)",
"CREATE INDEX IF NOT EXISTS idx_rag_blob_cache_repo_blob ON rag_blob_cache (repo_id, blob_sha)",
"CREATE INDEX IF NOT EXISTS idx_rag_chunk_cache_repo_blob ON rag_chunk_cache (repo_id, blob_sha, chunk_index)",
"CREATE INDEX IF NOT EXISTS idx_rag_session_chunk_map_session ON rag_session_chunk_map (rag_session_id, created_at DESC)",
):
conn.execute(text(statement))