180 lines
10 KiB
Python
180 lines
10 KiB
Python
from __future__ import annotations
|
|
|
|
from sqlalchemy import text
|
|
|
|
from app.modules.shared.db import get_engine
|
|
|
|
|
|
class RagSchemaRepository:
|
|
def ensure_tables(self) -> None:
|
|
engine = get_engine()
|
|
with engine.connect() as conn:
|
|
conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS rag_sessions (
|
|
rag_session_id VARCHAR(64) PRIMARY KEY,
|
|
project_id VARCHAR(512) NOT NULL,
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS rag_index_jobs (
|
|
index_job_id VARCHAR(64) PRIMARY KEY,
|
|
rag_session_id VARCHAR(64) NOT NULL,
|
|
status VARCHAR(16) NOT NULL,
|
|
indexed_files INTEGER NOT NULL DEFAULT 0,
|
|
failed_files INTEGER NOT NULL DEFAULT 0,
|
|
cache_hit_files INTEGER NOT NULL DEFAULT 0,
|
|
cache_miss_files INTEGER NOT NULL DEFAULT 0,
|
|
error_code VARCHAR(128) NULL,
|
|
error_desc TEXT NULL,
|
|
error_module VARCHAR(64) NULL,
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS rag_chunks (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
rag_session_id VARCHAR(64) NOT NULL,
|
|
path TEXT NOT NULL,
|
|
chunk_index INTEGER NOT NULL,
|
|
content TEXT NOT NULL,
|
|
embedding vector NULL,
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS rag_blob_cache (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
repo_id VARCHAR(512) NOT NULL,
|
|
blob_sha VARCHAR(128) NOT NULL,
|
|
path TEXT NOT NULL,
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
CONSTRAINT uq_rag_blob_cache UNIQUE (repo_id, blob_sha, path)
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS rag_chunk_cache (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
repo_id VARCHAR(512) NOT NULL,
|
|
blob_sha VARCHAR(128) NOT NULL,
|
|
chunk_index INTEGER NOT NULL,
|
|
content TEXT NOT NULL,
|
|
embedding vector NULL,
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
CONSTRAINT uq_rag_chunk_cache UNIQUE (repo_id, blob_sha, chunk_index)
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS rag_session_chunk_map (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
rag_session_id VARCHAR(64) NOT NULL,
|
|
repo_id VARCHAR(512) NOT NULL,
|
|
blob_sha VARCHAR(128) NOT NULL,
|
|
chunk_index INTEGER NOT NULL,
|
|
path TEXT NOT NULL,
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
self._ensure_columns(conn)
|
|
self._ensure_indexes(conn)
|
|
conn.commit()
|
|
|
|
def _ensure_columns(self, conn) -> None:
|
|
for statement in (
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_id TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_version TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS owner TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS system_component TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS rag_doc_id VARCHAR(128) NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS repo_id VARCHAR(512) NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS title TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS links_json TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_start INTEGER NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_end INTEGER NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS symbol_id TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS qname TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS kind TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS framework TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS entrypoint_type TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS module_id TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section_path TEXT NULL",
|
|
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_kind TEXT NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS section TEXT NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_id TEXT NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_version TEXT NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS owner TEXT NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS system_component TEXT NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
|
|
"ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS section TEXT NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS path TEXT NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS title TEXT NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS links_json TEXT NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_start INTEGER NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_end INTEGER NULL",
|
|
"ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL",
|
|
"ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_hit_files INTEGER NOT NULL DEFAULT 0",
|
|
"ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_miss_files INTEGER NOT NULL DEFAULT 0",
|
|
):
|
|
conn.execute(text(statement))
|
|
|
|
def _ensure_indexes(self, conn) -> None:
|
|
for statement in (
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer ON rag_chunks (rag_session_id, layer)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer_path ON rag_chunks (rag_session_id, layer, path)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_qname ON rag_chunks (qname)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_symbol_id ON rag_chunks (symbol_id)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_module_id ON rag_chunks (module_id)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_doc_kind ON rag_chunks (doc_kind)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_entrypoint ON rag_chunks (entrypoint_type, framework)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_blob_cache_repo_blob ON rag_blob_cache (repo_id, blob_sha)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_chunk_cache_repo_blob ON rag_chunk_cache (repo_id, blob_sha, chunk_index)",
|
|
"CREATE INDEX IF NOT EXISTS idx_rag_session_chunk_map_session ON rag_session_chunk_map (rag_session_id, created_at DESC)",
|
|
):
|
|
conn.execute(text(statement))
|