from __future__ import annotations from sqlalchemy import text from app.modules.shared.db import get_engine class RagSchemaRepository: def ensure_tables(self) -> None: engine = get_engine() with engine.connect() as conn: conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) conn.execute( text( """ CREATE TABLE IF NOT EXISTS rag_sessions ( rag_session_id VARCHAR(64) PRIMARY KEY, project_id VARCHAR(512) NOT NULL, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ) """ ) ) conn.execute( text( """ CREATE TABLE IF NOT EXISTS rag_index_jobs ( index_job_id VARCHAR(64) PRIMARY KEY, rag_session_id VARCHAR(64) NOT NULL, status VARCHAR(16) NOT NULL, indexed_files INTEGER NOT NULL DEFAULT 0, failed_files INTEGER NOT NULL DEFAULT 0, cache_hit_files INTEGER NOT NULL DEFAULT 0, cache_miss_files INTEGER NOT NULL DEFAULT 0, error_code VARCHAR(128) NULL, error_desc TEXT NULL, error_module VARCHAR(64) NULL, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ) """ ) ) conn.execute( text( """ CREATE TABLE IF NOT EXISTS rag_chunks ( id BIGSERIAL PRIMARY KEY, rag_session_id VARCHAR(64) NOT NULL, path TEXT NOT NULL, chunk_index INTEGER NOT NULL, content TEXT NOT NULL, embedding vector NULL, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ) """ ) ) conn.execute( text( """ CREATE TABLE IF NOT EXISTS rag_blob_cache ( id BIGSERIAL PRIMARY KEY, repo_id VARCHAR(512) NOT NULL, blob_sha VARCHAR(128) NOT NULL, path TEXT NOT NULL, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, CONSTRAINT uq_rag_blob_cache UNIQUE (repo_id, blob_sha, path) ) """ ) ) conn.execute( text( """ CREATE TABLE IF NOT EXISTS rag_chunk_cache ( id BIGSERIAL PRIMARY KEY, repo_id VARCHAR(512) NOT NULL, blob_sha VARCHAR(128) NOT NULL, chunk_index INTEGER NOT NULL, content TEXT NOT NULL, embedding vector NULL, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, CONSTRAINT uq_rag_chunk_cache UNIQUE (repo_id, blob_sha, chunk_index) ) """ ) ) conn.execute( text( """ CREATE TABLE IF NOT EXISTS rag_session_chunk_map ( id BIGSERIAL PRIMARY KEY, rag_session_id VARCHAR(64) NOT NULL, repo_id VARCHAR(512) NOT NULL, blob_sha VARCHAR(128) NOT NULL, chunk_index INTEGER NOT NULL, path TEXT NOT NULL, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ) """ ) ) self._ensure_columns(conn) self._ensure_indexes(conn) self._drop_unused_rag_chunk_columns(conn) conn.commit() def _ensure_columns(self, conn) -> None: for statement in ( "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_id TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_version TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS owner TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS system_component TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS repo_id VARCHAR(512) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS title TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_start INTEGER NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_end INTEGER NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS symbol_id TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS qname TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS kind TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS framework TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS entrypoint_type TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS module_id TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section_path TEXT NULL", "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_kind TEXT NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS section TEXT NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_id TEXT NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_version TEXT NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS owner TEXT NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS system_component TEXT NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL", "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS section TEXT NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS path TEXT NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS title TEXT NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS links_json TEXT NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_start INTEGER NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_end INTEGER NULL", "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL", "ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_hit_files INTEGER NOT NULL DEFAULT 0", "ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_miss_files INTEGER NOT NULL DEFAULT 0", ): conn.execute(text(statement)) def _drop_unused_rag_chunk_columns(self, conn) -> None: for statement in ( "ALTER TABLE rag_chunks DROP COLUMN IF EXISTS rag_doc_id", "ALTER TABLE rag_chunks DROP COLUMN IF EXISTS links_json", ): conn.execute(text(statement)) def _ensure_indexes(self, conn) -> None: for statement in ( "CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)", "CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer ON rag_chunks (rag_session_id, layer)", "CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer_path ON rag_chunks (rag_session_id, layer, path)", "CREATE INDEX IF NOT EXISTS idx_rag_chunks_qname ON rag_chunks (qname)", "CREATE INDEX IF NOT EXISTS idx_rag_chunks_symbol_id ON rag_chunks (symbol_id)", "CREATE INDEX IF NOT EXISTS idx_rag_chunks_module_id ON rag_chunks (module_id)", "CREATE INDEX IF NOT EXISTS idx_rag_chunks_doc_kind ON rag_chunks (doc_kind)", "CREATE INDEX IF NOT EXISTS idx_rag_chunks_entrypoint ON rag_chunks (entrypoint_type, framework)", "CREATE INDEX IF NOT EXISTS idx_rag_blob_cache_repo_blob ON rag_blob_cache (repo_id, blob_sha)", "CREATE INDEX IF NOT EXISTS idx_rag_chunk_cache_repo_blob ON rag_chunk_cache (repo_id, blob_sha, chunk_index)", "CREATE INDEX IF NOT EXISTS idx_rag_session_chunk_map_session ON rag_session_chunk_map (rag_session_id, created_at DESC)", ): conn.execute(text(statement))