Фиксация изменений
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -46,7 +46,6 @@ class RagDocumentRepository:
|
||||
for doc in docs:
|
||||
row = doc.to_record()
|
||||
metadata = row["metadata"]
|
||||
links = row["links"]
|
||||
emb = row["embedding"] or []
|
||||
emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None
|
||||
conn.execute(
|
||||
@@ -55,15 +54,15 @@ class RagDocumentRepository:
|
||||
INSERT INTO rag_chunks (
|
||||
rag_session_id, path, chunk_index, content, embedding, artifact_type, section, doc_id,
|
||||
doc_version, owner, system_component, last_modified, staleness_score, created_at, updated_at,
|
||||
rag_doc_id, layer, lang, repo_id, commit_sha, title, metadata_json, links_json, span_start,
|
||||
span_end, symbol_id, qname, kind, framework, entrypoint_type, module_id, section_path, doc_kind
|
||||
layer, lang, repo_id, commit_sha, title, metadata_json, span_start, span_end, symbol_id,
|
||||
qname, kind, framework, entrypoint_type, module_id, section_path, doc_kind
|
||||
)
|
||||
VALUES (
|
||||
:sid, :path, :chunk_index, :content, CAST(:emb AS vector), :artifact_type, :section, :doc_id,
|
||||
:doc_version, :owner, :system_component, :last_modified, :staleness_score, CURRENT_TIMESTAMP,
|
||||
CURRENT_TIMESTAMP, :rag_doc_id, :layer, :lang, :repo_id, :commit_sha, :title, :metadata_json,
|
||||
:links_json, :span_start, :span_end, :symbol_id, :qname, :kind, :framework, :entrypoint_type,
|
||||
:module_id, :section_path, :doc_kind
|
||||
CURRENT_TIMESTAMP, :layer, :lang, :repo_id, :commit_sha, :title, :metadata_json,
|
||||
:span_start, :span_end, :symbol_id, :qname, :kind, :framework, :entrypoint_type, :module_id,
|
||||
:section_path, :doc_kind
|
||||
)
|
||||
"""
|
||||
),
|
||||
@@ -81,14 +80,12 @@ class RagDocumentRepository:
|
||||
"system_component": metadata.get("system_component"),
|
||||
"last_modified": metadata.get("last_modified"),
|
||||
"staleness_score": metadata.get("staleness_score"),
|
||||
"rag_doc_id": row["doc_id"],
|
||||
"layer": row["layer"],
|
||||
"lang": row["lang"],
|
||||
"repo_id": row["repo_id"],
|
||||
"commit_sha": row["commit_sha"],
|
||||
"title": row["title"],
|
||||
"metadata_json": json.dumps(metadata, ensure_ascii=True),
|
||||
"links_json": json.dumps(links, ensure_ascii=True),
|
||||
"span_start": row["span_start"],
|
||||
"span_end": row["span_end"],
|
||||
"symbol_id": metadata.get("symbol_id"),
|
||||
|
||||
@@ -4,11 +4,14 @@ import json
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
from app.modules.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder
|
||||
from app.modules.shared.db import get_engine
|
||||
|
||||
|
||||
class RagQueryRepository:
|
||||
def __init__(self) -> None:
|
||||
self._builder = RetrievalStatementBuilder()
|
||||
|
||||
def retrieve(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
@@ -18,89 +21,47 @@ class RagQueryRepository:
|
||||
limit: int = 5,
|
||||
layers: list[str] | None = None,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_path_prefixes: list[str] | None = None,
|
||||
exclude_like_patterns: list[str] | None = None,
|
||||
prefer_non_tests: bool = False,
|
||||
) -> list[dict]:
|
||||
emb = "[" + ",".join(str(x) for x in query_embedding) + "]"
|
||||
filters = ["rag_session_id = :sid"]
|
||||
params: dict = {"sid": rag_session_id, "emb": emb, "lim": limit}
|
||||
if layers:
|
||||
filters.append("layer = ANY(:layers)")
|
||||
params["layers"] = layers
|
||||
if path_prefixes:
|
||||
or_filters = []
|
||||
for idx, prefix in enumerate(path_prefixes):
|
||||
key = f"path_{idx}"
|
||||
params[key] = f"{prefix}%"
|
||||
or_filters.append(f"path LIKE :{key}")
|
||||
filters.append("(" + " OR ".join(or_filters) + ")")
|
||||
term_filters = []
|
||||
terms = extract_query_terms(query_text)
|
||||
for idx, term in enumerate(terms):
|
||||
exact_key = f"term_exact_{idx}"
|
||||
prefix_key = f"term_prefix_{idx}"
|
||||
contains_key = f"term_contains_{idx}"
|
||||
params[exact_key] = term
|
||||
params[prefix_key] = f"{term}%"
|
||||
params[contains_key] = f"%{term}%"
|
||||
term_filters.append(
|
||||
"CASE "
|
||||
f"WHEN lower(COALESCE(qname, '')) = :{exact_key} THEN 0 "
|
||||
f"WHEN lower(COALESCE(symbol_id, '')) = :{exact_key} THEN 1 "
|
||||
f"WHEN lower(COALESCE(title, '')) = :{exact_key} THEN 2 "
|
||||
f"WHEN lower(COALESCE(qname, '')) LIKE :{prefix_key} THEN 3 "
|
||||
f"WHEN lower(COALESCE(title, '')) LIKE :{prefix_key} THEN 4 "
|
||||
f"WHEN lower(COALESCE(path, '')) LIKE :{contains_key} THEN 5 "
|
||||
f"WHEN lower(COALESCE(content, '')) LIKE :{contains_key} THEN 6 "
|
||||
"ELSE 100 END"
|
||||
)
|
||||
lexical_sql = "LEAST(" + ", ".join(term_filters) + ")" if term_filters else "100"
|
||||
test_penalty_sql = (
|
||||
"CASE "
|
||||
"WHEN lower(path) LIKE 'tests/%' OR lower(path) LIKE '%/tests/%' OR lower(path) LIKE 'test_%' OR lower(path) LIKE '%/test_%' "
|
||||
"THEN 1 ELSE 0 END"
|
||||
if prefer_non_tests
|
||||
else "0"
|
||||
sql, params = self._builder.build_retrieve(
|
||||
rag_session_id,
|
||||
query_embedding,
|
||||
query_text=query_text,
|
||||
limit=limit,
|
||||
layers=layers,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=exclude_path_prefixes,
|
||||
exclude_like_patterns=exclude_like_patterns,
|
||||
prefer_non_tests=prefer_non_tests,
|
||||
)
|
||||
layer_rank_sql = (
|
||||
"CASE "
|
||||
"WHEN layer = 'C3_ENTRYPOINTS' THEN 0 "
|
||||
"WHEN layer = 'C1_SYMBOL_CATALOG' THEN 1 "
|
||||
"WHEN layer = 'C2_DEPENDENCY_GRAPH' THEN 2 "
|
||||
"WHEN layer = 'C0_SOURCE_CHUNKS' THEN 3 "
|
||||
"WHEN layer = 'D1_MODULE_CATALOG' THEN 0 "
|
||||
"WHEN layer = 'D2_FACT_INDEX' THEN 1 "
|
||||
"WHEN layer = 'D3_SECTION_INDEX' THEN 2 "
|
||||
"WHEN layer = 'D4_POLICY_INDEX' THEN 3 "
|
||||
"ELSE 10 END"
|
||||
)
|
||||
sql = f"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end,
|
||||
{lexical_sql} AS lexical_rank,
|
||||
{test_penalty_sql} AS test_penalty,
|
||||
{layer_rank_sql} AS layer_rank,
|
||||
(embedding <=> CAST(:emb AS vector)) AS distance
|
||||
FROM rag_chunks
|
||||
WHERE {' AND '.join(filters)}
|
||||
ORDER BY lexical_rank ASC, test_penalty ASC, layer_rank ASC, embedding <=> CAST(:emb AS vector)
|
||||
LIMIT :lim
|
||||
"""
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(text(sql), params).mappings().fetchall()
|
||||
return [self._row_to_dict(row) for row in rows]
|
||||
|
||||
def fallback_chunks(self, rag_session_id: str, *, limit: int = 5, layers: list[str] | None = None) -> list[dict]:
|
||||
filters = ["rag_session_id = :sid"]
|
||||
params: dict = {"sid": rag_session_id, "lim": limit}
|
||||
if layers:
|
||||
filters.append("layer = ANY(:layers)")
|
||||
params["layers"] = layers
|
||||
sql = f"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end
|
||||
FROM rag_chunks
|
||||
WHERE {' AND '.join(filters)}
|
||||
ORDER BY id DESC
|
||||
LIMIT :lim
|
||||
"""
|
||||
def retrieve_lexical_code(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
*,
|
||||
query_text: str,
|
||||
limit: int = 5,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_path_prefixes: list[str] | None = None,
|
||||
exclude_like_patterns: list[str] | None = None,
|
||||
prefer_non_tests: bool = False,
|
||||
) -> list[dict]:
|
||||
sql, params = self._builder.build_lexical_code(
|
||||
rag_session_id,
|
||||
query_text=query_text,
|
||||
limit=limit,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=exclude_path_prefixes,
|
||||
exclude_like_patterns=exclude_like_patterns,
|
||||
prefer_non_tests=prefer_non_tests,
|
||||
)
|
||||
if sql is None:
|
||||
return []
|
||||
with get_engine().connect() as conn:
|
||||
rows = conn.execute(text(sql), params).mappings().fetchall()
|
||||
return [self._row_to_dict(row) for row in rows]
|
||||
|
||||
@@ -67,6 +67,9 @@ class RagRepository:
|
||||
query_text: str = "",
|
||||
limit: int = 5,
|
||||
layers: list[str] | None = None,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_path_prefixes: list[str] | None = None,
|
||||
exclude_like_patterns: list[str] | None = None,
|
||||
prefer_non_tests: bool = False,
|
||||
) -> list[dict]:
|
||||
return self._query.retrieve(
|
||||
@@ -75,8 +78,29 @@ class RagRepository:
|
||||
query_text=query_text,
|
||||
limit=limit,
|
||||
layers=layers,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=exclude_path_prefixes,
|
||||
exclude_like_patterns=exclude_like_patterns,
|
||||
prefer_non_tests=prefer_non_tests,
|
||||
)
|
||||
|
||||
def fallback_chunks(self, rag_session_id: str, limit: int = 5, layers: list[str] | None = None) -> list[dict]:
|
||||
return self._query.fallback_chunks(rag_session_id, limit=limit, layers=layers)
|
||||
def retrieve_lexical_code(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
query_text: str,
|
||||
*,
|
||||
limit: int = 5,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_path_prefixes: list[str] | None = None,
|
||||
exclude_like_patterns: list[str] | None = None,
|
||||
prefer_non_tests: bool = False,
|
||||
) -> list[dict]:
|
||||
return self._query.retrieve_lexical_code(
|
||||
rag_session_id,
|
||||
query_text=query_text,
|
||||
limit=limit,
|
||||
path_prefixes=path_prefixes,
|
||||
exclude_path_prefixes=exclude_path_prefixes,
|
||||
exclude_like_patterns=exclude_like_patterns,
|
||||
prefer_non_tests=prefer_non_tests,
|
||||
)
|
||||
|
||||
201
app/modules/rag/persistence/retrieval_statement_builder.py
Normal file
201
app/modules/rag/persistence/retrieval_statement_builder.py
Normal file
@@ -0,0 +1,201 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
_LIKE_ESCAPE_SQL = " ESCAPE E'\\\\'"
|
||||
|
||||
|
||||
class RetrievalStatementBuilder:
|
||||
def build_retrieve(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
query_embedding: list[float],
|
||||
*,
|
||||
query_text: str = "",
|
||||
limit: int = 5,
|
||||
layers: list[str] | None = None,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_path_prefixes: list[str] | None = None,
|
||||
exclude_like_patterns: list[str] | None = None,
|
||||
prefer_non_tests: bool = False,
|
||||
) -> tuple[str, dict]:
|
||||
emb = "[" + ",".join(str(x) for x in query_embedding) + "]"
|
||||
filters = ["rag_session_id = :sid"]
|
||||
params: dict = {"sid": rag_session_id, "emb": emb, "lim": limit}
|
||||
self._append_prefix_group(filters, params, "path", path_prefixes)
|
||||
self._append_prefix_group(filters, params, "exclude_prefix", exclude_path_prefixes, negate=True)
|
||||
self._append_like_group(filters, params, "exclude_like", exclude_like_patterns, negate=True)
|
||||
if layers:
|
||||
filters.append("layer = ANY(:layers)")
|
||||
params["layers"] = layers
|
||||
lexical_sql = self._lexical_rank_sql(query_text, params)
|
||||
test_penalty_sql = self._test_penalty_sql(
|
||||
prefer_non_tests,
|
||||
params,
|
||||
base_key="penalty",
|
||||
path_prefixes=exclude_path_prefixes,
|
||||
like_patterns=exclude_like_patterns,
|
||||
)
|
||||
layer_rank_sql = (
|
||||
"CASE "
|
||||
"WHEN layer = 'C3_ENTRYPOINTS' THEN 0 "
|
||||
"WHEN layer = 'C1_SYMBOL_CATALOG' THEN 1 "
|
||||
"WHEN layer = 'C2_DEPENDENCY_GRAPH' THEN 2 "
|
||||
"WHEN layer = 'C0_SOURCE_CHUNKS' THEN 3 "
|
||||
"WHEN layer = 'D1_MODULE_CATALOG' THEN 0 "
|
||||
"WHEN layer = 'D2_FACT_INDEX' THEN 1 "
|
||||
"WHEN layer = 'D3_SECTION_INDEX' THEN 2 "
|
||||
"WHEN layer = 'D4_POLICY_INDEX' THEN 3 "
|
||||
"ELSE 10 END"
|
||||
)
|
||||
sql = f"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end,
|
||||
{lexical_sql} AS lexical_rank,
|
||||
{test_penalty_sql} AS test_penalty,
|
||||
{layer_rank_sql} AS layer_rank,
|
||||
(embedding <=> CAST(:emb AS vector)) AS distance
|
||||
FROM rag_chunks
|
||||
WHERE {' AND '.join(filters)}
|
||||
ORDER BY lexical_rank ASC, test_penalty ASC, layer_rank ASC, embedding <=> CAST(:emb AS vector)
|
||||
LIMIT :lim
|
||||
"""
|
||||
return sql, params
|
||||
|
||||
def build_lexical_code(
|
||||
self,
|
||||
rag_session_id: str,
|
||||
*,
|
||||
query_text: str,
|
||||
limit: int = 5,
|
||||
path_prefixes: list[str] | None = None,
|
||||
exclude_path_prefixes: list[str] | None = None,
|
||||
exclude_like_patterns: list[str] | None = None,
|
||||
prefer_non_tests: bool = False,
|
||||
) -> tuple[str | None, dict]:
|
||||
terms = extract_query_terms(query_text)
|
||||
if not terms:
|
||||
return None, {}
|
||||
filters = ["rag_session_id = :sid", "layer = 'C0_SOURCE_CHUNKS'"]
|
||||
params: dict = {"sid": rag_session_id, "lim": limit}
|
||||
self._append_prefix_group(filters, params, "path", path_prefixes)
|
||||
self._append_prefix_group(filters, params, "exclude_prefix", exclude_path_prefixes, negate=True)
|
||||
self._append_like_group(filters, params, "exclude_like", exclude_like_patterns, negate=True)
|
||||
lexical_filters: list[str] = []
|
||||
lexical_ranks: list[str] = []
|
||||
for idx, term in enumerate(terms):
|
||||
exact_key = f"lex_exact_{idx}"
|
||||
prefix_key = f"lex_prefix_{idx}"
|
||||
contains_key = f"lex_contains_{idx}"
|
||||
params[exact_key] = term
|
||||
params[prefix_key] = f"{term}%"
|
||||
params[contains_key] = f"%{term}%"
|
||||
lexical_filters.append(
|
||||
f"(lower(COALESCE(qname, '')) = :{exact_key} "
|
||||
f"OR lower(COALESCE(title, '')) = :{exact_key} "
|
||||
f"OR lower(COALESCE(path, '')) LIKE :{contains_key} "
|
||||
f"OR lower(COALESCE(title, '')) LIKE :{prefix_key} "
|
||||
f"OR lower(COALESCE(content, '')) LIKE :{contains_key})"
|
||||
)
|
||||
lexical_ranks.append(
|
||||
"CASE "
|
||||
f"WHEN lower(COALESCE(qname, '')) = :{exact_key} THEN 0 "
|
||||
f"WHEN lower(COALESCE(title, '')) = :{exact_key} THEN 1 "
|
||||
f"WHEN lower(COALESCE(title, '')) LIKE :{prefix_key} THEN 2 "
|
||||
f"WHEN lower(COALESCE(path, '')) LIKE :{contains_key} THEN 3 "
|
||||
f"WHEN lower(COALESCE(content, '')) LIKE :{contains_key} THEN 4 "
|
||||
"ELSE 100 END"
|
||||
)
|
||||
filters.append("(" + " OR ".join(lexical_filters) + ")")
|
||||
lexical_sql = "LEAST(" + ", ".join(lexical_ranks) + ")"
|
||||
test_penalty_sql = self._test_penalty_sql(
|
||||
prefer_non_tests,
|
||||
params,
|
||||
base_key="lex_penalty",
|
||||
path_prefixes=exclude_path_prefixes,
|
||||
like_patterns=exclude_like_patterns,
|
||||
)
|
||||
sql = f"""
|
||||
SELECT path, content, layer, title, metadata_json, span_start, span_end,
|
||||
{lexical_sql} AS lexical_rank,
|
||||
{test_penalty_sql} AS test_penalty
|
||||
FROM rag_chunks
|
||||
WHERE {' AND '.join(filters)}
|
||||
ORDER BY lexical_rank ASC, test_penalty ASC, path ASC, span_start ASC
|
||||
LIMIT :lim
|
||||
"""
|
||||
return sql, params
|
||||
|
||||
def _lexical_rank_sql(self, query_text: str, params: dict) -> str:
|
||||
term_filters: list[str] = []
|
||||
for idx, term in enumerate(extract_query_terms(query_text)):
|
||||
exact_key = f"term_exact_{idx}"
|
||||
prefix_key = f"term_prefix_{idx}"
|
||||
contains_key = f"term_contains_{idx}"
|
||||
params[exact_key] = term
|
||||
params[prefix_key] = f"{term}%"
|
||||
params[contains_key] = f"%{term}%"
|
||||
term_filters.append(
|
||||
"CASE "
|
||||
f"WHEN lower(COALESCE(qname, '')) = :{exact_key} THEN 0 "
|
||||
f"WHEN lower(COALESCE(symbol_id, '')) = :{exact_key} THEN 1 "
|
||||
f"WHEN lower(COALESCE(title, '')) = :{exact_key} THEN 2 "
|
||||
f"WHEN lower(COALESCE(qname, '')) LIKE :{prefix_key} THEN 3 "
|
||||
f"WHEN lower(COALESCE(title, '')) LIKE :{prefix_key} THEN 4 "
|
||||
f"WHEN lower(COALESCE(path, '')) LIKE :{contains_key} THEN 5 "
|
||||
f"WHEN lower(COALESCE(content, '')) LIKE :{contains_key} THEN 6 "
|
||||
"ELSE 100 END"
|
||||
)
|
||||
return "LEAST(" + ", ".join(term_filters) + ")" if term_filters else "100"
|
||||
|
||||
def _append_prefix_group(self, filters: list[str], params: dict, base_key: str, prefixes: list[str] | None, *, negate: bool = False) -> None:
|
||||
if not prefixes:
|
||||
return
|
||||
items: list[str] = []
|
||||
for idx, prefix in enumerate(prefixes):
|
||||
key = f"{base_key}_{idx}"
|
||||
params[key] = self._escape_like_value(prefix) + "%"
|
||||
items.append(f"path LIKE :{key}{_LIKE_ESCAPE_SQL}")
|
||||
self._append_group(filters, items, negate=negate)
|
||||
|
||||
def _append_like_group(self, filters: list[str], params: dict, base_key: str, patterns: list[str] | None, *, negate: bool = False) -> None:
|
||||
if not patterns:
|
||||
return
|
||||
items: list[str] = []
|
||||
for idx, pattern in enumerate(patterns):
|
||||
key = f"{base_key}_{idx}"
|
||||
params[key] = pattern
|
||||
items.append(f"lower(path) LIKE :{key}{_LIKE_ESCAPE_SQL}")
|
||||
self._append_group(filters, items, negate=negate)
|
||||
|
||||
def _append_group(self, filters: list[str], parts: list[str], *, negate: bool) -> None:
|
||||
if not parts:
|
||||
return
|
||||
joined = " OR ".join(parts)
|
||||
filters.append(f"NOT ({joined})" if negate else f"({joined})")
|
||||
|
||||
def _test_penalty_sql(
|
||||
self,
|
||||
enabled: bool,
|
||||
params: dict,
|
||||
*,
|
||||
base_key: str,
|
||||
path_prefixes: list[str] | None,
|
||||
like_patterns: list[str] | None,
|
||||
) -> str:
|
||||
if not enabled:
|
||||
return "0"
|
||||
parts: list[str] = []
|
||||
for idx, prefix in enumerate(path_prefixes or []):
|
||||
key = f"{base_key}_prefix_{idx}"
|
||||
params[key] = self._escape_like_value(prefix) + "%"
|
||||
parts.append(f"lower(path) LIKE :{key}{_LIKE_ESCAPE_SQL}")
|
||||
for idx, pattern in enumerate(like_patterns or []):
|
||||
key = f"{base_key}_like_{idx}"
|
||||
params[key] = pattern
|
||||
parts.append(f"lower(path) LIKE :{key}{_LIKE_ESCAPE_SQL}")
|
||||
if not parts:
|
||||
return "0"
|
||||
return "CASE WHEN " + " OR ".join(parts) + " THEN 1 ELSE 0 END"
|
||||
|
||||
def _escape_like_value(self, value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
||||
@@ -106,6 +106,7 @@ class RagSchemaRepository:
|
||||
)
|
||||
self._ensure_columns(conn)
|
||||
self._ensure_indexes(conn)
|
||||
self._drop_unused_rag_chunk_columns(conn)
|
||||
conn.commit()
|
||||
|
||||
def _ensure_columns(self, conn) -> None:
|
||||
@@ -118,14 +119,12 @@ class RagSchemaRepository:
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS system_component TEXT NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS rag_doc_id VARCHAR(128) NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS repo_id VARCHAR(512) NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS title TEXT NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS links_json TEXT NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_start INTEGER NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_end INTEGER NULL",
|
||||
"ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS symbol_id TEXT NULL",
|
||||
@@ -162,6 +161,13 @@ class RagSchemaRepository:
|
||||
):
|
||||
conn.execute(text(statement))
|
||||
|
||||
def _drop_unused_rag_chunk_columns(self, conn) -> None:
|
||||
for statement in (
|
||||
"ALTER TABLE rag_chunks DROP COLUMN IF EXISTS rag_doc_id",
|
||||
"ALTER TABLE rag_chunks DROP COLUMN IF EXISTS links_json",
|
||||
):
|
||||
conn.execute(text(statement))
|
||||
|
||||
def _ensure_indexes(self, conn) -> None:
|
||||
for statement in (
|
||||
"CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)",
|
||||
|
||||
Reference in New Issue
Block a user