69 lines
2.0 KiB
Python
69 lines
2.0 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Iterable, Sequence
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
# Load .env from repo root when config is used (e.g. for local runs)
|
|
_repo_root = Path(__file__).resolve().parent.parent.parent
|
|
load_dotenv(_repo_root / ".env")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AppConfig:
|
|
repo_path: str
|
|
db_dsn: str
|
|
chunk_size: int
|
|
chunk_overlap: int
|
|
chunk_size_lines: int
|
|
chunk_overlap_lines: int
|
|
embeddings_dim: int
|
|
embeddings_model: str
|
|
llm_model: str
|
|
allowed_extensions: Sequence[str]
|
|
|
|
|
|
def _env_int(name: str, default: int) -> int:
|
|
value = os.getenv(name, "").strip()
|
|
if not value:
|
|
return default
|
|
try:
|
|
return int(value)
|
|
except ValueError as exc:
|
|
raise ValueError(f"Invalid integer for {name}: {value}") from exc
|
|
|
|
|
|
def _env_list(name: str, default: Iterable[str]) -> list[str]:
|
|
value = os.getenv(name, "").strip()
|
|
if not value:
|
|
return list(default)
|
|
return [item.strip() for item in value.split(",") if item.strip()]
|
|
|
|
|
|
def load_config() -> AppConfig:
|
|
repo_path = os.getenv("RAG_REPO_PATH", "").strip()
|
|
if not repo_path:
|
|
raise ValueError("RAG_REPO_PATH is required")
|
|
|
|
db_dsn = os.getenv("RAG_DB_DSN", "").strip()
|
|
if not db_dsn:
|
|
raise ValueError("RAG_DB_DSN is required")
|
|
|
|
return AppConfig(
|
|
repo_path=repo_path,
|
|
db_dsn=db_dsn,
|
|
chunk_size=_env_int("RAG_CHUNK_SIZE", 400),
|
|
chunk_overlap=_env_int("RAG_CHUNK_OVERLAP", 50),
|
|
chunk_size_lines=_env_int("RAG_CHUNK_SIZE_LINES", 40),
|
|
chunk_overlap_lines=_env_int("RAG_CHUNK_OVERLAP_LINES", 8),
|
|
embeddings_dim=_env_int("RAG_EMBEDDINGS_DIM", 1024), # GigaChat Embeddings = 1024; OpenAI = 1536
|
|
embeddings_model=os.getenv("RAG_EMBEDDINGS_MODEL", "stub-embeddings"),
|
|
llm_model=os.getenv("RAG_LLM_MODEL", "GigaChat"),
|
|
allowed_extensions=tuple(
|
|
_env_list("RAG_ALLOWED_EXTENSIONS", [".md", ".txt", ".rst"])
|
|
),
|
|
)
|