from __future__ import annotations import os from dataclasses import dataclass from pathlib import Path from typing import Iterable, Sequence from dotenv import load_dotenv # Load .env from repo root when config is used (e.g. for local runs) _repo_root = Path(__file__).resolve().parent.parent.parent load_dotenv(_repo_root / ".env") @dataclass(frozen=True) class AppConfig: repo_path: str db_dsn: str chunk_size: int chunk_overlap: int chunk_size_lines: int chunk_overlap_lines: int embeddings_dim: int embeddings_model: str llm_model: str allowed_extensions: Sequence[str] def _env_int(name: str, default: int) -> int: value = os.getenv(name, "").strip() if not value: return default try: return int(value) except ValueError as exc: raise ValueError(f"Invalid integer for {name}: {value}") from exc def _env_list(name: str, default: Iterable[str]) -> list[str]: value = os.getenv(name, "").strip() if not value: return list(default) return [item.strip() for item in value.split(",") if item.strip()] def load_config() -> AppConfig: repo_path = os.getenv("RAG_REPO_PATH", "").strip() if not repo_path: raise ValueError("RAG_REPO_PATH is required") db_dsn = os.getenv("RAG_DB_DSN", "").strip() if not db_dsn: raise ValueError("RAG_DB_DSN is required") return AppConfig( repo_path=repo_path, db_dsn=db_dsn, chunk_size=_env_int("RAG_CHUNK_SIZE", 400), chunk_overlap=_env_int("RAG_CHUNK_OVERLAP", 50), chunk_size_lines=_env_int("RAG_CHUNK_SIZE_LINES", 40), chunk_overlap_lines=_env_int("RAG_CHUNK_OVERLAP_LINES", 8), embeddings_dim=_env_int("RAG_EMBEDDINGS_DIM", 1024), # GigaChat Embeddings = 1024; OpenAI = 1536 embeddings_model=os.getenv("RAG_EMBEDDINGS_MODEL", "stub-embeddings"), llm_model=os.getenv("RAG_LLM_MODEL", "GigaChat"), allowed_extensions=tuple( _env_list("RAG_ALLOWED_EXTENSIONS", [".md", ".txt", ".rst"]) ), )