Files
RagAgent/src/rag_agent/config.py
2026-01-31 23:46:08 +03:00

69 lines
2.0 KiB
Python

from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, Sequence
from dotenv import load_dotenv
# Load .env from repo root when config is used (e.g. for local runs)
_repo_root = Path(__file__).resolve().parent.parent.parent
load_dotenv(_repo_root / ".env")
@dataclass(frozen=True)
class AppConfig:
repo_path: str
db_dsn: str
chunk_size: int
chunk_overlap: int
chunk_size_lines: int
chunk_overlap_lines: int
embeddings_dim: int
embeddings_model: str
llm_model: str
allowed_extensions: Sequence[str]
def _env_int(name: str, default: int) -> int:
value = os.getenv(name, "").strip()
if not value:
return default
try:
return int(value)
except ValueError as exc:
raise ValueError(f"Invalid integer for {name}: {value}") from exc
def _env_list(name: str, default: Iterable[str]) -> list[str]:
value = os.getenv(name, "").strip()
if not value:
return list(default)
return [item.strip() for item in value.split(",") if item.strip()]
def load_config() -> AppConfig:
repo_path = os.getenv("RAG_REPO_PATH", "").strip()
if not repo_path:
raise ValueError("RAG_REPO_PATH is required")
db_dsn = os.getenv("RAG_DB_DSN", "").strip()
if not db_dsn:
raise ValueError("RAG_DB_DSN is required")
return AppConfig(
repo_path=repo_path,
db_dsn=db_dsn,
chunk_size=_env_int("RAG_CHUNK_SIZE", 400),
chunk_overlap=_env_int("RAG_CHUNK_OVERLAP", 50),
chunk_size_lines=_env_int("RAG_CHUNK_SIZE_LINES", 40),
chunk_overlap_lines=_env_int("RAG_CHUNK_OVERLAP_LINES", 8),
embeddings_dim=_env_int("RAG_EMBEDDINGS_DIM", 1024), # GigaChat Embeddings = 1024; OpenAI = 1536
embeddings_model=os.getenv("RAG_EMBEDDINGS_MODEL", "stub-embeddings"),
llm_model=os.getenv("RAG_LLM_MODEL", "GigaChat"),
allowed_extensions=tuple(
_env_list("RAG_ALLOWED_EXTENSIONS", [".md", ".txt", ".rst"])
),
)