Настройки развертывания

ДОбавлены эмбеддинги на базе гигачата
2026-01-30 22:53:16 +03:00 · 2026-01-30 22:53:01 +03:00
9 changed files with 203 additions and 7 deletions
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+# RAG Agent app. Build from repo root (clone git@git.lesha.spb.ru:alex/RagAgent.git then docker compose build).
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install git for optional in-image clone; app is usually COPY'd from build context
+RUN apt-get update -qq && apt-get install -y --no-install-recommends git openssh-client \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy repo (when built from cloned repo: docker compose build)
+COPY pyproject.toml ./
+COPY src ./src
+COPY README.md ./
+
+RUN pip install --no-cache-dir -e .
+
+# Default: run CLI (override in compose or when running)
+ENV RAG_DB_DSN=""
+ENV RAG_REPO_PATH="/data"
+ENTRYPOINT ["rag-agent"]
+CMD ["ask", "--help"]
--- a/README.md
+++ b/README.md
@@ -6,15 +6,21 @@ and answers queries using retrieval + LLM generation. Commits are tied to

 ## Quick start

-1. Configure environment variables:
+1. (Optional) Run Postgres and the app via Docker (clone the repo first):
+   - `git clone git@git.lesha.spb.ru:alex/RagAgent.git && cd RagAgent`
+   - `docker compose up -d` — starts Postgres and the RAG app in one network `rag_net`; app connects to DB at host `postgres`.
+   - On first start (empty DB), scripts in `docker/postgres-init/` run automatically (extension + tables). To disable, comment out the init volume in `docker-compose.yml`.
+   - Default DSN inside the app: `postgresql://rag:rag_secret@postgres:5432/rag`. Override with `POSTGRES_*` and `RAG_REPO_PATH` (path to your knowledge-base repo, mounted into the app container).
+   - Run commands: `docker compose run --rm app index --story my-branch`, `docker compose run --rm app ask "Question?"`.
+2. Configure environment variables:
   - `RAG_REPO_PATH` — path to git repo with text files
-   - `RAG_DB_DSN` — Postgres DSN (e.g. `postgresql://user:pass@localhost:5432/rag`)
+   - `RAG_DB_DSN` — Postgres DSN (e.g. `postgresql://rag:rag_secret@localhost:5432/rag`)
   - `RAG_EMBEDDINGS_DIM` — embedding vector dimension (e.g. `1536`)
-2. Create DB schema:
+3. Create DB schema (only if not using Docker, or if init was disabled):
   - `python scripts/create_db.py` (or `psql "$RAG_DB_DSN" -f scripts/schema.sql`)
-3. Index files for a story (e.g. branch name as story slug):
+4. Index files for a story (e.g. branch name as story slug):
   - `rag-agent index --story my-branch --changed --base-ref HEAD~1 --head-ref HEAD`
-4. Ask a question (optionally scoped to a story):
+5. Ask a question (optionally scoped to a story):
   - `rag-agent ask "What is covered?"`
   - `rag-agent ask "What is covered?" --story my-branch`

@@ -36,8 +42,11 @@ Story for the commit is taken from (in order): env `RAG_STORY`, file `.rag-story

 Scripts: `scripts/create_db.py` (Python, uses `ensure_schema` and `RAG_*` env), `scripts/schema.sql` (raw SQL).

+## Embeddings (GigaChat)
+
+If `GIGACHAT_CREDENTIALS` is set (e.g. in `.env` for local runs), embeddings use GigaChat API; otherwise the stub client is used. Optional env: `GIGACHAT_EMBEDDINGS_MODEL` (default `Embeddings`), `GIGACHAT_VERIFY_SSL` (`true`/`false`). Ensure `RAG_EMBEDDINGS_DIM` matches the model output (see GigaChat docs).
+
 ## Notes

- The default embedding/LLM clients are stubs. Replace them in
-  `src/rag_agent/index/embeddings.py` and `src/rag_agent/agent/pipeline.py`.
+- LLM client is still a stub; replace it in `src/rag_agent/agent/pipeline.py` for real answers.
 - This project requires Postgres with the `pgvector` extension.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,56 @@
+# Postgres with pgvector + RAG Agent app (from repo git@git.lesha.spb.ru:alex/RagAgent.git).
+# Clone the repo, then: docker compose up -d
+# App and DB share network "rag_net"; app uses RAG_DB_DSN with host=postgres.
+# DB init: scripts in docker/postgres-init/ run on first start (empty volume); to disable, comment out the init volume.
+
+services:
+  postgres:
+    image: pgvector/pgvector:pg16
+    container_name: rag-postgres
+    environment:
+      POSTGRES_USER: ${POSTGRES_USER:-rag}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-rag_secret}
+      POSTGRES_DB: ${POSTGRES_DB:-rag}
+    ports:
+      - "${POSTGRES_PORT:-5432}:5432"
+    volumes:
+      - rag_pgdata:/var/lib/postgresql/data
+      # Init scripts run once on first start (create extension, tables). Optional: comment out to skip.
+      - ./docker/postgres-init:/docker-entrypoint-initdb.d:ro
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-rag} -d ${POSTGRES_DB:-rag}"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+    networks:
+      - rag_net
+
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: rag-agent:latest
+    container_name: rag-agent
+    restart: "no"
+    depends_on:
+      postgres:
+        condition: service_healthy
+    environment:
+      RAG_DB_DSN: "postgresql://${POSTGRES_USER:-rag}:${POSTGRES_PASSWORD:-rag_secret}@postgres:5432/${POSTGRES_DB:-rag}"
+      RAG_REPO_PATH: ${RAG_REPO_PATH:-/data}
+      RAG_EMBEDDINGS_DIM: ${RAG_EMBEDDINGS_DIM:-1536}
+      GIGACHAT_CREDENTIALS: ${GIGACHAT_CREDENTIALS:-}
+      GIGACHAT_EMBEDDINGS_MODEL: ${GIGACHAT_EMBEDDINGS_MODEL:-Embeddings}
+    volumes:
+      - ${RAG_REPO_PATH:-./data}:/data:ro
+    entrypoint: ["rag-agent"]
+    command: ["ask", "--help"]
+    networks:
+      - rag_net
+
+networks:
+  rag_net:
+    driver: bridge
+
+volumes:
+  rag_pgdata:
--- a/docker/postgres-init/00-example-extra-user.sql.example
+++ b/docker/postgres-init/00-example-extra-user.sql.example
@@ -0,0 +1,7 @@
+-- Example: create an extra DB user (e.g. read-only). Not executed — rename to 00-create-extra-user.sql to enable.
+-- Scripts in this folder run in alphabetical order; 00-* runs before 01-schema.sql.
+
+-- CREATE USER rag_readonly WITH PASSWORD 'change_me';
+-- GRANT CONNECT ON DATABASE rag TO rag_readonly;
+-- GRANT USAGE ON SCHEMA public TO rag_readonly;
+-- GRANT SELECT ON ALL TABLES IN SCHEMA public TO rag_readonly;
--- a/docker/postgres-init/01-schema.sql
+++ b/docker/postgres-init/01-schema.sql
@@ -0,0 +1,32 @@
+-- RAG vector DB schema (runs automatically on first Postgres init).
+-- If RAG_EMBEDDINGS_DIM is not 1536, change vector(1536) below.
+
+CREATE EXTENSION IF NOT EXISTS vector;
+
+CREATE TABLE IF NOT EXISTS stories (
+    id SERIAL PRIMARY KEY,
+    slug TEXT UNIQUE NOT NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT (NOW() AT TIME ZONE 'utc')
+);
+
+CREATE TABLE IF NOT EXISTS documents (
+    id SERIAL PRIMARY KEY,
+    story_id INTEGER NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
+    path TEXT NOT NULL,
+    version TEXT NOT NULL,
+    updated_at TIMESTAMPTZ NOT NULL,
+    UNIQUE(story_id, path)
+);
+
+CREATE TABLE IF NOT EXISTS chunks (
+    id SERIAL PRIMARY KEY,
+    document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+    chunk_index INTEGER NOT NULL,
+    hash TEXT NOT NULL,
+    content TEXT NOT NULL,
+    embedding vector(1536) NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_documents_story_id ON documents(story_id);
+CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id);
+CREATE INDEX IF NOT EXISTS idx_chunks_embedding ON chunks USING ivfflat (embedding vector_cosine_ops);
--- a/docker/postgres-init/README.md
+++ b/docker/postgres-init/README.md
@@ -0,0 +1,9 @@
+# Postgres init scripts (optional)
+
+Files here are mounted into the Postgres container at `/docker-entrypoint-initdb.d/` and run **only on first startup** (when the data volume is empty), in alphabetical order.
+
+- `01-schema.sql` — creates pgvector extension and RAG tables (stories, documents, chunks).
+- To add more users or other setup, add scripts with names like `00-create-user.sql` (they run before `01-schema.sql`).
+- To disable init: in `docker-compose.yml`, comment out the postgres volume that mounts this folder, or remove/rename the `.sql` files.
+
+After the first run, these scripts are not executed again. To re-run them, remove the volume: `docker compose down -v` (this deletes DB data), then `docker compose up -d`.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,8 @@ dependencies = [
  "psycopg[binary]>=3.1.18",
  "pgvector>=0.2.5",
  "pydantic>=2.7.0",
+  "python-dotenv>=1.0.0",
+  "gigachat>=0.2.0",
 ]

 [project.scripts]
--- a/src/rag_agent/config.py
+++ b/src/rag_agent/config.py
@@ -2,8 +2,15 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Iterable, Sequence

+from dotenv import load_dotenv
+
+# Load .env from repo root when config is used (e.g. for local runs)
+_repo_root = Path(__file__).resolve().parent.parent.parent
+load_dotenv(_repo_root / ".env")
+

@dataclass(frozen=True)
 class AppConfig:
--- a/src/rag_agent/index/embeddings.py
+++ b/src/rag_agent/index/embeddings.py
@@ -1,9 +1,17 @@
 from __future__ import annotations

 import hashlib
+import os
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Iterable, Protocol

+from dotenv import load_dotenv
+
+# Ensure .env is loaded when resolving embedding client (e.g. GIGACHAT_CREDENTIALS)
+_repo_root = Path(__file__).resolve().parent.parent.parent.parent
+load_dotenv(_repo_root / ".env")
+

 class EmbeddingClient(Protocol):
    def embed_texts(self, texts: Iterable[str]) -> list[list[float]]:
@@ -25,5 +33,50 @@ class StubEmbeddingClient:
        return vectors


+_GIGACHAT_BATCH_SIZE = 50
+
+
+class GigaChatEmbeddingClient:
+    """Embeddings via GigaChat API. Credentials from env GIGACHAT_CREDENTIALS."""
+
+    def __init__(
+        self,
+        credentials: str,
+        model: str = "Embeddings",
+        verify_ssl_certs: bool = False,
+    ) -> None:
+        self._credentials = credentials.strip()
+        self._model = model
+        self._verify_ssl_certs = verify_ssl_certs
+
+    def embed_texts(self, texts: Iterable[str]) -> list[list[float]]:
+        from gigachat import GigaChat
+
+        texts_list = list(texts)
+        if not texts_list:
+            return []
+
+        result: list[list[float]] = []
+        for i in range(0, len(texts_list), _GIGACHAT_BATCH_SIZE):
+            batch = texts_list[i : i + _GIGACHAT_BATCH_SIZE]
+            with GigaChat(
+                credentials=self._credentials,
+                verify_ssl_certs=self._verify_ssl_certs,
+            ) as giga:
+                response = giga.embeddings(model=self._model, input=batch)
+            # Preserve order by index
+            by_index = {item.index: item.embedding for item in response.data}
+            result.extend(by_index[j] for j in range(len(batch)))
+        return result
+
+
 def get_embedding_client(dim: int) -> EmbeddingClient:
+    credentials = os.getenv("GIGACHAT_CREDENTIALS", "").strip()
+    if credentials:
+        return GigaChatEmbeddingClient(
+            credentials=credentials,
+            model=os.getenv("GIGACHAT_EMBEDDINGS_MODEL", "Embeddings"),
+            verify_ssl_certs=os.getenv("GIGACHAT_VERIFY_SSL", "false").lower()
+            in ("1", "true", "yes"),
+        )
    return StubEmbeddingClient(dim=dim)
Author	SHA1	Message	Date
zosimovaa	5ce6335ad8	Настройки развертывания	2026-01-30 22:53:16 +03:00
zosimovaa	e899f54f04	ДОбавлены эмбеддинги на базе гигачата	2026-01-30 22:53:01 +03:00