Настройки развертывания
This commit is contained in:
21
Dockerfile
Normal file
21
Dockerfile
Normal file
@@ -0,0 +1,21 @@
|
||||
# RAG Agent app. Build from repo root (clone git@git.lesha.spb.ru:alex/RagAgent.git then docker compose build).
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install git for optional in-image clone; app is usually COPY'd from build context
|
||||
RUN apt-get update -qq && apt-get install -y --no-install-recommends git openssh-client \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy repo (when built from cloned repo: docker compose build)
|
||||
COPY pyproject.toml ./
|
||||
COPY src ./src
|
||||
COPY README.md ./
|
||||
|
||||
RUN pip install --no-cache-dir -e .
|
||||
|
||||
# Default: run CLI (override in compose or when running)
|
||||
ENV RAG_DB_DSN=""
|
||||
ENV RAG_REPO_PATH="/data"
|
||||
ENTRYPOINT ["rag-agent"]
|
||||
CMD ["ask", "--help"]
|
||||
23
README.md
23
README.md
@@ -6,15 +6,21 @@ and answers queries using retrieval + LLM generation. Commits are tied to
|
||||
|
||||
## Quick start
|
||||
|
||||
1. Configure environment variables:
|
||||
1. (Optional) Run Postgres and the app via Docker (clone the repo first):
|
||||
- `git clone git@git.lesha.spb.ru:alex/RagAgent.git && cd RagAgent`
|
||||
- `docker compose up -d` — starts Postgres and the RAG app in one network `rag_net`; app connects to DB at host `postgres`.
|
||||
- On first start (empty DB), scripts in `docker/postgres-init/` run automatically (extension + tables). To disable, comment out the init volume in `docker-compose.yml`.
|
||||
- Default DSN inside the app: `postgresql://rag:rag_secret@postgres:5432/rag`. Override with `POSTGRES_*` and `RAG_REPO_PATH` (path to your knowledge-base repo, mounted into the app container).
|
||||
- Run commands: `docker compose run --rm app index --story my-branch`, `docker compose run --rm app ask "Question?"`.
|
||||
2. Configure environment variables:
|
||||
- `RAG_REPO_PATH` — path to git repo with text files
|
||||
- `RAG_DB_DSN` — Postgres DSN (e.g. `postgresql://user:pass@localhost:5432/rag`)
|
||||
- `RAG_DB_DSN` — Postgres DSN (e.g. `postgresql://rag:rag_secret@localhost:5432/rag`)
|
||||
- `RAG_EMBEDDINGS_DIM` — embedding vector dimension (e.g. `1536`)
|
||||
2. Create DB schema:
|
||||
3. Create DB schema (only if not using Docker, or if init was disabled):
|
||||
- `python scripts/create_db.py` (or `psql "$RAG_DB_DSN" -f scripts/schema.sql`)
|
||||
3. Index files for a story (e.g. branch name as story slug):
|
||||
4. Index files for a story (e.g. branch name as story slug):
|
||||
- `rag-agent index --story my-branch --changed --base-ref HEAD~1 --head-ref HEAD`
|
||||
4. Ask a question (optionally scoped to a story):
|
||||
5. Ask a question (optionally scoped to a story):
|
||||
- `rag-agent ask "What is covered?"`
|
||||
- `rag-agent ask "What is covered?" --story my-branch`
|
||||
|
||||
@@ -36,8 +42,11 @@ Story for the commit is taken from (in order): env `RAG_STORY`, file `.rag-story
|
||||
|
||||
Scripts: `scripts/create_db.py` (Python, uses `ensure_schema` and `RAG_*` env), `scripts/schema.sql` (raw SQL).
|
||||
|
||||
## Embeddings (GigaChat)
|
||||
|
||||
If `GIGACHAT_CREDENTIALS` is set (e.g. in `.env` for local runs), embeddings use GigaChat API; otherwise the stub client is used. Optional env: `GIGACHAT_EMBEDDINGS_MODEL` (default `Embeddings`), `GIGACHAT_VERIFY_SSL` (`true`/`false`). Ensure `RAG_EMBEDDINGS_DIM` matches the model output (see GigaChat docs).
|
||||
|
||||
## Notes
|
||||
|
||||
- The default embedding/LLM clients are stubs. Replace them in
|
||||
`src/rag_agent/index/embeddings.py` and `src/rag_agent/agent/pipeline.py`.
|
||||
- LLM client is still a stub; replace it in `src/rag_agent/agent/pipeline.py` for real answers.
|
||||
- This project requires Postgres with the `pgvector` extension.
|
||||
|
||||
56
docker-compose.yml
Normal file
56
docker-compose.yml
Normal file
@@ -0,0 +1,56 @@
|
||||
# Postgres with pgvector + RAG Agent app (from repo git@git.lesha.spb.ru:alex/RagAgent.git).
|
||||
# Clone the repo, then: docker compose up -d
|
||||
# App and DB share network "rag_net"; app uses RAG_DB_DSN with host=postgres.
|
||||
# DB init: scripts in docker/postgres-init/ run on first start (empty volume); to disable, comment out the init volume.
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg16
|
||||
container_name: rag-postgres
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-rag}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-rag_secret}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-rag}
|
||||
ports:
|
||||
- "${POSTGRES_PORT:-5432}:5432"
|
||||
volumes:
|
||||
- rag_pgdata:/var/lib/postgresql/data
|
||||
# Init scripts run once on first start (create extension, tables). Optional: comment out to skip.
|
||||
- ./docker/postgres-init:/docker-entrypoint-initdb.d:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-rag} -d ${POSTGRES_DB:-rag}"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- rag_net
|
||||
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: rag-agent:latest
|
||||
container_name: rag-agent
|
||||
restart: "no"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
RAG_DB_DSN: "postgresql://${POSTGRES_USER:-rag}:${POSTGRES_PASSWORD:-rag_secret}@postgres:5432/${POSTGRES_DB:-rag}"
|
||||
RAG_REPO_PATH: ${RAG_REPO_PATH:-/data}
|
||||
RAG_EMBEDDINGS_DIM: ${RAG_EMBEDDINGS_DIM:-1536}
|
||||
GIGACHAT_CREDENTIALS: ${GIGACHAT_CREDENTIALS:-}
|
||||
GIGACHAT_EMBEDDINGS_MODEL: ${GIGACHAT_EMBEDDINGS_MODEL:-Embeddings}
|
||||
volumes:
|
||||
- ${RAG_REPO_PATH:-./data}:/data:ro
|
||||
entrypoint: ["rag-agent"]
|
||||
command: ["ask", "--help"]
|
||||
networks:
|
||||
- rag_net
|
||||
|
||||
networks:
|
||||
rag_net:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
rag_pgdata:
|
||||
7
docker/postgres-init/00-example-extra-user.sql.example
Normal file
7
docker/postgres-init/00-example-extra-user.sql.example
Normal file
@@ -0,0 +1,7 @@
|
||||
-- Example: create an extra DB user (e.g. read-only). Not executed — rename to 00-create-extra-user.sql to enable.
|
||||
-- Scripts in this folder run in alphabetical order; 00-* runs before 01-schema.sql.
|
||||
|
||||
-- CREATE USER rag_readonly WITH PASSWORD 'change_me';
|
||||
-- GRANT CONNECT ON DATABASE rag TO rag_readonly;
|
||||
-- GRANT USAGE ON SCHEMA public TO rag_readonly;
|
||||
-- GRANT SELECT ON ALL TABLES IN SCHEMA public TO rag_readonly;
|
||||
32
docker/postgres-init/01-schema.sql
Normal file
32
docker/postgres-init/01-schema.sql
Normal file
@@ -0,0 +1,32 @@
|
||||
-- RAG vector DB schema (runs automatically on first Postgres init).
|
||||
-- If RAG_EMBEDDINGS_DIM is not 1536, change vector(1536) below.
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stories (
|
||||
id SERIAL PRIMARY KEY,
|
||||
slug TEXT UNIQUE NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT (NOW() AT TIME ZONE 'utc')
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS documents (
|
||||
id SERIAL PRIMARY KEY,
|
||||
story_id INTEGER NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
path TEXT NOT NULL,
|
||||
version TEXT NOT NULL,
|
||||
updated_at TIMESTAMPTZ NOT NULL,
|
||||
UNIQUE(story_id, path)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
id SERIAL PRIMARY KEY,
|
||||
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
chunk_index INTEGER NOT NULL,
|
||||
hash TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
embedding vector(1536) NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_story_id ON documents(story_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_chunks_embedding ON chunks USING ivfflat (embedding vector_cosine_ops);
|
||||
9
docker/postgres-init/README.md
Normal file
9
docker/postgres-init/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
# Postgres init scripts (optional)
|
||||
|
||||
Files here are mounted into the Postgres container at `/docker-entrypoint-initdb.d/` and run **only on first startup** (when the data volume is empty), in alphabetical order.
|
||||
|
||||
- `01-schema.sql` — creates pgvector extension and RAG tables (stories, documents, chunks).
|
||||
- To add more users or other setup, add scripts with names like `00-create-user.sql` (they run before `01-schema.sql`).
|
||||
- To disable init: in `docker-compose.yml`, comment out the postgres volume that mounts this folder, or remove/rename the `.sql` files.
|
||||
|
||||
After the first run, these scripts are not executed again. To re-run them, remove the volume: `docker compose down -v` (this deletes DB data), then `docker compose up -d`.
|
||||
Reference in New Issue
Block a user