This repository has been archived on 2026-04-10. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
RagAgent/src/rag_agent/webhook.py
T
2026-01-31 20:19:44 +03:00

242 lines
8.2 KiB
Python

"""Webhook server: on push from remote repo, pull and run index --changed."""
from __future__ import annotations
import hmac
import hashlib
import json
import logging
import os
import subprocess
import threading
from pathlib import Path
from fastapi import FastAPI, Request, Response
from fastapi.responses import PlainTextResponse
logger = logging.getLogger(__name__)
# So background webhook logs (pull_and_index, index) appear when run via uvicorn
_rag_log = logging.getLogger("rag_agent")
if not _rag_log.handlers:
_h = logging.StreamHandler()
_h.setFormatter(logging.Formatter("%(levelname)s: %(name)s: %(message)s"))
_rag_log.setLevel(logging.INFO)
_rag_log.addHandler(_h)
app = FastAPI(title="RAG Agent Webhook", version="0.1.0")
def _branch_from_ref(ref: str) -> str | None:
"""refs/heads/main -> main."""
if not ref or not ref.startswith("refs/heads/"):
return None
return ref.removeprefix("refs/heads/")
def _verify_github_signature(body: bytes, secret: str, signature_header: str | None) -> bool:
if not secret or not signature_header or not signature_header.startswith("sha256="):
return not secret
expected = hmac.new(
secret.encode("utf-8"), body, digestmod=hashlib.sha256
).hexdigest()
received = signature_header.removeprefix("sha256=").strip()
return hmac.compare_digest(received, expected)
def _decode_stderr(stderr: str | bytes | None) -> str:
if stderr is None:
return ""
return stderr.decode("utf-8", errors="replace") if isinstance(stderr, bytes) else stderr
def _run_index(repo_path: str, story: str, base_ref: str, head_ref: str) -> bool:
env = os.environ.copy()
env["RAG_REPO_PATH"] = repo_path
try:
proc = subprocess.run(
["rag-agent", "index", "--story", story, "--changed", "--base-ref", base_ref, "--head-ref", head_ref],
env=env,
capture_output=True,
text=True,
timeout=600,
)
if proc.returncode != 0:
logger.error(
"index failed (story=%s base=%s head=%s): stdout=%s stderr=%s",
story, base_ref, head_ref, proc.stdout, proc.stderr,
)
return False
logger.info("index completed for story=%s %s..%s", story, base_ref, head_ref)
return True
except subprocess.TimeoutExpired:
logger.error("index timeout for story=%s", story)
return False
except Exception as e:
logger.exception("index error: %s", e)
return False
def _pull_and_index(
repo_path: str,
branch: str,
*,
payload_before: str | None = None,
payload_after: str | None = None,
) -> None:
"""Fetch, checkout branch; index range from payload (before→after) or from merge result."""
logger.info(
"webhook: pull_and_index started branch=%s repo_path=%s payload_before=%s payload_after=%s",
branch, repo_path, payload_before, payload_after,
)
repo = Path(repo_path)
if not repo.is_dir() or not (repo / ".git").exists():
logger.warning("not a git repo or missing: %s", repo_path)
return
try:
subprocess.run(
["git", "-C", repo_path, "fetch", "origin", branch],
check=True,
capture_output=True,
timeout=60,
)
except subprocess.CalledProcessError as e:
logger.warning("git fetch failed (branch=%s): %s", branch, _decode_stderr(e.stderr))
return
except Exception as e:
logger.exception("git fetch error: %s", e)
return
try:
subprocess.run(
["git", "-C", repo_path, "checkout", branch],
check=True,
capture_output=True,
timeout=10,
)
except subprocess.CalledProcessError as e:
logger.warning("git checkout %s failed: %s", branch, _decode_stderr(e.stderr))
return
# Prefer commit range from webhook payload (GitHub/GitLab before/after) so we index every push
# even when the clone is the same dir as the one that was pushed from (HEAD already at new commit).
if payload_before and payload_after and payload_before != payload_after:
logger.info(
"webhook: running index from payload story=%s %s..%s",
branch, payload_before, payload_after,
)
_run_index(repo_path, story=branch, base_ref=payload_before, head_ref=payload_after)
return
if payload_before and payload_after and payload_before == payload_after:
logger.info("webhook: payload before==after for branch=%s (e.g. force-push); skipping index", branch)
return
# Fallback: no before/after in payload — infer from merge (original behaviour).
origin_ref = f"origin/{branch}"
rev_origin = subprocess.run(
["git", "-C", repo_path, "rev-parse", origin_ref],
capture_output=True,
text=True,
timeout=10,
)
origin_head = (rev_origin.stdout or "").strip() if rev_origin.returncode == 0 else None
if not origin_head:
logger.warning("after fetch: %s not found (wrong branch name?)", origin_ref)
return
try:
old_head = subprocess.run(
["git", "-C", repo_path, "rev-parse", "HEAD"],
capture_output=True,
text=True,
timeout=10,
)
old_head = (old_head.stdout or "").strip() if old_head.returncode == 0 else None
except Exception as e:
logger.exception("rev-parse HEAD: %s", e)
return
if old_head == origin_head:
logger.info(
"no new commits for branch=%s (already at %s); skipping index",
branch, origin_head,
)
return
try:
merge_proc = subprocess.run(
["git", "-C", repo_path, "merge", "--ff-only", origin_ref],
capture_output=True,
text=True,
timeout=60,
)
except subprocess.TimeoutExpired:
logger.error("git merge timeout")
return
if merge_proc.returncode != 0:
logger.warning(
"git merge --ff-only failed (branch=%s, non-fast-forward?). stderr=%s Skipping index.",
branch, _decode_stderr(merge_proc.stderr),
)
return
new_head = subprocess.run(
["git", "-C", repo_path, "rev-parse", "HEAD"],
capture_output=True,
text=True,
timeout=10,
)
new_head = (new_head.stdout or "").strip() if new_head.returncode == 0 else None
if not old_head or not new_head or old_head == new_head:
logger.info("no new commits for branch=%s (old_head=%s new_head=%s)", branch, old_head, new_head)
return
logger.info("webhook: running index story=%s %s..%s", branch, old_head, new_head)
_run_index(repo_path, story=branch, base_ref=old_head, head_ref=new_head)
@app.post("/webhook")
async def webhook(request: Request) -> Response:
"""Handle push webhook from GitHub/GitLab: pull repo and run index --changed."""
body = await request.body()
secret = os.getenv("WEBHOOK_SECRET", "").strip()
sig = request.headers.get("X-Hub-Signature-256")
if secret and not _verify_github_signature(body, secret, sig):
return PlainTextResponse("Invalid signature", status_code=401)
try:
payload = json.loads(body.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError):
payload = None
if not payload or not isinstance(payload, dict):
return PlainTextResponse("Invalid JSON", status_code=400)
ref = payload.get("ref")
branch = _branch_from_ref(ref) if ref else None
if not branch:
return PlainTextResponse("Missing or unsupported ref", status_code=400)
# GitHub/GitLab push: before = previous commit, after = new tip (use for index range)
before = (payload.get("before") or "").strip() or None
after = (payload.get("after") or "").strip() or None
repo_path = os.getenv("RAG_REPO_PATH", "").strip()
if not repo_path:
return PlainTextResponse("RAG_REPO_PATH not set", status_code=500)
threading.Thread(
target=_pull_and_index,
args=(repo_path, branch),
kwargs={"payload_before": before, "payload_after": after},
daemon=True,
).start()
return PlainTextResponse("Accepted", status_code=202)
@app.get("/health")
async def health() -> str:
return "ok"