"""Webhook server: on push from remote repo, pull and run index --changed.""" from __future__ import annotations import hmac import hashlib import json import logging import os import subprocess import threading from pathlib import Path from fastapi import FastAPI, Request, Response from fastapi.responses import PlainTextResponse logger = logging.getLogger(__name__) # So background webhook logs (pull_and_index, index) appear when run via uvicorn _rag_log = logging.getLogger("rag_agent") if not _rag_log.handlers: _h = logging.StreamHandler() _h.setFormatter(logging.Formatter("%(levelname)s: %(name)s: %(message)s")) _rag_log.setLevel(logging.INFO) _rag_log.addHandler(_h) app = FastAPI(title="RAG Agent Webhook", version="0.1.0") def _branch_from_ref(ref: str) -> str | None: """refs/heads/main -> main.""" if not ref or not ref.startswith("refs/heads/"): return None return ref.removeprefix("refs/heads/") def _verify_github_signature(body: bytes, secret: str, signature_header: str | None) -> bool: if not secret or not signature_header or not signature_header.startswith("sha256="): return not secret expected = hmac.new( secret.encode("utf-8"), body, digestmod=hashlib.sha256 ).hexdigest() received = signature_header.removeprefix("sha256=").strip() return hmac.compare_digest(received, expected) def _decode_stderr(stderr: str | bytes | None) -> str: if stderr is None: return "" return stderr.decode("utf-8", errors="replace") if isinstance(stderr, bytes) else stderr def _run_index(repo_path: str, story: str, base_ref: str, head_ref: str) -> bool: env = os.environ.copy() env["RAG_REPO_PATH"] = repo_path try: proc = subprocess.run( ["rag-agent", "index", "--story", story, "--changed", "--base-ref", base_ref, "--head-ref", head_ref], env=env, capture_output=True, text=True, timeout=600, ) if proc.returncode != 0: logger.error( "index failed (story=%s base=%s head=%s): stdout=%s stderr=%s", story, base_ref, head_ref, proc.stdout, proc.stderr, ) return False logger.info("index completed for story=%s %s..%s", story, base_ref, head_ref) return True except subprocess.TimeoutExpired: logger.error("index timeout for story=%s", story) return False except Exception as e: logger.exception("index error: %s", e) return False def _pull_and_index( repo_path: str, branch: str, *, payload_before: str | None = None, payload_after: str | None = None, ) -> None: """Fetch, checkout branch; index range from payload (before→after) or from merge result.""" logger.info( "webhook: pull_and_index started branch=%s repo_path=%s payload_before=%s payload_after=%s", branch, repo_path, payload_before, payload_after, ) repo = Path(repo_path) if not repo.is_dir() or not (repo / ".git").exists(): logger.warning("not a git repo or missing: %s", repo_path) return try: subprocess.run( ["git", "-C", repo_path, "fetch", "origin", branch], check=True, capture_output=True, timeout=60, ) except subprocess.CalledProcessError as e: logger.warning("git fetch failed (branch=%s): %s", branch, _decode_stderr(e.stderr)) return except Exception as e: logger.exception("git fetch error: %s", e) return try: subprocess.run( ["git", "-C", repo_path, "checkout", branch], check=True, capture_output=True, timeout=10, ) except subprocess.CalledProcessError as e: logger.warning("git checkout %s failed: %s", branch, _decode_stderr(e.stderr)) return # Prefer commit range from webhook payload (GitHub/GitLab before/after) so we index every push # even when the clone is the same dir as the one that was pushed from (HEAD already at new commit). if payload_before and payload_after and payload_before != payload_after: logger.info( "webhook: running index from payload story=%s %s..%s", branch, payload_before, payload_after, ) _run_index(repo_path, story=branch, base_ref=payload_before, head_ref=payload_after) return if payload_before and payload_after and payload_before == payload_after: logger.info("webhook: payload before==after for branch=%s (e.g. force-push); skipping index", branch) return # Fallback: no before/after in payload — infer from merge (original behaviour). origin_ref = f"origin/{branch}" rev_origin = subprocess.run( ["git", "-C", repo_path, "rev-parse", origin_ref], capture_output=True, text=True, timeout=10, ) origin_head = (rev_origin.stdout or "").strip() if rev_origin.returncode == 0 else None if not origin_head: logger.warning("after fetch: %s not found (wrong branch name?)", origin_ref) return try: old_head = subprocess.run( ["git", "-C", repo_path, "rev-parse", "HEAD"], capture_output=True, text=True, timeout=10, ) old_head = (old_head.stdout or "").strip() if old_head.returncode == 0 else None except Exception as e: logger.exception("rev-parse HEAD: %s", e) return if old_head == origin_head: logger.info( "no new commits for branch=%s (already at %s); skipping index", branch, origin_head, ) return try: merge_proc = subprocess.run( ["git", "-C", repo_path, "merge", "--ff-only", origin_ref], capture_output=True, text=True, timeout=60, ) except subprocess.TimeoutExpired: logger.error("git merge timeout") return if merge_proc.returncode != 0: logger.warning( "git merge --ff-only failed (branch=%s, non-fast-forward?). stderr=%s Skipping index.", branch, _decode_stderr(merge_proc.stderr), ) return new_head = subprocess.run( ["git", "-C", repo_path, "rev-parse", "HEAD"], capture_output=True, text=True, timeout=10, ) new_head = (new_head.stdout or "").strip() if new_head.returncode == 0 else None if not old_head or not new_head or old_head == new_head: logger.info("no new commits for branch=%s (old_head=%s new_head=%s)", branch, old_head, new_head) return logger.info("webhook: running index story=%s %s..%s", branch, old_head, new_head) _run_index(repo_path, story=branch, base_ref=old_head, head_ref=new_head) @app.post("/webhook") async def webhook(request: Request) -> Response: """Handle push webhook from GitHub/GitLab: pull repo and run index --changed.""" body = await request.body() secret = os.getenv("WEBHOOK_SECRET", "").strip() sig = request.headers.get("X-Hub-Signature-256") if secret and not _verify_github_signature(body, secret, sig): return PlainTextResponse("Invalid signature", status_code=401) try: payload = json.loads(body.decode("utf-8")) except (json.JSONDecodeError, UnicodeDecodeError): payload = None if not payload or not isinstance(payload, dict): return PlainTextResponse("Invalid JSON", status_code=400) ref = payload.get("ref") branch = _branch_from_ref(ref) if ref else None if not branch: return PlainTextResponse("Missing or unsupported ref", status_code=400) # GitHub/GitLab push: before = previous commit, after = new tip (use for index range) before = (payload.get("before") or "").strip() or None after = (payload.get("after") or "").strip() or None repo_path = os.getenv("RAG_REPO_PATH", "").strip() if not repo_path: return PlainTextResponse("RAG_REPO_PATH not set", status_code=500) threading.Thread( target=_pull_and_index, args=(repo_path, branch), kwargs={"payload_before": before, "payload_after": after}, daemon=True, ).start() return PlainTextResponse("Accepted", status_code=202) @app.get("/health") async def health() -> str: return "ok"