242 lines
8.2 KiB
Python
242 lines
8.2 KiB
Python
"""Webhook server: on push from remote repo, pull and run index --changed."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hmac
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import threading
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI, Request, Response
|
|
from fastapi.responses import PlainTextResponse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# So background webhook logs (pull_and_index, index) appear when run via uvicorn
|
|
_rag_log = logging.getLogger("rag_agent")
|
|
if not _rag_log.handlers:
|
|
_h = logging.StreamHandler()
|
|
_h.setFormatter(logging.Formatter("%(levelname)s: %(name)s: %(message)s"))
|
|
_rag_log.setLevel(logging.INFO)
|
|
_rag_log.addHandler(_h)
|
|
|
|
app = FastAPI(title="RAG Agent Webhook", version="0.1.0")
|
|
|
|
|
|
def _branch_from_ref(ref: str) -> str | None:
|
|
"""refs/heads/main -> main."""
|
|
if not ref or not ref.startswith("refs/heads/"):
|
|
return None
|
|
return ref.removeprefix("refs/heads/")
|
|
|
|
|
|
def _verify_github_signature(body: bytes, secret: str, signature_header: str | None) -> bool:
|
|
if not secret or not signature_header or not signature_header.startswith("sha256="):
|
|
return not secret
|
|
expected = hmac.new(
|
|
secret.encode("utf-8"), body, digestmod=hashlib.sha256
|
|
).hexdigest()
|
|
received = signature_header.removeprefix("sha256=").strip()
|
|
return hmac.compare_digest(received, expected)
|
|
|
|
|
|
def _decode_stderr(stderr: str | bytes | None) -> str:
|
|
if stderr is None:
|
|
return ""
|
|
return stderr.decode("utf-8", errors="replace") if isinstance(stderr, bytes) else stderr
|
|
|
|
|
|
def _run_index(repo_path: str, story: str, base_ref: str, head_ref: str) -> bool:
|
|
env = os.environ.copy()
|
|
env["RAG_REPO_PATH"] = repo_path
|
|
try:
|
|
proc = subprocess.run(
|
|
["rag-agent", "index", "--story", story, "--changed", "--base-ref", base_ref, "--head-ref", head_ref],
|
|
env=env,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=600,
|
|
)
|
|
if proc.returncode != 0:
|
|
logger.error(
|
|
"index failed (story=%s base=%s head=%s): stdout=%s stderr=%s",
|
|
story, base_ref, head_ref, proc.stdout, proc.stderr,
|
|
)
|
|
return False
|
|
logger.info("index completed for story=%s %s..%s", story, base_ref, head_ref)
|
|
return True
|
|
except subprocess.TimeoutExpired:
|
|
logger.error("index timeout for story=%s", story)
|
|
return False
|
|
except Exception as e:
|
|
logger.exception("index error: %s", e)
|
|
return False
|
|
|
|
|
|
def _pull_and_index(
|
|
repo_path: str,
|
|
branch: str,
|
|
*,
|
|
payload_before: str | None = None,
|
|
payload_after: str | None = None,
|
|
) -> None:
|
|
"""Fetch, checkout branch; index range from payload (before→after) or from merge result."""
|
|
logger.info(
|
|
"webhook: pull_and_index started branch=%s repo_path=%s payload_before=%s payload_after=%s",
|
|
branch, repo_path, payload_before, payload_after,
|
|
)
|
|
repo = Path(repo_path)
|
|
if not repo.is_dir() or not (repo / ".git").exists():
|
|
logger.warning("not a git repo or missing: %s", repo_path)
|
|
return
|
|
try:
|
|
subprocess.run(
|
|
["git", "-C", repo_path, "fetch", "origin", branch],
|
|
check=True,
|
|
capture_output=True,
|
|
timeout=60,
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
logger.warning("git fetch failed (branch=%s): %s", branch, _decode_stderr(e.stderr))
|
|
return
|
|
except Exception as e:
|
|
logger.exception("git fetch error: %s", e)
|
|
return
|
|
|
|
try:
|
|
subprocess.run(
|
|
["git", "-C", repo_path, "checkout", branch],
|
|
check=True,
|
|
capture_output=True,
|
|
timeout=10,
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
logger.warning("git checkout %s failed: %s", branch, _decode_stderr(e.stderr))
|
|
return
|
|
|
|
# Prefer commit range from webhook payload (GitHub/GitLab before/after) so we index every push
|
|
# even when the clone is the same dir as the one that was pushed from (HEAD already at new commit).
|
|
if payload_before and payload_after and payload_before != payload_after:
|
|
logger.info(
|
|
"webhook: running index from payload story=%s %s..%s",
|
|
branch, payload_before, payload_after,
|
|
)
|
|
_run_index(repo_path, story=branch, base_ref=payload_before, head_ref=payload_after)
|
|
return
|
|
|
|
if payload_before and payload_after and payload_before == payload_after:
|
|
logger.info("webhook: payload before==after for branch=%s (e.g. force-push); skipping index", branch)
|
|
return
|
|
|
|
# Fallback: no before/after in payload — infer from merge (original behaviour).
|
|
origin_ref = f"origin/{branch}"
|
|
rev_origin = subprocess.run(
|
|
["git", "-C", repo_path, "rev-parse", origin_ref],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10,
|
|
)
|
|
origin_head = (rev_origin.stdout or "").strip() if rev_origin.returncode == 0 else None
|
|
if not origin_head:
|
|
logger.warning("after fetch: %s not found (wrong branch name?)", origin_ref)
|
|
return
|
|
|
|
try:
|
|
old_head = subprocess.run(
|
|
["git", "-C", repo_path, "rev-parse", "HEAD"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10,
|
|
)
|
|
old_head = (old_head.stdout or "").strip() if old_head.returncode == 0 else None
|
|
except Exception as e:
|
|
logger.exception("rev-parse HEAD: %s", e)
|
|
return
|
|
|
|
if old_head == origin_head:
|
|
logger.info(
|
|
"no new commits for branch=%s (already at %s); skipping index",
|
|
branch, origin_head,
|
|
)
|
|
return
|
|
|
|
try:
|
|
merge_proc = subprocess.run(
|
|
["git", "-C", repo_path, "merge", "--ff-only", origin_ref],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60,
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
logger.error("git merge timeout")
|
|
return
|
|
if merge_proc.returncode != 0:
|
|
logger.warning(
|
|
"git merge --ff-only failed (branch=%s, non-fast-forward?). stderr=%s Skipping index.",
|
|
branch, _decode_stderr(merge_proc.stderr),
|
|
)
|
|
return
|
|
|
|
new_head = subprocess.run(
|
|
["git", "-C", repo_path, "rev-parse", "HEAD"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10,
|
|
)
|
|
new_head = (new_head.stdout or "").strip() if new_head.returncode == 0 else None
|
|
if not old_head or not new_head or old_head == new_head:
|
|
logger.info("no new commits for branch=%s (old_head=%s new_head=%s)", branch, old_head, new_head)
|
|
return
|
|
|
|
logger.info("webhook: running index story=%s %s..%s", branch, old_head, new_head)
|
|
_run_index(repo_path, story=branch, base_ref=old_head, head_ref=new_head)
|
|
|
|
|
|
@app.post("/webhook")
|
|
async def webhook(request: Request) -> Response:
|
|
"""Handle push webhook from GitHub/GitLab: pull repo and run index --changed."""
|
|
body = await request.body()
|
|
secret = os.getenv("WEBHOOK_SECRET", "").strip()
|
|
sig = request.headers.get("X-Hub-Signature-256")
|
|
|
|
if secret and not _verify_github_signature(body, secret, sig):
|
|
return PlainTextResponse("Invalid signature", status_code=401)
|
|
|
|
try:
|
|
payload = json.loads(body.decode("utf-8"))
|
|
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
payload = None
|
|
if not payload or not isinstance(payload, dict):
|
|
return PlainTextResponse("Invalid JSON", status_code=400)
|
|
|
|
ref = payload.get("ref")
|
|
branch = _branch_from_ref(ref) if ref else None
|
|
if not branch:
|
|
return PlainTextResponse("Missing or unsupported ref", status_code=400)
|
|
|
|
# GitHub/GitLab push: before = previous commit, after = new tip (use for index range)
|
|
before = (payload.get("before") or "").strip() or None
|
|
after = (payload.get("after") or "").strip() or None
|
|
|
|
repo_path = os.getenv("RAG_REPO_PATH", "").strip()
|
|
if not repo_path:
|
|
return PlainTextResponse("RAG_REPO_PATH not set", status_code=500)
|
|
|
|
threading.Thread(
|
|
target=_pull_and_index,
|
|
args=(repo_path, branch),
|
|
kwargs={"payload_before": before, "payload_after": after},
|
|
daemon=True,
|
|
).start()
|
|
|
|
return PlainTextResponse("Accepted", status_code=202)
|
|
|
|
|
|
@app.get("/health")
|
|
async def health() -> str:
|
|
return "ok"
|