первый коммит

This commit is contained in:
2026-02-27 21:28:09 +03:00
parent 43c404f958
commit 1bc57a7c25
171 changed files with 6400 additions and 556 deletions

View File

@@ -0,0 +1,56 @@
# Модуль rag_repo
## 1. Функции модуля
- Прием webhook-событий от Git-провайдеров (`gitea`, `bitbucket`).
- Нормализация payload в единый формат.
- Определение `story_id` и фиксация контекста, необходимого для определения изменений, произведенных в Story.
- Запись контекста коммита в Story-хранилище через `StoryContextRepository`.
- Подготовка данных, которые позволяют ускорять формирование чанков в `rag_session` за счет кэш-переиспользования.
Ускорение `rag_session` обеспечивается связкой полей:
- `project_id` (идентификатор репозитория/проекта),
- `commit_sha` (снимок состояния),
- `changed_files` (точный набор затронутых файлов),
- `story_id` (бизнес-контекст инкремента).
Эта связка позволяет в `rag_session` выполнять delta-индексацию и переиспользовать ранее рассчитанные чанки/эмбеддинги для неизмененных файлов.
## 2. Диаграмма классов и взаимосвязей
```mermaid
classDiagram
class RagRepoModule
class RepoWebhookService
class StoryContextRepository
RagRepoModule --> RepoWebhookService
RepoWebhookService --> StoryContextRepository
```
## 3. Описание классов
- `RagRepoModule`: точка входа модуля и публикация webhook endpoint'а.
Методы: `__init__` — создает сервис обработки webhook; `internal_router` — регистрирует internal route `webhook`.
- `RepoWebhookService`: нормализует payload, извлекает `story_id` и формирует запись commit-контекста.
Методы: `process` — основной обработчик webhook; `_normalize_gitea` — маппинг payload Gitea; `_normalize_bitbucket` — маппинг payload Bitbucket; `_extract_story_id` — извлечение `story_id` из commit message.
- `StoryContextRepository`: persistence-слой Story-контекста.
Методы: `record_story_commit` — сохраняет commit, ветку и список измененных файлов в контекст Story.
## 4. Сиквенс-диаграммы API
### POST /internal/rag-repo/webhook
Назначение: принимает webhook коммита, автоматически определяет провайдера (`gitea`/`bitbucket`), извлекает `story_id` и сохраняет commit-контекст для трассировки изменений по Story.
```mermaid
sequenceDiagram
participant Router as RagRepoModule.APIRouter
participant Webhook as RepoWebhookService
participant StoryRepo as StoryContextRepository
Router->>Webhook: process(payload, headers)
Webhook->>Webhook: normalize payload + extract story_id
alt story_id found
Webhook->>StoryRepo: record_story_commit(...)
StoryRepo-->>Webhook: ok
else story_id missing
Webhook-->>Router: accepted, story_bound=false
end
Webhook-->>Router: result
```

View File

@@ -0,0 +1 @@
__all__: list[str] = []

View File

@@ -0,0 +1,24 @@
from __future__ import annotations
from fastapi import APIRouter, Request
from app.modules.agent.story_context_repository import StoryContextRepository
from app.modules.rag_repo.webhook_service import RepoWebhookService
from app.modules.rag_session.repository import RagRepository
class RagRepoModule:
def __init__(self, story_context_repository: StoryContextRepository, rag_repository: RagRepository) -> None:
self._webhook = RepoWebhookService(story_context_repository, rag_repository)
def internal_router(self) -> APIRouter:
router = APIRouter(prefix="/internal/rag-repo", tags=["internal-rag-repo"])
@router.post("/webhook")
async def process_repo_webhook(request: Request, payload: dict) -> dict:
return self._webhook.process(
payload=payload,
headers={k: v for k, v in request.headers.items()},
)
return router

View File

@@ -0,0 +1,217 @@
from __future__ import annotations
import re
from typing import Protocol
_STORY_ID_RE = re.compile(r"\b[A-Z][A-Z0-9_]*-\d+\b")
class StoryCommitWriter(Protocol):
def record_story_commit(
self,
*,
story_id: str,
project_id: str,
title: str,
commit_sha: str | None,
branch: str | None,
changed_files: list[str],
summary: str,
actor: str | None,
) -> None: ...
class RepoCacheWriter(Protocol):
def record_repo_cache(
self,
*,
project_id: str,
commit_sha: str | None,
changed_files: list[str],
summary: str,
) -> None: ...
class RepoWebhookService:
def __init__(self, story_writer: StoryCommitWriter, cache_writer: RepoCacheWriter | None = None) -> None:
self._story_writer = story_writer
self._cache_writer = cache_writer
def process(self, *, payload: dict, provider: str | None = None, headers: dict | None = None) -> dict:
resolved_provider = self._resolve_provider(provider=provider, payload=payload, headers=headers or {})
normalized = self._normalize(provider=resolved_provider, payload=payload)
if not normalized:
return {"accepted": False, "reason": "unsupported_or_invalid_payload"}
cache_recorded = False
if self._cache_writer is not None:
self._cache_writer.record_repo_cache(
project_id=normalized["project_id"],
commit_sha=normalized["commit_sha"],
changed_files=normalized["changed_files"],
summary=normalized["summary"],
)
cache_recorded = True
story_id = self._extract_story_id(normalized["messages"])
if not story_id:
return {
"accepted": True,
"indexed": False,
"story_bound": False,
"cache_recorded": cache_recorded,
"reason": "story_id_not_found",
}
self._story_writer.record_story_commit(
story_id=story_id,
project_id=normalized["project_id"],
title=f"Story {story_id}",
commit_sha=normalized["commit_sha"],
branch=normalized["branch"],
changed_files=normalized["changed_files"],
summary=normalized["summary"],
actor=normalized["actor"],
)
return {
"accepted": True,
"indexed": False,
"story_bound": True,
"cache_recorded": cache_recorded,
"story_id": story_id,
"project_id": normalized["project_id"],
"commit_sha": normalized["commit_sha"],
"changed_files": normalized["changed_files"],
}
def _resolve_provider(self, *, provider: str | None, payload: dict, headers: dict[str, str]) -> str:
value = (provider or "").strip().lower()
if value in {"gitea", "bitbucket"}:
return value
lowered = {str(k).lower(): str(v) for k, v in headers.items()}
if "x-gitea-event" in lowered:
return "gitea"
if "x-event-key" in lowered:
return "bitbucket"
if isinstance(payload.get("commits"), list) and ("ref" in payload or "pusher" in payload):
return "gitea"
push = payload.get("push")
if isinstance(push, dict) and isinstance(push.get("changes"), list):
return "bitbucket"
return ""
def _normalize(self, *, provider: str, payload: dict) -> dict | None:
key = provider.lower().strip()
if key == "gitea":
return self._normalize_gitea(payload)
if key == "bitbucket":
return self._normalize_bitbucket(payload)
return None
def _normalize_gitea(self, payload: dict) -> dict:
repo = payload.get("repository") or {}
commits = payload.get("commits") or []
project_id = str(repo.get("full_name") or repo.get("name") or "unknown_repo")
ref = str(payload.get("ref") or "")
branch = ref.replace("refs/heads/", "") if ref.startswith("refs/heads/") else ref or None
actor = str((payload.get("pusher") or {}).get("username") or "") or None
messages: list[str] = []
changed_files: set[str] = set()
commit_sha: str | None = None
for commit in commits:
if not isinstance(commit, dict):
continue
cid = str(commit.get("id") or "").strip()
if cid:
commit_sha = cid
msg = str(commit.get("message") or "").strip()
if msg:
messages.append(msg)
for key in ("added", "modified", "removed"):
for path in commit.get(key) or []:
path_value = str(path).strip()
if path_value:
changed_files.add(path_value)
summary = messages[-1] if messages else "Webhook commit without message"
return {
"project_id": project_id,
"branch": branch,
"commit_sha": commit_sha,
"changed_files": sorted(changed_files),
"messages": messages,
"summary": summary,
"actor": actor,
}
def _normalize_bitbucket(self, payload: dict) -> dict:
repo = payload.get("repository") or {}
project_id = str(repo.get("full_name") or repo.get("name") or "unknown_repo")
changes = (((payload.get("push") or {}).get("changes")) or [])
messages: list[str] = []
changed_files: set[str] = set()
commit_sha: str | None = None
branch: str | None = None
actor = None
actor_raw = payload.get("actor") or {}
if isinstance(actor_raw, dict):
actor = str(actor_raw.get("display_name") or actor_raw.get("username") or "") or None
for change in changes:
if not isinstance(change, dict):
continue
new_ref = change.get("new") or {}
if isinstance(new_ref, dict):
branch_name = str(new_ref.get("name") or "").strip()
if branch_name:
branch = branch_name
target = new_ref.get("target") or {}
if isinstance(target, dict):
h = str(target.get("hash") or "").strip()
if h:
commit_sha = h
msg = str(target.get("message") or "").strip()
if msg:
messages.append(msg)
for commit in change.get("commits") or []:
if not isinstance(commit, dict):
continue
h = str(commit.get("hash") or "").strip()
if h:
commit_sha = h
msg = str(commit.get("message") or "").strip()
if msg:
messages.append(msg)
for key in ("added", "modified", "removed"):
for item in commit.get(key) or []:
if isinstance(item, dict):
path_value = str(item.get("path") or "").strip()
else:
path_value = str(item).strip()
if path_value:
changed_files.add(path_value)
summary = messages[-1] if messages else "Webhook commit without message"
return {
"project_id": project_id,
"branch": branch,
"commit_sha": commit_sha,
"changed_files": sorted(changed_files),
"messages": messages,
"summary": summary,
"actor": actor,
}
def _extract_story_id(self, messages: list[str]) -> str | None:
for msg in messages:
match = _STORY_ID_RE.search(msg)
if match:
return match.group(0)
return None