гих хук и сохранение изменений в контексте стори
This commit is contained in:
@@ -5,9 +5,15 @@ import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
from rag_agent.config import load_config
|
||||
from rag_agent.ingest.chunker import chunk_text
|
||||
from rag_agent.ingest.chunker import chunk_text_by_lines
|
||||
from rag_agent.ingest.file_loader import iter_text_files
|
||||
from rag_agent.ingest.git_watcher import filter_existing, filter_removed, get_changed_files
|
||||
from rag_agent.ingest.git_watcher import (
|
||||
filter_existing,
|
||||
filter_removed,
|
||||
get_changed_files,
|
||||
get_merge_base,
|
||||
read_file_at_ref,
|
||||
)
|
||||
from rag_agent.index.embeddings import get_embedding_client
|
||||
from rag_agent.index.postgres import (
|
||||
connect,
|
||||
@@ -16,6 +22,7 @@ from rag_agent.index.postgres import (
|
||||
get_or_create_story,
|
||||
get_story_id,
|
||||
replace_chunks,
|
||||
update_story_indexed_range,
|
||||
upsert_document,
|
||||
)
|
||||
from rag_agent.agent.pipeline import StubLLMClient, answer_query
|
||||
@@ -34,26 +41,66 @@ def cmd_index(args: argparse.Namespace) -> None:
|
||||
story_id = get_or_create_story(conn, args.story)
|
||||
embedding_client = get_embedding_client(config.embeddings_dim)
|
||||
|
||||
base_ref = args.base_ref.strip()
|
||||
head_ref = args.head_ref.strip()
|
||||
if args.changed:
|
||||
changed_files = get_changed_files(config.repo_path, args.base_ref, args.head_ref)
|
||||
if base_ref.lower() == "auto":
|
||||
base_ref = get_merge_base(
|
||||
config.repo_path, args.default_branch, head_ref
|
||||
) or args.default_branch
|
||||
changed_files = get_changed_files(
|
||||
config.repo_path, base_ref, head_ref
|
||||
)
|
||||
removed = filter_removed(changed_files)
|
||||
existing = filter_existing(changed_files)
|
||||
else:
|
||||
removed = []
|
||||
existing = [path for path in Path(config.repo_path).rglob("*") if path.is_file()]
|
||||
existing = [
|
||||
p for p in Path(config.repo_path).rglob("*") if p.is_file()
|
||||
]
|
||||
|
||||
for path in removed:
|
||||
delete_document(conn, story_id, str(path))
|
||||
|
||||
for path, text in iter_text_files(existing, config.allowed_extensions):
|
||||
chunks = chunk_text(text, config.chunk_size, config.chunk_overlap)
|
||||
chunks = chunk_text_by_lines(
|
||||
text,
|
||||
config.chunk_size_lines,
|
||||
config.chunk_overlap_lines,
|
||||
)
|
||||
if not chunks:
|
||||
continue
|
||||
embeddings = embedding_client.embed_texts([chunk.text for chunk in chunks])
|
||||
base_chunks = None
|
||||
if args.changed:
|
||||
base_text = read_file_at_ref(config.repo_path, path, base_ref)
|
||||
if base_text is not None:
|
||||
base_chunks = chunk_text_by_lines(
|
||||
base_text,
|
||||
config.chunk_size_lines,
|
||||
config.chunk_overlap_lines,
|
||||
)
|
||||
embeddings = embedding_client.embed_texts(
|
||||
[chunk.text for chunk in chunks]
|
||||
)
|
||||
document_id = upsert_document(
|
||||
conn, story_id, str(path), _file_version(path)
|
||||
)
|
||||
replace_chunks(conn, document_id, chunks, embeddings)
|
||||
replace_chunks(
|
||||
conn, document_id, chunks, embeddings, base_chunks=base_chunks
|
||||
)
|
||||
|
||||
if args.changed:
|
||||
update_story_indexed_range(conn, story_id, base_ref, head_ref)
|
||||
|
||||
|
||||
def cmd_serve(args: argparse.Namespace) -> None:
|
||||
import uvicorn
|
||||
uvicorn.run(
|
||||
"rag_agent.webhook:app",
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
log_level="info",
|
||||
)
|
||||
|
||||
|
||||
def cmd_ask(args: argparse.Namespace) -> None:
|
||||
@@ -89,9 +136,26 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
required=True,
|
||||
help="Story slug (e.g. branch name or story id); documents are tied to this story",
|
||||
)
|
||||
index_parser.add_argument("--changed", action="store_true", help="Index only changed files")
|
||||
index_parser.add_argument("--base-ref", default="HEAD~1", help="Base git ref")
|
||||
index_parser.add_argument("--head-ref", default="HEAD", help="Head git ref")
|
||||
index_parser.add_argument(
|
||||
"--changed",
|
||||
action="store_true",
|
||||
help="Index only files changed in the story range (base-ref..head-ref); all commits in range belong to the story",
|
||||
)
|
||||
index_parser.add_argument(
|
||||
"--base-ref",
|
||||
default="main",
|
||||
help="Start of story range (e.g. main). Use 'auto' for merge-base(default-branch, head-ref). All commits from base to head are the story.",
|
||||
)
|
||||
index_parser.add_argument(
|
||||
"--head-ref",
|
||||
default="HEAD",
|
||||
help="End of story range (e.g. current branch tip)",
|
||||
)
|
||||
index_parser.add_argument(
|
||||
"--default-branch",
|
||||
default="main",
|
||||
help="Default branch name for --base-ref auto",
|
||||
)
|
||||
index_parser.set_defaults(func=cmd_index)
|
||||
|
||||
ask_parser = sub.add_parser("ask", help="Ask a question")
|
||||
@@ -104,6 +168,23 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
ask_parser.add_argument("--top-k", type=int, default=5, help="Top K chunks to retrieve")
|
||||
ask_parser.set_defaults(func=cmd_ask)
|
||||
|
||||
serve_parser = sub.add_parser(
|
||||
"serve",
|
||||
help="Run webhook server: on push to remote repo, pull and index changes",
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--host",
|
||||
default="0.0.0.0",
|
||||
help="Bind host (default: 0.0.0.0)",
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=8000,
|
||||
help="Bind port (default: 8000)",
|
||||
)
|
||||
serve_parser.set_defaults(func=cmd_serve)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user