Files
hive/backend/app/services/git_repository_service.py
anthonyrawlins 268214d971 Major WHOOSH system refactoring and feature enhancements
- Migrated from HIVE branding to WHOOSH across all components
- Enhanced backend API with new services: AI models, BZZZ integration, templates, members
- Added comprehensive testing suite with security, performance, and integration tests
- Improved frontend with new components for project setup, AI models, and team management
- Updated MCP server implementation with WHOOSH-specific tools and resources
- Enhanced deployment configurations with production-ready Docker setups
- Added comprehensive documentation and setup guides
- Implemented age encryption service and UCXL integration

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-27 08:34:48 +10:00

513 lines
20 KiB
Python

#!/usr/bin/env python3
"""
Git Repository Service for WHOOSH
Handles git repository management, cloning, credentials, and project integration
"""
import asyncio
import git
import json
import logging
import aiofiles
import os
from typing import Dict, List, Optional, Any
from datetime import datetime
from dataclasses import dataclass, asdict
from pathlib import Path
import base64
import subprocess
from urllib.parse import urlparse
import tempfile
import shutil
logger = logging.getLogger(__name__)
@dataclass
class GitCredentials:
"""Git repository credentials"""
repo_url: str
username: Optional[str] = None
password: Optional[str] = None # token or password
ssh_key_path: Optional[str] = None
ssh_key_content: Optional[str] = None
auth_type: str = "https" # https, ssh, token
@dataclass
class GitRepository:
"""Git repository configuration"""
id: str
name: str
url: str
credentials: GitCredentials
project_id: Optional[str] = None
local_path: Optional[str] = None
default_branch: str = "main"
status: str = "pending" # pending, cloning, ready, error
last_updated: Optional[datetime] = None
commit_hash: Optional[str] = None
commit_message: Optional[str] = None
error_message: Optional[str] = None
class GitRepositoryService:
"""
Service for managing git repositories in WHOOSH projects.
Handles cloning, credential management, and repository status tracking.
"""
def __init__(self):
self.repositories: Dict[str, GitRepository] = {}
self.base_repos_path = Path("/tmp/whoosh_repos")
self.credentials_store = {}
async def initialize(self) -> bool:
"""Initialize the git repository service"""
try:
logger.info("🔧 Initializing Git Repository Service")
# Create base repositories directory
self.base_repos_path.mkdir(parents=True, exist_ok=True)
# Load existing repositories if any
await self._load_repositories()
logger.info("✅ Git Repository Service initialized")
return True
except Exception as e:
logger.error(f"❌ Failed to initialize git repository service: {e}")
return False
async def _load_repositories(self) -> None:
"""Load existing repositories from storage"""
try:
config_file = self.base_repos_path / "repositories.json"
if config_file.exists():
async with aiofiles.open(config_file, 'r') as f:
content = await f.read()
repos_data = json.loads(content)
for repo_data in repos_data:
credentials = GitCredentials(**repo_data['credentials'])
repo = GitRepository(
**{k: v for k, v in repo_data.items() if k != 'credentials'},
credentials=credentials
)
self.repositories[repo.id] = repo
logger.info(f"📂 Loaded {len(self.repositories)} existing repositories")
except Exception as e:
logger.error(f"❌ Error loading repositories: {e}")
async def _save_repositories(self) -> None:
"""Save repositories to storage"""
try:
config_file = self.base_repos_path / "repositories.json"
repos_data = []
for repo in self.repositories.values():
repo_dict = asdict(repo)
# Convert datetime to string
if repo_dict.get('last_updated'):
repo_dict['last_updated'] = repo_dict['last_updated'].isoformat()
repos_data.append(repo_dict)
async with aiofiles.open(config_file, 'w') as f:
await f.write(json.dumps(repos_data, indent=2, default=str))
except Exception as e:
logger.error(f"❌ Error saving repositories: {e}")
async def add_repository(
self,
name: str,
url: str,
credentials: Dict[str, Any],
project_id: Optional[str] = None
) -> Dict[str, Any]:
"""Add a new git repository"""
try:
logger.info(f"📥 Adding repository: {name} ({url})")
# Generate unique ID
repo_id = f"repo_{len(self.repositories) + 1}_{name.lower().replace(' ', '_')}"
# Create credentials object
git_credentials = GitCredentials(
repo_url=url,
username=credentials.get('username'),
password=credentials.get('password'),
ssh_key_path=credentials.get('ssh_key_path'),
ssh_key_content=credentials.get('ssh_key_content'),
auth_type=credentials.get('auth_type', 'https')
)
# Create repository object
repository = GitRepository(
id=repo_id,
name=name,
url=url,
credentials=git_credentials,
project_id=project_id,
status="pending"
)
# Store repository
self.repositories[repo_id] = repository
await self._save_repositories()
# Start cloning process in background
asyncio.create_task(self._clone_repository(repo_id))
logger.info(f"✅ Repository {name} added with ID: {repo_id}")
return {
"success": True,
"repository_id": repo_id,
"message": f"Repository {name} added successfully, cloning started"
}
except Exception as e:
logger.error(f"❌ Error adding repository: {e}")
return {"success": False, "error": str(e)}
async def _clone_repository(self, repo_id: str) -> None:
"""Clone a repository asynchronously"""
try:
repo = self.repositories.get(repo_id)
if not repo:
raise Exception(f"Repository {repo_id} not found")
logger.info(f"🔄 Cloning repository: {repo.name}")
repo.status = "cloning"
# Create local path
local_path = self.base_repos_path / repo_id
repo.local_path = str(local_path)
# Prepare clone command and environment
env = os.environ.copy()
clone_cmd = ["git", "clone"]
# Handle authentication
if repo.credentials.auth_type == "https" and repo.credentials.username and repo.credentials.password:
# Use credentials in URL
parsed_url = urlparse(repo.url)
auth_url = f"{parsed_url.scheme}://{repo.credentials.username}:{repo.credentials.password}@{parsed_url.netloc}{parsed_url.path}"
clone_cmd.extend([auth_url, str(local_path)])
elif repo.credentials.auth_type == "ssh":
# Handle SSH key
if repo.credentials.ssh_key_content:
# Write SSH key to temporary file
ssh_dir = Path.home() / ".ssh"
ssh_dir.mkdir(exist_ok=True)
key_file = ssh_dir / f"whoosh_key_{repo_id}"
async with aiofiles.open(key_file, 'w') as f:
await f.write(repo.credentials.ssh_key_content)
# Set proper permissions
os.chmod(key_file, 0o600)
# Configure git to use this key
env['GIT_SSH_COMMAND'] = f'ssh -i {key_file} -o StrictHostKeyChecking=no'
clone_cmd.extend([repo.url, str(local_path)])
else:
# Public repository or token-based
clone_cmd.extend([repo.url, str(local_path)])
# Execute clone command
process = await asyncio.create_subprocess_exec(
*clone_cmd,
env=env,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
# Clone successful
repo.status = "ready"
repo.last_updated = datetime.utcnow()
# Get latest commit info
git_repo = git.Repo(local_path)
latest_commit = git_repo.head.commit
repo.commit_hash = str(latest_commit.hexsha)
repo.commit_message = latest_commit.message.strip()
logger.info(f"✅ Repository {repo.name} cloned successfully")
else:
# Clone failed
repo.status = "error"
repo.error_message = stderr.decode() if stderr else "Clone failed"
logger.error(f"❌ Failed to clone repository {repo.name}: {repo.error_message}")
await self._save_repositories()
except Exception as e:
logger.error(f"❌ Error cloning repository {repo_id}: {e}")
if repo_id in self.repositories:
self.repositories[repo_id].status = "error"
self.repositories[repo_id].error_message = str(e)
await self._save_repositories()
async def get_repositories(self, project_id: Optional[str] = None) -> List[Dict[str, Any]]:
"""Get list of repositories, optionally filtered by project"""
try:
repos = list(self.repositories.values())
if project_id:
repos = [repo for repo in repos if repo.project_id == project_id]
# Convert to dict format, excluding sensitive credentials
result = []
for repo in repos:
repo_dict = asdict(repo)
# Remove sensitive credential information
repo_dict['credentials'] = {
'auth_type': repo.credentials.auth_type,
'has_username': bool(repo.credentials.username),
'has_password': bool(repo.credentials.password),
'has_ssh_key': bool(repo.credentials.ssh_key_content or repo.credentials.ssh_key_path)
}
# Convert datetime to string
if repo_dict.get('last_updated'):
repo_dict['last_updated'] = repo_dict['last_updated'].isoformat()
result.append(repo_dict)
return result
except Exception as e:
logger.error(f"❌ Error getting repositories: {e}")
return []
async def get_repository(self, repo_id: str) -> Optional[Dict[str, Any]]:
"""Get a specific repository"""
try:
repo = self.repositories.get(repo_id)
if not repo:
return None
repo_dict = asdict(repo)
# Remove sensitive credential information
repo_dict['credentials'] = {
'auth_type': repo.credentials.auth_type,
'has_username': bool(repo.credentials.username),
'has_password': bool(repo.credentials.password),
'has_ssh_key': bool(repo.credentials.ssh_key_content or repo.credentials.ssh_key_path)
}
# Convert datetime to string
if repo_dict.get('last_updated'):
repo_dict['last_updated'] = repo_dict['last_updated'].isoformat()
return repo_dict
except Exception as e:
logger.error(f"❌ Error getting repository {repo_id}: {e}")
return None
async def update_repository(self, repo_id: str) -> Dict[str, Any]:
"""Pull latest changes from repository"""
try:
repo = self.repositories.get(repo_id)
if not repo:
return {"success": False, "error": "Repository not found"}
if repo.status != "ready":
return {"success": False, "error": "Repository not ready for updates"}
logger.info(f"🔄 Updating repository: {repo.name}")
# Pull latest changes
local_path = Path(repo.local_path)
if not local_path.exists():
return {"success": False, "error": "Local repository path not found"}
git_repo = git.Repo(local_path)
origin = git_repo.remotes.origin
# Fetch and pull
origin.fetch()
git_repo.git.pull()
# Update repository info
latest_commit = git_repo.head.commit
repo.commit_hash = str(latest_commit.hexsha)
repo.commit_message = latest_commit.message.strip()
repo.last_updated = datetime.utcnow()
await self._save_repositories()
logger.info(f"✅ Repository {repo.name} updated successfully")
return {
"success": True,
"commit_hash": repo.commit_hash,
"commit_message": repo.commit_message,
"message": f"Repository {repo.name} updated successfully"
}
except Exception as e:
logger.error(f"❌ Error updating repository {repo_id}: {e}")
return {"success": False, "error": str(e)}
async def remove_repository(self, repo_id: str) -> Dict[str, Any]:
"""Remove a repository"""
try:
repo = self.repositories.get(repo_id)
if not repo:
return {"success": False, "error": "Repository not found"}
logger.info(f"🗑️ Removing repository: {repo.name}")
# Remove local files
if repo.local_path and Path(repo.local_path).exists():
shutil.rmtree(repo.local_path)
# Remove from memory
del self.repositories[repo_id]
await self._save_repositories()
logger.info(f"✅ Repository {repo.name} removed successfully")
return {
"success": True,
"message": f"Repository {repo.name} removed successfully"
}
except Exception as e:
logger.error(f"❌ Error removing repository {repo_id}: {e}")
return {"success": False, "error": str(e)}
async def get_repository_files(
self,
repo_id: str,
path: str = "",
max_depth: int = 2
) -> Dict[str, Any]:
"""Get file structure of a repository"""
try:
repo = self.repositories.get(repo_id)
if not repo or repo.status != "ready":
return {"success": False, "error": "Repository not found or not ready"}
local_path = Path(repo.local_path)
if not local_path.exists():
return {"success": False, "error": "Local repository path not found"}
target_path = local_path / path if path else local_path
def scan_directory(dir_path: Path, current_depth: int = 0) -> Dict[str, Any]:
"""Recursively scan directory structure"""
if current_depth >= max_depth:
return {"type": "directory", "name": dir_path.name, "truncated": True}
items = []
try:
for item in sorted(dir_path.iterdir()):
# Skip hidden files and git directory
if item.name.startswith('.'):
continue
if item.is_file():
items.append({
"type": "file",
"name": item.name,
"size": item.stat().st_size,
"path": str(item.relative_to(local_path))
})
elif item.is_dir():
items.append({
"type": "directory",
"name": item.name,
"path": str(item.relative_to(local_path)),
"children": scan_directory(item, current_depth + 1)
})
except PermissionError:
pass
return {
"type": "directory",
"name": dir_path.name,
"children": items
}
file_structure = scan_directory(target_path)
return {
"success": True,
"repository_id": repo_id,
"path": path,
"structure": file_structure
}
except Exception as e:
logger.error(f"❌ Error getting repository files {repo_id}: {e}")
return {"success": False, "error": str(e)}
async def get_file_content(
self,
repo_id: str,
file_path: str,
max_size: int = 1024 * 1024 # 1MB limit
) -> Dict[str, Any]:
"""Get content of a specific file"""
try:
repo = self.repositories.get(repo_id)
if not repo or repo.status != "ready":
return {"success": False, "error": "Repository not found or not ready"}
local_path = Path(repo.local_path)
target_file = local_path / file_path
if not target_file.exists() or not target_file.is_file():
return {"success": False, "error": "File not found"}
# Check file size
file_size = target_file.stat().st_size
if file_size > max_size:
return {
"success": False,
"error": f"File too large ({file_size} bytes), maximum {max_size} bytes"
}
# Read file content
try:
async with aiofiles.open(target_file, 'r', encoding='utf-8') as f:
content = await f.read()
return {
"success": True,
"repository_id": repo_id,
"file_path": file_path,
"content": content,
"size": file_size,
"encoding": "utf-8"
}
except UnicodeDecodeError:
# Try binary read for non-text files
async with aiofiles.open(target_file, 'rb') as f:
content = await f.read()
encoded_content = base64.b64encode(content).decode('utf-8')
return {
"success": True,
"repository_id": repo_id,
"file_path": file_path,
"content": encoded_content,
"size": file_size,
"encoding": "base64"
}
except Exception as e:
logger.error(f"❌ Error getting file content {repo_id}/{file_path}: {e}")
return {"success": False, "error": str(e)}
async def cleanup(self) -> None:
"""Cleanup git repository service resources"""
try:
logger.info("🧹 Git Repository Service cleanup completed")
except Exception as e:
logger.error(f"❌ Error during cleanup: {e}")
# Global service instance
git_repository_service = GitRepositoryService()