Files
hive/backend/app/services/repository_service.py
anthonyrawlins 1e81daaf18
Some checks failed
Frontend Tests / unit-tests (push) Has been cancelled
Frontend Tests / e2e-tests (push) Has been cancelled
Fix frontend URLs for production deployment and resolve database issues
- Update API base URL from localhost to https://api.hive.home.deepblack.cloud
- Update WebSocket URL to https://hive.home.deepblack.cloud for proper TLS routing
- Remove metadata field from Project model to fix SQLAlchemy conflict
- Remove index from JSON expertise column in AgentRole to fix PostgreSQL indexing
- Update push script to use local registry instead of Docker Hub
- Add Gitea repository support and monitoring endpoints

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-28 09:16:22 +10:00

477 lines
20 KiB
Python

"""
Repository service for managing task monitoring across different providers (GitHub, Gitea)
"""
import asyncio
import json
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
from ..core.database import get_db
from ..models.project import Project
from ..models.agent import Agent
from .agent_service import AgentService
logger = logging.getLogger(__name__)
class RepositoryService:
def __init__(self):
self.agent_service = AgentService()
self._task_cache = {}
self._last_sync = {}
async def sync_all_repositories(self, db: Session) -> Dict[str, Any]:
"""Sync tasks from all enabled repositories"""
results = {
"synced_projects": 0,
"new_tasks": 0,
"assigned_tasks": 0,
"errors": []
}
# Get all active projects with bzzz enabled
projects = db.query(Project).filter(
and_(
Project.status == "active",
Project.bzzz_enabled == True
)
).all()
for project in projects:
try:
sync_result = await self.sync_project_tasks(db, project)
results["synced_projects"] += 1
results["new_tasks"] += sync_result.get("new_tasks", 0)
results["assigned_tasks"] += sync_result.get("assigned_tasks", 0)
except Exception as e:
error_msg = f"Failed to sync project {project.name}: {str(e)}"
logger.error(error_msg)
results["errors"].append(error_msg)
return results
async def sync_project_tasks(self, db: Session, project: Project) -> Dict[str, Any]:
"""Sync tasks for a specific project"""
result = {
"project_id": project.id,
"project_name": project.name,
"new_tasks": 0,
"assigned_tasks": 0,
"provider": project.provider or "github"
}
try:
# Get repository client based on provider
repo_client = await self._get_repository_client(project)
if not repo_client:
raise Exception(f"Could not create repository client for {project.provider}")
# Fetch available tasks
tasks = await repo_client.list_available_tasks()
result["new_tasks"] = len(tasks)
# Process each task for potential assignment
for task in tasks:
try:
assigned = await self._process_task_for_assignment(db, project, task)
if assigned:
result["assigned_tasks"] += 1
except Exception as e:
logger.error(f"Failed to process task {task.get('number', 'unknown')}: {str(e)}")
# Update last sync time
self._last_sync[project.id] = datetime.now()
except Exception as e:
logger.error(f"Error syncing project {project.name}: {str(e)}")
raise
return result
async def _get_repository_client(self, project: Project):
"""Get appropriate repository client based on project provider"""
provider = project.provider or "github"
if provider == "gitea":
return await self._create_gitea_client(project)
elif provider == "github":
return await self._create_github_client(project)
else:
raise ValueError(f"Unsupported provider: {provider}")
async def _create_gitea_client(self, project: Project):
"""Create Gitea API client"""
try:
import aiohttp
class GiteaClient:
def __init__(self, base_url: str, owner: str, repo: str, token: str = None):
self.base_url = base_url.rstrip('/')
self.owner = owner
self.repo = repo
self.token = token
self.session = None
async def list_available_tasks(self) -> List[Dict]:
"""List open issues with bzzz-task label"""
if not self.session:
self.session = aiohttp.ClientSession()
url = f"{self.base_url}/api/v1/repos/{self.owner}/{self.repo}/issues"
params = {
"state": "open",
"labels": "bzzz-task",
"limit": 50
}
headers = {}
if self.token:
headers["Authorization"] = f"token {self.token}"
async with self.session.get(url, params=params, headers=headers) as response:
if response.status == 200:
issues = await response.json()
return [self._convert_issue_to_task(issue) for issue in issues
if not issue.get("assignee")] # Only unassigned tasks
else:
logger.error(f"Gitea API error: {response.status}")
return []
def _convert_issue_to_task(self, issue: Dict) -> Dict:
"""Convert Gitea issue to task format"""
labels = [label["name"] for label in issue.get("labels", [])]
# Extract role and expertise from labels
required_role = self._extract_required_role(labels)
required_expertise = self._extract_required_expertise(labels)
priority = self._extract_priority(labels)
return {
"id": issue["id"],
"number": issue["number"],
"title": issue["title"],
"description": issue.get("body", ""),
"state": issue["state"],
"labels": labels,
"created_at": issue["created_at"],
"updated_at": issue["updated_at"],
"provider": "gitea",
"repository": f"{self.owner}/{self.repo}",
"required_role": required_role,
"required_expertise": required_expertise,
"priority": priority,
"task_type": self._extract_task_type(labels, issue.get("body", "")),
"url": issue.get("html_url", "")
}
def _extract_required_role(self, labels: List[str]) -> str:
"""Extract required role from labels"""
role_map = {
"frontend": "frontend_developer",
"backend": "backend_developer",
"security": "security_expert",
"design": "ui_ux_designer",
"devops": "devops_engineer",
"documentation": "technical_writer",
"bug": "qa_engineer",
"architecture": "senior_software_architect"
}
for label in labels:
label_lower = label.lower()
if label_lower in role_map:
return role_map[label_lower]
return "full_stack_engineer" # Default
def _extract_required_expertise(self, labels: List[str]) -> List[str]:
"""Extract required expertise from labels"""
expertise = []
expertise_map = {
"frontend": ["frontend", "javascript", "ui_development"],
"backend": ["backend", "api_development", "server_frameworks"],
"database": ["database", "sql", "data_modeling"],
"security": ["security", "cybersecurity", "vulnerability_analysis"],
"testing": ["testing", "qa_methodologies", "debugging"],
"devops": ["deployment", "infrastructure", "automation"],
"design": ["design", "user_experience", "prototyping"]
}
for label in labels:
label_lower = label.lower()
if label_lower in expertise_map:
expertise.extend(expertise_map[label_lower])
return list(set(expertise)) if expertise else ["general_development"]
def _extract_priority(self, labels: List[str]) -> int:
"""Extract priority from labels"""
for label in labels:
if "priority-" in label.lower():
try:
return int(label.lower().split("priority-")[1])
except (ValueError, IndexError):
pass
elif label.lower() in ["urgent", "critical"]:
return 10
elif label.lower() in ["high"]:
return 8
elif label.lower() in ["low"]:
return 3
return 5 # Default priority
def _extract_task_type(self, labels: List[str], body: str) -> str:
"""Extract task type from labels and body"""
for label in labels:
label_lower = label.lower()
if label_lower in ["bug", "bugfix"]:
return "bug_fix"
elif label_lower in ["enhancement", "feature"]:
return "feature"
elif label_lower in ["documentation", "docs"]:
return "documentation"
elif label_lower in ["security"]:
return "security"
elif label_lower in ["refactor", "refactoring"]:
return "refactoring"
return "general"
async def close(self):
if self.session:
await self.session.close()
# Create and return Gitea client
base_url = project.provider_base_url or "http://192.168.1.113:3000"
token = None # TODO: Get from secure storage
return GiteaClient(
base_url=base_url,
owner=project.git_owner,
repo=project.git_repository,
token=token
)
except ImportError:
logger.error("aiohttp not available for Gitea client")
return None
except Exception as e:
logger.error(f"Failed to create Gitea client: {str(e)}")
return None
async def _create_github_client(self, project: Project):
"""Create GitHub API client (placeholder for now)"""
# TODO: Implement GitHub client similar to Gitea
logger.warning("GitHub client not yet implemented")
return None
async def _process_task_for_assignment(self, db: Session, project: Project, task: Dict) -> bool:
"""Process a task for automatic assignment to suitable agents"""
try:
# Check if auto-assignment is enabled for this project
if not getattr(project, 'auto_assignment', True):
return False
# Check if task was already processed recently
task_key = f"{project.id}:{task['number']}"
if task_key in self._task_cache:
return False
# Find suitable agents for this task
suitable_agents = await self._find_suitable_agents(db, task)
if not suitable_agents:
logger.info(f"No suitable agents found for task {task['number']} in {project.name}")
return False
# Select best agent (first in sorted list)
selected_agent = suitable_agents[0]
# Log the assignment attempt
await self._log_task_assignment(db, project, task, selected_agent, "auto_assigned")
# Cache this task to avoid reprocessing
self._task_cache[task_key] = {
"assigned_at": datetime.now(),
"agent_id": selected_agent["id"],
"task": task
}
logger.info(f"Assigned task {task['number']} to agent {selected_agent['id']} ({selected_agent['role']})")
return True
except Exception as e:
logger.error(f"Error processing task {task.get('number', 'unknown')} for assignment: {str(e)}")
return False
async def _find_suitable_agents(self, db: Session, task: Dict) -> List[Dict]:
"""Find agents suitable for a task based on role and expertise"""
try:
# Get all online agents
agents = db.query(Agent).filter(
and_(
Agent.status.in_(["online", "ready"]),
Agent.role.isnot(None) # Only agents with assigned roles
)
).all()
if not agents:
return []
# Convert to dict format for scoring
agent_infos = []
for agent in agents:
agent_info = {
"id": agent.id,
"role": agent.role,
"expertise": agent.expertise or [],
"current_tasks": agent.current_tasks or 0,
"max_tasks": agent.max_concurrent or 2,
"performance": 0.8, # Default performance score
"availability": 1.0 if agent.status == "ready" else 0.7,
"last_seen": agent.last_seen or datetime.now()
}
agent_infos.append(agent_info)
# Score agents for this task
scored_agents = []
for agent_info in agent_infos:
# Skip if agent is at capacity
if agent_info["current_tasks"] >= agent_info["max_tasks"]:
continue
score = self._calculate_agent_task_score(task, agent_info)
if score > 0.3: # Minimum threshold
scored_agents.append({
**agent_info,
"score": score
})
# Sort by score (highest first)
scored_agents.sort(key=lambda x: x["score"], reverse=True)
return scored_agents[:3] # Return top 3 candidates
except Exception as e:
logger.error(f"Error finding suitable agents: {str(e)}")
return []
def _calculate_agent_task_score(self, task: Dict, agent_info: Dict) -> float:
"""Calculate how suitable an agent is for a task"""
score = 0.0
# Role matching
task_role = task.get("required_role", "")
agent_role = agent_info.get("role", "")
if task_role == agent_role:
score += 0.5 # Perfect role match
elif self._is_compatible_role(task_role, agent_role):
score += 0.3 # Compatible role
elif agent_role == "full_stack_engineer":
score += 0.2 # Full-stack can handle most tasks
# Expertise matching
task_expertise = task.get("required_expertise", [])
agent_expertise = agent_info.get("expertise", [])
if task_expertise and agent_expertise:
expertise_overlap = len(set(task_expertise) & set(agent_expertise))
expertise_score = expertise_overlap / len(task_expertise)
score += expertise_score * 0.3
# Priority bonus
priority = task.get("priority", 5)
priority_bonus = (priority / 10.0) * 0.1
score += priority_bonus
# Availability bonus
availability = agent_info.get("availability", 1.0)
score *= availability
# Workload penalty
current_tasks = agent_info.get("current_tasks", 0)
max_tasks = agent_info.get("max_tasks", 2)
workload_ratio = current_tasks / max_tasks
workload_penalty = workload_ratio * 0.2
score -= workload_penalty
return max(0.0, min(1.0, score))
def _is_compatible_role(self, required_role: str, agent_role: str) -> bool:
"""Check if agent role is compatible with required role"""
compatibility_map = {
"frontend_developer": ["full_stack_engineer", "ui_ux_designer"],
"backend_developer": ["full_stack_engineer", "database_engineer"],
"qa_engineer": ["full_stack_engineer"],
"devops_engineer": ["systems_engineer", "backend_developer"],
"security_expert": ["backend_developer", "senior_software_architect"],
"ui_ux_designer": ["frontend_developer"],
"technical_writer": ["full_stack_engineer"],
"database_engineer": ["backend_developer", "full_stack_engineer"],
}
compatible_roles = compatibility_map.get(required_role, [])
return agent_role in compatible_roles
async def _log_task_assignment(self, db: Session, project: Project, task: Dict, agent: Dict, reason: str):
"""Log task assignment for tracking"""
try:
# This would insert into task_assignments table
# For now, just log it
logger.info(f"Task assignment: Project={project.name}, Task={task['number']}, "
f"Agent={agent['id']}, Role={agent['role']}, Reason={reason}")
except Exception as e:
logger.error(f"Failed to log task assignment: {str(e)}")
async def get_project_task_stats(self, db: Session, project_id: int) -> Dict[str, Any]:
"""Get task statistics for a project"""
try:
project = db.query(Project).filter(Project.id == project_id).first()
if not project:
return {"error": "Project not found"}
# Get recent sync info
last_sync = self._last_sync.get(project_id)
# Count cached tasks for this project
project_tasks = [
task_info for task_key, task_info in self._task_cache.items()
if task_key.startswith(f"{project_id}:")
]
return {
"project_id": project_id,
"project_name": project.name,
"provider": project.provider or "github",
"last_sync": last_sync.isoformat() if last_sync else None,
"cached_tasks": len(project_tasks),
"bzzz_enabled": project.bzzz_enabled,
"auto_assignment": getattr(project, "auto_assignment", True)
}
except Exception as e:
logger.error(f"Error getting project task stats: {str(e)}")
return {"error": str(e)}
async def cleanup_old_cache(self, max_age_hours: int = 24):
"""Clean up old task cache entries"""
cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
to_remove = []
for task_key, task_info in self._task_cache.items():
if task_info["assigned_at"] < cutoff_time:
to_remove.append(task_key)
for key in to_remove:
del self._task_cache[key]
logger.info(f"Cleaned up {len(to_remove)} old task cache entries")
# Global instance
repository_service = RepositoryService()