WIP: Save current work before CHORUS rebrand

- Agent roles integration progress
- Various backend and frontend updates
- Storybook cache cleanup

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-08-01 02:20:56 +10:00
parent 1e81daaf18
commit b6bff318d9
740 changed files with 90022 additions and 279523 deletions

View File

@@ -15,6 +15,8 @@ Key Features:
from fastapi import APIRouter, HTTPException, Request, Depends, status
from typing import List, Dict, Any
import time
import logging
from ..models.agent import Agent
from ..models.responses import (
AgentListResponse,
@@ -29,6 +31,9 @@ router = APIRouter()
from app.core.database import SessionLocal
from app.models.agent import Agent as ORMAgent
from ..services.agent_service import AgentType
logger = logging.getLogger(__name__)
@router.get(
@@ -384,4 +389,244 @@ async def unregister_agent(
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to unregister agent: {str(e)}"
)
@router.post(
"/agents/heartbeat",
status_code=status.HTTP_200_OK,
summary="Agent heartbeat update",
description="""
Update agent status and maintain registration through periodic heartbeat.
This endpoint allows agents to:
- Confirm they are still online and responsive
- Update their current status and metrics
- Report any capability or configuration changes
- Maintain their registration in the cluster
Agents should call this endpoint every 30-60 seconds to maintain
their active status in the Hive cluster.
""",
responses={
200: {"description": "Heartbeat received successfully"},
404: {"model": ErrorResponse, "description": "Agent not registered"},
400: {"model": ErrorResponse, "description": "Invalid heartbeat data"}
}
)
async def agent_heartbeat(
heartbeat_data: Dict[str, Any],
request: Request
):
"""
Process agent heartbeat to maintain registration.
Args:
heartbeat_data: Agent status and metrics data
request: FastAPI request object
Returns:
Success confirmation and any coordinator updates
"""
agent_id = heartbeat_data.get("agent_id")
if not agent_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Missing agent_id in heartbeat data"
)
# Access coordinator
hive_coordinator = getattr(request.app.state, 'hive_coordinator', None)
if not hive_coordinator:
from ..main import unified_coordinator
hive_coordinator = unified_coordinator
if not hive_coordinator:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Coordinator service unavailable"
)
try:
# Update agent heartbeat timestamp
agent_service = hive_coordinator.agent_service
if agent_service:
agent_service.update_agent_heartbeat(agent_id)
# Update current tasks if provided - use raw SQL to avoid role column
if "current_tasks" in heartbeat_data:
current_tasks = heartbeat_data["current_tasks"]
try:
with SessionLocal() as db:
from sqlalchemy import text
db.execute(text(
"UPDATE agents SET current_tasks = :current_tasks, last_seen = NOW() WHERE id = :agent_id"
), {
"current_tasks": current_tasks,
"agent_id": agent_id
})
db.commit()
except Exception as e:
logger.warning(f"Could not update agent tasks: {e}")
return {
"status": "success",
"message": f"Heartbeat received from agent '{agent_id}'",
"timestamp": time.time()
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to process heartbeat: {str(e)}"
)
@router.post(
"/agents/auto-register",
response_model=AgentRegistrationResponse,
status_code=status.HTTP_201_CREATED,
summary="Automatic agent registration",
description="""
Register an agent automatically with capability detection.
This endpoint is designed for Bzzz agents running as systemd services
to automatically register themselves with the Hive coordinator.
Features:
- Automatic capability detection based on available models
- Network discovery support
- Retry-friendly for service startup scenarios
- Health validation before registration
""",
responses={
201: {"description": "Agent auto-registered successfully"},
400: {"model": ErrorResponse, "description": "Invalid agent configuration"},
409: {"model": ErrorResponse, "description": "Agent already registered"},
503: {"model": ErrorResponse, "description": "Agent endpoint unreachable"}
}
)
async def auto_register_agent(
agent_data: Dict[str, Any],
request: Request
) -> AgentRegistrationResponse:
"""
Automatically register a Bzzz agent with the Hive coordinator.
Args:
agent_data: Agent configuration including endpoint, models, etc.
request: FastAPI request object
Returns:
AgentRegistrationResponse: Registration confirmation
"""
# Extract required fields
agent_id = agent_data.get("agent_id")
endpoint = agent_data.get("endpoint")
hostname = agent_data.get("hostname")
if not agent_id or not endpoint:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Missing required fields: agent_id, endpoint"
)
# Access coordinator
hive_coordinator = getattr(request.app.state, 'hive_coordinator', None)
if not hive_coordinator:
from ..main import unified_coordinator
hive_coordinator = unified_coordinator
if not hive_coordinator:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Coordinator service unavailable"
)
try:
# Check if agent already exists - use basic query to avoid role column
try:
with SessionLocal() as db:
from sqlalchemy import text
existing_agent = db.execute(text(
"SELECT id, endpoint FROM agents WHERE id = :agent_id LIMIT 1"
), {"agent_id": agent_id}).fetchone()
if existing_agent:
# Update existing agent
db.execute(text(
"UPDATE agents SET endpoint = :endpoint, last_seen = NOW() WHERE id = :agent_id"
), {"endpoint": endpoint, "agent_id": agent_id})
db.commit()
return AgentRegistrationResponse(
agent_id=agent_id,
endpoint=endpoint,
message=f"Agent '{agent_id}' registration updated successfully"
)
except Exception as e:
logger.warning(f"Could not check existing agent: {e}")
# Detect capabilities and models
models = agent_data.get("models", [])
if not models:
# Try to detect models from endpoint
try:
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get(f"{endpoint}/api/tags", timeout=aiohttp.ClientTimeout(total=5)) as response:
if response.status == 200:
tags_data = await response.json()
models = [model["name"] for model in tags_data.get("models", [])]
except Exception as e:
logger.warning(f"Could not detect models for {agent_id}: {e}")
# Determine specialty based on models or hostname
specialty = AgentType.GENERAL_AI # Default
if "codellama" in str(models).lower() or "code" in hostname.lower():
specialty = AgentType.KERNEL_DEV
elif "gemma" in str(models).lower():
specialty = AgentType.PYTORCH_DEV
elif any(model for model in models if "llama" in model.lower()):
specialty = AgentType.GENERAL_AI
# Insert agent directly into database
try:
with SessionLocal() as db:
from sqlalchemy import text
# Insert new agent using raw SQL to avoid role column issues
db.execute(text("""
INSERT INTO agents (id, name, endpoint, model, specialty, max_concurrent, current_tasks, status, created_at, last_seen)
VALUES (:agent_id, :name, :endpoint, :model, :specialty, :max_concurrent, 0, 'active', NOW(), NOW())
ON CONFLICT (id) DO UPDATE SET
endpoint = EXCLUDED.endpoint,
model = EXCLUDED.model,
specialty = EXCLUDED.specialty,
max_concurrent = EXCLUDED.max_concurrent,
last_seen = NOW()
"""), {
"agent_id": agent_id,
"name": agent_id, # Use agent_id as name
"endpoint": endpoint,
"model": models[0] if models else "unknown",
"specialty": specialty.value,
"max_concurrent": agent_data.get("max_concurrent", 2)
})
db.commit()
return AgentRegistrationResponse(
agent_id=agent_id,
endpoint=endpoint,
message=f"Agent '{agent_id}' auto-registered successfully with specialty '{specialty.value}'"
)
except Exception as e:
logger.error(f"Database insert failed: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to register agent in database: {str(e)}"
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to auto-register agent: {str(e)}"
)