WIP: Save current work before CHORUS rebrand
- Agent roles integration progress - Various backend and frontend updates - Storybook cache cleanup 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,8 @@ Key Features:
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request, Depends, status
|
||||
from typing import List, Dict, Any
|
||||
import time
|
||||
import logging
|
||||
from ..models.agent import Agent
|
||||
from ..models.responses import (
|
||||
AgentListResponse,
|
||||
@@ -29,6 +31,9 @@ router = APIRouter()
|
||||
|
||||
from app.core.database import SessionLocal
|
||||
from app.models.agent import Agent as ORMAgent
|
||||
from ..services.agent_service import AgentType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.get(
|
||||
@@ -384,4 +389,244 @@ async def unregister_agent(
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to unregister agent: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/agents/heartbeat",
|
||||
status_code=status.HTTP_200_OK,
|
||||
summary="Agent heartbeat update",
|
||||
description="""
|
||||
Update agent status and maintain registration through periodic heartbeat.
|
||||
|
||||
This endpoint allows agents to:
|
||||
- Confirm they are still online and responsive
|
||||
- Update their current status and metrics
|
||||
- Report any capability or configuration changes
|
||||
- Maintain their registration in the cluster
|
||||
|
||||
Agents should call this endpoint every 30-60 seconds to maintain
|
||||
their active status in the Hive cluster.
|
||||
""",
|
||||
responses={
|
||||
200: {"description": "Heartbeat received successfully"},
|
||||
404: {"model": ErrorResponse, "description": "Agent not registered"},
|
||||
400: {"model": ErrorResponse, "description": "Invalid heartbeat data"}
|
||||
}
|
||||
)
|
||||
async def agent_heartbeat(
|
||||
heartbeat_data: Dict[str, Any],
|
||||
request: Request
|
||||
):
|
||||
"""
|
||||
Process agent heartbeat to maintain registration.
|
||||
|
||||
Args:
|
||||
heartbeat_data: Agent status and metrics data
|
||||
request: FastAPI request object
|
||||
|
||||
Returns:
|
||||
Success confirmation and any coordinator updates
|
||||
"""
|
||||
agent_id = heartbeat_data.get("agent_id")
|
||||
if not agent_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Missing agent_id in heartbeat data"
|
||||
)
|
||||
|
||||
# Access coordinator
|
||||
hive_coordinator = getattr(request.app.state, 'hive_coordinator', None)
|
||||
if not hive_coordinator:
|
||||
from ..main import unified_coordinator
|
||||
hive_coordinator = unified_coordinator
|
||||
|
||||
if not hive_coordinator:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="Coordinator service unavailable"
|
||||
)
|
||||
|
||||
try:
|
||||
# Update agent heartbeat timestamp
|
||||
agent_service = hive_coordinator.agent_service
|
||||
if agent_service:
|
||||
agent_service.update_agent_heartbeat(agent_id)
|
||||
|
||||
# Update current tasks if provided - use raw SQL to avoid role column
|
||||
if "current_tasks" in heartbeat_data:
|
||||
current_tasks = heartbeat_data["current_tasks"]
|
||||
try:
|
||||
with SessionLocal() as db:
|
||||
from sqlalchemy import text
|
||||
db.execute(text(
|
||||
"UPDATE agents SET current_tasks = :current_tasks, last_seen = NOW() WHERE id = :agent_id"
|
||||
), {
|
||||
"current_tasks": current_tasks,
|
||||
"agent_id": agent_id
|
||||
})
|
||||
db.commit()
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not update agent tasks: {e}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Heartbeat received from agent '{agent_id}'",
|
||||
"timestamp": time.time()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to process heartbeat: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/agents/auto-register",
|
||||
response_model=AgentRegistrationResponse,
|
||||
status_code=status.HTTP_201_CREATED,
|
||||
summary="Automatic agent registration",
|
||||
description="""
|
||||
Register an agent automatically with capability detection.
|
||||
|
||||
This endpoint is designed for Bzzz agents running as systemd services
|
||||
to automatically register themselves with the Hive coordinator.
|
||||
|
||||
Features:
|
||||
- Automatic capability detection based on available models
|
||||
- Network discovery support
|
||||
- Retry-friendly for service startup scenarios
|
||||
- Health validation before registration
|
||||
""",
|
||||
responses={
|
||||
201: {"description": "Agent auto-registered successfully"},
|
||||
400: {"model": ErrorResponse, "description": "Invalid agent configuration"},
|
||||
409: {"model": ErrorResponse, "description": "Agent already registered"},
|
||||
503: {"model": ErrorResponse, "description": "Agent endpoint unreachable"}
|
||||
}
|
||||
)
|
||||
async def auto_register_agent(
|
||||
agent_data: Dict[str, Any],
|
||||
request: Request
|
||||
) -> AgentRegistrationResponse:
|
||||
"""
|
||||
Automatically register a Bzzz agent with the Hive coordinator.
|
||||
|
||||
Args:
|
||||
agent_data: Agent configuration including endpoint, models, etc.
|
||||
request: FastAPI request object
|
||||
|
||||
Returns:
|
||||
AgentRegistrationResponse: Registration confirmation
|
||||
"""
|
||||
# Extract required fields
|
||||
agent_id = agent_data.get("agent_id")
|
||||
endpoint = agent_data.get("endpoint")
|
||||
hostname = agent_data.get("hostname")
|
||||
|
||||
if not agent_id or not endpoint:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Missing required fields: agent_id, endpoint"
|
||||
)
|
||||
|
||||
# Access coordinator
|
||||
hive_coordinator = getattr(request.app.state, 'hive_coordinator', None)
|
||||
if not hive_coordinator:
|
||||
from ..main import unified_coordinator
|
||||
hive_coordinator = unified_coordinator
|
||||
|
||||
if not hive_coordinator:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="Coordinator service unavailable"
|
||||
)
|
||||
|
||||
try:
|
||||
# Check if agent already exists - use basic query to avoid role column
|
||||
try:
|
||||
with SessionLocal() as db:
|
||||
from sqlalchemy import text
|
||||
existing_agent = db.execute(text(
|
||||
"SELECT id, endpoint FROM agents WHERE id = :agent_id LIMIT 1"
|
||||
), {"agent_id": agent_id}).fetchone()
|
||||
if existing_agent:
|
||||
# Update existing agent
|
||||
db.execute(text(
|
||||
"UPDATE agents SET endpoint = :endpoint, last_seen = NOW() WHERE id = :agent_id"
|
||||
), {"endpoint": endpoint, "agent_id": agent_id})
|
||||
db.commit()
|
||||
|
||||
return AgentRegistrationResponse(
|
||||
agent_id=agent_id,
|
||||
endpoint=endpoint,
|
||||
message=f"Agent '{agent_id}' registration updated successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not check existing agent: {e}")
|
||||
|
||||
# Detect capabilities and models
|
||||
models = agent_data.get("models", [])
|
||||
if not models:
|
||||
# Try to detect models from endpoint
|
||||
try:
|
||||
import aiohttp
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(f"{endpoint}/api/tags", timeout=aiohttp.ClientTimeout(total=5)) as response:
|
||||
if response.status == 200:
|
||||
tags_data = await response.json()
|
||||
models = [model["name"] for model in tags_data.get("models", [])]
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not detect models for {agent_id}: {e}")
|
||||
|
||||
# Determine specialty based on models or hostname
|
||||
specialty = AgentType.GENERAL_AI # Default
|
||||
if "codellama" in str(models).lower() or "code" in hostname.lower():
|
||||
specialty = AgentType.KERNEL_DEV
|
||||
elif "gemma" in str(models).lower():
|
||||
specialty = AgentType.PYTORCH_DEV
|
||||
elif any(model for model in models if "llama" in model.lower()):
|
||||
specialty = AgentType.GENERAL_AI
|
||||
|
||||
# Insert agent directly into database
|
||||
try:
|
||||
with SessionLocal() as db:
|
||||
from sqlalchemy import text
|
||||
# Insert new agent using raw SQL to avoid role column issues
|
||||
db.execute(text("""
|
||||
INSERT INTO agents (id, name, endpoint, model, specialty, max_concurrent, current_tasks, status, created_at, last_seen)
|
||||
VALUES (:agent_id, :name, :endpoint, :model, :specialty, :max_concurrent, 0, 'active', NOW(), NOW())
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
endpoint = EXCLUDED.endpoint,
|
||||
model = EXCLUDED.model,
|
||||
specialty = EXCLUDED.specialty,
|
||||
max_concurrent = EXCLUDED.max_concurrent,
|
||||
last_seen = NOW()
|
||||
"""), {
|
||||
"agent_id": agent_id,
|
||||
"name": agent_id, # Use agent_id as name
|
||||
"endpoint": endpoint,
|
||||
"model": models[0] if models else "unknown",
|
||||
"specialty": specialty.value,
|
||||
"max_concurrent": agent_data.get("max_concurrent", 2)
|
||||
})
|
||||
db.commit()
|
||||
|
||||
return AgentRegistrationResponse(
|
||||
agent_id=agent_id,
|
||||
endpoint=endpoint,
|
||||
message=f"Agent '{agent_id}' auto-registered successfully with specialty '{specialty.value}'"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Database insert failed: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to register agent in database: {str(e)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to auto-register agent: {str(e)}"
|
||||
)
|
||||
Reference in New Issue
Block a user