WIP: Save current work before CHORUS rebrand

- Agent roles integration progress - Various backend and frontend updates - Storybook cache cleanup 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-01 02:20:56 +10:00
parent 1e81daaf18
commit b6bff318d9
740 changed files with 90022 additions and 279523 deletions
--- a/backend/app/api/agents.py
+++ b/backend/app/api/agents.py
@@ -15,6 +15,8 @@ Key Features:

 from fastapi import APIRouter, HTTPException, Request, Depends, status
 from typing import List, Dict, Any
+import time
+import logging
 from ..models.agent import Agent
 from ..models.responses import (
    AgentListResponse, 
@@ -29,6 +31,9 @@ router = APIRouter()

 from app.core.database import SessionLocal
 from app.models.agent import Agent as ORMAgent
+from ..services.agent_service import AgentType
+
+logger = logging.getLogger(__name__)


@router.get(
@@ -384,4 +389,244 @@ async def unregister_agent(
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to unregister agent: {str(e)}"
+        )
+
+
+@router.post(
+    "/agents/heartbeat",
+    status_code=status.HTTP_200_OK,
+    summary="Agent heartbeat update",
+    description="""
+    Update agent status and maintain registration through periodic heartbeat.
+    
+    This endpoint allows agents to:
+    - Confirm they are still online and responsive
+    - Update their current status and metrics
+    - Report any capability or configuration changes
+    - Maintain their registration in the cluster
+    
+    Agents should call this endpoint every 30-60 seconds to maintain
+    their active status in the Hive cluster.
+    """,
+    responses={
+        200: {"description": "Heartbeat received successfully"},
+        404: {"model": ErrorResponse, "description": "Agent not registered"},
+        400: {"model": ErrorResponse, "description": "Invalid heartbeat data"}
+    }
+)
+async def agent_heartbeat(
+    heartbeat_data: Dict[str, Any],
+    request: Request
+):
+    """
+    Process agent heartbeat to maintain registration.
+    
+    Args:
+        heartbeat_data: Agent status and metrics data
+        request: FastAPI request object
+        
+    Returns:
+        Success confirmation and any coordinator updates
+    """
+    agent_id = heartbeat_data.get("agent_id")
+    if not agent_id:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Missing agent_id in heartbeat data"
+        )
+    
+    # Access coordinator
+    hive_coordinator = getattr(request.app.state, 'hive_coordinator', None)
+    if not hive_coordinator:
+        from ..main import unified_coordinator
+        hive_coordinator = unified_coordinator
+        
+    if not hive_coordinator:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Coordinator service unavailable"
+        )
+    
+    try:
+        # Update agent heartbeat timestamp
+        agent_service = hive_coordinator.agent_service
+        if agent_service:
+            agent_service.update_agent_heartbeat(agent_id)
+            
+        # Update current tasks if provided - use raw SQL to avoid role column
+        if "current_tasks" in heartbeat_data:
+            current_tasks = heartbeat_data["current_tasks"]
+            try:
+                with SessionLocal() as db:
+                    from sqlalchemy import text
+                    db.execute(text(
+                        "UPDATE agents SET current_tasks = :current_tasks, last_seen = NOW() WHERE id = :agent_id"
+                    ), {
+                        "current_tasks": current_tasks,
+                        "agent_id": agent_id
+                    })
+                    db.commit()
+            except Exception as e:
+                logger.warning(f"Could not update agent tasks: {e}")
+        
+        return {
+            "status": "success",
+            "message": f"Heartbeat received from agent '{agent_id}'",
+            "timestamp": time.time()
+        }
+        
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to process heartbeat: {str(e)}"
+        )
+
+
+@router.post(
+    "/agents/auto-register",
+    response_model=AgentRegistrationResponse,
+    status_code=status.HTTP_201_CREATED,
+    summary="Automatic agent registration",
+    description="""
+    Register an agent automatically with capability detection.
+    
+    This endpoint is designed for Bzzz agents running as systemd services
+    to automatically register themselves with the Hive coordinator.
+    
+    Features:
+    - Automatic capability detection based on available models
+    - Network discovery support
+    - Retry-friendly for service startup scenarios
+    - Health validation before registration
+    """,
+    responses={
+        201: {"description": "Agent auto-registered successfully"},
+        400: {"model": ErrorResponse, "description": "Invalid agent configuration"},
+        409: {"model": ErrorResponse, "description": "Agent already registered"},
+        503: {"model": ErrorResponse, "description": "Agent endpoint unreachable"}
+    }
+)
+async def auto_register_agent(
+    agent_data: Dict[str, Any],
+    request: Request
+) -> AgentRegistrationResponse:
+    """
+    Automatically register a Bzzz agent with the Hive coordinator.
+    
+    Args:
+        agent_data: Agent configuration including endpoint, models, etc.
+        request: FastAPI request object
+        
+    Returns:
+        AgentRegistrationResponse: Registration confirmation
+    """
+    # Extract required fields
+    agent_id = agent_data.get("agent_id")
+    endpoint = agent_data.get("endpoint")
+    hostname = agent_data.get("hostname")
+    
+    if not agent_id or not endpoint:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Missing required fields: agent_id, endpoint"
+        )
+    
+    # Access coordinator
+    hive_coordinator = getattr(request.app.state, 'hive_coordinator', None)
+    if not hive_coordinator:
+        from ..main import unified_coordinator
+        hive_coordinator = unified_coordinator
+        
+    if not hive_coordinator:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Coordinator service unavailable"
+        )
+    
+    try:
+        # Check if agent already exists - use basic query to avoid role column
+        try:
+            with SessionLocal() as db:
+                from sqlalchemy import text
+                existing_agent = db.execute(text(
+                    "SELECT id, endpoint FROM agents WHERE id = :agent_id LIMIT 1"
+                ), {"agent_id": agent_id}).fetchone()
+                if existing_agent:
+                    # Update existing agent
+                    db.execute(text(
+                        "UPDATE agents SET endpoint = :endpoint, last_seen = NOW() WHERE id = :agent_id"
+                    ), {"endpoint": endpoint, "agent_id": agent_id})
+                    db.commit()
+                    
+                    return AgentRegistrationResponse(
+                        agent_id=agent_id,
+                        endpoint=endpoint,
+                        message=f"Agent '{agent_id}' registration updated successfully"
+                    )
+        except Exception as e:
+            logger.warning(f"Could not check existing agent: {e}")
+        
+        # Detect capabilities and models
+        models = agent_data.get("models", [])
+        if not models:
+            # Try to detect models from endpoint
+            try:
+                import aiohttp
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(f"{endpoint}/api/tags", timeout=aiohttp.ClientTimeout(total=5)) as response:
+                        if response.status == 200:
+                            tags_data = await response.json()
+                            models = [model["name"] for model in tags_data.get("models", [])]
+            except Exception as e:
+                logger.warning(f"Could not detect models for {agent_id}: {e}")
+        
+        # Determine specialty based on models or hostname
+        specialty = AgentType.GENERAL_AI  # Default
+        if "codellama" in str(models).lower() or "code" in hostname.lower():
+            specialty = AgentType.KERNEL_DEV
+        elif "gemma" in str(models).lower():
+            specialty = AgentType.PYTORCH_DEV
+        elif any(model for model in models if "llama" in model.lower()):
+            specialty = AgentType.GENERAL_AI
+        
+        # Insert agent directly into database
+        try:
+            with SessionLocal() as db:
+                from sqlalchemy import text
+                # Insert new agent using raw SQL to avoid role column issues
+                db.execute(text("""
+                    INSERT INTO agents (id, name, endpoint, model, specialty, max_concurrent, current_tasks, status, created_at, last_seen)
+                    VALUES (:agent_id, :name, :endpoint, :model, :specialty, :max_concurrent, 0, 'active', NOW(), NOW())
+                    ON CONFLICT (id) DO UPDATE SET
+                        endpoint = EXCLUDED.endpoint,
+                        model = EXCLUDED.model,
+                        specialty = EXCLUDED.specialty,
+                        max_concurrent = EXCLUDED.max_concurrent,
+                        last_seen = NOW()
+                """), {
+                    "agent_id": agent_id,
+                    "name": agent_id,  # Use agent_id as name
+                    "endpoint": endpoint,
+                    "model": models[0] if models else "unknown",
+                    "specialty": specialty.value,
+                    "max_concurrent": agent_data.get("max_concurrent", 2)
+                })
+                db.commit()
+                
+                return AgentRegistrationResponse(
+                    agent_id=agent_id,
+                    endpoint=endpoint,
+                    message=f"Agent '{agent_id}' auto-registered successfully with specialty '{specialty.value}'"
+                )
+        except Exception as e:
+            logger.error(f"Database insert failed: {e}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"Failed to register agent in database: {str(e)}"
+            )
+        
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to auto-register agent: {str(e)}"
        )
--- a/backend/app/api/bzzz_logs.py
+++ b/backend/app/api/bzzz_logs.py
@@ -0,0 +1,287 @@
+"""
+Bzzz hypercore/hyperswarm log streaming API endpoints.
+Provides real-time access to agent communication logs from the Bzzz network.
+"""
+
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException, Query
+from fastapi.responses import StreamingResponse
+from typing import List, Optional, Dict, Any
+import asyncio
+import json
+import logging
+import httpx
+import time
+from datetime import datetime, timedelta
+
+router = APIRouter()
+logger = logging.getLogger(__name__)
+
+# Keep track of active WebSocket connections
+active_connections: List[WebSocket] = []
+
+class BzzzLogEntry:
+    """Represents a Bzzz hypercore log entry"""
+    def __init__(self, data: Dict[str, Any]):
+        self.index = data.get("index", 0)
+        self.timestamp = data.get("timestamp", "")
+        self.author = data.get("author", "")
+        self.log_type = data.get("type", "")
+        self.message_data = data.get("data", {})
+        self.hash_value = data.get("hash", "")
+        self.prev_hash = data.get("prev_hash", "")
+        
+    def to_chat_message(self) -> Dict[str, Any]:
+        """Convert hypercore log entry to chat message format"""
+        # Extract message details from the log data
+        msg_data = self.message_data
+        
+        return {
+            "id": f"log-{self.index}",
+            "senderId": msg_data.get("from_short", self.author),
+            "senderName": msg_data.get("from_short", self.author),
+            "content": self._format_message_content(),
+            "timestamp": self.timestamp,
+            "messageType": self._determine_message_type(),
+            "channel": msg_data.get("topic", "unknown"),
+            "swarmId": f"swarm-{msg_data.get('topic', 'unknown')}",
+            "isDelivered": True,
+            "isRead": True,
+            "logType": self.log_type,
+            "hash": self.hash_value
+        }
+    
+    def _format_message_content(self) -> str:
+        """Format the log entry into a readable message"""
+        msg_data = self.message_data
+        message_type = msg_data.get("message_type", self.log_type)
+        
+        if message_type == "availability_broadcast":
+            status = msg_data.get("data", {}).get("status", "unknown")
+            current_tasks = msg_data.get("data", {}).get("current_tasks", 0)
+            max_tasks = msg_data.get("data", {}).get("max_tasks", 0)
+            return f"Status: {status} ({current_tasks}/{max_tasks} tasks)"
+        
+        elif message_type == "capability_broadcast":
+            capabilities = msg_data.get("data", {}).get("capabilities", [])
+            models = msg_data.get("data", {}).get("models", [])
+            return f"Updated capabilities: {', '.join(capabilities[:3])}{'...' if len(capabilities) > 3 else ''}"
+        
+        elif message_type == "task_announced":
+            task_data = msg_data.get("data", {})
+            return f"Task announced: {task_data.get('title', 'Unknown task')}"
+        
+        elif message_type == "task_claimed":
+            task_data = msg_data.get("data", {})
+            return f"Task claimed: {task_data.get('title', 'Unknown task')}"
+        
+        elif message_type == "role_announcement":
+            role = msg_data.get("data", {}).get("role", "unknown")
+            return f"Role announcement: {role}"
+        
+        elif message_type == "collaboration":
+            return f"Collaboration: {msg_data.get('data', {}).get('content', 'Agent discussion')}"
+        
+        elif self.log_type == "peer_joined":
+            return "Agent joined the network"
+        
+        elif self.log_type == "peer_left":
+            return "Agent left the network"
+        
+        else:
+            # Generic fallback
+            return f"{message_type}: {json.dumps(msg_data.get('data', {}))[:100]}{'...' if len(str(msg_data.get('data', {}))) > 100 else ''}"
+    
+    def _determine_message_type(self) -> str:
+        """Determine if this is a sent, received, or system message"""
+        msg_data = self.message_data
+        
+        # System messages
+        if self.log_type in ["peer_joined", "peer_left", "network_event"]:
+            return "system"
+        
+        # For now, treat all as received since we're monitoring
+        # In a real implementation, you'd check if the author is the current node
+        return "received"
+
+class BzzzLogStreamer:
+    """Manages streaming of Bzzz hypercore logs"""
+    
+    def __init__(self):
+        self.agent_endpoints = {}
+        self.last_indices = {}  # Track last seen index per agent
+    
+    async def discover_bzzz_agents(self) -> List[Dict[str, str]]:
+        """Discover active Bzzz agents from the Hive agents API"""
+        try:
+            # This would typically query the actual agents database
+            # For now, return known endpoints based on cluster nodes
+            return [
+                {"agent_id": "acacia-bzzz", "endpoint": "http://acacia.local:8080"},
+                {"agent_id": "walnut-bzzz", "endpoint": "http://walnut.local:8080"},
+                {"agent_id": "ironwood-bzzz", "endpoint": "http://ironwood.local:8080"},
+                {"agent_id": "rosewood-bzzz", "endpoint": "http://rosewood.local:8080"},
+            ]
+        except Exception as e:
+            logger.error(f"Failed to discover Bzzz agents: {e}")
+            return []
+    
+    async def fetch_agent_logs(self, agent_endpoint: str, since_index: int = 0) -> List[BzzzLogEntry]:
+        """Fetch hypercore logs from a specific Bzzz agent"""
+        try:
+            # This would call the actual Bzzz agent's HTTP API
+            # For now, return mock data structure that matches hypercore format
+            async with httpx.AsyncClient() as client:
+                response = await client.get(
+                    f"{agent_endpoint}/api/hypercore/logs",
+                    params={"since": since_index},
+                    timeout=5.0
+                )
+                
+                if response.status_code == 200:
+                    logs_data = response.json()
+                    return [BzzzLogEntry(log) for log in logs_data.get("entries", [])]
+                else:
+                    logger.warning(f"Failed to fetch logs from {agent_endpoint}: {response.status_code}")
+                    return []
+                    
+        except httpx.ConnectError:
+            logger.debug(f"Agent at {agent_endpoint} is not reachable")
+            return []
+        except Exception as e:
+            logger.error(f"Error fetching logs from {agent_endpoint}: {e}")
+            return []
+    
+    async def get_recent_logs(self, limit: int = 100) -> List[Dict[str, Any]]:
+        """Get recent logs from all agents"""
+        agents = await self.discover_bzzz_agents()
+        all_messages = []
+        
+        for agent in agents:
+            logs = await self.fetch_agent_logs(agent["endpoint"])
+            for log in logs[-limit:]:  # Get recent entries
+                message = log.to_chat_message()
+                message["agent_id"] = agent["agent_id"]
+                all_messages.append(message)
+        
+        # Sort by timestamp
+        all_messages.sort(key=lambda x: x["timestamp"])
+        return all_messages[-limit:]
+    
+    async def stream_new_logs(self):
+        """Continuously stream new logs from all agents"""
+        while True:
+            try:
+                agents = await self.discover_bzzz_agents()
+                new_messages = []
+                
+                for agent in agents:
+                    agent_id = agent["agent_id"]
+                    last_index = self.last_indices.get(agent_id, 0)
+                    
+                    logs = await self.fetch_agent_logs(agent["endpoint"], last_index)
+                    
+                    for log in logs:
+                        if log.index > last_index:
+                            message = log.to_chat_message()
+                            message["agent_id"] = agent_id
+                            new_messages.append(message)
+                            self.last_indices[agent_id] = log.index
+                
+                # Send new messages to all connected WebSocket clients
+                if new_messages and active_connections:
+                    message_data = {
+                        "type": "new_messages",
+                        "messages": new_messages
+                    }
+                    
+                    # Remove disconnected clients
+                    disconnected = []
+                    for connection in active_connections:
+                        try:
+                            await connection.send_text(json.dumps(message_data))
+                        except:
+                            disconnected.append(connection)
+                    
+                    for conn in disconnected:
+                        active_connections.remove(conn)
+                
+                await asyncio.sleep(2)  # Poll every 2 seconds
+                
+            except Exception as e:
+                logger.error(f"Error in log streaming: {e}")
+                await asyncio.sleep(5)
+
+# Global log streamer instance
+log_streamer = BzzzLogStreamer()
+
+@router.get("/bzzz/logs")
+async def get_bzzz_logs(
+    limit: int = Query(default=100, le=1000),
+    agent_id: Optional[str] = None
+):
+    """Get recent Bzzz hypercore logs"""
+    try:
+        logs = await log_streamer.get_recent_logs(limit)
+        
+        if agent_id:
+            logs = [log for log in logs if log.get("agent_id") == agent_id]
+        
+        return {
+            "logs": logs,
+            "count": len(logs),
+            "timestamp": datetime.utcnow().isoformat()
+        }
+    except Exception as e:
+        logger.error(f"Error fetching Bzzz logs: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.get("/bzzz/agents")
+async def get_bzzz_agents():
+    """Get list of discovered Bzzz agents"""
+    try:
+        agents = await log_streamer.discover_bzzz_agents()
+        return {"agents": agents}
+    except Exception as e:
+        logger.error(f"Error discovering Bzzz agents: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.websocket("/bzzz/logs/stream")
+async def websocket_bzzz_logs(websocket: WebSocket):
+    """WebSocket endpoint for real-time Bzzz log streaming"""
+    await websocket.accept()
+    active_connections.append(websocket)
+    
+    try:
+        # Send initial recent logs
+        recent_logs = await log_streamer.get_recent_logs(50)
+        await websocket.send_text(json.dumps({
+            "type": "initial_logs",
+            "messages": recent_logs
+        }))
+        
+        # Keep connection alive and handle client messages
+        while True:
+            try:
+                # Wait for client messages (ping, filters, etc.)
+                message = await asyncio.wait_for(websocket.receive_text(), timeout=30)
+                client_data = json.loads(message)
+                
+                if client_data.get("type") == "ping":
+                    await websocket.send_text(json.dumps({"type": "pong"}))
+                    
+            except asyncio.TimeoutError:
+                # Send periodic heartbeat
+                await websocket.send_text(json.dumps({"type": "heartbeat"}))
+                
+    except WebSocketDisconnect:
+        active_connections.remove(websocket)
+    except Exception as e:
+        logger.error(f"WebSocket error: {e}")
+        if websocket in active_connections:
+            active_connections.remove(websocket)
+
+# Start the log streaming background task
+@router.on_event("startup")
+async def start_log_streaming():
+    """Start the background log streaming task"""
+    asyncio.create_task(log_streamer.stream_new_logs())
--- a/backend/app/api/cluster_registration.py
+++ b/backend/app/api/cluster_registration.py
@@ -0,0 +1,434 @@
+"""
+Cluster Registration API endpoints
+Handles registration-based cluster management for Hive-Bzzz integration.
+"""
+from fastapi import APIRouter, HTTPException, Request, Depends
+from pydantic import BaseModel, Field
+from typing import Dict, Any, List, Optional
+import logging
+import os
+from ..services.cluster_registration_service import (
+    ClusterRegistrationService, 
+    RegistrationRequest, 
+    HeartbeatRequest
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+# Initialize service
+DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://hive:hivepass@localhost:5432/hive")
+cluster_registration_service = ClusterRegistrationService(DATABASE_URL)
+
+# Pydantic models for API
+class NodeRegistrationRequest(BaseModel):
+    token: str = Field(..., description="Cluster registration token")
+    node_id: str = Field(..., description="Unique node identifier")
+    hostname: str = Field(..., description="Node hostname")
+    system_info: Dict[str, Any] = Field(..., description="System hardware and OS information")
+    client_version: Optional[str] = Field(None, description="Bzzz client version")
+    services: Optional[Dict[str, Any]] = Field(None, description="Available services")
+    capabilities: Optional[Dict[str, Any]] = Field(None, description="Node capabilities")
+    ports: Optional[Dict[str, Any]] = Field(None, description="Service ports")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
+
+class NodeHeartbeatRequest(BaseModel):
+    node_id: str = Field(..., description="Node identifier")
+    status: str = Field("online", description="Node status")
+    cpu_usage: Optional[float] = Field(None, ge=0, le=100, description="CPU usage percentage")
+    memory_usage: Optional[float] = Field(None, ge=0, le=100, description="Memory usage percentage")
+    disk_usage: Optional[float] = Field(None, ge=0, le=100, description="Disk usage percentage")
+    gpu_usage: Optional[float] = Field(None, ge=0, le=100, description="GPU usage percentage")
+    services_status: Optional[Dict[str, Any]] = Field(None, description="Service status information")
+    network_metrics: Optional[Dict[str, Any]] = Field(None, description="Network metrics")
+    custom_metrics: Optional[Dict[str, Any]] = Field(None, description="Custom node metrics")
+
+class TokenCreateRequest(BaseModel):
+    description: str = Field(..., description="Token description")
+    expires_in_days: Optional[int] = Field(None, gt=0, description="Token expiration in days")
+    max_registrations: Optional[int] = Field(None, gt=0, description="Maximum number of registrations")
+    allowed_ip_ranges: Optional[List[str]] = Field(None, description="Allowed IP CIDR ranges")
+
+# Helper function to get client IP
+def get_client_ip(request: Request) -> str:
+    """Extract client IP address from request."""
+    # Check for X-Forwarded-For header (proxy/load balancer)
+    forwarded_for = request.headers.get("X-Forwarded-For")
+    if forwarded_for:
+        # Take the first IP in the chain (original client)
+        return forwarded_for.split(",")[0].strip()
+    
+    # Check for X-Real-IP header (nginx)
+    real_ip = request.headers.get("X-Real-IP")
+    if real_ip:
+        return real_ip.strip()
+    
+    # Fall back to direct connection IP
+    return request.client.host if request.client else "unknown"
+
+# Registration endpoints
+@router.post("/cluster/register")
+async def register_node(
+    registration: NodeRegistrationRequest,
+    request: Request
+) -> Dict[str, Any]:
+    """
+    Register a new node in the cluster.
+    
+    This endpoint allows Bzzz clients to register themselves with the Hive coordinator
+    using a valid cluster token. Similar to `docker swarm join`.
+    """
+    try:
+        client_ip = get_client_ip(request)
+        logger.info(f"Node registration attempt: {registration.node_id} from {client_ip}")
+        
+        # Convert to service request
+        reg_request = RegistrationRequest(
+            token=registration.token,
+            node_id=registration.node_id,
+            hostname=registration.hostname,
+            ip_address=client_ip,
+            system_info=registration.system_info,
+            client_version=registration.client_version,
+            services=registration.services,
+            capabilities=registration.capabilities,
+            ports=registration.ports,
+            metadata=registration.metadata
+        )
+        
+        result = await cluster_registration_service.register_node(reg_request, client_ip)
+        logger.info(f"Node {registration.node_id} registered successfully")
+        
+        return result
+        
+    except ValueError as e:
+        logger.warning(f"Registration failed for {registration.node_id}: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Registration error for {registration.node_id}: {e}")
+        raise HTTPException(status_code=500, detail="Registration failed")
+
+@router.post("/cluster/heartbeat")
+async def node_heartbeat(heartbeat: NodeHeartbeatRequest) -> Dict[str, Any]:
+    """
+    Update node heartbeat and status.
+    
+    Registered nodes should call this endpoint periodically (every 30 seconds)
+    to maintain their registration and report current status/metrics.
+    """
+    try:
+        heartbeat_request = HeartbeatRequest(
+            node_id=heartbeat.node_id,
+            status=heartbeat.status,
+            cpu_usage=heartbeat.cpu_usage,
+            memory_usage=heartbeat.memory_usage,
+            disk_usage=heartbeat.disk_usage,
+            gpu_usage=heartbeat.gpu_usage,
+            services_status=heartbeat.services_status,
+            network_metrics=heartbeat.network_metrics,
+            custom_metrics=heartbeat.custom_metrics
+        )
+        
+        result = await cluster_registration_service.update_heartbeat(heartbeat_request)
+        return result
+        
+    except ValueError as e:
+        logger.warning(f"Heartbeat failed for {heartbeat.node_id}: {e}")
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        logger.error(f"Heartbeat error for {heartbeat.node_id}: {e}")
+        raise HTTPException(status_code=500, detail="Heartbeat update failed")
+
+# Node management endpoints
+@router.get("/cluster/nodes/registered")
+async def get_registered_nodes(include_offline: bool = True) -> Dict[str, Any]:
+    """
+    Get all registered cluster nodes.
+    
+    Returns detailed information about all nodes that have registered
+    with the cluster, including their hardware specs and current status.
+    """
+    try:
+        nodes = await cluster_registration_service.get_registered_nodes(include_offline)
+        
+        # Convert to API response format
+        nodes_data = []
+        for node in nodes:
+            # Convert dataclass to dict and handle datetime serialization
+            node_dict = {
+                "id": node.id,
+                "node_id": node.node_id,
+                "hostname": node.hostname,
+                "ip_address": node.ip_address,
+                "status": node.status,
+                "hardware": {
+                    "cpu": node.cpu_info or {},
+                    "memory": node.memory_info or {},
+                    "gpu": node.gpu_info or {},
+                    "disk": node.disk_info or {},
+                    "os": node.os_info or {},
+                    "platform": node.platform_info or {}
+                },
+                "services": node.services or {},
+                "capabilities": node.capabilities or {},
+                "ports": node.ports or {},
+                "client_version": node.client_version,
+                "first_registered": node.first_registered.isoformat(),
+                "last_heartbeat": node.last_heartbeat.isoformat(),
+                "registration_metadata": node.registration_metadata or {}
+            }
+            nodes_data.append(node_dict)
+        
+        return {
+            "nodes": nodes_data,
+            "total_count": len(nodes_data),
+            "online_count": len([n for n in nodes if n.status == "online"]),
+            "offline_count": len([n for n in nodes if n.status == "offline"])
+        }
+        
+    except Exception as e:
+        logger.error(f"Failed to get registered nodes: {e}")
+        raise HTTPException(status_code=500, detail="Failed to retrieve registered nodes")
+
+@router.get("/cluster/nodes/{node_id}")
+async def get_node_details(node_id: str) -> Dict[str, Any]:
+    """Get detailed information about a specific registered node."""
+    try:
+        node = await cluster_registration_service.get_node_details(node_id)
+        if not node:
+            raise HTTPException(status_code=404, detail="Node not found")
+        
+        return {
+            "id": node.id,
+            "node_id": node.node_id,
+            "hostname": node.hostname,
+            "ip_address": node.ip_address,
+            "status": node.status,
+            "hardware": {
+                "cpu": node.cpu_info or {},
+                "memory": node.memory_info or {},
+                "gpu": node.gpu_info or {},
+                "disk": node.disk_info or {},
+                "os": node.os_info or {},
+                "platform": node.platform_info or {}
+            },
+            "services": node.services or {},
+            "capabilities": node.capabilities or {},
+            "ports": node.ports or {},
+            "client_version": node.client_version,
+            "first_registered": node.first_registered.isoformat(),
+            "last_heartbeat": node.last_heartbeat.isoformat(),
+            "registration_metadata": node.registration_metadata or {}
+        }
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to get node details for {node_id}: {e}")
+        raise HTTPException(status_code=500, detail="Failed to retrieve node details")
+
+@router.delete("/cluster/nodes/{node_id}")
+async def remove_node(node_id: str) -> Dict[str, Any]:
+    """
+    Remove a node from the cluster.
+    
+    This will unregister the node and stop accepting its heartbeats.
+    The node will need to re-register to rejoin the cluster.
+    """
+    try:
+        success = await cluster_registration_service.remove_node(node_id)
+        if not success:
+            raise HTTPException(status_code=404, detail="Node not found")
+        
+        return {
+            "node_id": node_id,
+            "status": "removed",
+            "message": "Node successfully removed from cluster"
+        }
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to remove node {node_id}: {e}")
+        raise HTTPException(status_code=500, detail="Failed to remove node")
+
+# Token management endpoints
+@router.post("/cluster/tokens")
+async def create_cluster_token(token_request: TokenCreateRequest) -> Dict[str, Any]:
+    """
+    Create a new cluster registration token.
+    
+    Tokens are used by Bzzz clients to authenticate and register with the cluster.
+    Only administrators should have access to this endpoint.
+    """
+    try:
+        # For now, use a default admin user ID
+        # TODO: Extract from JWT token or session
+        admin_user_id = "admin"  # This should come from authentication
+        
+        token = await cluster_registration_service.generate_cluster_token(
+            description=token_request.description,
+            created_by_user_id=admin_user_id,
+            expires_in_days=token_request.expires_in_days,
+            max_registrations=token_request.max_registrations,
+            allowed_ip_ranges=token_request.allowed_ip_ranges
+        )
+        
+        return {
+            "id": token.id,
+            "token": token.token,
+            "description": token.description,
+            "created_at": token.created_at.isoformat(),
+            "expires_at": token.expires_at.isoformat() if token.expires_at else None,
+            "is_active": token.is_active,
+            "max_registrations": token.max_registrations,
+            "current_registrations": token.current_registrations,
+            "allowed_ip_ranges": token.allowed_ip_ranges
+        }
+        
+    except Exception as e:
+        logger.error(f"Failed to create cluster token: {e}")
+        raise HTTPException(status_code=500, detail="Failed to create token")
+
+@router.get("/cluster/tokens")
+async def list_cluster_tokens() -> Dict[str, Any]:
+    """
+    List all cluster registration tokens.
+    
+    Returns information about all tokens including their usage statistics.
+    Only administrators should have access to this endpoint.
+    """
+    try:
+        tokens = await cluster_registration_service.list_tokens()
+        
+        tokens_data = []
+        for token in tokens:
+            tokens_data.append({
+                "id": token.id,
+                "token": token.token[:20] + "..." if len(token.token) > 20 else token.token,  # Partial token for security
+                "description": token.description,
+                "created_at": token.created_at.isoformat(),
+                "expires_at": token.expires_at.isoformat() if token.expires_at else None,
+                "is_active": token.is_active,
+                "max_registrations": token.max_registrations,
+                "current_registrations": token.current_registrations,
+                "allowed_ip_ranges": token.allowed_ip_ranges
+            })
+        
+        return {
+            "tokens": tokens_data,
+            "total_count": len(tokens_data)
+        }
+        
+    except Exception as e:
+        logger.error(f"Failed to list cluster tokens: {e}")
+        raise HTTPException(status_code=500, detail="Failed to list tokens")
+
+@router.delete("/cluster/tokens/{token}")
+async def revoke_cluster_token(token: str) -> Dict[str, Any]:
+    """
+    Revoke a cluster registration token.
+    
+    This will prevent new registrations using this token, but won't affect
+    nodes that are already registered.
+    """
+    try:
+        success = await cluster_registration_service.revoke_token(token)
+        if not success:
+            raise HTTPException(status_code=404, detail="Token not found")
+        
+        return {
+            "token": token[:20] + "..." if len(token) > 20 else token,
+            "status": "revoked",
+            "message": "Token successfully revoked"
+        }
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to revoke token {token}: {e}")
+        raise HTTPException(status_code=500, detail="Failed to revoke token")
+
+# Cluster statistics and monitoring
+@router.get("/cluster/statistics")
+async def get_cluster_statistics() -> Dict[str, Any]:
+    """
+    Get cluster health and usage statistics.
+    
+    Returns information about node counts, token usage, and overall cluster health.
+    """
+    try:
+        stats = await cluster_registration_service.get_cluster_statistics()
+        return stats
+        
+    except Exception as e:
+        logger.error(f"Failed to get cluster statistics: {e}")
+        raise HTTPException(status_code=500, detail="Failed to retrieve cluster statistics")
+
+# Maintenance endpoints
+@router.post("/cluster/maintenance/cleanup-offline")
+async def cleanup_offline_nodes(offline_threshold_minutes: int = 10) -> Dict[str, Any]:
+    """
+    Mark nodes as offline if they haven't sent heartbeats recently.
+    
+    This maintenance endpoint should be called periodically to keep
+    the cluster status accurate.
+    """
+    try:
+        count = await cluster_registration_service.cleanup_offline_nodes(offline_threshold_minutes)
+        return {
+            "nodes_marked_offline": count,
+            "threshold_minutes": offline_threshold_minutes,
+            "message": f"Marked {count} nodes as offline"
+        }
+        
+    except Exception as e:
+        logger.error(f"Failed to cleanup offline nodes: {e}")
+        raise HTTPException(status_code=500, detail="Failed to cleanup offline nodes")
+
+@router.post("/cluster/maintenance/cleanup-heartbeats")
+async def cleanup_old_heartbeats(retention_days: int = 30) -> Dict[str, Any]:
+    """
+    Remove old heartbeat data to manage database size.
+    
+    This maintenance endpoint should be called periodically to prevent
+    the heartbeat table from growing too large.
+    """
+    try:
+        count = await cluster_registration_service.cleanup_old_heartbeats(retention_days)
+        return {
+            "heartbeats_deleted": count,
+            "retention_days": retention_days,
+            "message": f"Deleted {count} old heartbeat records"
+        }
+        
+    except Exception as e:
+        logger.error(f"Failed to cleanup old heartbeats: {e}")
+        raise HTTPException(status_code=500, detail="Failed to cleanup old heartbeats")
+
+# Health check endpoint
+@router.get("/cluster/health")
+async def cluster_registration_health() -> Dict[str, Any]:
+    """
+    Health check for the cluster registration system.
+    """
+    try:
+        # Test database connection
+        stats = await cluster_registration_service.get_cluster_statistics()
+        
+        return {
+            "status": "healthy",
+            "database_connected": True,
+            "cluster_health": stats.get("cluster_health", {}),
+            "timestamp": stats.get("last_updated")
+        }
+        
+    except Exception as e:
+        logger.error(f"Cluster registration health check failed: {e}")
+        return {
+            "status": "unhealthy",
+            "database_connected": False,
+            "error": str(e),
+            "timestamp": None
+        }
--- a/backend/app/api/feedback.py
+++ b/backend/app/api/feedback.py
@@ -0,0 +1,474 @@
+"""
+Context Feedback API endpoints for RL Context Curator integration
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
+from sqlalchemy.orm import Session
+from typing import List, Optional, Dict, Any
+from datetime import datetime, timedelta
+from pydantic import BaseModel, Field
+
+from ..core.database import get_db
+from ..models.context_feedback import ContextFeedback, AgentPermissions, PromotionRuleHistory
+from ..models.task import Task
+from ..models.agent import Agent
+from ..services.auth import get_current_user
+from ..models.responses import StatusResponse
+
+router = APIRouter(prefix="/api/feedback", tags=["Context Feedback"])
+
+
+# Pydantic models for API
+class ContextFeedbackRequest(BaseModel):
+    """Request model for context feedback"""
+    context_id: str = Field(..., description="HCFS context ID")
+    feedback_type: str = Field(..., description="Type of feedback: upvote, downvote, forgetfulness, task_success, task_failure")
+    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence in feedback")
+    reason: Optional[str] = Field(None, description="Optional reason for feedback")
+    usage_context: Optional[str] = Field(None, description="Context of usage")
+    directory_scope: Optional[str] = Field(None, description="Directory where context was used")
+    task_type: Optional[str] = Field(None, description="Type of task being performed")
+
+
+class TaskOutcomeFeedbackRequest(BaseModel):
+    """Request model for task outcome feedback"""
+    task_id: str = Field(..., description="Task ID")
+    outcome: str = Field(..., description="Task outcome: completed, failed, abandoned")
+    completion_time: Optional[int] = Field(None, description="Time to complete in seconds")
+    errors_encountered: int = Field(0, description="Number of errors during execution")
+    follow_up_questions: int = Field(0, description="Number of follow-up questions")
+    context_used: Optional[List[str]] = Field(None, description="Context IDs used in task")
+    context_relevance_score: Optional[float] = Field(None, ge=0.0, le=1.0, description="Average relevance of used context")
+    outcome_confidence: Optional[float] = Field(None, ge=0.0, le=1.0, description="Confidence in outcome classification")
+
+
+class AgentPermissionsRequest(BaseModel):
+    """Request model for agent permissions"""
+    agent_id: str = Field(..., description="Agent ID")
+    role: str = Field(..., description="Agent role")
+    directory_patterns: List[str] = Field(..., description="Directory patterns for this role")
+    task_types: List[str] = Field(..., description="Task types this agent can handle")
+    context_weight: float = Field(1.0, ge=0.1, le=2.0, description="Weight for context relevance")
+
+
+class ContextFeedbackResponse(BaseModel):
+    """Response model for context feedback"""
+    id: int
+    context_id: str
+    agent_id: str
+    task_id: Optional[str]
+    feedback_type: str
+    role: str
+    confidence: float
+    reason: Optional[str]
+    usage_context: Optional[str]
+    directory_scope: Optional[str]
+    task_type: Optional[str]
+    timestamp: datetime
+
+
+class FeedbackStatsResponse(BaseModel):
+    """Response model for feedback statistics"""
+    total_feedback: int
+    feedback_by_type: Dict[str, int]
+    feedback_by_role: Dict[str, int]
+    average_confidence: float
+    recent_feedback_count: int
+    top_contexts: List[Dict[str, Any]]
+
+
+@router.post("/context/{context_id}", response_model=StatusResponse)
+async def submit_context_feedback(
+    context_id: str,
+    request: ContextFeedbackRequest,
+    background_tasks: BackgroundTasks,
+    db: Session = Depends(get_db),
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Submit feedback for a specific context
+    """
+    try:
+        # Get agent information
+        agent = db.query(Agent).filter(Agent.id == current_user.get("agent_id", "unknown")).first()
+        if not agent:
+            raise HTTPException(status_code=404, detail="Agent not found")
+        
+        # Validate feedback type
+        valid_types = ["upvote", "downvote", "forgetfulness", "task_success", "task_failure"]
+        if request.feedback_type not in valid_types:
+            raise HTTPException(status_code=400, detail=f"Invalid feedback type. Must be one of: {valid_types}")
+        
+        # Create feedback record
+        feedback = ContextFeedback(
+            context_id=request.context_id,
+            agent_id=agent.id,
+            feedback_type=request.feedback_type,
+            role=agent.role if agent.role else "general",
+            confidence=request.confidence,
+            reason=request.reason,
+            usage_context=request.usage_context,
+            directory_scope=request.directory_scope,
+            task_type=request.task_type
+        )
+        
+        db.add(feedback)
+        db.commit()
+        db.refresh(feedback)
+        
+        # Send feedback to RL Context Curator in background
+        background_tasks.add_task(
+            send_feedback_to_rl_curator,
+            feedback.id,
+            request.context_id,
+            request.feedback_type,
+            agent.id,
+            agent.role if agent.role else "general",
+            request.confidence
+        )
+        
+        return StatusResponse(
+            status="success",
+            message="Context feedback submitted successfully",
+            data={"feedback_id": feedback.id, "context_id": request.context_id}
+        )
+        
+    except Exception as e:
+        db.rollback()
+        raise HTTPException(status_code=500, detail=f"Failed to submit feedback: {str(e)}")
+
+
+@router.post("/task-outcome/{task_id}", response_model=StatusResponse)
+async def submit_task_outcome_feedback(
+    task_id: str,
+    request: TaskOutcomeFeedbackRequest,
+    background_tasks: BackgroundTasks,
+    db: Session = Depends(get_db),
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Submit task outcome feedback for RL learning
+    """
+    try:
+        # Get task
+        task = db.query(Task).filter(Task.id == task_id).first()
+        if not task:
+            raise HTTPException(status_code=404, detail="Task not found")
+        
+        # Update task with outcome metrics
+        task.task_outcome = request.outcome
+        task.completion_time = request.completion_time
+        task.errors_encountered = request.errors_encountered
+        task.follow_up_questions = request.follow_up_questions
+        task.context_relevance_score = request.context_relevance_score
+        task.outcome_confidence = request.outcome_confidence
+        task.feedback_collected = True
+        
+        if request.context_used:
+            task.context_used = request.context_used
+        
+        if request.outcome in ["completed", "failed", "abandoned"] and not task.completed_at:
+            task.completed_at = datetime.utcnow()
+        
+        # Calculate success rate
+        if request.outcome == "completed":
+            task.success_rate = 1.0 - (request.errors_encountered * 0.1)  # Simple calculation
+            task.success_rate = max(0.0, min(1.0, task.success_rate))
+        else:
+            task.success_rate = 0.0
+        
+        db.commit()
+        
+        # Create feedback events for used contexts
+        if request.context_used and task.assigned_agent_id:
+            agent = db.query(Agent).filter(Agent.id == task.assigned_agent_id).first()
+            if agent:
+                feedback_type = "task_success" if request.outcome == "completed" else "task_failure"
+                
+                for context_id in request.context_used:
+                    feedback = ContextFeedback(
+                        context_id=context_id,
+                        agent_id=agent.id,
+                        task_id=task.id,
+                        feedback_type=feedback_type,
+                        role=agent.role if agent.role else "general",
+                        confidence=request.outcome_confidence or 0.8,
+                        reason=f"Task {request.outcome}",
+                        usage_context=f"task_execution_{request.outcome}",
+                        task_type=request.task_type
+                    )
+                    db.add(feedback)
+                
+                db.commit()
+        
+        return StatusResponse(
+            status="success",
+            message="Task outcome feedback submitted successfully",
+            data={"task_id": task_id, "outcome": request.outcome}
+        )
+        
+    except Exception as e:
+        db.rollback()
+        raise HTTPException(status_code=500, detail=f"Failed to submit task outcome: {str(e)}")
+
+
+@router.get("/stats", response_model=FeedbackStatsResponse)
+async def get_feedback_stats(
+    days: int = 7,
+    role: Optional[str] = None,
+    db: Session = Depends(get_db),
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Get feedback statistics for analysis
+    """
+    try:
+        # Base query
+        query = db.query(ContextFeedback)
+        
+        # Filter by date range
+        if days > 0:
+            since_date = datetime.utcnow() - timedelta(days=days)
+            query = query.filter(ContextFeedback.timestamp >= since_date)
+        
+        # Filter by role if specified
+        if role:
+            query = query.filter(ContextFeedback.role == role)
+        
+        feedback_records = query.all()
+        
+        # Calculate statistics
+        total_feedback = len(feedback_records)
+        
+        feedback_by_type = {}
+        feedback_by_role = {}
+        confidence_values = []
+        context_usage = {}
+        
+        for feedback in feedback_records:
+            # Count by type
+            feedback_by_type[feedback.feedback_type] = feedback_by_type.get(feedback.feedback_type, 0) + 1
+            
+            # Count by role
+            feedback_by_role[feedback.role] = feedback_by_role.get(feedback.role, 0) + 1
+            
+            # Collect confidence values
+            confidence_values.append(feedback.confidence)
+            
+            # Count context usage
+            context_usage[feedback.context_id] = context_usage.get(feedback.context_id, 0) + 1
+        
+        # Calculate average confidence
+        average_confidence = sum(confidence_values) / len(confidence_values) if confidence_values else 0.0
+        
+        # Get recent feedback count (last 24 hours)
+        recent_since = datetime.utcnow() - timedelta(days=1)
+        recent_count = db.query(ContextFeedback).filter(
+            ContextFeedback.timestamp >= recent_since
+        ).count()
+        
+        # Get top contexts by usage
+        top_contexts = [
+            {"context_id": ctx_id, "usage_count": count}
+            for ctx_id, count in sorted(context_usage.items(), key=lambda x: x[1], reverse=True)[:10]
+        ]
+        
+        return FeedbackStatsResponse(
+            total_feedback=total_feedback,
+            feedback_by_type=feedback_by_type,
+            feedback_by_role=feedback_by_role,
+            average_confidence=average_confidence,
+            recent_feedback_count=recent_count,
+            top_contexts=top_contexts
+        )
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get feedback stats: {str(e)}")
+
+
+@router.get("/recent", response_model=List[ContextFeedbackResponse])
+async def get_recent_feedback(
+    limit: int = 50,
+    feedback_type: Optional[str] = None,
+    role: Optional[str] = None,
+    db: Session = Depends(get_db),
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Get recent feedback events
+    """
+    try:
+        query = db.query(ContextFeedback).order_by(ContextFeedback.timestamp.desc())
+        
+        if feedback_type:
+            query = query.filter(ContextFeedback.feedback_type == feedback_type)
+        
+        if role:
+            query = query.filter(ContextFeedback.role == role)
+        
+        feedback_records = query.limit(limit).all()
+        
+        return [
+            ContextFeedbackResponse(
+                id=fb.id,
+                context_id=fb.context_id,
+                agent_id=fb.agent_id,
+                task_id=str(fb.task_id) if fb.task_id else None,
+                feedback_type=fb.feedback_type,
+                role=fb.role,
+                confidence=fb.confidence,
+                reason=fb.reason,
+                usage_context=fb.usage_context,
+                directory_scope=fb.directory_scope,
+                task_type=fb.task_type,
+                timestamp=fb.timestamp
+            )
+            for fb in feedback_records
+        ]
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get recent feedback: {str(e)}")
+
+
+@router.post("/agent-permissions", response_model=StatusResponse)
+async def set_agent_permissions(
+    request: AgentPermissionsRequest,
+    db: Session = Depends(get_db),
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Set or update agent permissions for context filtering
+    """
+    try:
+        # Check if permissions already exist
+        existing = db.query(AgentPermissions).filter(
+            AgentPermissions.agent_id == request.agent_id,
+            AgentPermissions.role == request.role
+        ).first()
+        
+        if existing:
+            # Update existing permissions
+            existing.directory_patterns = ",".join(request.directory_patterns)
+            existing.task_types = ",".join(request.task_types)
+            existing.context_weight = request.context_weight
+            existing.updated_at = datetime.utcnow()
+        else:
+            # Create new permissions
+            permissions = AgentPermissions(
+                agent_id=request.agent_id,
+                role=request.role,
+                directory_patterns=",".join(request.directory_patterns),
+                task_types=",".join(request.task_types),
+                context_weight=request.context_weight
+            )
+            db.add(permissions)
+        
+        db.commit()
+        
+        return StatusResponse(
+            status="success",
+            message="Agent permissions updated successfully",
+            data={"agent_id": request.agent_id, "role": request.role}
+        )
+        
+    except Exception as e:
+        db.rollback()
+        raise HTTPException(status_code=500, detail=f"Failed to set agent permissions: {str(e)}")
+
+
+@router.get("/agent-permissions/{agent_id}")
+async def get_agent_permissions(
+    agent_id: str,
+    db: Session = Depends(get_db),
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Get agent permissions for context filtering
+    """
+    try:
+        permissions = db.query(AgentPermissions).filter(
+            AgentPermissions.agent_id == agent_id,
+            AgentPermissions.active == "true"
+        ).all()
+        
+        return [
+            {
+                "id": perm.id,
+                "agent_id": perm.agent_id,
+                "role": perm.role,
+                "directory_patterns": perm.directory_patterns.split(",") if perm.directory_patterns else [],
+                "task_types": perm.task_types.split(",") if perm.task_types else [],
+                "context_weight": perm.context_weight,
+                "created_at": perm.created_at,
+                "updated_at": perm.updated_at
+            }
+            for perm in permissions
+        ]
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get agent permissions: {str(e)}")
+
+
+async def send_feedback_to_rl_curator(
+    feedback_id: int,
+    context_id: str,
+    feedback_type: str,
+    agent_id: str,
+    role: str,
+    confidence: float
+):
+    """
+    Background task to send feedback to RL Context Curator
+    """
+    try:
+        import httpx
+        import json
+        from datetime import datetime
+        
+        # Prepare feedback event in Bzzz format
+        feedback_event = {
+            "bzzz_type": "feedback_event",
+            "timestamp": datetime.utcnow().isoformat(),
+            "origin": {
+                "node_id": "hive",
+                "agent_id": agent_id,
+                "task_id": f"hive-feedback-{feedback_id}",
+                "workspace": "hive://context-feedback",
+                "directory": "/feedback/"
+            },
+            "feedback": {
+                "type": feedback_type,
+                "category": "general",  # Could be enhanced with category detection
+                "role": role,
+                "context_id": context_id,
+                "reason": f"Feedback from Hive agent {agent_id}",
+                "confidence": confidence,
+                "usage_context": "hive_platform"
+            },
+            "task_outcome": {
+                "completed": feedback_type in ["upvote", "task_success"],
+                "completion_time": 0,
+                "errors_encountered": 0,
+                "follow_up_questions": 0
+            }
+        }
+        
+        # Send to HCFS RL Tuner Service
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(
+                    "http://localhost:8001/api/feedback",
+                    json=feedback_event,
+                    timeout=10.0
+                )
+                if response.status_code == 200:
+                    print(f"✅ Feedback sent to RL Curator: {feedback_id}")
+                else:
+                    print(f"⚠️ RL Curator responded with status {response.status_code}")
+            except httpx.ConnectError:
+                print(f"⚠️ Could not connect to RL Curator service (feedback {feedback_id})")
+            except Exception as e:
+                print(f"❌ Error sending feedback to RL Curator: {e}")
+                
+    except Exception as e:
+        print(f"❌ Background feedback task failed: {e}")
--- a/backend/app/api/projects.py
+++ b/backend/app/api/projects.py
@@ -47,6 +47,37 @@ async def get_project_tasks(project_id: str, current_user: Dict[str, Any] = Depe
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

+@router.put("/projects/{project_id}")
+async def update_project(project_id: str, project_data: Dict[str, Any], current_user: Dict[str, Any] = Depends(get_current_user_context)) -> Dict[str, Any]:
+    """Update a project configuration."""
+    try:
+        updated_project = project_service.update_project(project_id, project_data)
+        if not updated_project:
+            raise HTTPException(status_code=404, detail="Project not found")
+        return updated_project
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.post("/projects")
+async def create_project(project_data: Dict[str, Any], current_user: Dict[str, Any] = Depends(get_current_user_context)) -> Dict[str, Any]:
+    """Create a new project."""
+    try:
+        new_project = project_service.create_project(project_data)
+        return new_project
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.delete("/projects/{project_id}")
+async def delete_project(project_id: str, current_user: Dict[str, Any] = Depends(get_current_user_context)) -> Dict[str, Any]:
+    """Delete a project."""
+    try:
+        result = project_service.delete_project(project_id)
+        if not result:
+            raise HTTPException(status_code=404, detail="Project not found")
+        return {"success": True, "message": "Project deleted successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
 # === Bzzz Integration Endpoints ===

@bzzz_router.get("/active-repos")
--- a/backend/app/cli_agents/cli_agent_manager.py
+++ b/backend/app/cli_agents/cli_agent_manager.py
@@ -11,7 +11,7 @@ from typing import Dict, Any, Optional
 from dataclasses import asdict

 # Add CCLI source to path
-ccli_path = os.path.join(os.path.dirname(__file__), '../../../ccli_src')
+ccli_path = os.path.join(os.path.dirname(__file__), '../../ccli_src')
 sys.path.insert(0, ccli_path)

 from agents.gemini_cli_agent import GeminiCliAgent, GeminiCliConfig, TaskRequest as CliTaskRequest, TaskResult as CliTaskResult
--- a/backend/app/core/security.py
+++ b/backend/app/core/security.py
@@ -273,7 +273,6 @@ def create_token_response(user_id: int, user_data: Dict[str, Any]) -> Dict[str,
        "refresh_token": refresh_token,
        "token_type": "bearer",
        "expires_in": ACCESS_TOKEN_EXPIRE_MINUTES * 60,  # seconds
-        "user": user_data,
    }


--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -174,6 +174,10 @@ app = FastAPI(
            "name": "cluster",
            "description": "Cluster-wide operations and coordination"
        },
+        {
+            "name": "cluster-registration",
+            "description": "Dynamic cluster node registration and management"
+        },
        {
            "name": "distributed-workflows",
            "description": "Advanced distributed workflow management"
@@ -206,7 +210,7 @@ def get_coordinator() -> UnifiedCoordinator:
    return unified_coordinator

 # Import API routers
-from .api import agents, workflows, executions, monitoring, projects, tasks, cluster, distributed_workflows, cli_agents, auth
+from .api import agents, workflows, executions, monitoring, projects, tasks, cluster, distributed_workflows, cli_agents, auth, bzzz_logs, cluster_registration

 # Import error handlers and response models
 from .core.error_handlers import (
@@ -239,8 +243,10 @@ app.include_router(projects.router, prefix="/api", tags=["projects"])
 app.include_router(projects.bzzz_router, prefix="/api", tags=["bzzz-integration"])
 app.include_router(tasks.router, prefix="/api", tags=["tasks"])
 app.include_router(cluster.router, prefix="/api", tags=["cluster"])
+app.include_router(cluster_registration.router, prefix="/api", tags=["cluster-registration"])
 app.include_router(distributed_workflows.router, tags=["distributed-workflows"])
 app.include_router(cli_agents.router, tags=["cli-agents"])
+app.include_router(bzzz_logs.router, prefix="/api", tags=["bzzz-logs"])

 # Override dependency functions in API modules with our coordinator instance
 agents.get_coordinator = get_coordinator
@@ -528,16 +534,6 @@ async def root():

 # Removed duplicate /health endpoint - using the enhanced one above

-@app.get("/api/health", response_model=None)
-async def health_check():
-    """Simple health check endpoint"""
-    return {
-        "status": "healthy",
-        "timestamp": datetime.now().isoformat(),
-        "version": "1.0.0",
-        "message": "Hive API is operational"
-    }
-
@app.get("/api/status")
 async def get_system_status():
    """Get comprehensive system status"""
--- a/backend/app/models/init.py
+++ b/backend/app/models/init.py
@@ -2,4 +2,5 @@ from . import agent
 from . import agent_role
 from . import project
 from . import task
+from . import context_feedback
 from . import sqlalchemy_models
--- a/backend/app/models/agent.py
+++ b/backend/app/models/agent.py
@@ -34,6 +34,8 @@ class Agent(Base):
    
    # Relationships
    tasks = relationship("Task", back_populates="assigned_agent")
+    context_feedback = relationship("ContextFeedback", back_populates="agent")
+    permissions = relationship("AgentPermissions", back_populates="agent")
    
    def to_dict(self):
        return {
--- a/backend/app/models/context_feedback.py
+++ b/backend/app/models/context_feedback.py
@@ -0,0 +1,85 @@
+"""
+Context Feedback model for RL Context Curator integration
+"""
+
+from sqlalchemy import Column, String, Text, Integer, DateTime, ForeignKey, UUID as SqlUUID, Float
+from sqlalchemy.sql import func
+from sqlalchemy.orm import relationship
+from ..core.database import Base
+import uuid
+
+
+class ContextFeedback(Base):
+    __tablename__ = "context_feedback"
+    
+    # Primary identification
+    id = Column(Integer, primary_key=True, index=True)
+    
+    # Context and agent information
+    context_id = Column(String(255), nullable=False, index=True)  # HCFS context ID
+    agent_id = Column(String(255), ForeignKey("agents.id"), nullable=False)
+    task_id = Column(SqlUUID(as_uuid=True), ForeignKey("tasks.id"), nullable=True)
+    
+    # Feedback details
+    feedback_type = Column(String(50), nullable=False)  # upvote, downvote, forgetfulness, task_success, task_failure
+    role = Column(String(100), nullable=False)          # Agent role when feedback was given
+    confidence = Column(Float, nullable=False)          # Confidence in feedback (0.0 to 1.0)
+    reason = Column(Text, nullable=True)                # Optional reason for feedback
+    usage_context = Column(String(255), nullable=True) # Context of usage (debugging, coding, etc.)
+    
+    # Additional metadata
+    directory_scope = Column(String(500), nullable=True)  # Directory where context was used
+    task_type = Column(String(100), nullable=True)        # Type of task being performed
+    
+    # Timestamps
+    timestamp = Column(DateTime(timezone=True), server_default=func.now())
+    
+    # Relationships
+    agent = relationship("Agent", back_populates="context_feedback")
+    task = relationship("Task", backref="context_feedback")
+
+
+class AgentPermissions(Base):
+    __tablename__ = "agent_permissions"
+    
+    # Primary identification
+    id = Column(Integer, primary_key=True, index=True)
+    
+    # Agent and role information
+    agent_id = Column(String(255), ForeignKey("agents.id"), nullable=False, index=True)
+    role = Column(String(100), nullable=False)
+    
+    # Permission details
+    directory_patterns = Column(Text, nullable=True)    # JSON array of path patterns
+    task_types = Column(Text, nullable=True)            # JSON array of allowed task types
+    context_weight = Column(Float, default=1.0)        # Weight for context relevance
+    
+    # Status
+    active = Column(String(10), default='true')        # String to match existing boolean patterns
+    
+    # Timestamps
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
+    updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
+    
+    # Relationships
+    agent = relationship("Agent", back_populates="permissions")
+
+
+class PromotionRuleHistory(Base):
+    __tablename__ = "promotion_rule_history"
+    
+    # Primary identification
+    id = Column(Integer, primary_key=True, index=True)
+    
+    # Rule information
+    rule_version = Column(String(50), nullable=False)
+    category = Column(String(100), nullable=False)
+    role = Column(String(100), nullable=False)
+    weight_value = Column(Float, nullable=False)
+    
+    # Change information
+    change_reason = Column(Text, nullable=True)
+    previous_value = Column(Float, nullable=True)
+    
+    # Timestamps
+    timestamp = Column(DateTime(timezone=True), server_default=func.now())
--- a/backend/app/models/task.py
+++ b/backend/app/models/task.py
@@ -2,7 +2,7 @@
 Task model for SQLAlchemy ORM
 """

-from sqlalchemy import Column, String, Text, Integer, DateTime, ForeignKey, UUID as SqlUUID
+from sqlalchemy import Column, String, Text, Integer, DateTime, ForeignKey, UUID as SqlUUID, Float, Boolean
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.sql import func
 from sqlalchemy.orm import relationship
@@ -30,6 +30,17 @@ class Task(Base):
    # Task metadata (includes context and payload)
    task_metadata = Column("metadata", JSONB, nullable=True)
    
+    # RL Context Curator outcome tracking fields
+    completion_time = Column(Integer, nullable=True)  # Time to complete in seconds
+    errors_encountered = Column(Integer, default=0)   # Number of errors during execution
+    follow_up_questions = Column(Integer, default=0)  # Number of follow-up questions
+    success_rate = Column(Float, nullable=True)       # Success rate (0.0 to 1.0)
+    context_used = Column(JSONB, nullable=True)       # Context IDs used in this task
+    context_relevance_score = Column(Float, nullable=True)  # Average relevance of used context
+    feedback_collected = Column(Boolean, default=False)     # Whether feedback was collected
+    task_outcome = Column(String(50), nullable=True)        # completed, failed, abandoned
+    outcome_confidence = Column(Float, nullable=True)       # Confidence in outcome classification
+    
    # Timestamps
    created_at = Column(DateTime(timezone=True), server_default=func.now())
    started_at = Column(DateTime(timezone=True), nullable=True)
--- a/backend/app/services/cluster_registration_service.py
+++ b/backend/app/services/cluster_registration_service.py
@@ -0,0 +1,522 @@
+"""
+Cluster Registration Service
+Handles registration-based cluster management for Hive-Bzzz integration.
+"""
+import asyncpg
+import secrets
+import json
+import socket
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass
+from ipaddress import IPv4Network, IPv6Network, ip_address
+import logging
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ClusterToken:
+    id: int
+    token: str
+    description: str
+    created_at: datetime
+    expires_at: Optional[datetime]
+    is_active: bool
+    max_registrations: Optional[int]
+    current_registrations: int
+    allowed_ip_ranges: Optional[List[str]]
+
+@dataclass
+class ClusterNode:
+    id: int
+    node_id: str
+    hostname: str
+    ip_address: str
+    registration_token: str
+    cpu_info: Optional[Dict[str, Any]]
+    memory_info: Optional[Dict[str, Any]]
+    gpu_info: Optional[Dict[str, Any]]
+    disk_info: Optional[Dict[str, Any]]
+    os_info: Optional[Dict[str, Any]]
+    platform_info: Optional[Dict[str, Any]]
+    status: str
+    last_heartbeat: datetime
+    first_registered: datetime
+    services: Optional[Dict[str, Any]]
+    capabilities: Optional[Dict[str, Any]]
+    ports: Optional[Dict[str, Any]]
+    client_version: Optional[str]
+    registration_metadata: Optional[Dict[str, Any]]
+
+@dataclass
+class RegistrationRequest:
+    token: str
+    node_id: str
+    hostname: str
+    ip_address: str
+    system_info: Dict[str, Any]
+    client_version: Optional[str] = None
+    services: Optional[Dict[str, Any]] = None
+    capabilities: Optional[Dict[str, Any]] = None
+    ports: Optional[Dict[str, Any]] = None
+    metadata: Optional[Dict[str, Any]] = None
+
+@dataclass
+class HeartbeatRequest:
+    node_id: str
+    status: str = "online"
+    cpu_usage: Optional[float] = None
+    memory_usage: Optional[float] = None
+    disk_usage: Optional[float] = None
+    gpu_usage: Optional[float] = None
+    services_status: Optional[Dict[str, Any]] = None
+    network_metrics: Optional[Dict[str, Any]] = None
+    custom_metrics: Optional[Dict[str, Any]] = None
+
+class ClusterRegistrationService:
+    def __init__(self, database_url: str):
+        self.database_url = database_url
+        self._conn_cache = None
+    
+    async def get_connection(self) -> asyncpg.Connection:
+        """Get database connection with caching."""
+        if not self._conn_cache or self._conn_cache.is_closed():
+            try:
+                self._conn_cache = await asyncpg.connect(self.database_url)
+            except Exception as e:
+                logger.error(f"Failed to connect to database: {e}")
+                raise
+        return self._conn_cache
+    
+    async def close_connection(self):
+        """Close database connection."""
+        if self._conn_cache and not self._conn_cache.is_closed():
+            await self._conn_cache.close()
+    
+    # Token Management
+    async def generate_cluster_token(
+        self, 
+        description: str,
+        created_by_user_id: str,
+        expires_in_days: Optional[int] = None,
+        max_registrations: Optional[int] = None,
+        allowed_ip_ranges: Optional[List[str]] = None
+    ) -> ClusterToken:
+        """Generate a new cluster registration token."""
+        conn = await self.get_connection()
+        
+        # Generate secure token  
+        token = f"hive_cluster_{secrets.token_urlsafe(32)}"
+        expires_at = datetime.now() + timedelta(days=expires_in_days) if expires_in_days else None
+        
+        try:
+            result = await conn.fetchrow("""
+                INSERT INTO cluster_tokens (
+                    token, description, created_by, expires_at, 
+                    max_registrations, allowed_ip_ranges
+                ) VALUES ($1, $2, $3, $4, $5, $6)
+                RETURNING id, token, description, created_at, expires_at, 
+                         is_active, max_registrations, current_registrations, allowed_ip_ranges
+            """, token, description, created_by_user_id, expires_at, max_registrations, allowed_ip_ranges)
+            
+            return ClusterToken(**dict(result))
+        except Exception as e:
+            logger.error(f"Failed to generate cluster token: {e}")
+            raise
+    
+    async def validate_token(self, token: str, client_ip: str) -> Optional[ClusterToken]:
+        """Validate a cluster registration token."""
+        conn = await self.get_connection()
+        
+        try:
+            result = await conn.fetchrow("""
+                SELECT id, token, description, created_at, expires_at, 
+                       is_active, max_registrations, current_registrations, allowed_ip_ranges
+                FROM cluster_tokens 
+                WHERE token = $1 AND is_active = true
+            """, token)
+            
+            if not result:
+                return None
+            
+            cluster_token = ClusterToken(**dict(result))
+            
+            # Check expiration
+            if cluster_token.expires_at and datetime.now() > cluster_token.expires_at:
+                logger.warning(f"Token {token[:20]}... has expired")
+                return None
+            
+            # Check registration limit
+            if (cluster_token.max_registrations and 
+                cluster_token.current_registrations >= cluster_token.max_registrations):
+                logger.warning(f"Token {token[:20]}... has reached registration limit")
+                return None
+            
+            # Check IP restrictions
+            if cluster_token.allowed_ip_ranges:
+                client_ip_obj = ip_address(client_ip)
+                allowed = False
+                for ip_range in cluster_token.allowed_ip_ranges:
+                    try:
+                        network = IPv4Network(ip_range, strict=False) if ':' not in ip_range else IPv6Network(ip_range, strict=False)
+                        if client_ip_obj in network:
+                            allowed = True
+                            break
+                    except Exception as e:
+                        logger.warning(f"Invalid IP range {ip_range}: {e}")
+                
+                if not allowed:
+                    logger.warning(f"IP {client_ip} not allowed for token {token[:20]}...")
+                    return None
+            
+            return cluster_token
+            
+        except Exception as e:
+            logger.error(f"Failed to validate token: {e}")
+            return None
+    
+    async def list_tokens(self) -> List[ClusterToken]:
+        """List all cluster tokens."""
+        conn = await self.get_connection()
+        
+        try:
+            results = await conn.fetch("""
+                SELECT id, token, description, created_at, expires_at, 
+                       is_active, max_registrations, current_registrations, allowed_ip_ranges
+                FROM cluster_tokens 
+                ORDER BY created_at DESC
+            """)
+            
+            return [ClusterToken(**dict(result)) for result in results]
+            
+        except Exception as e:
+            logger.error(f"Failed to list tokens: {e}")
+            raise
+    
+    async def revoke_token(self, token: str) -> bool:
+        """Revoke a cluster token."""
+        conn = await self.get_connection()
+        
+        try:
+            result = await conn.execute("""
+                UPDATE cluster_tokens 
+                SET is_active = false 
+                WHERE token = $1
+            """, token)
+            
+            return result != "UPDATE 0"
+            
+        except Exception as e:
+            logger.error(f"Failed to revoke token: {e}")
+            return False
+    
+    # Node Registration
+    async def register_node(self, request: RegistrationRequest, client_ip: str) -> Dict[str, Any]:
+        """Register a new cluster node."""
+        conn = await self.get_connection()
+        
+        # Log registration attempt
+        await self._log_registration_attempt(
+            client_ip, request.token, request.node_id, 
+            request.hostname, True, None, request.metadata
+        )
+        
+        try:
+            # Validate token
+            token_info = await self.validate_token(request.token, client_ip)
+            if not token_info:
+                await self._log_registration_attempt(
+                    client_ip, request.token, request.node_id, 
+                    request.hostname, False, "Invalid or expired token", request.metadata
+                )
+                raise ValueError("Invalid or expired registration token")
+            
+            # Extract system info components
+            system_info = request.system_info or {}
+            cpu_info = system_info.get('cpu', {})
+            memory_info = system_info.get('memory', {})
+            gpu_info = system_info.get('gpu', {})
+            disk_info = system_info.get('disk', {})
+            os_info = system_info.get('os', {})
+            platform_info = system_info.get('platform', {})
+            
+            # Register or update node
+            result = await conn.fetchrow("""
+                INSERT INTO cluster_nodes (
+                    node_id, hostname, ip_address, registration_token,
+                    cpu_info, memory_info, gpu_info, disk_info, os_info, platform_info,
+                    services, capabilities, ports, client_version, registration_metadata
+                ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
+                ON CONFLICT (node_id) DO UPDATE SET
+                    hostname = EXCLUDED.hostname,
+                    ip_address = EXCLUDED.ip_address,
+                    cpu_info = EXCLUDED.cpu_info,
+                    memory_info = EXCLUDED.memory_info,
+                    gpu_info = EXCLUDED.gpu_info,
+                    disk_info = EXCLUDED.disk_info,
+                    os_info = EXCLUDED.os_info,
+                    platform_info = EXCLUDED.platform_info,
+                    services = EXCLUDED.services,
+                    capabilities = EXCLUDED.capabilities,
+                    ports = EXCLUDED.ports,
+                    client_version = EXCLUDED.client_version,
+                    registration_metadata = EXCLUDED.registration_metadata,
+                    status = 'online',
+                    last_heartbeat = NOW()
+                RETURNING id, node_id, hostname, ip_address, first_registered
+            """, 
+                request.node_id, request.hostname, request.ip_address, request.token,
+                json.dumps(cpu_info) if cpu_info else None,
+                json.dumps(memory_info) if memory_info else None,
+                json.dumps(gpu_info) if gpu_info else None,
+                json.dumps(disk_info) if disk_info else None,
+                json.dumps(os_info) if os_info else None,
+                json.dumps(platform_info) if platform_info else None,
+                json.dumps(request.services) if request.services else None,
+                json.dumps(request.capabilities) if request.capabilities else None,
+                json.dumps(request.ports) if request.ports else None,
+                request.client_version,
+                json.dumps(request.metadata) if request.metadata else None
+            )
+            
+            logger.info(f"Node {request.node_id} registered successfully from {client_ip}")
+            
+            return {
+                "node_id": result["node_id"],
+                "registration_status": "success",
+                "heartbeat_interval": 30,  # seconds
+                "registered_at": result["first_registered"].isoformat(),
+                "cluster_info": {
+                    "coordinator_version": "1.0.0",
+                    "features": ["heartbeat", "dynamic_scaling", "service_discovery"]
+                }
+            }
+            
+        except Exception as e:
+            logger.error(f"Failed to register node {request.node_id}: {e}")
+            await self._log_registration_attempt(
+                client_ip, request.token, request.node_id, 
+                request.hostname, False, str(e), request.metadata
+            )
+            raise
+    
+    async def update_heartbeat(self, request: HeartbeatRequest) -> Dict[str, Any]:
+        """Update node heartbeat and metrics."""
+        conn = await self.get_connection()
+        
+        try:
+            # Update node status and heartbeat
+            result = await conn.fetchrow("""
+                UPDATE cluster_nodes 
+                SET status = $2, last_heartbeat = NOW()
+                WHERE node_id = $1
+                RETURNING node_id, status, last_heartbeat
+            """, request.node_id, request.status)
+            
+            if not result:
+                raise ValueError(f"Node {request.node_id} not found")
+            
+            # Record heartbeat metrics
+            await conn.execute("""
+                INSERT INTO node_heartbeats (
+                    node_id, cpu_usage, memory_usage, disk_usage, gpu_usage,
+                    services_status, network_metrics, custom_metrics
+                ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
+            """, 
+                request.node_id, request.cpu_usage, request.memory_usage,
+                request.disk_usage, request.gpu_usage,
+                json.dumps(request.services_status) if request.services_status else None,
+                json.dumps(request.network_metrics) if request.network_metrics else None,
+                json.dumps(request.custom_metrics) if request.custom_metrics else None
+            )
+            
+            return {
+                "node_id": result["node_id"],
+                "status": result["status"],
+                "heartbeat_received": result["last_heartbeat"].isoformat(),
+                "next_heartbeat_in": 30,  # seconds
+                "commands": []  # Future: cluster management commands
+            }
+            
+        except Exception as e:
+            logger.error(f"Failed to update heartbeat for {request.node_id}: {e}")
+            raise
+    
+    async def get_registered_nodes(self, include_offline: bool = True) -> List[ClusterNode]:
+        """Get all registered cluster nodes."""
+        conn = await self.get_connection()
+        
+        try:
+            query = """
+                SELECT id, node_id, hostname, ip_address, registration_token,
+                       cpu_info, memory_info, gpu_info, disk_info, os_info, platform_info,
+                       status, last_heartbeat, first_registered,
+                       services, capabilities, ports, client_version, registration_metadata
+                FROM cluster_nodes
+            """
+            
+            if not include_offline:
+                query += " WHERE status != 'offline'"
+            
+            query += " ORDER BY first_registered DESC"
+            
+            results = await conn.fetch(query)
+            
+            nodes = []
+            for result in results:
+                node_dict = dict(result)
+                # Parse JSON fields
+                for json_field in ['cpu_info', 'memory_info', 'gpu_info', 'disk_info', 
+                                 'os_info', 'platform_info', 'services', 'capabilities', 
+                                 'ports', 'registration_metadata']:
+                    if node_dict[json_field]:
+                        try:
+                            node_dict[json_field] = json.loads(node_dict[json_field])
+                        except json.JSONDecodeError:
+                            node_dict[json_field] = None
+                
+                nodes.append(ClusterNode(**node_dict))
+            
+            return nodes
+            
+        except Exception as e:
+            logger.error(f"Failed to get registered nodes: {e}")
+            raise
+    
+    async def get_node_details(self, node_id: str) -> Optional[ClusterNode]:
+        """Get detailed information about a specific node."""
+        nodes = await self.get_registered_nodes()
+        return next((node for node in nodes if node.node_id == node_id), None)
+    
+    async def remove_node(self, node_id: str) -> bool:
+        """Remove a node from the cluster."""
+        conn = await self.get_connection()
+        
+        try:
+            result = await conn.execute("""
+                DELETE FROM cluster_nodes WHERE node_id = $1
+            """, node_id)
+            
+            if result != "DELETE 0":
+                logger.info(f"Node {node_id} removed from cluster")
+                return True
+            return False
+            
+        except Exception as e:
+            logger.error(f"Failed to remove node {node_id}: {e}")
+            return False
+    
+    # Maintenance and Monitoring
+    async def cleanup_offline_nodes(self, offline_threshold_minutes: int = 10) -> int:
+        """Mark nodes as offline if they haven't sent heartbeats."""
+        conn = await self.get_connection()
+        
+        try:
+            result = await conn.execute("""
+                UPDATE cluster_nodes 
+                SET status = 'offline'
+                WHERE status = 'online' 
+                AND last_heartbeat < NOW() - INTERVAL '%s minutes'
+            """ % offline_threshold_minutes)
+            
+            # Extract number from result like "UPDATE 3"
+            count = int(result.split()[-1]) if result.split()[-1].isdigit() else 0
+            if count > 0:
+                logger.info(f"Marked {count} nodes as offline due to missing heartbeats")
+            
+            return count
+            
+        except Exception as e:
+            logger.error(f"Failed to cleanup offline nodes: {e}")
+            return 0
+    
+    async def cleanup_old_heartbeats(self, retention_days: int = 30) -> int:
+        """Remove old heartbeat data for storage management."""
+        conn = await self.get_connection()
+        
+        try:
+            result = await conn.execute("""
+                DELETE FROM node_heartbeats 
+                WHERE heartbeat_time < NOW() - INTERVAL '%s days'
+            """ % retention_days)
+            
+            count = int(result.split()[-1]) if result.split()[-1].isdigit() else 0
+            if count > 0:
+                logger.info(f"Cleaned up {count} old heartbeat records")
+            
+            return count
+            
+        except Exception as e:
+            logger.error(f"Failed to cleanup old heartbeats: {e}")
+            return 0
+    
+    async def _log_registration_attempt(
+        self, 
+        ip_address: str, 
+        token: str, 
+        node_id: str,
+        hostname: str, 
+        success: bool, 
+        failure_reason: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ):
+        """Log registration attempts for security monitoring."""
+        conn = await self.get_connection()
+        
+        try:
+            await conn.execute("""
+                INSERT INTO node_registration_attempts (
+                    ip_address, token_used, node_id, hostname, 
+                    success, failure_reason, request_metadata
+                ) VALUES ($1, $2, $3, $4, $5, $6, $7)
+            """, ip_address, token, node_id, hostname, success, failure_reason,
+                json.dumps(metadata) if metadata else None)
+        except Exception as e:
+            logger.error(f"Failed to log registration attempt: {e}")
+    
+    async def get_cluster_statistics(self) -> Dict[str, Any]:
+        """Get cluster statistics and health metrics."""
+        conn = await self.get_connection()
+        
+        try:
+            # Node statistics
+            node_stats = await conn.fetchrow("""
+                SELECT 
+                    COUNT(*) as total_nodes,
+                    COUNT(*) FILTER (WHERE status = 'online') as online_nodes,
+                    COUNT(*) FILTER (WHERE status = 'offline') as offline_nodes,
+                    COUNT(*) FILTER (WHERE status = 'maintenance') as maintenance_nodes
+                FROM cluster_nodes
+            """)
+            
+            # Token statistics
+            token_stats = await conn.fetchrow("""
+                SELECT 
+                    COUNT(*) as total_tokens,
+                    COUNT(*) FILTER (WHERE is_active = true) as active_tokens,
+                    COUNT(*) FILTER (WHERE expires_at IS NOT NULL AND expires_at < NOW()) as expired_tokens
+                FROM cluster_tokens
+            """)
+            
+            return {
+                "cluster_health": {
+                    "total_nodes": node_stats["total_nodes"],
+                    "online_nodes": node_stats["online_nodes"],
+                    "offline_nodes": node_stats["offline_nodes"],
+                    "maintenance_nodes": node_stats["maintenance_nodes"],
+                    "health_percentage": (node_stats["online_nodes"] / max(node_stats["total_nodes"], 1)) * 100
+                },
+                "token_management": {
+                    "total_tokens": token_stats["total_tokens"],
+                    "active_tokens": token_stats["active_tokens"],
+                    "expired_tokens": token_stats["expired_tokens"]
+                },
+                "last_updated": datetime.now().isoformat()
+            }
+            
+        except Exception as e:
+            logger.error(f"Failed to get cluster statistics: {e}")
+            return {
+                "error": str(e),
+                "last_updated": datetime.now().isoformat()
+            }
--- a/backend/app/services/cluster_service.py
+++ b/backend/app/services/cluster_service.py
@@ -26,7 +26,7 @@ class ClusterService:
                "ip": "192.168.1.113", 
                "hostname": "ironwood",
                "role": "worker",
-                "gpu": "NVIDIA RTX 3070",
+                "gpu": "NVIDIA RTX 2080S",
                "memory": "128GB",
                "cpu": "AMD Threadripper 2920X",
                "ollama_port": 11434,
@@ -57,6 +57,66 @@ class ClusterService:
        self.n8n_api_base = "https://n8n.home.deepblack.cloud/api/v1"
        self.n8n_api_key = self._get_n8n_api_key()
    
+    def _get_live_hardware_info(self, hostname: str, ip: str) -> Dict[str, str]:
+        """Get live hardware information from a remote node via SSH."""
+        hardware = {
+            "cpu": "Unknown",
+            "memory": "Unknown", 
+            "gpu": "Unknown"
+        }
+        
+        try:
+            # Try to get GPU info via SSH
+            print(f"🔍 SSH GPU command for {hostname}: ssh tony@{ip} 'nvidia-smi || lspci | grep -i vga'")
+            gpu_result = subprocess.run([
+                "ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
+                f"tony@{ip}", "nvidia-smi --query-gpu=name --format=csv,noheader,nounits || lspci | grep -i 'vga\\|3d\\|display'"
+            ], capture_output=True, text=True, timeout=10)
+            
+            print(f"📊 GPU command result for {hostname}: returncode={gpu_result.returncode}, stdout='{gpu_result.stdout.strip()}', stderr='{gpu_result.stderr.strip()}'")
+            
+            if gpu_result.returncode == 0 and gpu_result.stdout.strip():
+                gpu_info = gpu_result.stdout.strip().split('\n')[0]
+                if "NVIDIA" in gpu_info or "RTX" in gpu_info or "GTX" in gpu_info:
+                    hardware["gpu"] = gpu_info.strip()
+                elif "VGA" in gpu_info or "Display" in gpu_info:
+                    # Parse lspci output for GPU info
+                    if "NVIDIA" in gpu_info:
+                        parts = gpu_info.split("NVIDIA")
+                        if len(parts) > 1:
+                            gpu_name = "NVIDIA" + parts[1].split('[')[0].strip()
+                            hardware["gpu"] = gpu_name
+                    elif "AMD" in gpu_info or "Radeon" in gpu_info:
+                        parts = gpu_info.split(":")
+                        if len(parts) > 2:
+                            gpu_name = parts[2].strip()
+                            hardware["gpu"] = gpu_name
+            
+            # Try to get memory info via SSH
+            mem_result = subprocess.run([
+                "ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
+                f"tony@{ip}", "free -h | grep '^Mem:' | awk '{print $2}'"
+            ], capture_output=True, text=True, timeout=10)
+            
+            if mem_result.returncode == 0 and mem_result.stdout.strip():
+                memory_info = mem_result.stdout.strip()
+                hardware["memory"] = memory_info
+            
+            # Try to get CPU info via SSH
+            cpu_result = subprocess.run([
+                "ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5", 
+                f"tony@{ip}", "lscpu | grep 'Model name:' | cut -d':' -f2- | xargs"
+            ], capture_output=True, text=True, timeout=10)
+            
+            if cpu_result.returncode == 0 and cpu_result.stdout.strip():
+                cpu_info = cpu_result.stdout.strip()
+                hardware["cpu"] = cpu_info
+                
+        except Exception as e:
+            print(f"Error getting live hardware info for {hostname}: {e}")
+        
+        return hardware
+    
    def _get_n8n_api_key(self) -> Optional[str]:
        """Get n8n API key from secrets."""
        try:
@@ -136,17 +196,35 @@ class ClusterService:
            except Exception:
                pass
        
+        # Try to get live hardware info if node is online
+        hardware_info = {
+            "cpu": node_info["cpu"],
+            "memory": node_info["memory"],
+            "gpu": node_info["gpu"]
+        }
+        
+        if status == "online":
+            try:
+                print(f"🔍 Getting live hardware info for {node_id} ({node_info['ip']})")
+                live_hardware = self._get_live_hardware_info(node_info["hostname"], node_info["ip"])
+                print(f"📊 Live hardware detected for {node_id}: {live_hardware}")
+                # Use live data if available, fallback to hardcoded values
+                for key in ["cpu", "memory", "gpu"]:
+                    if live_hardware[key] != "Unknown":
+                        print(f"✅ Using live {key} for {node_id}: {live_hardware[key]}")
+                        hardware_info[key] = live_hardware[key]
+                    else:
+                        print(f"⚠️ Using fallback {key} for {node_id}: {hardware_info[key]}")
+            except Exception as e:
+                print(f"❌ Failed to get live hardware info for {node_id}: {e}")
+        
        return {
            "id": node_id,
            "hostname": node_info["hostname"],
            "ip": node_info["ip"],
            "status": status,
            "role": node_info["role"],
-            "hardware": {
-                "cpu": node_info["cpu"],
-                "memory": node_info["memory"],
-                "gpu": node_info["gpu"]
-            },
+            "hardware": hardware_info,
            "model_count": model_count,
            "models": [{"name": m["name"], "size": m.get("size", 0)} for m in models],
            "metrics": {
--- a/backend/app/services/project_service.py
+++ b/backend/app/services/project_service.py
@@ -689,4 +689,58 @@ class ProjectService:
        # Handle escalation status
        if status == "escalated":
            print(f"Task escalated for human review: {metadata}")
-            # TODO: Trigger N8N webhook for human escalation
+            # TODO: Trigger N8N webhook for human escalation
+    
+    def update_project(self, project_id: str, project_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """Update a project configuration."""
+        try:
+            # For now, projects are read-only from the filesystem
+            # This could be extended to update project metadata files
+            project = self.get_project_by_id(project_id)
+            if not project:
+                return None
+            
+            # Update project metadata in a local JSON file if needed
+            # For now, just return the existing project as projects are filesystem-based
+            print(f"Project update request for {project_id}: {project_data}")
+            return project
+        except Exception as e:
+            print(f"Error updating project {project_id}: {e}")
+            return None
+    
+    def create_project(self, project_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Create a new project."""
+        try:
+            # For now, projects are filesystem-based and read-only
+            # This could be extended to create new project directories
+            print(f"Project creation request: {project_data}")
+            
+            # Return a mock project for now
+            project_id = project_data.get("name", "new-project").lower().replace(" ", "-")
+            return {
+                "id": project_id,
+                "name": project_data.get("name", "New Project"),
+                "description": project_data.get("description", ""),
+                "status": "created",
+                "created_at": datetime.now().isoformat(),
+                "updated_at": datetime.now().isoformat()
+            }
+        except Exception as e:
+            print(f"Error creating project: {e}")
+            raise
+    
+    def delete_project(self, project_id: str) -> bool:
+        """Delete a project."""
+        try:
+            # For now, projects are filesystem-based and read-only
+            # This could be extended to archive or remove project directories
+            project = self.get_project_by_id(project_id)
+            if not project:
+                return False
+            
+            print(f"Project deletion request for {project_id}")
+            # Return success for now (projects are read-only)
+            return True
+        except Exception as e:
+            print(f"Error deleting project {project_id}: {e}")
+            return False