Integrate wave-based scaling system with WHOOSH server

- Add scaling system components to server initialization
- Register scaling API and assignment broker routes
- Start bootstrap pool manager in server lifecycle
- Add graceful shutdown for scaling controller
- Update API routing to use chi.Router instead of gorilla/mux
- Fix Docker API compatibility issues
- Configure health gates with placeholder URLs for KACHING and BACKBEAT

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Claude Code
2025-09-22 13:59:01 +10:00
parent 564852dc91
commit 28f02b61d1
10 changed files with 193 additions and 135 deletions

View File

@@ -121,7 +121,7 @@ func (sm *SwarmManager) ScaleService(ctx context.Context, serviceName string, re
Str("service_id", service.ID).
Uint64("current_replicas", currentReplicas).
Int("target_replicas", replicas).
Str("update_id", updateResponse.ID).
Interface("update_response", updateResponse).
Msg("Scaled service")
return nil
@@ -214,9 +214,7 @@ func (sm *SwarmManager) GetServiceStatus(ctx context.Context, serviceName string
UpdatedAt: task.UpdatedAt,
}
if task.Status.Timestamp != nil {
taskStatus.StatusTimestamp = *task.Status.Timestamp
}
taskStatus.StatusTimestamp = task.Status.Timestamp
status.Tasks = append(status.Tasks, taskStatus)
@@ -247,7 +245,7 @@ func (sm *SwarmManager) CreateCHORUSService(ctx context.Context, config *CHORUSS
Env: buildEnvironmentList(config.Environment),
},
Resources: &swarm.ResourceRequirements{
Limits: &swarm.Resources{
Limits: &swarm.Limit{
NanoCPUs: config.Resources.CPULimit,
MemoryBytes: config.Resources.MemoryLimit,
},
@@ -763,7 +761,7 @@ func (sm *SwarmManager) CleanupFailedServices() error {
}
for _, service := range services {
status, err := sm.GetServiceStatus(service.ID)
status, err := sm.GetServiceStatus(context.Background(), service.ID)
if err != nil {
log.Error().
Err(err).
@@ -771,13 +769,20 @@ func (sm *SwarmManager) CleanupFailedServices() error {
Msg("Failed to get service status")
continue
}
// Remove services with all failed tasks and no running tasks
if status.FailedTasks > 0 && status.RunningTasks == 0 {
failedTasks := 0
for _, task := range status.Tasks {
if task.State == "failed" {
failedTasks++
}
}
if failedTasks > 0 && status.RunningReplicas == 0 {
log.Warn().
Str("service_id", service.ID).
Str("service_name", service.Spec.Name).
Uint64("failed_tasks", status.FailedTasks).
Int("failed_tasks", failedTasks).
Msg("Removing failed service")
err = sm.RemoveAgent(service.ID)