Integrate wave-based scaling system with WHOOSH server
- Add scaling system components to server initialization - Register scaling API and assignment broker routes - Start bootstrap pool manager in server lifecycle - Add graceful shutdown for scaling controller - Update API routing to use chi.Router instead of gorilla/mux - Fix Docker API compatibility issues - Configure health gates with placeholder URLs for KACHING and BACKBEAT 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -121,7 +121,7 @@ func (sm *SwarmManager) ScaleService(ctx context.Context, serviceName string, re
|
||||
Str("service_id", service.ID).
|
||||
Uint64("current_replicas", currentReplicas).
|
||||
Int("target_replicas", replicas).
|
||||
Str("update_id", updateResponse.ID).
|
||||
Interface("update_response", updateResponse).
|
||||
Msg("Scaled service")
|
||||
|
||||
return nil
|
||||
@@ -214,9 +214,7 @@ func (sm *SwarmManager) GetServiceStatus(ctx context.Context, serviceName string
|
||||
UpdatedAt: task.UpdatedAt,
|
||||
}
|
||||
|
||||
if task.Status.Timestamp != nil {
|
||||
taskStatus.StatusTimestamp = *task.Status.Timestamp
|
||||
}
|
||||
taskStatus.StatusTimestamp = task.Status.Timestamp
|
||||
|
||||
status.Tasks = append(status.Tasks, taskStatus)
|
||||
|
||||
@@ -247,7 +245,7 @@ func (sm *SwarmManager) CreateCHORUSService(ctx context.Context, config *CHORUSS
|
||||
Env: buildEnvironmentList(config.Environment),
|
||||
},
|
||||
Resources: &swarm.ResourceRequirements{
|
||||
Limits: &swarm.Resources{
|
||||
Limits: &swarm.Limit{
|
||||
NanoCPUs: config.Resources.CPULimit,
|
||||
MemoryBytes: config.Resources.MemoryLimit,
|
||||
},
|
||||
@@ -763,7 +761,7 @@ func (sm *SwarmManager) CleanupFailedServices() error {
|
||||
}
|
||||
|
||||
for _, service := range services {
|
||||
status, err := sm.GetServiceStatus(service.ID)
|
||||
status, err := sm.GetServiceStatus(context.Background(), service.ID)
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Err(err).
|
||||
@@ -771,13 +769,20 @@ func (sm *SwarmManager) CleanupFailedServices() error {
|
||||
Msg("Failed to get service status")
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
// Remove services with all failed tasks and no running tasks
|
||||
if status.FailedTasks > 0 && status.RunningTasks == 0 {
|
||||
failedTasks := 0
|
||||
for _, task := range status.Tasks {
|
||||
if task.State == "failed" {
|
||||
failedTasks++
|
||||
}
|
||||
}
|
||||
|
||||
if failedTasks > 0 && status.RunningReplicas == 0 {
|
||||
log.Warn().
|
||||
Str("service_id", service.ID).
|
||||
Str("service_name", service.Spec.Name).
|
||||
Uint64("failed_tasks", status.FailedTasks).
|
||||
Int("failed_tasks", failedTasks).
|
||||
Msg("Removing failed service")
|
||||
|
||||
err = sm.RemoveAgent(service.ID)
|
||||
|
||||
Reference in New Issue
Block a user