Major enhancements: - Added production-grade durability guarantees with fsync operations - Implemented BadgerDB WAL for crash recovery and persistence - Added comprehensive HTTP API (GET/POST /state, POST /command) - Exported ComputeStateHash for external use in genesis initialization - Enhanced snapshot system with atomic write-fsync-rename sequence - Added API integration documentation and durability guarantees docs New files: - api.go: HTTP server implementation with state and command endpoints - api_test.go: Comprehensive API test suite - badger_wal.go: BadgerDB-based write-ahead log - cmd/swoosh/main.go: CLI entry point with API server - API_INTEGRATION.md: API usage and integration guide - DURABILITY.md: Durability guarantees and recovery procedures - CHANGELOG.md: Version history and changes - RELEASE_NOTES.md: Release notes for v1.0.0 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
150 lines
4.0 KiB
Go
150 lines
4.0 KiB
Go
package swoosh
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
)
|
|
|
|
// Snapshot captures a serialized view of the orchestrator state and replay cursor.
|
|
type Snapshot struct {
|
|
State OrchestratorState `json:"state"`
|
|
LastAppliedHLC string `json:"last_applied_hlc"`
|
|
LastAppliedIndex uint64 `json:"last_applied_index"`
|
|
}
|
|
|
|
// SnapshotStore persists and loads orchestrator snapshots.
|
|
type SnapshotStore interface {
|
|
Save(s Snapshot) error
|
|
LoadLatest() (Snapshot, error)
|
|
}
|
|
|
|
// ErrSnapshotNotFound indicates there is no stored snapshot.
|
|
var ErrSnapshotNotFound = errors.New("snapshot not found")
|
|
|
|
// FileSnapshotStore stores snapshots using atomic file replacement.
|
|
type FileSnapshotStore struct {
|
|
path string
|
|
}
|
|
|
|
// NewFileSnapshotStore creates a snapshot store at the supplied path.
|
|
func NewFileSnapshotStore(path string) *FileSnapshotStore {
|
|
return &FileSnapshotStore{path: path}
|
|
}
|
|
|
|
// Save writes the snapshot with atomic replace semantics.
|
|
//
|
|
// Durability guarantees (production-grade):
|
|
// 1. Serialize snapshot to canonical JSON
|
|
// 2. Write to temporary file in same directory as target
|
|
// 3. Fsync temp file to ensure data reaches disk
|
|
// 4. Fsync parent directory to ensure rename is durable (Linux ext4/xfs)
|
|
// 5. Atomic rename temp → target
|
|
//
|
|
// If crash occurs between steps 3-5, temp file exists but is not "latest".
|
|
// LoadLatest() always reads canonical path, never temp files.
|
|
func (s *FileSnapshotStore) Save(snapshot Snapshot) error {
|
|
dir := filepath.Dir(s.path)
|
|
if err := os.MkdirAll(dir, 0o755); err != nil {
|
|
return fmt.Errorf("create snapshot directory: %w", err)
|
|
}
|
|
|
|
payload, err := canonicalJSON(snapshot)
|
|
if err != nil {
|
|
return fmt.Errorf("marshal snapshot: %w", err)
|
|
}
|
|
|
|
temp, err := os.CreateTemp(dir, "snapshot-*.tmp")
|
|
if err != nil {
|
|
return fmt.Errorf("create temp snapshot: %w", err)
|
|
}
|
|
tempName := temp.Name()
|
|
|
|
// Cleanup temp file if we fail before rename
|
|
defer func() {
|
|
if temp != nil {
|
|
temp.Close()
|
|
os.Remove(tempName)
|
|
}
|
|
}()
|
|
|
|
if _, err := temp.Write(payload); err != nil {
|
|
return fmt.Errorf("write snapshot: %w", err)
|
|
}
|
|
|
|
// DURABILITY POINT 1: fsync temp file
|
|
if err := temp.Sync(); err != nil {
|
|
return fmt.Errorf("fsync snapshot: %w", err)
|
|
}
|
|
|
|
if err := temp.Close(); err != nil {
|
|
return fmt.Errorf("close snapshot temp file: %w", err)
|
|
}
|
|
temp = nil // Prevent deferred cleanup from closing again
|
|
|
|
// DURABILITY POINT 2: fsync parent directory
|
|
// This ensures the upcoming rename is durable on Linux
|
|
if err := fsyncDir(dir); err != nil {
|
|
return fmt.Errorf("fsync snapshot directory: %w", err)
|
|
}
|
|
|
|
// DURABILITY POINT 3: atomic rename
|
|
if err := os.Rename(tempName, s.path); err != nil {
|
|
return fmt.Errorf("rename snapshot: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// fsyncDir opens and fsyncs a directory to ensure metadata (e.g., renames) is durable.
|
|
// On Linux, this is required for rename durability.
|
|
func fsyncDir(path string) error {
|
|
dir, err := os.Open(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer dir.Close()
|
|
return dir.Sync()
|
|
}
|
|
|
|
// LoadLatest returns the persisted snapshot or ErrSnapshotNotFound if absent.
|
|
func (s *FileSnapshotStore) LoadLatest() (Snapshot, error) {
|
|
payload, err := os.ReadFile(s.path)
|
|
if err != nil {
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
return Snapshot{}, ErrSnapshotNotFound
|
|
}
|
|
return Snapshot{}, fmt.Errorf("read snapshot: %w", err)
|
|
}
|
|
|
|
var snapshot Snapshot
|
|
if err := json.Unmarshal(payload, &snapshot); err != nil {
|
|
return Snapshot{}, fmt.Errorf("decode snapshot: %w", err)
|
|
}
|
|
|
|
return snapshot, nil
|
|
}
|
|
|
|
// InMemorySnapshotStore fulfills SnapshotStore in memory for testing.
|
|
type InMemorySnapshotStore struct {
|
|
snapshot Snapshot
|
|
has bool
|
|
}
|
|
|
|
// Save stores the snapshot in memory.
|
|
func (s *InMemorySnapshotStore) Save(snapshot Snapshot) error {
|
|
s.snapshot = snapshot
|
|
s.has = true
|
|
return nil
|
|
}
|
|
|
|
// LoadLatest retrieves the stored snapshot or ErrSnapshotNotFound.
|
|
func (s *InMemorySnapshotStore) LoadLatest() (Snapshot, error) {
|
|
if !s.has {
|
|
return Snapshot{}, ErrSnapshotNotFound
|
|
}
|
|
return s.snapshot, nil
|
|
}
|