Release v1.0.0: Production-ready SWOOSH with durability guarantees
Major enhancements: - Added production-grade durability guarantees with fsync operations - Implemented BadgerDB WAL for crash recovery and persistence - Added comprehensive HTTP API (GET/POST /state, POST /command) - Exported ComputeStateHash for external use in genesis initialization - Enhanced snapshot system with atomic write-fsync-rename sequence - Added API integration documentation and durability guarantees docs New files: - api.go: HTTP server implementation with state and command endpoints - api_test.go: Comprehensive API test suite - badger_wal.go: BadgerDB-based write-ahead log - cmd/swoosh/main.go: CLI entry point with API server - API_INTEGRATION.md: API usage and integration guide - DURABILITY.md: Durability guarantees and recovery procedures - CHANGELOG.md: Version history and changes - RELEASE_NOTES.md: Release notes for v1.0.0 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
46
snapshot.go
46
snapshot.go
@@ -35,6 +35,16 @@ func NewFileSnapshotStore(path string) *FileSnapshotStore {
|
||||
}
|
||||
|
||||
// Save writes the snapshot with atomic replace semantics.
|
||||
//
|
||||
// Durability guarantees (production-grade):
|
||||
// 1. Serialize snapshot to canonical JSON
|
||||
// 2. Write to temporary file in same directory as target
|
||||
// 3. Fsync temp file to ensure data reaches disk
|
||||
// 4. Fsync parent directory to ensure rename is durable (Linux ext4/xfs)
|
||||
// 5. Atomic rename temp → target
|
||||
//
|
||||
// If crash occurs between steps 3-5, temp file exists but is not "latest".
|
||||
// LoadLatest() always reads canonical path, never temp files.
|
||||
func (s *FileSnapshotStore) Save(snapshot Snapshot) error {
|
||||
dir := filepath.Dir(s.path)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
@@ -52,31 +62,53 @@ func (s *FileSnapshotStore) Save(snapshot Snapshot) error {
|
||||
}
|
||||
tempName := temp.Name()
|
||||
|
||||
// Cleanup temp file if we fail before rename
|
||||
defer func() {
|
||||
if temp != nil {
|
||||
temp.Close()
|
||||
os.Remove(tempName)
|
||||
}
|
||||
}()
|
||||
|
||||
if _, err := temp.Write(payload); err != nil {
|
||||
temp.Close()
|
||||
os.Remove(tempName)
|
||||
return fmt.Errorf("write snapshot: %w", err)
|
||||
}
|
||||
|
||||
// DURABILITY POINT 1: fsync temp file
|
||||
if err := temp.Sync(); err != nil {
|
||||
temp.Close()
|
||||
os.Remove(tempName)
|
||||
return fmt.Errorf("sync snapshot: %w", err)
|
||||
return fmt.Errorf("fsync snapshot: %w", err)
|
||||
}
|
||||
|
||||
if err := temp.Close(); err != nil {
|
||||
os.Remove(tempName)
|
||||
return fmt.Errorf("close snapshot temp file: %w", err)
|
||||
}
|
||||
temp = nil // Prevent deferred cleanup from closing again
|
||||
|
||||
// DURABILITY POINT 2: fsync parent directory
|
||||
// This ensures the upcoming rename is durable on Linux
|
||||
if err := fsyncDir(dir); err != nil {
|
||||
return fmt.Errorf("fsync snapshot directory: %w", err)
|
||||
}
|
||||
|
||||
// DURABILITY POINT 3: atomic rename
|
||||
if err := os.Rename(tempName, s.path); err != nil {
|
||||
os.Remove(tempName)
|
||||
return fmt.Errorf("rename snapshot: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// fsyncDir opens and fsyncs a directory to ensure metadata (e.g., renames) is durable.
|
||||
// On Linux, this is required for rename durability.
|
||||
func fsyncDir(path string) error {
|
||||
dir, err := os.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer dir.Close()
|
||||
return dir.Sync()
|
||||
}
|
||||
|
||||
// LoadLatest returns the persisted snapshot or ErrSnapshotNotFound if absent.
|
||||
func (s *FileSnapshotStore) LoadLatest() (Snapshot, error) {
|
||||
payload, err := os.ReadFile(s.path)
|
||||
|
||||
Reference in New Issue
Block a user