package swoosh import ( "encoding/json" "errors" "fmt" "os" "path/filepath" ) // Snapshot captures a serialized view of the orchestrator state and replay cursor. type Snapshot struct { State OrchestratorState `json:"state"` LastAppliedHLC string `json:"last_applied_hlc"` LastAppliedIndex uint64 `json:"last_applied_index"` } // SnapshotStore persists and loads orchestrator snapshots. type SnapshotStore interface { Save(s Snapshot) error LoadLatest() (Snapshot, error) } // ErrSnapshotNotFound indicates there is no stored snapshot. var ErrSnapshotNotFound = errors.New("snapshot not found") // FileSnapshotStore stores snapshots using atomic file replacement. type FileSnapshotStore struct { path string } // NewFileSnapshotStore creates a snapshot store at the supplied path. func NewFileSnapshotStore(path string) *FileSnapshotStore { return &FileSnapshotStore{path: path} } // Save writes the snapshot with atomic replace semantics. // // Durability guarantees (production-grade): // 1. Serialize snapshot to canonical JSON // 2. Write to temporary file in same directory as target // 3. Fsync temp file to ensure data reaches disk // 4. Fsync parent directory to ensure rename is durable (Linux ext4/xfs) // 5. Atomic rename temp → target // // If crash occurs between steps 3-5, temp file exists but is not "latest". // LoadLatest() always reads canonical path, never temp files. func (s *FileSnapshotStore) Save(snapshot Snapshot) error { dir := filepath.Dir(s.path) if err := os.MkdirAll(dir, 0o755); err != nil { return fmt.Errorf("create snapshot directory: %w", err) } payload, err := canonicalJSON(snapshot) if err != nil { return fmt.Errorf("marshal snapshot: %w", err) } temp, err := os.CreateTemp(dir, "snapshot-*.tmp") if err != nil { return fmt.Errorf("create temp snapshot: %w", err) } tempName := temp.Name() // Cleanup temp file if we fail before rename defer func() { if temp != nil { temp.Close() os.Remove(tempName) } }() if _, err := temp.Write(payload); err != nil { return fmt.Errorf("write snapshot: %w", err) } // DURABILITY POINT 1: fsync temp file if err := temp.Sync(); err != nil { return fmt.Errorf("fsync snapshot: %w", err) } if err := temp.Close(); err != nil { return fmt.Errorf("close snapshot temp file: %w", err) } temp = nil // Prevent deferred cleanup from closing again // DURABILITY POINT 2: fsync parent directory // This ensures the upcoming rename is durable on Linux if err := fsyncDir(dir); err != nil { return fmt.Errorf("fsync snapshot directory: %w", err) } // DURABILITY POINT 3: atomic rename if err := os.Rename(tempName, s.path); err != nil { return fmt.Errorf("rename snapshot: %w", err) } return nil } // fsyncDir opens and fsyncs a directory to ensure metadata (e.g., renames) is durable. // On Linux, this is required for rename durability. func fsyncDir(path string) error { dir, err := os.Open(path) if err != nil { return err } defer dir.Close() return dir.Sync() } // LoadLatest returns the persisted snapshot or ErrSnapshotNotFound if absent. func (s *FileSnapshotStore) LoadLatest() (Snapshot, error) { payload, err := os.ReadFile(s.path) if err != nil { if errors.Is(err, os.ErrNotExist) { return Snapshot{}, ErrSnapshotNotFound } return Snapshot{}, fmt.Errorf("read snapshot: %w", err) } var snapshot Snapshot if err := json.Unmarshal(payload, &snapshot); err != nil { return Snapshot{}, fmt.Errorf("decode snapshot: %w", err) } return snapshot, nil } // InMemorySnapshotStore fulfills SnapshotStore in memory for testing. type InMemorySnapshotStore struct { snapshot Snapshot has bool } // Save stores the snapshot in memory. func (s *InMemorySnapshotStore) Save(snapshot Snapshot) error { s.snapshot = snapshot s.has = true return nil } // LoadLatest retrieves the stored snapshot or ErrSnapshotNotFound. func (s *InMemorySnapshotStore) LoadLatest() (Snapshot, error) { if !s.has { return Snapshot{}, ErrSnapshotNotFound } return s.snapshot, nil }