mirror of
https://github.com/benbjohnson/litestream.git
synced 2026-01-25 05:06:30 +00:00
feat: Add optional post-compaction consistency verification (#1029)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -231,6 +231,10 @@ type Config struct {
|
|||||||
L0Retention *time.Duration `yaml:"l0-retention"`
|
L0Retention *time.Duration `yaml:"l0-retention"`
|
||||||
L0RetentionCheckInterval *time.Duration `yaml:"l0-retention-check-interval"`
|
L0RetentionCheckInterval *time.Duration `yaml:"l0-retention-check-interval"`
|
||||||
|
|
||||||
|
// Verify TXID consistency at destination level after each compaction.
|
||||||
|
// When enabled, logs warnings if gaps or overlaps are detected.
|
||||||
|
VerifyCompaction bool `yaml:"verify-compaction"`
|
||||||
|
|
||||||
// Heartbeat settings (global defaults)
|
// Heartbeat settings (global defaults)
|
||||||
HeartbeatURL string `yaml:"heartbeat-url"`
|
HeartbeatURL string `yaml:"heartbeat-url"`
|
||||||
HeartbeatInterval *time.Duration `yaml:"heartbeat-interval"`
|
HeartbeatInterval *time.Duration `yaml:"heartbeat-interval"`
|
||||||
|
|||||||
@@ -250,6 +250,9 @@ func (c *ReplicateCommand) Run(ctx context.Context) (err error) {
|
|||||||
if c.Config.ShutdownSyncInterval != nil {
|
if c.Config.ShutdownSyncInterval != nil {
|
||||||
c.Store.SetShutdownSyncInterval(*c.Config.ShutdownSyncInterval)
|
c.Store.SetShutdownSyncInterval(*c.Config.ShutdownSyncInterval)
|
||||||
}
|
}
|
||||||
|
if c.Config.VerifyCompaction {
|
||||||
|
c.Store.SetVerifyCompaction(true)
|
||||||
|
}
|
||||||
if c.Config.HeartbeatURL != "" {
|
if c.Config.HeartbeatURL != "" {
|
||||||
interval := litestream.DefaultHeartbeatInterval
|
interval := litestream.DefaultHeartbeatInterval
|
||||||
if c.Config.HeartbeatInterval != nil {
|
if c.Config.HeartbeatInterval != nil {
|
||||||
|
|||||||
63
compactor.go
63
compactor.go
@@ -8,6 +8,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/superfly/ltx"
|
"github.com/superfly/ltx"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -18,6 +19,15 @@ type Compactor struct {
|
|||||||
client ReplicaClient
|
client ReplicaClient
|
||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
|
|
||||||
|
// VerifyCompaction enables post-compaction TXID consistency verification.
|
||||||
|
// When enabled, verifies that files at the destination level have
|
||||||
|
// contiguous TXID ranges after each compaction. Disabled by default.
|
||||||
|
VerifyCompaction bool
|
||||||
|
|
||||||
|
// CompactionVerifyErrorCounter is incremented when post-compaction
|
||||||
|
// verification fails. Optional; if nil, no metric is recorded.
|
||||||
|
CompactionVerifyErrorCounter prometheus.Counter
|
||||||
|
|
||||||
// LocalFileOpener optionally opens a local LTX file for compaction.
|
// LocalFileOpener optionally opens a local LTX file for compaction.
|
||||||
// If nil or returns os.ErrNotExist, falls back to remote.
|
// If nil or returns os.ErrNotExist, falls back to remote.
|
||||||
// This is used by DB to prefer local files over remote for consistency.
|
// This is used by DB to prefer local files over remote for consistency.
|
||||||
@@ -164,9 +174,62 @@ func (c *Compactor) Compact(ctx context.Context, dstLevel int) (*ltx.FileInfo, e
|
|||||||
c.CacheSetter(dstLevel, info)
|
c.CacheSetter(dstLevel, info)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify level consistency if enabled
|
||||||
|
if c.VerifyCompaction {
|
||||||
|
if err := c.VerifyLevelConsistency(ctx, dstLevel); err != nil {
|
||||||
|
c.logger.Warn("post-compaction verification failed",
|
||||||
|
"level", dstLevel,
|
||||||
|
"error", err)
|
||||||
|
if c.CompactionVerifyErrorCounter != nil {
|
||||||
|
c.CompactionVerifyErrorCounter.Inc()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return info, nil
|
return info, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VerifyLevelConsistency checks that LTX files at the given level have
|
||||||
|
// contiguous TXID ranges (prevMaxTXID + 1 == currMinTXID for consecutive files).
|
||||||
|
// Returns an error describing any gaps or overlaps found.
|
||||||
|
func (c *Compactor) VerifyLevelConsistency(ctx context.Context, level int) error {
|
||||||
|
itr, err := c.client.LTXFiles(ctx, level, 0, false)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("fetch ltx files: %w", err)
|
||||||
|
}
|
||||||
|
defer itr.Close()
|
||||||
|
|
||||||
|
var prevInfo *ltx.FileInfo
|
||||||
|
for itr.Next() {
|
||||||
|
info := itr.Item()
|
||||||
|
|
||||||
|
// Skip first file - nothing to compare against
|
||||||
|
if prevInfo == nil {
|
||||||
|
prevInfo = info
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for TXID contiguity: prev.MaxTXID + 1 should equal curr.MinTXID
|
||||||
|
expectedMinTXID := prevInfo.MaxTXID + 1
|
||||||
|
if info.MinTXID != expectedMinTXID {
|
||||||
|
if info.MinTXID > expectedMinTXID {
|
||||||
|
return fmt.Errorf("TXID gap detected: prev.MaxTXID=%s, next.MinTXID=%s (expected %s)",
|
||||||
|
prevInfo.MaxTXID, info.MinTXID, expectedMinTXID)
|
||||||
|
}
|
||||||
|
return fmt.Errorf("TXID overlap detected: prev.MaxTXID=%s, next.MinTXID=%s",
|
||||||
|
prevInfo.MaxTXID, info.MinTXID)
|
||||||
|
}
|
||||||
|
|
||||||
|
prevInfo = info
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := itr.Close(); err != nil {
|
||||||
|
return fmt.Errorf("close iterator: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// EnforceSnapshotRetention enforces retention of snapshot level files by timestamp.
|
// EnforceSnapshotRetention enforces retention of snapshot level files by timestamp.
|
||||||
// Files older than the retention duration are deleted (except the newest is always kept).
|
// Files older than the retention duration are deleted (except the newest is always kept).
|
||||||
// Returns the minimum snapshot TXID still retained (useful for cascading retention to lower levels).
|
// Returns the minimum snapshot TXID still retained (useful for cascading retention to lower levels).
|
||||||
|
|||||||
@@ -321,6 +321,112 @@ func TestCompactor_EnforceSnapshotRetention(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCompactor_VerifyLevelConsistency(t *testing.T) {
|
||||||
|
t.Run("ContiguousFiles", func(t *testing.T) {
|
||||||
|
client := file.NewReplicaClient(t.TempDir())
|
||||||
|
compactor := litestream.NewCompactor(client, slog.Default())
|
||||||
|
|
||||||
|
// Create contiguous files
|
||||||
|
createTestLTXFile(t, client, 1, 1, 2)
|
||||||
|
createTestLTXFile(t, client, 1, 3, 5)
|
||||||
|
createTestLTXFile(t, client, 1, 6, 10)
|
||||||
|
|
||||||
|
// Should pass verification
|
||||||
|
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("expected nil error for contiguous files, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("GapDetected", func(t *testing.T) {
|
||||||
|
client := file.NewReplicaClient(t.TempDir())
|
||||||
|
compactor := litestream.NewCompactor(client, slog.Default())
|
||||||
|
|
||||||
|
// Create files with a gap (missing TXID 3-4)
|
||||||
|
createTestLTXFile(t, client, 1, 1, 2)
|
||||||
|
createTestLTXFile(t, client, 1, 5, 7) // gap: expected MinTXID=3, got 5
|
||||||
|
|
||||||
|
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for gap in files, got nil")
|
||||||
|
}
|
||||||
|
if err != nil && !containsString(err.Error(), "gap") {
|
||||||
|
t.Errorf("expected gap error, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("OverlapDetected", func(t *testing.T) {
|
||||||
|
client := file.NewReplicaClient(t.TempDir())
|
||||||
|
compactor := litestream.NewCompactor(client, slog.Default())
|
||||||
|
|
||||||
|
// Create overlapping files
|
||||||
|
createTestLTXFile(t, client, 1, 1, 5)
|
||||||
|
createTestLTXFile(t, client, 1, 3, 7) // overlap: expected MinTXID=6, got 3
|
||||||
|
|
||||||
|
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for overlapping files, got nil")
|
||||||
|
}
|
||||||
|
if err != nil && !containsString(err.Error(), "overlap") {
|
||||||
|
t.Errorf("expected overlap error, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("SingleFile", func(t *testing.T) {
|
||||||
|
client := file.NewReplicaClient(t.TempDir())
|
||||||
|
compactor := litestream.NewCompactor(client, slog.Default())
|
||||||
|
|
||||||
|
// Create single file - should pass
|
||||||
|
createTestLTXFile(t, client, 1, 1, 5)
|
||||||
|
|
||||||
|
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("expected nil error for single file, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("EmptyLevel", func(t *testing.T) {
|
||||||
|
client := file.NewReplicaClient(t.TempDir())
|
||||||
|
compactor := litestream.NewCompactor(client, slog.Default())
|
||||||
|
|
||||||
|
// Empty level - should pass
|
||||||
|
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("expected nil error for empty level, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompactor_CompactWithVerification(t *testing.T) {
|
||||||
|
t.Run("VerificationEnabled", func(t *testing.T) {
|
||||||
|
client := file.NewReplicaClient(t.TempDir())
|
||||||
|
compactor := litestream.NewCompactor(client, slog.Default())
|
||||||
|
compactor.VerifyCompaction = true
|
||||||
|
|
||||||
|
// Create contiguous L0 files
|
||||||
|
createTestLTXFile(t, client, 0, 1, 1)
|
||||||
|
createTestLTXFile(t, client, 0, 2, 2)
|
||||||
|
createTestLTXFile(t, client, 0, 3, 3)
|
||||||
|
|
||||||
|
// Compact to L1 - should succeed with verification
|
||||||
|
info, err := compactor.Compact(context.Background(), 1)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if info.Level != 1 {
|
||||||
|
t.Errorf("Level=%d, want 1", info.Level)
|
||||||
|
}
|
||||||
|
if info.MinTXID != 1 || info.MaxTXID != 3 {
|
||||||
|
t.Errorf("TXID range=%d-%d, want 1-3", info.MinTXID, info.MaxTXID)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// containsString checks if s contains substr.
|
||||||
|
func containsString(s, substr string) bool {
|
||||||
|
return bytes.Contains([]byte(s), []byte(substr))
|
||||||
|
}
|
||||||
|
|
||||||
// createTestLTXFile creates a minimal LTX file for testing.
|
// createTestLTXFile creates a minimal LTX file for testing.
|
||||||
func createTestLTXFile(t testing.TB, client litestream.ReplicaClient, level int, minTXID, maxTXID ltx.TXID) {
|
func createTestLTXFile(t testing.TB, client litestream.ReplicaClient, level int, minTXID, maxTXID ltx.TXID) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|||||||
14
db.go
14
db.go
@@ -148,6 +148,11 @@ type DB struct {
|
|||||||
// Minimum time to retain L0 files after they have been compacted into L1.
|
// Minimum time to retain L0 files after they have been compacted into L1.
|
||||||
L0Retention time.Duration
|
L0Retention time.Duration
|
||||||
|
|
||||||
|
// VerifyCompaction enables post-compaction TXID consistency verification.
|
||||||
|
// When enabled, verifies that files at the destination level have
|
||||||
|
// contiguous TXID ranges after each compaction.
|
||||||
|
VerifyCompaction bool
|
||||||
|
|
||||||
// Remote replica for the database.
|
// Remote replica for the database.
|
||||||
// Must be set before calling Open().
|
// Must be set before calling Open().
|
||||||
Replica *Replica
|
Replica *Replica
|
||||||
@@ -209,6 +214,7 @@ func NewDB(path string) *DB {
|
|||||||
db.compactor = NewCompactor(nil, db.Logger)
|
db.compactor = NewCompactor(nil, db.Logger)
|
||||||
db.compactor.LocalFileOpener = db.openLocalLTXFile
|
db.compactor.LocalFileOpener = db.openLocalLTXFile
|
||||||
db.compactor.LocalFileDeleter = db.deleteLocalLTXFile
|
db.compactor.LocalFileDeleter = db.deleteLocalLTXFile
|
||||||
|
db.compactor.CompactionVerifyErrorCounter = compactionVerifyErrorCounterVec.WithLabelValues(db.path)
|
||||||
db.compactor.CacheGetter = func(level int) (*ltx.FileInfo, bool) {
|
db.compactor.CacheGetter = func(level int) (*ltx.FileInfo, bool) {
|
||||||
db.maxLTXFileInfos.Lock()
|
db.maxLTXFileInfos.Lock()
|
||||||
defer db.maxLTXFileInfos.Unlock()
|
defer db.maxLTXFileInfos.Unlock()
|
||||||
@@ -433,6 +439,9 @@ func (db *DB) Open() (err error) {
|
|||||||
db.opened = true
|
db.opened = true
|
||||||
db.mu.Unlock()
|
db.mu.Unlock()
|
||||||
|
|
||||||
|
// Apply verify compaction setting to the compactor
|
||||||
|
db.compactor.VerifyCompaction = db.VerifyCompaction
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2275,4 +2284,9 @@ var (
|
|||||||
Name: "litestream_checkpoint_seconds",
|
Name: "litestream_checkpoint_seconds",
|
||||||
Help: "Time spent checkpointing WAL, in seconds",
|
Help: "Time spent checkpointing WAL, in seconds",
|
||||||
}, []string{"db", "mode"})
|
}, []string{"db", "mode"})
|
||||||
|
|
||||||
|
compactionVerifyErrorCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Name: "litestream_compaction_verify_error_count",
|
||||||
|
Help: "Number of post-compaction verification failures",
|
||||||
|
}, []string{"db"})
|
||||||
)
|
)
|
||||||
|
|||||||
17
store.go
17
store.go
@@ -80,6 +80,9 @@ type Store struct {
|
|||||||
// If true, compaction is run in the background according to compaction levels.
|
// If true, compaction is run in the background according to compaction levels.
|
||||||
CompactionMonitorEnabled bool
|
CompactionMonitorEnabled bool
|
||||||
|
|
||||||
|
// If true, verify TXID consistency at destination level after each compaction.
|
||||||
|
VerifyCompaction bool
|
||||||
|
|
||||||
// Shutdown sync retry settings.
|
// Shutdown sync retry settings.
|
||||||
ShutdownSyncTimeout time.Duration
|
ShutdownSyncTimeout time.Duration
|
||||||
ShutdownSyncInterval time.Duration
|
ShutdownSyncInterval time.Duration
|
||||||
@@ -114,6 +117,7 @@ func NewStore(dbs []*DB, levels CompactionLevels) *Store {
|
|||||||
db.L0Retention = s.L0Retention
|
db.L0Retention = s.L0Retention
|
||||||
db.ShutdownSyncTimeout = s.ShutdownSyncTimeout
|
db.ShutdownSyncTimeout = s.ShutdownSyncTimeout
|
||||||
db.ShutdownSyncInterval = s.ShutdownSyncInterval
|
db.ShutdownSyncInterval = s.ShutdownSyncInterval
|
||||||
|
db.VerifyCompaction = s.VerifyCompaction
|
||||||
}
|
}
|
||||||
s.ctx, s.cancel = context.WithCancel(context.Background())
|
s.ctx, s.cancel = context.WithCancel(context.Background())
|
||||||
return s
|
return s
|
||||||
@@ -212,6 +216,7 @@ func (s *Store) AddDB(db *DB) error {
|
|||||||
db.L0Retention = s.L0Retention
|
db.L0Retention = s.L0Retention
|
||||||
db.ShutdownSyncTimeout = s.ShutdownSyncTimeout
|
db.ShutdownSyncTimeout = s.ShutdownSyncTimeout
|
||||||
db.ShutdownSyncInterval = s.ShutdownSyncInterval
|
db.ShutdownSyncInterval = s.ShutdownSyncInterval
|
||||||
|
db.VerifyCompaction = s.VerifyCompaction
|
||||||
|
|
||||||
// Open the database without holding the lock to avoid blocking other operations.
|
// Open the database without holding the lock to avoid blocking other operations.
|
||||||
// The double-check pattern below handles the race condition.
|
// The double-check pattern below handles the race condition.
|
||||||
@@ -366,6 +371,18 @@ func (s *Store) SetShutdownSyncInterval(d time.Duration) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetVerifyCompaction updates the verify compaction flag and propagates it to
|
||||||
|
// all managed databases.
|
||||||
|
func (s *Store) SetVerifyCompaction(v bool) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.VerifyCompaction = v
|
||||||
|
for _, db := range s.dbs {
|
||||||
|
db.VerifyCompaction = v
|
||||||
|
db.compactor.VerifyCompaction = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// SnapshotLevel returns a pseudo compaction level based on snapshot settings.
|
// SnapshotLevel returns a pseudo compaction level based on snapshot settings.
|
||||||
func (s *Store) SnapshotLevel() *CompactionLevel {
|
func (s *Store) SnapshotLevel() *CompactionLevel {
|
||||||
return &CompactionLevel{
|
return &CompactionLevel{
|
||||||
|
|||||||
Reference in New Issue
Block a user