mirror of
https://github.com/benbjohnson/litestream.git
synced 2026-01-25 05:06:30 +00:00
feat: Add optional post-compaction consistency verification (#1029)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -231,6 +231,10 @@ type Config struct {
|
||||
L0Retention *time.Duration `yaml:"l0-retention"`
|
||||
L0RetentionCheckInterval *time.Duration `yaml:"l0-retention-check-interval"`
|
||||
|
||||
// Verify TXID consistency at destination level after each compaction.
|
||||
// When enabled, logs warnings if gaps or overlaps are detected.
|
||||
VerifyCompaction bool `yaml:"verify-compaction"`
|
||||
|
||||
// Heartbeat settings (global defaults)
|
||||
HeartbeatURL string `yaml:"heartbeat-url"`
|
||||
HeartbeatInterval *time.Duration `yaml:"heartbeat-interval"`
|
||||
|
||||
@@ -250,6 +250,9 @@ func (c *ReplicateCommand) Run(ctx context.Context) (err error) {
|
||||
if c.Config.ShutdownSyncInterval != nil {
|
||||
c.Store.SetShutdownSyncInterval(*c.Config.ShutdownSyncInterval)
|
||||
}
|
||||
if c.Config.VerifyCompaction {
|
||||
c.Store.SetVerifyCompaction(true)
|
||||
}
|
||||
if c.Config.HeartbeatURL != "" {
|
||||
interval := litestream.DefaultHeartbeatInterval
|
||||
if c.Config.HeartbeatInterval != nil {
|
||||
|
||||
63
compactor.go
63
compactor.go
@@ -8,6 +8,7 @@ import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/superfly/ltx"
|
||||
)
|
||||
|
||||
@@ -18,6 +19,15 @@ type Compactor struct {
|
||||
client ReplicaClient
|
||||
logger *slog.Logger
|
||||
|
||||
// VerifyCompaction enables post-compaction TXID consistency verification.
|
||||
// When enabled, verifies that files at the destination level have
|
||||
// contiguous TXID ranges after each compaction. Disabled by default.
|
||||
VerifyCompaction bool
|
||||
|
||||
// CompactionVerifyErrorCounter is incremented when post-compaction
|
||||
// verification fails. Optional; if nil, no metric is recorded.
|
||||
CompactionVerifyErrorCounter prometheus.Counter
|
||||
|
||||
// LocalFileOpener optionally opens a local LTX file for compaction.
|
||||
// If nil or returns os.ErrNotExist, falls back to remote.
|
||||
// This is used by DB to prefer local files over remote for consistency.
|
||||
@@ -164,9 +174,62 @@ func (c *Compactor) Compact(ctx context.Context, dstLevel int) (*ltx.FileInfo, e
|
||||
c.CacheSetter(dstLevel, info)
|
||||
}
|
||||
|
||||
// Verify level consistency if enabled
|
||||
if c.VerifyCompaction {
|
||||
if err := c.VerifyLevelConsistency(ctx, dstLevel); err != nil {
|
||||
c.logger.Warn("post-compaction verification failed",
|
||||
"level", dstLevel,
|
||||
"error", err)
|
||||
if c.CompactionVerifyErrorCounter != nil {
|
||||
c.CompactionVerifyErrorCounter.Inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// VerifyLevelConsistency checks that LTX files at the given level have
|
||||
// contiguous TXID ranges (prevMaxTXID + 1 == currMinTXID for consecutive files).
|
||||
// Returns an error describing any gaps or overlaps found.
|
||||
func (c *Compactor) VerifyLevelConsistency(ctx context.Context, level int) error {
|
||||
itr, err := c.client.LTXFiles(ctx, level, 0, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("fetch ltx files: %w", err)
|
||||
}
|
||||
defer itr.Close()
|
||||
|
||||
var prevInfo *ltx.FileInfo
|
||||
for itr.Next() {
|
||||
info := itr.Item()
|
||||
|
||||
// Skip first file - nothing to compare against
|
||||
if prevInfo == nil {
|
||||
prevInfo = info
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for TXID contiguity: prev.MaxTXID + 1 should equal curr.MinTXID
|
||||
expectedMinTXID := prevInfo.MaxTXID + 1
|
||||
if info.MinTXID != expectedMinTXID {
|
||||
if info.MinTXID > expectedMinTXID {
|
||||
return fmt.Errorf("TXID gap detected: prev.MaxTXID=%s, next.MinTXID=%s (expected %s)",
|
||||
prevInfo.MaxTXID, info.MinTXID, expectedMinTXID)
|
||||
}
|
||||
return fmt.Errorf("TXID overlap detected: prev.MaxTXID=%s, next.MinTXID=%s",
|
||||
prevInfo.MaxTXID, info.MinTXID)
|
||||
}
|
||||
|
||||
prevInfo = info
|
||||
}
|
||||
|
||||
if err := itr.Close(); err != nil {
|
||||
return fmt.Errorf("close iterator: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnforceSnapshotRetention enforces retention of snapshot level files by timestamp.
|
||||
// Files older than the retention duration are deleted (except the newest is always kept).
|
||||
// Returns the minimum snapshot TXID still retained (useful for cascading retention to lower levels).
|
||||
|
||||
@@ -321,6 +321,112 @@ func TestCompactor_EnforceSnapshotRetention(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompactor_VerifyLevelConsistency(t *testing.T) {
|
||||
t.Run("ContiguousFiles", func(t *testing.T) {
|
||||
client := file.NewReplicaClient(t.TempDir())
|
||||
compactor := litestream.NewCompactor(client, slog.Default())
|
||||
|
||||
// Create contiguous files
|
||||
createTestLTXFile(t, client, 1, 1, 2)
|
||||
createTestLTXFile(t, client, 1, 3, 5)
|
||||
createTestLTXFile(t, client, 1, 6, 10)
|
||||
|
||||
// Should pass verification
|
||||
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Errorf("expected nil error for contiguous files, got: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("GapDetected", func(t *testing.T) {
|
||||
client := file.NewReplicaClient(t.TempDir())
|
||||
compactor := litestream.NewCompactor(client, slog.Default())
|
||||
|
||||
// Create files with a gap (missing TXID 3-4)
|
||||
createTestLTXFile(t, client, 1, 1, 2)
|
||||
createTestLTXFile(t, client, 1, 5, 7) // gap: expected MinTXID=3, got 5
|
||||
|
||||
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||
if err == nil {
|
||||
t.Error("expected error for gap in files, got nil")
|
||||
}
|
||||
if err != nil && !containsString(err.Error(), "gap") {
|
||||
t.Errorf("expected gap error, got: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("OverlapDetected", func(t *testing.T) {
|
||||
client := file.NewReplicaClient(t.TempDir())
|
||||
compactor := litestream.NewCompactor(client, slog.Default())
|
||||
|
||||
// Create overlapping files
|
||||
createTestLTXFile(t, client, 1, 1, 5)
|
||||
createTestLTXFile(t, client, 1, 3, 7) // overlap: expected MinTXID=6, got 3
|
||||
|
||||
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||
if err == nil {
|
||||
t.Error("expected error for overlapping files, got nil")
|
||||
}
|
||||
if err != nil && !containsString(err.Error(), "overlap") {
|
||||
t.Errorf("expected overlap error, got: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("SingleFile", func(t *testing.T) {
|
||||
client := file.NewReplicaClient(t.TempDir())
|
||||
compactor := litestream.NewCompactor(client, slog.Default())
|
||||
|
||||
// Create single file - should pass
|
||||
createTestLTXFile(t, client, 1, 1, 5)
|
||||
|
||||
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Errorf("expected nil error for single file, got: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("EmptyLevel", func(t *testing.T) {
|
||||
client := file.NewReplicaClient(t.TempDir())
|
||||
compactor := litestream.NewCompactor(client, slog.Default())
|
||||
|
||||
// Empty level - should pass
|
||||
err := compactor.VerifyLevelConsistency(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Errorf("expected nil error for empty level, got: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompactor_CompactWithVerification(t *testing.T) {
|
||||
t.Run("VerificationEnabled", func(t *testing.T) {
|
||||
client := file.NewReplicaClient(t.TempDir())
|
||||
compactor := litestream.NewCompactor(client, slog.Default())
|
||||
compactor.VerifyCompaction = true
|
||||
|
||||
// Create contiguous L0 files
|
||||
createTestLTXFile(t, client, 0, 1, 1)
|
||||
createTestLTXFile(t, client, 0, 2, 2)
|
||||
createTestLTXFile(t, client, 0, 3, 3)
|
||||
|
||||
// Compact to L1 - should succeed with verification
|
||||
info, err := compactor.Compact(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if info.Level != 1 {
|
||||
t.Errorf("Level=%d, want 1", info.Level)
|
||||
}
|
||||
if info.MinTXID != 1 || info.MaxTXID != 3 {
|
||||
t.Errorf("TXID range=%d-%d, want 1-3", info.MinTXID, info.MaxTXID)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// containsString checks if s contains substr.
|
||||
func containsString(s, substr string) bool {
|
||||
return bytes.Contains([]byte(s), []byte(substr))
|
||||
}
|
||||
|
||||
// createTestLTXFile creates a minimal LTX file for testing.
|
||||
func createTestLTXFile(t testing.TB, client litestream.ReplicaClient, level int, minTXID, maxTXID ltx.TXID) {
|
||||
t.Helper()
|
||||
|
||||
14
db.go
14
db.go
@@ -148,6 +148,11 @@ type DB struct {
|
||||
// Minimum time to retain L0 files after they have been compacted into L1.
|
||||
L0Retention time.Duration
|
||||
|
||||
// VerifyCompaction enables post-compaction TXID consistency verification.
|
||||
// When enabled, verifies that files at the destination level have
|
||||
// contiguous TXID ranges after each compaction.
|
||||
VerifyCompaction bool
|
||||
|
||||
// Remote replica for the database.
|
||||
// Must be set before calling Open().
|
||||
Replica *Replica
|
||||
@@ -209,6 +214,7 @@ func NewDB(path string) *DB {
|
||||
db.compactor = NewCompactor(nil, db.Logger)
|
||||
db.compactor.LocalFileOpener = db.openLocalLTXFile
|
||||
db.compactor.LocalFileDeleter = db.deleteLocalLTXFile
|
||||
db.compactor.CompactionVerifyErrorCounter = compactionVerifyErrorCounterVec.WithLabelValues(db.path)
|
||||
db.compactor.CacheGetter = func(level int) (*ltx.FileInfo, bool) {
|
||||
db.maxLTXFileInfos.Lock()
|
||||
defer db.maxLTXFileInfos.Unlock()
|
||||
@@ -433,6 +439,9 @@ func (db *DB) Open() (err error) {
|
||||
db.opened = true
|
||||
db.mu.Unlock()
|
||||
|
||||
// Apply verify compaction setting to the compactor
|
||||
db.compactor.VerifyCompaction = db.VerifyCompaction
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2275,4 +2284,9 @@ var (
|
||||
Name: "litestream_checkpoint_seconds",
|
||||
Help: "Time spent checkpointing WAL, in seconds",
|
||||
}, []string{"db", "mode"})
|
||||
|
||||
compactionVerifyErrorCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "litestream_compaction_verify_error_count",
|
||||
Help: "Number of post-compaction verification failures",
|
||||
}, []string{"db"})
|
||||
)
|
||||
|
||||
17
store.go
17
store.go
@@ -80,6 +80,9 @@ type Store struct {
|
||||
// If true, compaction is run in the background according to compaction levels.
|
||||
CompactionMonitorEnabled bool
|
||||
|
||||
// If true, verify TXID consistency at destination level after each compaction.
|
||||
VerifyCompaction bool
|
||||
|
||||
// Shutdown sync retry settings.
|
||||
ShutdownSyncTimeout time.Duration
|
||||
ShutdownSyncInterval time.Duration
|
||||
@@ -114,6 +117,7 @@ func NewStore(dbs []*DB, levels CompactionLevels) *Store {
|
||||
db.L0Retention = s.L0Retention
|
||||
db.ShutdownSyncTimeout = s.ShutdownSyncTimeout
|
||||
db.ShutdownSyncInterval = s.ShutdownSyncInterval
|
||||
db.VerifyCompaction = s.VerifyCompaction
|
||||
}
|
||||
s.ctx, s.cancel = context.WithCancel(context.Background())
|
||||
return s
|
||||
@@ -212,6 +216,7 @@ func (s *Store) AddDB(db *DB) error {
|
||||
db.L0Retention = s.L0Retention
|
||||
db.ShutdownSyncTimeout = s.ShutdownSyncTimeout
|
||||
db.ShutdownSyncInterval = s.ShutdownSyncInterval
|
||||
db.VerifyCompaction = s.VerifyCompaction
|
||||
|
||||
// Open the database without holding the lock to avoid blocking other operations.
|
||||
// The double-check pattern below handles the race condition.
|
||||
@@ -366,6 +371,18 @@ func (s *Store) SetShutdownSyncInterval(d time.Duration) {
|
||||
}
|
||||
}
|
||||
|
||||
// SetVerifyCompaction updates the verify compaction flag and propagates it to
|
||||
// all managed databases.
|
||||
func (s *Store) SetVerifyCompaction(v bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.VerifyCompaction = v
|
||||
for _, db := range s.dbs {
|
||||
db.VerifyCompaction = v
|
||||
db.compactor.VerifyCompaction = v
|
||||
}
|
||||
}
|
||||
|
||||
// SnapshotLevel returns a pseudo compaction level based on snapshot settings.
|
||||
func (s *Store) SnapshotLevel() *CompactionLevel {
|
||||
return &CompactionLevel{
|
||||
|
||||
Reference in New Issue
Block a user