test: migrate bash integration and soak tests to Go infrastructure (#799)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Cory LaNou
2025-11-03 11:17:21 -06:00
committed by GitHub
parent f2d217a6fd
commit 22b5ce1f51
29 changed files with 4998 additions and 4256 deletions

148
.github/workflows/integration-tests.yml vendored Normal file
View File

@@ -0,0 +1,148 @@
name: Integration Tests
on:
pull_request:
paths:
- '**.go'
- 'go.mod'
- 'go.sum'
- 'tests/integration/**'
- '.github/workflows/integration-tests.yml'
workflow_dispatch:
inputs:
test_type:
description: 'Test type to run'
required: false
default: 'quick'
type: choice
options:
- 'quick'
- 'all'
- 'long'
permissions:
contents: read
jobs:
quick-tests:
name: Quick Integration Tests
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' || inputs.test_type == 'quick' || inputs.test_type == 'all'
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: "go.mod"
- name: Build binaries
run: |
go build -o bin/litestream ./cmd/litestream
go build -o bin/litestream-test ./cmd/litestream-test
- name: Run quick integration tests
run: |
go test -v -tags=integration -timeout=30m ./tests/integration/... \
-run="TestFreshStart|TestDatabaseIntegrity|TestRapidCheckpoints"
env:
CGO_ENABLED: 1
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: quick-test-logs
path: |
/tmp/litestream-*/*.log
/tmp/*-test.log
scenario-tests:
name: Scenario Integration Tests
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' && (inputs.test_type == 'all' || inputs.test_type == 'long')
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: "go.mod"
- name: Build binaries
run: |
go build -o bin/litestream ./cmd/litestream
go build -o bin/litestream-test ./cmd/litestream-test
- name: Run all scenario tests
run: |
go test -v -tags=integration -timeout=1h ./tests/integration/... \
-run="Test(FreshStart|DatabaseIntegrity|DatabaseDeletion|RapidCheckpoints|WALGrowth|ConcurrentOperations|BusyTimeout)"
env:
CGO_ENABLED: 1
- name: Upload test logs
if: always()
uses: actions/upload-artifact@v4
with:
name: scenario-test-logs
path: |
/tmp/litestream-*/*.log
/tmp/*-test.log
long-running-tests:
name: Long-Running Integration Tests
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' && inputs.test_type == 'long'
timeout-minutes: 600
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: "go.mod"
- name: Build binaries
run: |
go build -o bin/litestream ./cmd/litestream
go build -o bin/litestream-test ./cmd/litestream-test
- name: Run long tests
run: |
go test -v -tags="integration,long" -timeout=10h ./tests/integration/... \
-run="TestOvernight|Test1GBBoundary"
env:
CGO_ENABLED: 1
- name: Upload test logs
if: always()
uses: actions/upload-artifact@v4
with:
name: long-test-logs
path: |
/tmp/litestream-*/*.log
/tmp/*-test.log
summary:
name: Test Summary
runs-on: ubuntu-latest
needs: [quick-tests]
if: always() && (github.event_name == 'pull_request' || inputs.test_type == 'quick' || inputs.test_type == 'all')
steps:
- name: Generate summary
run: |
echo "## Integration Test Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ "${{ needs.quick-tests.result }}" == "success" ]; then
echo "✅ **Quick Tests:** Passed" >> $GITHUB_STEP_SUMMARY
elif [ "${{ needs.quick-tests.result }}" == "failure" ]; then
echo "❌ **Quick Tests:** Failed" >> $GITHUB_STEP_SUMMARY
elif [ "${{ needs.quick-tests.result }}" == "skipped" ]; then
echo "⏭️ **Quick Tests:** Skipped" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "---" >> $GITHUB_STEP_SUMMARY
echo "**Triggered by:** @${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
# Note: Scenario and long-running tests run independently on workflow_dispatch.
# Check individual job results for those test suites.

View File

@@ -13,18 +13,11 @@ go build -o bin/litestream-test ./cmd/litestream-test
## Quick Reference
> **Note:** Some tests have been migrated to Go integration tests in `tests/integration/`. See [tests/integration/README.md](../../tests/integration/README.md) for the Go-based test suite.
| Script | Purpose | Duration | Status |
|--------|---------|----------|--------|
| verify-test-setup.sh | Environment validation | ~5s | ✅ Stable |
| test-fresh-start.sh | Fresh database creation | ~30s | ✅ Stable |
| test-rapid-checkpoints.sh | Checkpoint stress test | ~2min | ✅ Stable |
| test-wal-growth.sh | Large WAL handling (100MB+) | ~5min | ✅ Stable |
| test-concurrent-operations.sh | Multi-database concurrent replication | ~5min | ✅ Stable |
| test-database-integrity.sh | Complex data integrity validation | ~3min | ✅ Stable |
| test-database-deletion.sh | Database deletion scenarios | ~2min | ✅ Stable |
| test-replica-failover.sh | Replica failover testing | ~3min | ✅ Stable |
| test-busy-timeout.sh | Database busy timeout handling | ~2min | ✅ Stable |
| test-1gb-boundary.sh | SQLite 1GB lock page boundary | ~10min | ⚠️ Blocked by #754 |
| reproduce-critical-bug.sh | Checkpoint during downtime bug | ~2min | 🐛 Reproduces #752 |
| test-754-s3-scenarios.sh | Issue #754 S3 vs file replication | ~10min | 🐛 Tests #754 |
| test-754-restore-focus.sh | Issue #754 restore focus | ~5min | 🐛 Tests #754 |
@@ -58,148 +51,6 @@ Verifies that the test environment is properly configured with required binaries
- SQLite3 available
- Python dependencies for S3 mock
### Core Functionality Tests
#### test-fresh-start.sh
Tests replication with a fresh database that doesn't exist when Litestream starts.
```bash
./cmd/litestream-test/scripts/test-fresh-start.sh
```
**Tests:**
- Starting Litestream before database exists
- Database creation while Litestream is running
- Automatic detection of new database
- Replication and restore integrity
#### test-database-integrity.sh
Creates complex data patterns and verifies integrity after restore.
```bash
./cmd/litestream-test/scripts/test-database-integrity.sh
```
**Tests:**
- Complex data patterns (multiple tables, indexes)
- SQLite PRAGMA integrity_check
- Full database restoration
- Data consistency verification
#### test-database-deletion.sh
Tests scenarios where the source database is deleted during replication.
```bash
./cmd/litestream-test/scripts/test-database-deletion.sh
```
**Tests:**
- Database deletion during active replication
- Recovery behavior
- Replica consistency
#### test-replica-failover.sh
Tests replica failover scenarios with multiple replicas.
```bash
./cmd/litestream-test/scripts/test-replica-failover.sh
```
**Tests:**
- Multiple replica configuration
- Failover when primary replica fails
- Data consistency across replicas
### Stress & Performance Tests
#### test-rapid-checkpoints.sh
Tests Litestream under rapid checkpoint pressure with continuous writes.
```bash
./cmd/litestream-test/scripts/test-rapid-checkpoints.sh
```
**Tests:**
- 100+ writes/second
- Forced rapid checkpoints
- Replication under checkpoint pressure
- Data integrity under stress
**Key Metrics:**
- Checkpoint frequency
- WAL file growth
- Replication lag
- Error rates
#### test-wal-growth.sh
Tests handling of large WAL files (100MB+) under sustained write load.
```bash
./cmd/litestream-test/scripts/test-wal-growth.sh
```
**Tests:**
- Sustained high write rates (400+ writes/sec)
- Large WAL file creation and handling
- Checkpoint behavior with large WALs
- Replication performance with large data
**Key Findings:**
- Successfully handles 100MB+ WAL files
- Maintains data integrity
- Handles 400+ writes/second
#### test-concurrent-operations.sh
Tests multiple databases replicating simultaneously with competing operations.
```bash
./cmd/litestream-test/scripts/test-concurrent-operations.sh
```
**Tests:**
- Multiple databases (3-5) replicating concurrently
- Mixed read/write operations
- Competing checkpoints
- Resource contention handling
#### test-busy-timeout.sh
Tests database busy timeout handling with concurrent access.
```bash
./cmd/litestream-test/scripts/test-busy-timeout.sh
```
**Tests:**
- Concurrent database access
- Busy timeout configuration
- Lock contention handling
- Recovery from busy states
### Boundary & Edge Case Tests
#### test-1gb-boundary.sh
Tests SQLite's 1GB lock page boundary handling.
```bash
./cmd/litestream-test/scripts/test-1gb-boundary.sh
```
**Tests:**
- Database growth beyond 1GB (with 4KB pages)
- Lock page at #262145 properly skipped
- Replication across lock page boundary
- Restoration integrity after crossing boundary
**Status:** ⚠️ Currently blocked by ltx v0.5.0 flag compatibility issue (#754)
**Lock Page Numbers by Page Size:**
| Page Size | Lock Page # |
|-----------|-------------|
| 4KB | 262145 |
| 8KB | 131073 |
| 16KB | 65537 |
| 32KB | 32769 |
### Bug Reproduction Scripts
#### reproduce-critical-bug.sh

View File

@@ -1,217 +0,0 @@
#!/bin/bash
# Test Script: SQLite 1GB Lock Page Boundary
#
# This test verifies that Litestream correctly handles the SQLite lock page
# at the 1GB boundary (0x40000000). This page is reserved by SQLite and
# cannot contain data - Litestream must skip it during replication.
#
# The lock page number varies by page size:
# - 4KB: page 262145
# - 8KB: page 131073
# - 16KB: page 65537
# - 32KB: page 32769
set -e
echo "=========================================="
echo "SQLite 1GB Lock Page Boundary Test"
echo "=========================================="
echo ""
echo "Testing Litestream's handling of SQLite's reserved lock page at 1GB"
echo ""
# Configuration
DB="/tmp/1gb-test.db"
REPLICA="/tmp/1gb-replica"
LITESTREAM_TEST="./bin/litestream-test"
LITESTREAM="./bin/litestream"
# Clean up any previous test
echo "[SETUP] Cleaning up previous test files..."
rm -f "$DB"*
rm -rf "$REPLICA"
# Check for required binaries
if [ ! -f "$LITESTREAM_TEST" ]; then
echo "ERROR: litestream-test not found at $LITESTREAM_TEST"
echo "Build with: go build -o bin/litestream-test ./cmd/litestream-test"
exit 1
fi
if [ ! -f "$LITESTREAM" ]; then
echo "ERROR: litestream not found at $LITESTREAM"
echo "Build with: go build -o bin/litestream ./cmd/litestream"
exit 1
fi
test_page_size() {
local PAGE_SIZE=$1
local LOCK_PGNO=$2
echo ""
echo "======================================="
echo "Testing with page size: $PAGE_SIZE bytes"
echo "Lock page should be at: $LOCK_PGNO"
echo "======================================="
# Clean up for this test
rm -f "$DB"*
rm -rf "$REPLICA"
# Create database with specific page size
echo "[1] Creating database with page_size=$PAGE_SIZE..."
sqlite3 "$DB" <<EOF
PRAGMA page_size=$PAGE_SIZE;
CREATE TABLE test_data (
id INTEGER PRIMARY KEY,
data BLOB
);
EOF
# Calculate target size (1.2GB to ensure we cross 1GB boundary)
TARGET_SIZE=$((1200 * 1024 * 1024))
echo "[2] Populating database to cross 1GB boundary (target: 1.2GB)..."
# Use litestream-test to populate efficiently
$LITESTREAM_TEST populate -db "$DB" -target-size 1200MB -row-size $((PAGE_SIZE - 100))
# Get actual size and page count
DB_SIZE=$(stat -f%z "$DB" 2>/dev/null || stat -c%s "$DB")
PAGE_COUNT=$(sqlite3 "$DB" "PRAGMA page_count;")
echo " Database size: $(( DB_SIZE / 1024 / 1024 ))MB"
echo " Page count: $PAGE_COUNT"
echo " Lock page at: $LOCK_PGNO"
# Verify we've crossed the boundary
if [ "$PAGE_COUNT" -le "$LOCK_PGNO" ]; then
echo " WARNING: Database doesn't cross lock page boundary!"
echo " Need at least $LOCK_PGNO pages, have $PAGE_COUNT"
else
echo " ✓ Database crosses lock page boundary"
fi
# Start Litestream replication
echo "[3] Starting Litestream replication..."
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/litestream-1gb.log 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo "ERROR: Litestream failed to start"
cat /tmp/litestream-1gb.log
return 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
# Add more data to trigger replication across the boundary
echo "[4] Adding data around the lock page boundary..."
# Use litestream-test load to ensure continuous writes
$LITESTREAM_TEST load -db "$DB" -write-rate 10 -duration 10s -pattern constant &
LOAD_PID=$!
# Let it run and create multiple transactions
echo "[5] Running writes for 10 seconds to ensure multiple transactions..."
sleep 10
# Stop writes and let replication catch up
kill $LOAD_PID 2>/dev/null || true
sleep 5
# Check for errors in log
if grep -i "error\|panic\|fatal" /tmp/litestream-1gb.log > /dev/null 2>&1; then
echo " WARNING: Errors detected in Litestream log:"
grep -i "error\|panic\|fatal" /tmp/litestream-1gb.log | head -5
fi
# Stop Litestream
kill $LITESTREAM_PID 2>/dev/null || true
sleep 2
# Attempt restore
echo "[6] Testing restore..."
rm -f /tmp/restored-1gb.db
if $LITESTREAM restore -o /tmp/restored-1gb.db "file://$REPLICA" > /tmp/restore-1gb.log 2>&1; then
echo " ✓ Restore successful"
# Verify integrity
INTEGRITY=$(sqlite3 /tmp/restored-1gb.db "PRAGMA integrity_check;" 2>/dev/null || echo "FAILED")
if [ "$INTEGRITY" = "ok" ]; then
echo " ✓ Integrity check passed"
else
echo " ✗ Integrity check failed: $INTEGRITY"
return 1
fi
# Compare page counts
RESTORED_COUNT=$(sqlite3 /tmp/restored-1gb.db "PRAGMA page_count;" 2>/dev/null || echo "0")
echo " Original pages: $PAGE_COUNT"
echo " Restored pages: $RESTORED_COUNT"
if [ "$PAGE_COUNT" -eq "$RESTORED_COUNT" ]; then
echo " ✓ Page count matches"
else
echo " ✗ Page count mismatch!"
return 1
fi
# Check data integrity
ORIG_ROWS=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test_data;")
REST_ROWS=$(sqlite3 /tmp/restored-1gb.db "SELECT COUNT(*) FROM test_data;")
echo " Original rows: $ORIG_ROWS"
echo " Restored rows: $REST_ROWS"
if [ "$ORIG_ROWS" -eq "$REST_ROWS" ]; then
echo " ✓ Data integrity verified"
echo ""
echo " TEST PASSED for page_size=$PAGE_SIZE"
else
echo " ✗ Row count mismatch!"
return 1
fi
else
echo " ✗ Restore FAILED!"
cat /tmp/restore-1gb.log
return 1
fi
# Clean up
rm -f /tmp/restored-1gb.db
}
# Test with different page sizes
echo "Testing SQLite lock page handling at 1GB boundary"
echo "This verifies Litestream correctly skips the reserved lock page"
echo ""
# Default 4KB page size (most common)
if ! test_page_size 4096 262145; then
echo "CRITICAL: Test failed for 4KB pages!"
exit 1
fi
# 8KB page size
if ! test_page_size 8192 131073; then
echo "CRITICAL: Test failed for 8KB pages!"
exit 1
fi
# 16KB page size (if time permits - these are large databases)
# Uncomment to test:
# if ! test_page_size 16384 65537; then
# echo "CRITICAL: Test failed for 16KB pages!"
# exit 1
# fi
echo ""
echo "=========================================="
echo "All 1GB boundary tests PASSED!"
echo "=========================================="
echo ""
echo "Litestream correctly handles the SQLite lock page at 1GB boundary"
echo "for all tested page sizes."
echo ""
# Clean up
pkill -f "litestream replicate" 2>/dev/null || true
echo "Test complete."

View File

@@ -1,225 +0,0 @@
#!/bin/bash
set -e
# Test busy timeout handling with concurrent writes
# This test verifies proper handling of write lock conflicts between app and Litestream
echo "=========================================="
echo "Busy Timeout and Write Lock Conflict Test"
echo "=========================================="
echo ""
echo "Testing write lock conflict handling with various busy_timeout settings"
echo ""
# Configuration
DB="/tmp/busy-test.db"
REPLICA="/tmp/busy-replica"
LITESTREAM="./bin/litestream"
LITESTREAM_TEST="./bin/litestream-test"
# Cleanup function
cleanup() {
pkill -f "litestream replicate.*busy-test.db" 2>/dev/null || true
pkill -f "litestream-test load.*busy-test.db" 2>/dev/null || true
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
rm -rf "$REPLICA"
rm -f /tmp/busy-*.log
}
trap cleanup EXIT
echo "[SETUP] Cleaning up previous test files..."
cleanup
echo ""
echo "[1] Creating test database..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode = WAL;
CREATE TABLE test (id INTEGER PRIMARY KEY, data BLOB, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP);
INSERT INTO test (data) VALUES (randomblob(1000));
EOF
echo " ✓ Database created"
echo ""
echo "[2] Starting Litestream replication..."
"$LITESTREAM" replicate "$DB" "file://$REPLICA" > /tmp/busy-litestream.log 2>&1 &
LITESTREAM_PID=$!
sleep 2
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream failed to start"
cat /tmp/busy-litestream.log
exit 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
echo ""
echo "=========================================="
echo "Test 1: No busy_timeout (default behavior)"
echo "=========================================="
echo "[3] Starting aggressive writes without busy_timeout..."
ERRORS_NO_TIMEOUT=0
SUCCESS_NO_TIMEOUT=0
for i in {1..100}; do
if sqlite3 "$DB" "INSERT INTO test (data) VALUES (randomblob(1000));" 2>/dev/null; then
((SUCCESS_NO_TIMEOUT++))
else
((ERRORS_NO_TIMEOUT++))
fi
done
echo " Results without busy_timeout:"
echo " ✓ Successful writes: $SUCCESS_NO_TIMEOUT"
echo " ✗ Failed writes (SQLITE_BUSY): $ERRORS_NO_TIMEOUT"
if [ $ERRORS_NO_TIMEOUT -gt 0 ]; then
echo " ⚠️ Conflicts detected without busy_timeout (expected)"
else
echo " ✓ No conflicts (may indicate low checkpoint frequency)"
fi
echo ""
echo "=========================================="
echo "Test 2: With 5-second busy_timeout (recommended)"
echo "=========================================="
echo "[4] Testing with recommended 5-second timeout..."
ERRORS_WITH_TIMEOUT=0
SUCCESS_WITH_TIMEOUT=0
for i in {1..100}; do
if sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES (randomblob(1000));" 2>/dev/null; then
((SUCCESS_WITH_TIMEOUT++))
else
((ERRORS_WITH_TIMEOUT++))
fi
done
echo " Results with 5s busy_timeout:"
echo " ✓ Successful writes: $SUCCESS_WITH_TIMEOUT"
echo " ✗ Failed writes: $ERRORS_WITH_TIMEOUT"
if [ $ERRORS_WITH_TIMEOUT -eq 0 ]; then
echo " ✓ All writes succeeded with proper timeout!"
elif [ $ERRORS_WITH_TIMEOUT -lt $ERRORS_NO_TIMEOUT ]; then
echo " ✓ Timeout reduced conflicts significantly"
else
echo " ⚠️ Timeout didn't help (may need investigation)"
fi
echo ""
echo "=========================================="
echo "Test 3: Concurrent high-frequency writes"
echo "=========================================="
echo "[5] Starting 3 concurrent write processes..."
# Start multiple concurrent writers
(
for i in {1..50}; do
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES ('Writer1: ' || randomblob(500));" 2>/dev/null
sleep 0.01
done
) > /tmp/busy-writer1.log 2>&1 &
WRITER1_PID=$!
(
for i in {1..50}; do
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES ('Writer2: ' || randomblob(500));" 2>/dev/null
sleep 0.01
done
) > /tmp/busy-writer2.log 2>&1 &
WRITER2_PID=$!
(
for i in {1..50}; do
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES ('Writer3: ' || randomblob(500));" 2>/dev/null
sleep 0.01
done
) > /tmp/busy-writer3.log 2>&1 &
WRITER3_PID=$!
echo " Writers started: PID $WRITER1_PID, $WRITER2_PID, $WRITER3_PID"
# Monitor for conflicts
sleep 1
echo ""
echo "[6] Forcing checkpoints during concurrent writes..."
for i in {1..5}; do
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; PRAGMA wal_checkpoint(PASSIVE);" 2>/dev/null || true
sleep 1
done
# Wait for writers to complete
wait $WRITER1_PID 2>/dev/null
wait $WRITER2_PID 2>/dev/null
wait $WRITER3_PID 2>/dev/null
echo " ✓ Concurrent writers completed"
echo ""
echo "[7] Checking for lock contention in Litestream log..."
CHECKPOINT_ERRORS=$(grep -c "checkpoint" /tmp/busy-litestream.log 2>/dev/null || echo "0")
SYNC_ERRORS=$(grep -c "database is locked" /tmp/busy-litestream.log 2>/dev/null || echo "0")
echo " Litestream errors:"
echo " Checkpoint errors: $CHECKPOINT_ERRORS"
echo " Lock errors: $SYNC_ERRORS"
if [ "$SYNC_ERRORS" -eq "0" ]; then
echo " ✓ No lock errors in Litestream"
else
echo " ⚠️ Some lock contention detected (may be normal under high load)"
fi
echo ""
echo "=========================================="
echo "Test 4: Checkpoint during write transaction"
echo "=========================================="
echo "[8] Testing checkpoint during long transaction..."
# Start a long transaction
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; BEGIN EXCLUSIVE;" 2>/dev/null &
TRANS_PID=$!
sleep 0.5
# Try to checkpoint while transaction is held
CHECKPOINT_RESULT=$(sqlite3 "$DB" "PRAGMA busy_timeout = 1000; PRAGMA wal_checkpoint(FULL);" 2>&1 || echo "FAILED")
if [[ "$CHECKPOINT_RESULT" == *"FAILED"* ]] || [[ "$CHECKPOINT_RESULT" == *"database is locked"* ]]; then
echo " ✓ Checkpoint correctly blocked by exclusive transaction"
else
echo " ⚠️ Unexpected checkpoint behavior: $CHECKPOINT_RESULT"
fi
# Clean up transaction
kill $TRANS_PID 2>/dev/null || true
echo ""
echo "[9] Final statistics..."
TOTAL_ROWS=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;")
WAL_SIZE=$(du -h "$DB-wal" 2>/dev/null | cut -f1 || echo "0")
DB_SIZE=$(du -h "$DB" | cut -f1)
echo " Database stats:"
echo " Total rows inserted: $TOTAL_ROWS"
echo " Database size: $DB_SIZE"
echo " WAL size: $WAL_SIZE"
echo ""
echo "=========================================="
echo "Busy Timeout Test Summary:"
echo " Without timeout: $ERRORS_NO_TIMEOUT conflicts"
echo " With 5s timeout: $ERRORS_WITH_TIMEOUT conflicts"
echo " Concurrent writes: Completed successfully"
echo " Lock contention: Properly handled"
echo ""
if [ $ERRORS_WITH_TIMEOUT -lt $ERRORS_NO_TIMEOUT ] || [ $ERRORS_WITH_TIMEOUT -eq 0 ]; then
echo "✅ TEST PASSED: busy_timeout improves conflict handling"
else
echo "⚠️ TEST NOTICE: Timeout may need tuning for this workload"
fi
echo "=========================================="

View File

@@ -1,300 +0,0 @@
#!/bin/bash
# Test Script: Concurrent Database Operations
#
# This test verifies Litestream's behavior under heavy concurrent load with
# multiple databases replicating simultaneously, mixed operations, and
# competing checkpoints.
set -e
echo "============================================"
echo "Concurrent Database Operations Test"
echo "============================================"
echo ""
echo "Testing Litestream with multiple concurrent databases and operations"
echo ""
# Configuration
BASE_DIR="/tmp/concurrent-test"
LITESTREAM_TEST="./bin/litestream-test"
LITESTREAM="./bin/litestream"
NUM_DBS=5
DB_SIZE="50MB"
DURATION="30s"
# Clean up any previous test
echo "[SETUP] Cleaning up previous test files..."
rm -rf "$BASE_DIR"
mkdir -p "$BASE_DIR"
# Check for required binaries
if [ ! -f "$LITESTREAM_TEST" ]; then
echo "ERROR: litestream-test not found at $LITESTREAM_TEST"
echo "Build with: go build -o bin/litestream-test ./cmd/litestream-test"
exit 1
fi
if [ ! -f "$LITESTREAM" ]; then
echo "ERROR: litestream not found at $LITESTREAM"
echo "Build with: go build -o bin/litestream ./cmd/litestream"
exit 1
fi
# Create configuration file for multiple databases
echo "[1] Creating Litestream configuration for $NUM_DBS databases..."
cat > "$BASE_DIR/litestream.yml" <<EOF
dbs:
EOF
for i in $(seq 1 $NUM_DBS); do
cat >> "$BASE_DIR/litestream.yml" <<EOF
- path: $BASE_DIR/db${i}.db
replicas:
- url: file://$BASE_DIR/replica${i}
sync-interval: 1s
EOF
done
echo " ✓ Configuration created"
# Create and populate databases
echo ""
echo "[2] Creating and populating $NUM_DBS databases..."
for i in $(seq 1 $NUM_DBS); do
echo " Creating database $i..."
$LITESTREAM_TEST populate -db "$BASE_DIR/db${i}.db" -target-size "$DB_SIZE" -table-count 2 &
done
wait
echo " ✓ All databases created"
# Start Litestream with multiple databases
echo ""
echo "[3] Starting Litestream for all databases..."
$LITESTREAM replicate -config "$BASE_DIR/litestream.yml" > "$BASE_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo "ERROR: Litestream failed to start"
cat "$BASE_DIR/litestream.log"
exit 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
# Start concurrent operations on all databases
echo ""
echo "[4] Starting concurrent operations on all databases..."
PIDS=()
# Different workload patterns for each database
for i in $(seq 1 $NUM_DBS); do
case $i in
1)
# High-frequency writes
echo " DB$i: High-frequency writes (500/sec)"
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
-write-rate 500 -duration "$DURATION" \
-pattern constant > "$BASE_DIR/load${i}.log" 2>&1 &
;;
2)
# Burst writes
echo " DB$i: Burst writes (1000/sec burst)"
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
-write-rate 1000 -duration "$DURATION" \
-pattern burst > "$BASE_DIR/load${i}.log" 2>&1 &
;;
3)
# Mixed with checkpoints
echo " DB$i: Moderate writes with periodic checkpoints"
(
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
-write-rate 100 -duration "$DURATION" \
-pattern constant > "$BASE_DIR/load${i}.log" 2>&1 &
LOAD_PID=$!
# Periodic checkpoints
for j in {1..6}; do
sleep 5
sqlite3 "$BASE_DIR/db${i}.db" "PRAGMA wal_checkpoint(PASSIVE);" 2>/dev/null || true
done
wait $LOAD_PID
) &
;;
4)
# Shrinking operations
echo " DB$i: Writes with periodic shrinking"
(
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
-write-rate 50 -duration "$DURATION" \
-pattern wave > "$BASE_DIR/load${i}.log" 2>&1 &
LOAD_PID=$!
# Periodic shrinks
for j in {1..3}; do
sleep 10
$LITESTREAM_TEST shrink -db "$BASE_DIR/db${i}.db" \
-delete-percentage 30 2>/dev/null || true
done
wait $LOAD_PID
) &
;;
5)
# Large transactions
echo " DB$i: Large batch transactions"
for j in {1..10}; do
sqlite3 "$BASE_DIR/db${i}.db" <<EOF
BEGIN;
INSERT INTO test_table_0 (data)
SELECT randomblob(1000) FROM generate_series(1, 10000);
COMMIT;
EOF
sleep 3
done &
;;
esac
PIDS+=($!)
done
# Monitor progress
echo ""
echo "[5] Running concurrent operations for $DURATION..."
ELAPSED=0
MAX_ELAPSED=30
while [ $ELAPSED -lt $MAX_ELAPSED ]; do
sleep 5
ELAPSED=$((ELAPSED + 5))
# Check Litestream health
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ERROR: Litestream crashed!"
cat "$BASE_DIR/litestream.log" | tail -20
exit 1
fi
# Check for errors
ERROR_COUNT=$(grep -i "error\|panic" "$BASE_DIR/litestream.log" 2>/dev/null | wc -l || echo "0")
if [ "$ERROR_COUNT" -gt 0 ]; then
echo " Errors detected: $ERROR_COUNT"
fi
echo " Progress: ${ELAPSED}s / ${MAX_ELAPSED}s"
done
# Stop all operations
echo ""
echo "[6] Stopping operations..."
for pid in "${PIDS[@]}"; do
kill $pid 2>/dev/null || true
done
wait
# Give Litestream time to catch up
echo " Waiting for final sync..."
sleep 5
# Collect metrics
echo ""
echo "[7] Collecting metrics..."
for i in $(seq 1 $NUM_DBS); do
DB_SIZE=$(stat -f%z "$BASE_DIR/db${i}.db" 2>/dev/null || stat -c%s "$BASE_DIR/db${i}.db")
WAL_SIZE=$(stat -f%z "$BASE_DIR/db${i}.db-wal" 2>/dev/null || stat -c%s "$BASE_DIR/db${i}.db-wal" 2>/dev/null || echo "0")
REPLICA_COUNT=$(find "$BASE_DIR/replica${i}" -type f 2>/dev/null | wc -l || echo "0")
echo " DB$i:"
echo " Database size: $((DB_SIZE / 1024 / 1024))MB"
echo " WAL size: $((WAL_SIZE / 1024 / 1024))MB"
echo " Replica files: $REPLICA_COUNT"
done
# Stop Litestream
kill $LITESTREAM_PID 2>/dev/null || true
sleep 2
# Test restoration for all databases
echo ""
echo "[8] Testing restoration of all databases..."
RESTORE_FAILED=0
for i in $(seq 1 $NUM_DBS); do
echo " Restoring DB$i..."
rm -f "$BASE_DIR/restored${i}.db"
if $LITESTREAM restore -config "$BASE_DIR/litestream.yml" \
-o "$BASE_DIR/restored${i}.db" "$BASE_DIR/db${i}.db" > "$BASE_DIR/restore${i}.log" 2>&1; then
# Verify integrity
INTEGRITY=$(sqlite3 "$BASE_DIR/restored${i}.db" "PRAGMA integrity_check;" 2>/dev/null || echo "FAILED")
if [ "$INTEGRITY" = "ok" ]; then
echo " ✓ DB$i restored successfully"
else
echo " ✗ DB$i integrity check failed!"
RESTORE_FAILED=$((RESTORE_FAILED + 1))
fi
else
echo " ✗ DB$i restore failed!"
cat "$BASE_DIR/restore${i}.log"
RESTORE_FAILED=$((RESTORE_FAILED + 1))
fi
done
# Check for race conditions or deadlocks in logs
echo ""
echo "[9] Analyzing logs for issues..."
ISSUES_FOUND=0
# Check for deadlocks
if grep -i "deadlock" "$BASE_DIR/litestream.log" > /dev/null 2>&1; then
echo " ✗ Deadlock detected!"
ISSUES_FOUND=$((ISSUES_FOUND + 1))
fi
# Check for database locked errors
LOCKED_COUNT=$(grep -c "database is locked" "$BASE_DIR/litestream.log" 2>/dev/null || echo "0")
if [ "$LOCKED_COUNT" -gt 10 ]; then
echo " ⚠ High number of 'database locked' errors: $LOCKED_COUNT"
ISSUES_FOUND=$((ISSUES_FOUND + 1))
fi
# Check for checkpoint failures
CHECKPOINT_ERRORS=$(grep -c "checkpoint.*error\|checkpoint.*fail" "$BASE_DIR/litestream.log" 2>/dev/null || echo "0")
if [ "$CHECKPOINT_ERRORS" -gt 0 ]; then
echo " ⚠ Checkpoint errors detected: $CHECKPOINT_ERRORS"
fi
# Summary
echo ""
echo "============================================"
echo "Test Results Summary"
echo "============================================"
echo ""
echo "Databases tested: $NUM_DBS"
echo "Restore failures: $RESTORE_FAILED"
echo "Critical issues found: $ISSUES_FOUND"
if [ "$RESTORE_FAILED" -eq 0 ] && [ "$ISSUES_FOUND" -eq 0 ]; then
echo ""
echo "✅ CONCURRENT OPERATIONS TEST PASSED"
echo ""
echo "Litestream successfully handled:"
echo "- $NUM_DBS databases replicating simultaneously"
echo "- Mixed workload patterns (high-frequency, burst, batch)"
echo "- Concurrent checkpoints and shrinking operations"
echo "- All databases restored successfully"
else
echo ""
echo "❌ CONCURRENT OPERATIONS TEST FAILED"
echo ""
echo "Issues detected during concurrent operations"
echo "Check logs at: $BASE_DIR/"
exit 1
fi
# Clean up
pkill -f litestream-test 2>/dev/null || true
pkill -f "litestream replicate" 2>/dev/null || true
echo ""
echo "Test complete. Artifacts saved in: $BASE_DIR/"

View File

@@ -1,172 +0,0 @@
#!/bin/bash
set -e
# Test database deletion and recreation scenarios
# This test verifies proper handling when databases are deleted and recreated
echo "=========================================="
echo "Database Deletion and Recreation Test"
echo "=========================================="
echo ""
echo "Testing Litestream's handling of database deletion and recreation"
echo ""
# Configuration
DB="/tmp/deletion-test.db"
REPLICA="/tmp/deletion-replica"
LITESTREAM="./bin/litestream"
# Cleanup function
cleanup() {
pkill -f "litestream replicate.*deletion-test.db" 2>/dev/null || true
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
rm -rf "$REPLICA"
rm -f /tmp/deletion-*.log
}
trap cleanup EXIT
echo "[SETUP] Cleaning up previous test files..."
cleanup
echo ""
echo "[1] Creating initial database..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode = WAL;
CREATE TABLE original (id INTEGER PRIMARY KEY, data TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
INSERT INTO original (data) VALUES ('Original database content');
INSERT INTO original (data) VALUES ('Should not appear in new database');
EOF
ORIGINAL_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM original;")
echo " ✓ Original database created with $ORIGINAL_COUNT rows"
echo ""
echo "[2] Starting Litestream replication..."
"$LITESTREAM" replicate "$DB" "file://$REPLICA" > /tmp/deletion-litestream.log 2>&1 &
LITESTREAM_PID=$!
sleep 2
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream failed to start"
cat /tmp/deletion-litestream.log
exit 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
echo ""
echo "[3] Letting replication stabilize..."
sleep 3
echo " ✓ Initial replication complete"
echo ""
echo "=========================================="
echo "Test 1: Delete database while Litestream running"
echo "=========================================="
echo "[4] Deleting database files..."
rm -f "$DB" "$DB-wal" "$DB-shm"
echo " ✓ Database files deleted"
echo ""
echo "[5] Creating new database with different schema..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode = WAL;
CREATE TABLE replacement (id INTEGER PRIMARY KEY, content BLOB, version INTEGER);
INSERT INTO replacement (content, version) VALUES (randomblob(100), 1);
INSERT INTO replacement (content, version) VALUES (randomblob(200), 2);
EOF
NEW_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM replacement;")
echo " ✓ New database created with $NEW_COUNT rows"
echo ""
echo "[6] Checking for Litestream errors..."
sleep 2
ERRORS=$(grep -c "ERROR" /tmp/deletion-litestream.log 2>/dev/null || echo "0")
WARNINGS=$(grep -c "WAL" /tmp/deletion-litestream.log 2>/dev/null || echo "0")
echo " Litestream errors: $ERRORS"
echo " WAL warnings: $WARNINGS"
if [ $ERRORS -gt 0 ]; then
echo " ⚠️ Errors detected (expected when database deleted)"
tail -5 /tmp/deletion-litestream.log | grep ERROR || true
fi
echo ""
echo "=========================================="
echo "Test 2: Check for leftover WAL corruption"
echo "=========================================="
echo "[7] Stopping Litestream..."
kill $LITESTREAM_PID 2>/dev/null || true
wait $LITESTREAM_PID 2>/dev/null
echo " ✓ Litestream stopped"
echo ""
echo "[8] Simulating leftover WAL file scenario..."
# Create a database with WAL
sqlite3 "$DB" <<EOF
PRAGMA journal_mode = WAL;
INSERT INTO replacement (content, version) VALUES (randomblob(300), 3);
EOF
echo " ✓ WAL file created"
# Delete only the main database file (leaving WAL)
echo "[9] Deleting only main database file (leaving WAL)..."
rm -f "$DB"
ls -la /tmp/deletion-test* 2>/dev/null | head -5 || true
echo ""
echo "[10] Creating new database with leftover WAL..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode = WAL;
CREATE TABLE new_table (id INTEGER PRIMARY KEY, data TEXT);
INSERT INTO new_table (data) VALUES ('New database with old WAL');
EOF
# Check if corruption occurred
INTEGRITY=$(sqlite3 "$DB" "PRAGMA integrity_check;" 2>&1)
if [ "$INTEGRITY" = "ok" ]; then
echo " ✓ No corruption despite leftover WAL"
else
echo " ✗ CORRUPTION DETECTED: $INTEGRITY"
echo " This confirms leftover WAL files can corrupt new databases!"
fi
echo ""
echo "=========================================="
echo "Test 3: Clean deletion procedure"
echo "=========================================="
echo "[11] Demonstrating proper deletion procedure..."
# Clean up everything
rm -f "$DB" "$DB-wal" "$DB-shm"
rm -rf "$DB-litestream"
echo " ✓ All database files removed"
# Create fresh database
sqlite3 "$DB" <<EOF
PRAGMA journal_mode = WAL;
CREATE TABLE clean (id INTEGER PRIMARY KEY, data TEXT);
INSERT INTO clean (data) VALUES ('Clean start');
EOF
FINAL_INTEGRITY=$(sqlite3 "$DB" "PRAGMA integrity_check;")
FINAL_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM clean;")
echo " ✓ Clean database created"
echo " Integrity: $FINAL_INTEGRITY"
echo " Rows: $FINAL_COUNT"
echo ""
echo "=========================================="
echo "Database Deletion Test Summary:"
echo " ✓ Detected database deletion scenarios"
echo " ✓ Demonstrated WAL file corruption risk"
echo " ✓ Showed proper cleanup procedure"
echo ""
echo "IMPORTANT: When deleting databases:"
echo " 1. Stop Litestream first"
echo " 2. Delete: DB, DB-wal, DB-shm, DB-litestream"
echo " 3. Restart Litestream after creating new DB"
echo "=========================================="

View File

@@ -1,260 +0,0 @@
#!/bin/bash
set -e
# Test database integrity after restore (Issue #582)
# This test creates complex data patterns, replicates, and verifies integrity after restore
echo "=========================================="
echo "Database Integrity After Restore Test"
echo "=========================================="
echo ""
echo "Testing if restored databases pass integrity checks"
echo ""
# Configuration
DB="/tmp/integrity-test.db"
REPLICA="/tmp/integrity-replica"
RESTORED="/tmp/integrity-restored.db"
LITESTREAM_CONFIG="/tmp/integrity-litestream.yml"
LITESTREAM="./bin/litestream"
LITESTREAM_TEST="./bin/litestream-test"
# Cleanup function
cleanup() {
pkill -f "litestream replicate.*integrity-test.db" 2>/dev/null || true
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
rm -rf "$REPLICA"
rm -f "$LITESTREAM_CONFIG"
rm -f /tmp/integrity-*.log
}
trap cleanup EXIT
echo "[SETUP] Cleaning up previous test files..."
cleanup
echo ""
echo "[1] Creating database with complex data patterns..."
# Create database with various data types and constraints
sqlite3 "$DB" <<EOF
PRAGMA page_size = 4096;
PRAGMA journal_mode = WAL;
-- Table with primary key and foreign key constraints
CREATE TABLE users (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
email TEXT UNIQUE,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Table with indexes
CREATE TABLE posts (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL,
title TEXT NOT NULL,
content BLOB,
score REAL,
FOREIGN KEY (user_id) REFERENCES users(id)
);
CREATE INDEX idx_posts_user ON posts(user_id);
CREATE INDEX idx_posts_score ON posts(score);
-- Table with check constraints
CREATE TABLE transactions (
id INTEGER PRIMARY KEY,
amount REAL NOT NULL CHECK (amount != 0),
type TEXT CHECK (type IN ('credit', 'debit')),
balance REAL
);
-- Add initial data
INSERT INTO users (name, email) VALUES
('Alice', 'alice@test.com'),
('Bob', 'bob@test.com'),
('Charlie', 'charlie@test.com');
-- Add posts with various data types
INSERT INTO posts (user_id, title, content, score) VALUES
(1, 'First Post', randomblob(1000), 4.5),
(2, 'Second Post', randomblob(2000), 3.8),
(3, 'Third Post', NULL, 4.9);
-- Add transactions
INSERT INTO transactions (amount, type, balance) VALUES
(100.50, 'credit', 100.50),
(-25.75, 'debit', 74.75),
(50.00, 'credit', 124.75);
EOF
echo " ✓ Database created with complex schema"
# Add more data manually to preserve schema
echo ""
echo "[2] Adding bulk data..."
for i in {1..100}; do
sqlite3 "$DB" "INSERT INTO posts (user_id, title, content, score) VALUES ((ABS(RANDOM()) % 3) + 1, 'Post $i', randomblob(5000), RANDOM() % 5);" 2>/dev/null
sqlite3 "$DB" "INSERT INTO transactions (amount, type, balance) VALUES (ABS(RANDOM() % 1000) + 0.01, CASE WHEN RANDOM() % 2 = 0 THEN 'credit' ELSE 'debit' END, ABS(RANDOM() % 10000));" 2>/dev/null
done
INITIAL_SIZE=$(du -h "$DB" | cut -f1)
echo " ✓ Database populated: $INITIAL_SIZE"
echo ""
echo "[3] Running initial integrity check..."
INITIAL_INTEGRITY=$(sqlite3 "$DB" "PRAGMA integrity_check;")
if [ "$INITIAL_INTEGRITY" != "ok" ]; then
echo " ✗ Initial database has integrity issues: $INITIAL_INTEGRITY"
exit 1
fi
echo " ✓ Initial integrity check: $INITIAL_INTEGRITY"
# Get checksums for verification
USERS_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM users;")
POSTS_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM posts;")
TRANS_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM transactions;")
TABLE_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM posts;" 2>/dev/null || echo "0")
echo ""
echo "[4] Starting Litestream replication..."
"$LITESTREAM" replicate "$DB" "file://$REPLICA" > /tmp/integrity-litestream.log 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream failed to start"
cat /tmp/integrity-litestream.log
exit 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
echo ""
echo "[5] Making changes while replicating..."
# Add more data and modify existing
sqlite3 "$DB" <<EOF
-- Update existing data
UPDATE users SET name = 'Alice Updated' WHERE id = 1;
DELETE FROM posts WHERE id = 2;
-- Add new data with edge cases
INSERT INTO users (name, email) VALUES ('Dave', 'dave@test.com');
INSERT INTO posts (user_id, title, content, score) VALUES
(4, 'Edge Case Post', randomblob(5000), 0.0),
(4, 'Another Post', randomblob(100), -1.5);
-- Trigger constraint checks
INSERT INTO transactions (amount, type, balance) VALUES
(1000.00, 'credit', 1124.75),
(-500.00, 'debit', 624.75);
EOF
# Force checkpoint
sqlite3 "$DB" "PRAGMA wal_checkpoint(FULL);" >/dev/null 2>&1
sleep 2
echo " ✓ Changes made and checkpoint executed"
echo ""
echo "[6] Stopping Litestream and attempting restore..."
kill $LITESTREAM_PID
wait $LITESTREAM_PID 2>/dev/null
# Attempt restore
"$LITESTREAM" restore -o "$RESTORED" "file://$REPLICA" > /tmp/integrity-restore.log 2>&1
RESTORE_EXIT=$?
if [ $RESTORE_EXIT -ne 0 ]; then
echo " ✗ Restore failed with exit code: $RESTORE_EXIT"
cat /tmp/integrity-restore.log
exit 1
fi
echo " ✓ Restore completed"
echo ""
echo "[7] Running integrity check on restored database..."
RESTORED_INTEGRITY=$(sqlite3 "$RESTORED" "PRAGMA integrity_check;" 2>&1)
if [ "$RESTORED_INTEGRITY" != "ok" ]; then
echo " ✗ CRITICAL: Restored database FAILED integrity check!"
echo " Result: $RESTORED_INTEGRITY"
# Try to get more info
echo ""
echo " Attempting detailed analysis:"
sqlite3 "$RESTORED" "PRAGMA foreign_key_check;" 2>/dev/null || echo " Foreign key check failed"
sqlite3 "$RESTORED" "SELECT COUNT(*) FROM sqlite_master;" 2>/dev/null || echo " Cannot read schema"
exit 1
else
echo " ✓ Integrity check PASSED: $RESTORED_INTEGRITY"
fi
echo ""
echo "[8] Verifying data consistency..."
# Check row counts
RESTORED_USERS=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM users;" 2>/dev/null || echo "ERROR")
RESTORED_POSTS=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM posts;" 2>/dev/null || echo "ERROR")
RESTORED_TRANS=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM transactions;" 2>/dev/null || echo "ERROR")
RESTORED_TABLE=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM posts;" 2>/dev/null || echo "0")
# Expected counts after changes
EXPECTED_USERS=4 # 3 original + 1 added
EXPECTED_POSTS=104 # 3 original + 100 bulk - 1 deleted + 2 added
EXPECTED_TRANS=105 # 3 original + 100 bulk + 2 added
echo " Data verification:"
echo " Users: $RESTORED_USERS (expected: $EXPECTED_USERS)"
echo " Posts: $RESTORED_POSTS (expected: $EXPECTED_POSTS)"
echo " Transactions: $RESTORED_TRANS (expected: $EXPECTED_TRANS)"
echo " Test Table: $RESTORED_TABLE (expected: $TABLE_COUNT)"
DATA_INTACT=true
if [ "$RESTORED_USERS" != "$EXPECTED_USERS" ]; then
echo " ✗ User count mismatch!"
DATA_INTACT=false
fi
if [ "$RESTORED_POSTS" != "$EXPECTED_POSTS" ]; then
echo " ✗ Post count mismatch!"
DATA_INTACT=false
fi
if [ "$RESTORED_TRANS" != "$EXPECTED_TRANS" ]; then
echo " ✗ Transaction count mismatch!"
DATA_INTACT=false
fi
echo ""
echo "[9] Testing constraint enforcement..."
# Test that constraints still work
CONSTRAINT_TEST=$(sqlite3 "$RESTORED" "INSERT INTO transactions (amount, type) VALUES (0, 'credit');" 2>&1 || echo "CONSTRAINT_OK")
if [[ "$CONSTRAINT_TEST" == *"CONSTRAINT_OK"* ]] || [[ "$CONSTRAINT_TEST" == *"CHECK constraint failed"* ]]; then
echo " ✓ Check constraints working"
else
echo " ✗ Check constraints not enforced!"
DATA_INTACT=false
fi
# Test foreign keys
FK_TEST=$(sqlite3 "$RESTORED" "PRAGMA foreign_keys=ON; INSERT INTO posts (user_id, title) VALUES (999, 'Bad FK');" 2>&1 || echo "FK_OK")
if [[ "$FK_TEST" == *"FK_OK"* ]] || [[ "$FK_TEST" == *"FOREIGN KEY constraint failed"* ]]; then
echo " ✓ Foreign key constraints working"
else
echo " ✗ Foreign key constraints not enforced!"
DATA_INTACT=false
fi
echo ""
if [ "$DATA_INTACT" = true ] && [ "$RESTORED_INTEGRITY" = "ok" ]; then
echo "✅ TEST PASSED: Database integrity preserved after restore"
else
echo "❌ TEST FAILED: Database integrity issues detected"
exit 1
fi
echo ""
echo "=========================================="
echo "Summary:"
echo " Integrity Check: $RESTORED_INTEGRITY"
echo " Data Consistency: $DATA_INTACT"
echo " Constraints: Working"
echo "=========================================="

View File

@@ -1,139 +0,0 @@
#!/bin/bash
# Test: Starting replication with a fresh (empty) database
# This tests if Litestream works better when it creates the database from scratch
set -e
echo "=========================================="
echo "Fresh Start Database Test"
echo "=========================================="
echo ""
echo "Testing if Litestream works correctly when starting fresh"
echo ""
# Configuration
DB="/tmp/fresh-test.db"
REPLICA="/tmp/fresh-replica"
LITESTREAM="./bin/litestream"
LITESTREAM_TEST="./bin/litestream-test"
# Clean up
echo "[SETUP] Cleaning up..."
rm -f "$DB"*
rm -rf "$REPLICA"
# Check binaries
if [ ! -f "$LITESTREAM" ]; then
echo "ERROR: $LITESTREAM not found"
exit 1
fi
if [ ! -f "$LITESTREAM_TEST" ]; then
echo "ERROR: $LITESTREAM_TEST not found"
exit 1
fi
# Start Litestream BEFORE creating database
echo ""
echo "[1] Starting Litestream with non-existent database..."
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/fresh-test.log 2>&1 &
LITESTREAM_PID=$!
sleep 2
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✓ Expected: Litestream waiting for database to be created"
else
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
fi
# Now create and populate the database
echo ""
echo "[2] Creating database while Litestream is running..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode=WAL;
CREATE TABLE test (id INTEGER PRIMARY KEY, data TEXT);
INSERT INTO test (data) VALUES ('initial data');
EOF
echo " ✓ Database created"
# Give Litestream time to detect the new database
sleep 3
# Check if Litestream started replicating
echo ""
echo "[3] Checking if Litestream detected the database..."
if grep -q "initialized db" /tmp/fresh-test.log; then
echo " ✓ Litestream detected and initialized database"
fi
# Add more data
echo ""
echo "[4] Adding data to test replication..."
for i in {1..100}; do
sqlite3 "$DB" "INSERT INTO test (data) VALUES ('row $i');"
done
echo " ✓ Added 100 rows"
# Let replication catch up
sleep 5
# Check for errors
echo ""
echo "[5] Checking for errors..."
ERROR_COUNT=$(grep -c "ERROR" /tmp/fresh-test.log 2>/dev/null || echo "0")
if [ "$ERROR_COUNT" -gt 1 ]; then
echo " ⚠ Found $ERROR_COUNT errors:"
grep "ERROR" /tmp/fresh-test.log | head -3
else
echo " ✓ No significant errors"
fi
# Check replica files
echo ""
echo "[6] Checking replica files..."
if [ -d "$REPLICA/ltx" ]; then
FILE_COUNT=$(find "$REPLICA/ltx" -name "*.ltx" | wc -l)
echo " ✓ Replica created with $FILE_COUNT LTX files"
ls -la "$REPLICA/ltx/0/" 2>/dev/null | head -3
else
echo " ✗ No replica files created!"
fi
# Stop Litestream
kill $LITESTREAM_PID 2>/dev/null || true
sleep 2
# Test restore
echo ""
echo "[7] Testing restore..."
rm -f /tmp/fresh-restored.db
if $LITESTREAM restore -o /tmp/fresh-restored.db "file://$REPLICA" 2>&1; then
echo " ✓ Restore successful"
# Verify data
ORIG_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;")
REST_COUNT=$(sqlite3 /tmp/fresh-restored.db "SELECT COUNT(*) FROM test;")
if [ "$ORIG_COUNT" -eq "$REST_COUNT" ]; then
echo " ✓ Data integrity verified: $ORIG_COUNT rows"
echo ""
echo "TEST PASSED: Fresh start works correctly"
else
echo " ✗ Data mismatch: Original=$ORIG_COUNT, Restored=$REST_COUNT"
echo ""
echo "TEST FAILED: Data loss detected"
fi
else
echo " ✗ Restore failed!"
echo ""
echo "TEST FAILED: Cannot restore database"
fi
echo ""
echo "=========================================="
echo "Test artifacts:"
echo " Database: $DB"
echo " Replica: $REPLICA"
echo " Log: /tmp/fresh-test.log"
echo "=========================================="

View File

@@ -1,173 +0,0 @@
#!/bin/bash
# Test: Rapid Checkpoint Cycling
# This tests Litestream's behavior under rapid checkpoint pressure
set -e
echo "=========================================="
echo "Rapid Checkpoint Cycling Test"
echo "=========================================="
echo ""
echo "Testing Litestream under rapid checkpoint pressure"
echo ""
# Configuration
DB="/tmp/checkpoint-cycle.db"
REPLICA="/tmp/checkpoint-cycle-replica"
LITESTREAM="./bin/litestream"
LITESTREAM_TEST="./bin/litestream-test"
# Clean up
echo "[SETUP] Cleaning up..."
rm -f "$DB"*
rm -rf "$REPLICA"
# Start with fresh database
echo "[1] Creating initial database..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode=WAL;
CREATE TABLE test (id INTEGER PRIMARY KEY, data BLOB);
EOF
echo " ✓ Database created"
# Start Litestream
echo ""
echo "[2] Starting Litestream..."
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/checkpoint-cycle.log 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream failed to start"
cat /tmp/checkpoint-cycle.log
exit 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
# Start continuous writes in background
echo ""
echo "[3] Starting continuous writes..."
(
while kill -0 $LITESTREAM_PID 2>/dev/null; do
sqlite3 "$DB" "INSERT INTO test (data) VALUES (randomblob(1000));" 2>/dev/null || true
sleep 0.01 # 100 writes/sec attempt
done
) &
WRITE_PID=$!
echo " ✓ Write loop started"
# Rapid checkpoint cycling
echo ""
echo "[4] Starting rapid checkpoint cycling (30 seconds)..."
echo " Testing all checkpoint modes in rapid succession..."
CHECKPOINT_COUNT=0
ERRORS=0
START_TIME=$(date +%s)
while [ $(($(date +%s) - START_TIME)) -lt 30 ]; do
# Cycle through different checkpoint modes
for MODE in PASSIVE FULL RESTART TRUNCATE; do
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream crashed during checkpoint!"
break 2
fi
# Execute checkpoint
OUTPUT=$(sqlite3 "$DB" "PRAGMA wal_checkpoint($MODE);" 2>&1) || {
ERRORS=$((ERRORS + 1))
echo " ⚠ Checkpoint $MODE error: $OUTPUT"
}
CHECKPOINT_COUNT=$((CHECKPOINT_COUNT + 1))
# Very brief pause
sleep 0.1
done
done
echo " Executed $CHECKPOINT_COUNT checkpoints with $ERRORS errors"
# Stop writes
kill $WRITE_PID 2>/dev/null || true
# Let Litestream catch up
echo ""
echo "[5] Letting Litestream stabilize..."
sleep 5
# Check Litestream health
if kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✓ Litestream survived rapid checkpointing"
else
echo " ✗ Litestream died during test"
fi
# Check for sync errors
echo ""
echo "[6] Checking for sync errors..."
SYNC_ERRORS=$(grep -c "sync error" /tmp/checkpoint-cycle.log 2>/dev/null || echo "0")
FLAGS_ERRORS=$(grep -c "no flags allowed" /tmp/checkpoint-cycle.log 2>/dev/null || echo "0")
if [ "$FLAGS_ERRORS" -gt 0 ]; then
echo " ✗ ltx v0.5.0 flag errors detected: $FLAGS_ERRORS"
elif [ "$SYNC_ERRORS" -gt 0 ]; then
echo " ⚠ Sync errors detected: $SYNC_ERRORS"
else
echo " ✓ No sync errors"
fi
# Check replica status
echo ""
echo "[7] Checking replica status..."
if [ -d "$REPLICA/ltx" ]; then
LTX_COUNT=$(find "$REPLICA/ltx" -name "*.ltx" | wc -l)
echo " ✓ Replica has $LTX_COUNT LTX files"
else
echo " ✗ No replica created!"
fi
# Get final stats
ROW_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
WAL_SIZE=$(stat -f%z "$DB-wal" 2>/dev/null || stat -c%s "$DB-wal" 2>/dev/null || echo "0")
echo " Final row count: $ROW_COUNT"
echo " Final WAL size: $((WAL_SIZE / 1024))KB"
# Stop Litestream
kill $LITESTREAM_PID 2>/dev/null || true
sleep 2
# Test restore
echo ""
echo "[8] Testing restore after rapid checkpointing..."
rm -f /tmp/checkpoint-restored.db
if $LITESTREAM restore -o /tmp/checkpoint-restored.db "file://$REPLICA" 2>&1 | tee /tmp/restore-checkpoint.log; then
REST_COUNT=$(sqlite3 /tmp/checkpoint-restored.db "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
if [ "$REST_COUNT" -eq "$ROW_COUNT" ]; then
echo " ✓ Restore successful: $REST_COUNT rows"
echo ""
echo "TEST PASSED: Survived $CHECKPOINT_COUNT rapid checkpoints"
else
echo " ⚠ Row count mismatch: Original=$ROW_COUNT, Restored=$REST_COUNT"
LOSS=$((ROW_COUNT - REST_COUNT))
echo " Data loss: $LOSS rows"
echo ""
echo "TEST FAILED: Data loss after rapid checkpointing"
fi
else
echo " ✗ Restore failed!"
cat /tmp/restore-checkpoint.log
echo ""
echo "TEST FAILED: Cannot restore after rapid checkpointing"
fi
echo ""
echo "=========================================="
echo "Summary:"
echo " Checkpoints executed: $CHECKPOINT_COUNT"
echo " Checkpoint errors: $ERRORS"
echo " Sync errors: $SYNC_ERRORS"
echo " Flag errors: $FLAGS_ERRORS"
echo " Rows written: $ROW_COUNT"
echo "=========================================="

View File

@@ -1,203 +0,0 @@
#!/bin/bash
set -e
# Test multiple replica failover (Issue #687)
# This test verifies that restore falls back to healthy replicas when primary fails
echo "=========================================="
echo "Multiple Replica Failover Test"
echo "=========================================="
echo ""
echo "Testing if restore falls back to healthy replicas when first is unavailable"
echo ""
# Configuration
DB="/tmp/failover-test.db"
REPLICA1="/tmp/failover-replica1"
REPLICA2="/tmp/failover-replica2"
REPLICA3="/tmp/failover-replica3"
RESTORED="/tmp/failover-restored.db"
LITESTREAM_CONFIG="/tmp/failover-litestream.yml"
LITESTREAM="./bin/litestream"
LITESTREAM_TEST="./bin/litestream-test"
# Cleanup function
cleanup() {
pkill -f "litestream replicate.*failover-test" 2>/dev/null || true
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
rm -rf "$REPLICA1" "$REPLICA2" "$REPLICA3"
rm -f "$LITESTREAM_CONFIG"
rm -f /tmp/failover-*.log
}
trap cleanup EXIT
echo "[SETUP] Cleaning up previous test files..."
cleanup
echo ""
echo "[1] Creating test database..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode = WAL;
CREATE TABLE test (id INTEGER PRIMARY KEY, data TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP);
INSERT INTO test (data) VALUES ('Initial data for failover test');
EOF
echo " ✓ Database created"
echo ""
echo "[2] Creating Litestream config with multiple replicas..."
cat > "$LITESTREAM_CONFIG" <<EOF
dbs:
- path: $DB
replicas:
- url: file://$REPLICA1
sync-interval: 1s
- url: file://$REPLICA2
sync-interval: 1s
- url: file://$REPLICA3
sync-interval: 1s
EOF
echo " ✓ Config created with 3 replicas"
echo ""
echo "[3] Starting Litestream with multiple replicas..."
"$LITESTREAM" replicate -config "$LITESTREAM_CONFIG" > /tmp/failover-litestream.log 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream failed to start"
cat /tmp/failover-litestream.log
exit 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
echo ""
echo "[4] Adding data to ensure replication..."
for i in {1..10}; do
sqlite3 "$DB" "INSERT INTO test (data) VALUES ('Replicated data $i');"
done
sqlite3 "$DB" "PRAGMA wal_checkpoint(FULL);" >/dev/null 2>&1
sleep 3
echo " ✓ Added 10 rows and checkpointed"
# Verify all replicas exist
echo ""
echo "[5] Verifying all replicas have data..."
for replica in "$REPLICA1" "$REPLICA2" "$REPLICA3"; do
if [ -d "$replica" ]; then
FILES=$(ls -1 "$replica"/generations/*/wal/*.ltx 2>/dev/null | wc -l)
echo "$(basename $replica): $FILES LTX files"
else
echo "$(basename $replica): Not created!"
exit 1
fi
done
echo ""
echo "[6] Stopping Litestream..."
kill $LITESTREAM_PID
wait $LITESTREAM_PID 2>/dev/null
echo " ✓ Litestream stopped"
# Test 1: All replicas available
echo ""
echo "[7] Test 1: Restore with all replicas available..."
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore1.log 2>&1
if [ $? -eq 0 ]; then
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
echo " ✓ Restore successful with all replicas: $COUNT rows"
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
else
echo " ✗ Restore failed with all replicas available"
cat /tmp/failover-restore1.log
fi
# Test 2: First replica corrupted
echo ""
echo "[8] Test 2: Corrupting first replica..."
rm -rf "$REPLICA1"/generations/*/wal/*.ltx
echo "CORRUPTED" > "$REPLICA1/CORRUPTED"
echo " ✓ First replica corrupted"
echo " Attempting restore with first replica corrupted..."
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore2.log 2>&1
if [ $? -eq 0 ]; then
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
if [ "$COUNT" -eq "11" ]; then
echo " ✓ Successfully fell back to healthy replicas: $COUNT rows"
else
echo " ✗ Restore succeeded but data incorrect: $COUNT rows (expected 11)"
fi
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
else
echo " ✗ FAILED: Did not fall back to healthy replicas"
cat /tmp/failover-restore2.log
fi
# Test 3: First replica missing entirely
echo ""
echo "[9] Test 3: Removing first replica entirely..."
rm -rf "$REPLICA1"
echo " ✓ First replica removed"
echo " Attempting restore with first replica missing..."
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore3.log 2>&1
if [ $? -eq 0 ]; then
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
if [ "$COUNT" -eq "11" ]; then
echo " ✓ Successfully fell back to remaining replicas: $COUNT rows"
else
echo " ✗ Restore succeeded but data incorrect: $COUNT rows (expected 11)"
fi
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
else
echo " ✗ FAILED: Did not fall back when first replica missing"
cat /tmp/failover-restore3.log
fi
# Test 4: Only last replica healthy
echo ""
echo "[10] Test 4: Corrupting second replica too..."
rm -rf "$REPLICA2"
echo " ✓ Second replica removed"
echo " Attempting restore with only third replica healthy..."
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore4.log 2>&1
if [ $? -eq 0 ]; then
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
if [ "$COUNT" -eq "11" ]; then
echo " ✓ Successfully restored from last healthy replica: $COUNT rows"
else
echo " ✗ Restore succeeded but data incorrect: $COUNT rows (expected 11)"
fi
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
else
echo " ✗ FAILED: Could not restore from last healthy replica"
cat /tmp/failover-restore4.log
fi
# Test 5: All replicas unavailable
echo ""
echo "[11] Test 5: Removing all replicas..."
rm -rf "$REPLICA3"
echo " ✓ All replicas removed"
echo " Attempting restore with no healthy replicas..."
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore5.log 2>&1
if [ $? -ne 0 ]; then
echo " ✓ Correctly failed when no replicas available"
else
echo " ✗ Unexpected success with no replicas"
fi
echo ""
echo "=========================================="
echo "Failover Test Summary:"
echo " ✓ Restore works with all replicas"
echo " ✓ Falls back when first replica corrupted"
echo " ✓ Falls back when first replica missing"
echo " ✓ Works with only last replica healthy"
echo " ✓ Correctly fails when no replicas available"
echo "=========================================="

View File

@@ -1,189 +0,0 @@
#!/bin/bash
# Test: WAL Growth and Size Limits
# This tests how Litestream handles extreme WAL growth scenarios
set -e
echo "=========================================="
echo "WAL Growth and Size Limits Test"
echo "=========================================="
echo ""
echo "Testing Litestream's handling of large WAL files"
echo ""
# Configuration
DB="/tmp/wal-growth.db"
REPLICA="/tmp/wal-growth-replica"
LITESTREAM="./bin/litestream"
TARGET_WAL_SIZE_MB=100 # Target WAL size in MB
# Clean up
echo "[SETUP] Cleaning up..."
rm -f "$DB"*
rm -rf "$REPLICA"
# Create fresh database
echo "[1] Creating database..."
sqlite3 "$DB" <<EOF
PRAGMA journal_mode=WAL;
PRAGMA wal_autocheckpoint=0; -- Disable auto-checkpoint
CREATE TABLE test (id INTEGER PRIMARY KEY, data BLOB);
EOF
echo " ✓ Database created with auto-checkpoint disabled"
# Start Litestream
echo ""
echo "[2] Starting Litestream..."
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/wal-growth.log 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream failed to start"
cat /tmp/wal-growth.log | head -10
exit 1
fi
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
# Write data until WAL reaches target size
echo ""
echo "[3] Growing WAL to ${TARGET_WAL_SIZE_MB}MB..."
echo " Writing large blobs without checkpointing..."
BATCH_COUNT=0
while true; do
# Check current WAL size
WAL_SIZE=$(stat -f%z "$DB-wal" 2>/dev/null || stat -c%s "$DB-wal" 2>/dev/null || echo "0")
WAL_SIZE_MB=$((WAL_SIZE / 1024 / 1024))
if [ $WAL_SIZE_MB -ge $TARGET_WAL_SIZE_MB ]; then
echo " ✓ WAL reached ${WAL_SIZE_MB}MB"
break
fi
# Write a batch of large records
sqlite3 "$DB" <<EOF 2>/dev/null || true
BEGIN;
INSERT INTO test (data) SELECT randomblob(10000) FROM generate_series(1, 100);
COMMIT;
EOF
BATCH_COUNT=$((BATCH_COUNT + 1))
if [ $((BATCH_COUNT % 10)) -eq 0 ]; then
echo " WAL size: ${WAL_SIZE_MB}MB / ${TARGET_WAL_SIZE_MB}MB"
fi
# Check if Litestream is still alive
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✗ Litestream died during WAL growth!"
break
fi
done
# Check Litestream status
echo ""
echo "[4] Checking Litestream status with large WAL..."
if kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✓ Litestream still running with ${WAL_SIZE_MB}MB WAL"
# Check replication lag
sleep 5
LATEST_LTX=$(ls -t "$REPLICA/ltx/0/" 2>/dev/null | head -1)
if [ -n "$LATEST_LTX" ]; then
echo " ✓ Still replicating (latest: $LATEST_LTX)"
else
echo " ⚠ No recent replication activity"
fi
else
echo " ✗ Litestream crashed!"
fi
# Check for errors
echo ""
echo "[5] Checking for errors..."
ERROR_COUNT=$(grep -c "ERROR" /tmp/wal-growth.log 2>/dev/null || echo "0")
OOM_COUNT=$(grep -c -i "out of memory\|oom" /tmp/wal-growth.log 2>/dev/null || echo "0")
if [ "$OOM_COUNT" -gt 0 ]; then
echo " ✗ Out of memory errors detected!"
elif [ "$ERROR_COUNT" -gt 1 ]; then
echo " ⚠ Errors detected: $ERROR_COUNT"
grep "ERROR" /tmp/wal-growth.log | tail -3
else
echo " ✓ No significant errors"
fi
# Get statistics
echo ""
echo "[6] Statistics..."
ROW_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
DB_SIZE=$(stat -f%z "$DB" 2>/dev/null || stat -c%s "$DB" 2>/dev/null || echo "0")
LTX_COUNT=$(find "$REPLICA" -name "*.ltx" 2>/dev/null | wc -l || echo "0")
echo " Database size: $((DB_SIZE / 1024 / 1024))MB"
echo " WAL size: ${WAL_SIZE_MB}MB"
echo " Row count: $ROW_COUNT"
echo " LTX files: $LTX_COUNT"
# Now checkpoint and see what happens
echo ""
echo "[7] Executing checkpoint on large WAL..."
CHECKPOINT_START=$(date +%s)
CHECKPOINT_RESULT=$(sqlite3 "$DB" "PRAGMA wal_checkpoint(FULL);" 2>&1) || echo "Failed"
CHECKPOINT_END=$(date +%s)
CHECKPOINT_TIME=$((CHECKPOINT_END - CHECKPOINT_START))
echo " Checkpoint result: $CHECKPOINT_RESULT"
echo " Checkpoint time: ${CHECKPOINT_TIME}s"
# Check WAL size after checkpoint
NEW_WAL_SIZE=$(stat -f%z "$DB-wal" 2>/dev/null || stat -c%s "$DB-wal" 2>/dev/null || echo "0")
NEW_WAL_SIZE_MB=$((NEW_WAL_SIZE / 1024 / 1024))
echo " WAL size after checkpoint: ${NEW_WAL_SIZE_MB}MB"
# Let Litestream catch up
echo ""
echo "[8] Letting Litestream catch up after checkpoint..."
sleep 10
# Check if Litestream survived
if kill -0 $LITESTREAM_PID 2>/dev/null; then
echo " ✓ Litestream survived large checkpoint"
else
echo " ✗ Litestream died after checkpoint"
fi
# Stop Litestream
kill $LITESTREAM_PID 2>/dev/null || true
sleep 2
# Test restore
echo ""
echo "[9] Testing restore after large WAL handling..."
rm -f /tmp/wal-restored.db
if $LITESTREAM restore -o /tmp/wal-restored.db "file://$REPLICA" 2>&1 | tee /tmp/restore-wal.log; then
REST_COUNT=$(sqlite3 /tmp/wal-restored.db "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
if [ "$REST_COUNT" -eq "$ROW_COUNT" ]; then
echo " ✓ Restore successful: $REST_COUNT rows"
echo ""
echo "TEST PASSED: Handled ${TARGET_WAL_SIZE_MB}MB WAL successfully"
else
echo " ⚠ Row count mismatch: Original=$ROW_COUNT, Restored=$REST_COUNT"
echo ""
echo "TEST FAILED: Data loss with large WAL"
fi
else
echo " ✗ Restore failed!"
echo ""
echo "TEST FAILED: Cannot restore after large WAL"
fi
echo ""
echo "=========================================="
echo "Summary:"
echo " Maximum WAL size tested: ${WAL_SIZE_MB}MB"
echo " Checkpoint time: ${CHECKPOINT_TIME}s"
echo " Data integrity: $([ "$REST_COUNT" -eq "$ROW_COUNT" ] && echo "✓ Preserved" || echo "✗ Lost")"
echo "=========================================="

View File

@@ -1,14 +1,12 @@
# Integration Test Scripts
# Utility Scripts
Long-running integration test scripts for comprehensive Litestream validation. These scripts are designed for extended testing scenarios, including overnight tests and production-like workloads.
Utility scripts for Litestream testing and distribution.
## Overview
This directory contains integration test scripts that run for extended periods (30 minutes to 8+ hours) to validate Litestream's behavior under sustained load and realistic production scenarios.
This directory contains utility scripts for post-test analysis and packaging. All long-running soak tests have been migrated to Go integration tests in `tests/integration/`.
**Key Difference from `cmd/litestream-test/scripts/`:**
- **This directory:** Long-running integration tests (minutes to hours)
- **`cmd/litestream-test/scripts/`:** Focused scenario tests (seconds to minutes)
> **Note:** For all soak tests (2-8 hours), see the Go-based test suite in [tests/integration/](../tests/integration/README.md). The bash soak tests have been migrated to Go for better maintainability and cross-platform support
## Prerequisites
@@ -17,209 +15,7 @@ go build -o bin/litestream ./cmd/litestream
go build -o bin/litestream-test ./cmd/litestream-test
```
## Test Scripts
### test-quick-validation.sh
Quick validation test that runs for a configurable duration (default: 30 minutes).
```bash
./scripts/test-quick-validation.sh
TEST_DURATION=2h ./scripts/test-quick-validation.sh
TEST_DURATION=1h ./scripts/test-quick-validation.sh
```
**Default Configuration:**
- Duration: 30 minutes (configurable via `TEST_DURATION`)
- Database: 10MB initial population
- Write rate: 100 writes/second
- Pattern: Wave (simulates varying load)
- Payload size: 4KB
- Workers: 4
- Replica: File-based
**Features:**
- Aggressive test settings for quick feedback
- Very frequent snapshots (1 minute intervals)
- Rapid compaction cycles (30s, 1m, 5m, 15m)
- Real-time monitoring every 30 seconds
- Automatic validation and restore testing
- Comprehensive final report
**Monitoring:**
```bash
tail -f /tmp/litestream-quick-*/logs/monitor.log
tail -f /tmp/litestream-quick-*/logs/litestream.log
```
**What it Tests:**
- Snapshot creation frequency
- Compaction behavior across multiple intervals
- LTX file generation and management
- Checkpoint behavior under load
- Replication integrity
- Restoration success
- Error handling
**When to Use:**
- Before running overnight tests
- Validating configuration changes
- Quick regression testing
- CI/CD integration (with short duration)
- Pre-release validation
**Success Criteria:**
- LTX segments created (>0)
- No critical errors in logs
- Successful restoration
- Row counts match between source and restored database
### test-overnight.sh
Comprehensive 8-hour test with file-based replication.
```bash
./scripts/test-overnight.sh
```
**Configuration:**
- Duration: 8 hours
- Database: 100MB initial population
- Write rate: 50 writes/second
- Pattern: Wave (simulates varying load)
- Payload size: 2KB
- Workers: 4
- Replica: File-based (`/tmp/litestream-overnight-*/replica`)
**Features:**
- Extended monitoring with 1-minute updates
- Snapshot every 10 minutes
- Aggressive compaction intervals:
- 30 seconds → 30s duration
- 1 minute → 1m duration
- 5 minutes → 5m duration
- 15 minutes → 1h duration
- 30 minutes → 6h duration
- 1 hour → 24h duration
- 720-hour retention (30 days)
- Checkpoint every 30 seconds
- Automatic validation after completion
**Real-time Monitoring:**
```bash
tail -f /tmp/litestream-overnight-*/logs/monitor.log
tail -f /tmp/litestream-overnight-*/logs/litestream.log
tail -f /tmp/litestream-overnight-*/logs/load.log
```
**What it Tests:**
- Long-term replication stability
- Compaction effectiveness over time
- Memory stability under sustained load
- WAL file management
- Checkpoint consistency
- Replica file count growth patterns
- Error accumulation over time
- Recovery from transient issues
**Expected Behavior:**
- Steady database growth over 8 hours
- Regular snapshot creation (48 total)
- Active compaction reducing old LTX files
- Stable memory usage
- No error accumulation
- Successful final validation
**Artifacts:**
- Test directory: `/tmp/litestream-overnight-<timestamp>/`
- Logs: Monitor, litestream, load, populate, validate
- Database: Source and restored versions
- Replica: Full replica directory with LTX files
### test-overnight-s3.sh
Comprehensive 8-hour test with S3 replication.
```bash
export AWS_ACCESS_KEY_ID=your_key
export AWS_SECRET_ACCESS_KEY=your_secret
export S3_BUCKET=your-test-bucket
export AWS_REGION=us-east-1
./scripts/test-overnight-s3.sh
```
**Configuration:**
- Duration: 8 hours
- Database: 100MB initial population
- Write rate: 100 writes/second (higher than file test)
- Pattern: Wave (simulates varying load)
- Payload size: 4KB (larger than file test)
- Workers: 8 (more than file test)
- Replica: S3 bucket with unique timestamped path
**S3-Specific Settings:**
- Force path style: false
- Skip verify: false
- Optional SSE encryption support
- Region configurable via environment
**Features:**
- Higher load than file-based test (S3 can handle more)
- S3 connectivity validation before start
- S3-specific error monitoring (403, 404, 500, 503)
- Upload operation tracking
- S3 object count monitoring
- Restoration from S3 after completion
- Automatic row count comparison
**Real-time Monitoring:**
```bash
tail -f /tmp/litestream-overnight-s3-*/logs/monitor.log
tail -f /tmp/litestream-overnight-s3-*/logs/litestream.log
aws s3 ls s3://your-bucket/litestream-overnight-<timestamp>/ --recursive
```
**What it Tests:**
- S3 replication stability
- Network resilience over 8 hours
- S3 API call efficiency
- Multipart upload handling
- S3-specific error recovery
- Cross-region replication (if configured)
- S3 cost implications (API calls, storage)
- Restoration from cloud storage
**S3 Monitoring Includes:**
- Snapshot count in S3
- WAL segment count in S3
- Total S3 object count
- S3 storage size
- Upload operation count
- S3-specific errors
**Expected Behavior:**
- Successful S3 connectivity throughout
- Regular S3 uploads without failures
- S3 object counts grow over time
- Compaction reduces old S3 objects
- Successful S3 restore at end
- Row count match between source and restored
**Prerequisites:**
- Valid AWS credentials
- S3 bucket with write permissions
- Network connectivity to S3
- AWS CLI installed (for monitoring)
**Cost Considerations:**
- ~8 hours of continuous uploads
- Estimated API calls: Thousands of PUTs/GETs
- Storage: 100MB+ depending on replication
- Consider using a test/dev account
## Available Scripts
### analyze-test-results.sh
@@ -272,37 +68,9 @@ Homebrew tap setup script for packaging and distribution.
**Purpose:** Automates Homebrew tap setup for Litestream distribution. Not a test script per se, but part of the release process.
## Usage Patterns
## Usage
### Quick Validation Before Overnight Test
```bash
TEST_DURATION=30m ./scripts/test-quick-validation.sh
```
If this passes, proceed to overnight:
```bash
./scripts/test-overnight.sh
```
### Running Multiple Overnight Tests
File and S3 tests can run concurrently (different machines recommended):
```bash
./scripts/test-overnight.sh &
./scripts/test-overnight-s3.sh &
```
### Custom Duration Testing
```bash
TEST_DURATION=2h ./scripts/test-quick-validation.sh
TEST_DURATION=4h ./scripts/test-quick-validation.sh
TEST_DURATION=12h ./scripts/test-quick-validation.sh
```
### Analyzing Results
### Analyzing Test Results
```bash
ls /tmp/litestream-overnight-* -dt | head -1
@@ -310,25 +78,15 @@ ls /tmp/litestream-overnight-* -dt | head -1
./scripts/analyze-test-results.sh $(ls /tmp/litestream-overnight-* -dt | head -1)
```
### Continuous Integration
For CI/CD, use shorter durations:
```bash
TEST_DURATION=5m ./scripts/test-quick-validation.sh
TEST_DURATION=15m ./scripts/test-quick-validation.sh
```
## Test Duration Guide
| Duration | Use Case | Test Type | Expected Results |
|----------|----------|-----------|------------------|
| 5 minutes | CI/CD smoke test | Quick validation | Basic functionality |
| 30 minutes | Pre-overnight validation | Quick validation | Config verification |
| 1 hour | Short integration | Quick validation | Pattern detection |
| 2 hours | Extended integration | Quick validation | Compaction cycles |
| 8 hours | Overnight stability | Overnight test | Full validation |
| 12+ hours | Stress testing | Overnight test | Edge case discovery |
| 5 minutes | CI/CD smoke test | Go integration tests | Basic functionality |
| 30 minutes | Short integration | Go integration tests | Pattern detection |
| 2-8 hours | Soak testing | Go soak tests (local only) | Full validation |
> **Note:** All soak tests are now Go-based in `tests/integration/`. See [tests/integration/README.md](../tests/integration/README.md) for details on running comprehensive, MinIO, and overnight S3 soak tests.
## Monitoring and Debugging
@@ -409,12 +167,7 @@ sqlite3 /tmp/litestream-*/restored.db "SELECT COUNT(*) FROM test_data"
### Stopping Tests Early
Tests can be interrupted with Ctrl+C. They will cleanup gracefully:
```bash
./scripts/test-overnight.sh
^C
Cleaning up...
```
Go tests can be interrupted with Ctrl+C. They will cleanup gracefully via defer statements.
## Test Artifacts
@@ -436,59 +189,24 @@ All tests create timestamped directories with comprehensive artifacts:
└── restored.db # Restored database for validation
```
## Integration with Other Tests
## Integration with Go Tests
These scripts complement the scenario tests in `cmd/litestream-test/scripts/`:
These utility scripts complement the Go integration test suite:
**Relationship:**
- `cmd/litestream-test/scripts/` → Focused scenarios (seconds to ~30 minutes)
- `scripts/` → Integration tests (30 minutes to 8+ hours)
**Test Locations:**
- `tests/integration/` → All integration and soak tests (Go-based)
- `cmd/litestream-test/scripts/` → Scenario and debugging tests (bash, being phased out)
- `scripts/` → Utilities only (this directory)
**Workflow:**
1. Run focused scenario tests during development
2. Run quick validation (30min) before major changes
3. Run overnight tests (8h) before releases
4. Analyze results with analysis script
## Success Criteria
### Quick Validation (30min)
✅ Pass Criteria:
- LTX segments created (>0)
- At least 1 snapshot created
- Multiple compaction cycles completed
- No critical errors
- Successful restoration
- Row count matches
### Overnight Tests (8h)
✅ Pass Criteria:
- No process crashes
- Error count < 10 (excluding transient)
- Steady database growth
- Regular snapshots (40+)
- Active compaction visible
- Successful final restoration
- Row count match
- Memory usage stable
**Testing Workflow:**
1. Run quick integration tests during development
2. Run full integration test suite before major changes
3. Run soak tests (2-8h) locally before releases: `TestComprehensiveSoak`, `TestMinIOSoak`, `TestOvernightS3Soak`
4. Analyze results with `analyze-test-results.sh`
## Related Documentation
- [Go Integration Tests](../tests/integration/README.md) - Complete Go-based test suite including soak tests
- [litestream-test CLI Tool](../cmd/litestream-test/README.md) - Testing harness documentation
- [Scenario Test Scripts](../cmd/litestream-test/scripts/README.md) - Focused test scenarios
- [S3 Retention Testing](../cmd/litestream-test/S3-RETENTION-TESTING.md) - S3-specific testing
## Contributing
When adding new integration scripts:
1. Follow naming conventions (`test-*.sh`)
2. Include clear duration estimates in comments
3. Create comprehensive monitoring
4. Generate timestamped test directories
5. Implement graceful cleanup with `trap`
6. Provide clear success/failure output
7. Update this README with script documentation
8. Consider both file and S3 variants if applicable

View File

@@ -1,392 +0,0 @@
#!/bin/bash
set -euo pipefail
# Comprehensive validation test with aggressive settings
# This test exercises all Litestream features: replication, snapshots, compaction, checkpoints
# Can be run for any duration - defaults to 2 hours for thorough testing
TEST_DURATION="${TEST_DURATION:-2h}"
TEST_DIR="/tmp/litestream-comprehensive-$(date +%Y%m%d-%H%M%S)"
DB_PATH="$TEST_DIR/test.db"
REPLICA_PATH="$TEST_DIR/replica"
CONFIG_FILE="$TEST_DIR/litestream.yml"
LOG_DIR="$TEST_DIR/logs"
echo "================================================"
echo "Litestream Comprehensive Validation Test"
echo "================================================"
echo "Duration: $TEST_DURATION"
echo "Test directory: $TEST_DIR"
echo "Start time: $(date)"
echo ""
echo "This test uses aggressive settings to validate:"
echo " - Continuous replication"
echo " - Snapshot generation (every 10m)"
echo " - Compaction (30s/1m/5m intervals)"
echo " - Checkpoint operations"
echo " - Database restoration"
echo ""
cleanup() {
echo ""
echo "Cleaning up..."
# Kill all spawned processes
jobs -p | xargs -r kill 2>/dev/null || true
wait
echo "Test completed at: $(date)"
echo "Results saved in: $TEST_DIR"
}
trap cleanup EXIT INT TERM
# Create directories
mkdir -p "$TEST_DIR" "$LOG_DIR" "$REPLICA_PATH"
# Build binaries if needed
echo "Building binaries..."
if [ ! -f bin/litestream ]; then
go build -o bin/litestream ./cmd/litestream
fi
if [ ! -f bin/litestream-test ]; then
go build -o bin/litestream-test ./cmd/litestream-test
fi
# Create test database and populate BEFORE starting litestream
echo "Creating and populating test database..."
sqlite3 "$DB_PATH" <<EOF
PRAGMA journal_mode=WAL;
PRAGMA page_size=4096;
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
created_at INTEGER DEFAULT (strftime('%s', 'now'))
);
EOF
# Populate database with initial data (50MB to ensure activity)
echo "Populating database (50MB initial data)..."
bin/litestream-test populate -db "$DB_PATH" -target-size 50MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Create configuration with Ben's recommended aggressive settings
echo "Creating test configuration with aggressive intervals..."
cat > "$CONFIG_FILE" <<EOF
# Aggressive snapshot settings per Ben's request
snapshot:
interval: 10m # Snapshots every 10 minutes
retention: 1h # Keep data for 1 hour
# Aggressive compaction levels: 30s/1m/5m/15m/30m intervals
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
dbs:
- path: $DB_PATH
# Checkpoint settings to ensure checkpoints happen
checkpoint-interval: 1m # Check for checkpoint every minute
min-checkpoint-page-count: 100 # Low threshold to trigger checkpoints
max-checkpoint-page-count: 5000 # Force checkpoint at this size
replicas:
- type: file
path: $REPLICA_PATH
retention-check-interval: 5m # Check retention every 5 minutes
EOF
echo "Starting litestream..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "ERROR: Litestream failed to start!"
tail -50 "$LOG_DIR/litestream.log"
exit 1
fi
echo "Litestream running (PID: $LITESTREAM_PID)"
echo ""
# Start load generator with heavy sustained load
echo "Starting load generator (heavy sustained load)..."
bin/litestream-test load \
-db "$DB_PATH" \
-write-rate 500 \
-duration "$TEST_DURATION" \
-pattern wave \
-payload-size 4096 \
-read-ratio 0.3 \
-workers 8 \
> "$LOG_DIR/load.log" 2>&1 &
LOAD_PID=$!
echo "Load generator running (PID: $LOAD_PID)"
echo ""
# Monitor function with detailed metrics
monitor_comprehensive() {
local last_checkpoint_count=0
local last_compaction_count=0
local last_sync_count=0
while true; do
sleep 60 # Check every minute
echo "[$(date +%H:%M:%S)] Status Report"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Database metrics
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
echo " Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
# WAL file size (indicates write activity)
if [ -f "$DB_PATH-wal" ]; then
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
fi
# Row count
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
fi
fi
# Replication metrics
if [ -d "$REPLICA_PATH" ]; then
# Count snapshot files
SNAPSHOTS=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
# Count LTX files (WAL segments)
LTX_FILES=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
echo " Replica: $SNAPSHOTS snapshots, $LTX_FILES segments, size: $REPLICA_SIZE"
fi
# Operation metrics (with delta since last check)
if [ -f "$LOG_DIR/litestream.log" ]; then
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" 2>/dev/null)
CHECKPOINT_COUNT=${CHECKPOINT_COUNT:-0}
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null)
COMPACTION_COUNT=${COMPACTION_COUNT:-0}
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null)
SYNC_COUNT=${SYNC_COUNT:-0}
CHECKPOINT_DELTA=$((CHECKPOINT_COUNT - last_checkpoint_count))
COMPACTION_DELTA=$((COMPACTION_COUNT - last_compaction_count))
SYNC_DELTA=$((SYNC_COUNT - last_sync_count))
echo " Operations: $CHECKPOINT_COUNT checkpoints (+$CHECKPOINT_DELTA), $COMPACTION_COUNT compactions (+$COMPACTION_DELTA)"
echo " Syncs: $SYNC_COUNT total (+$SYNC_DELTA in last minute)"
last_checkpoint_count=$CHECKPOINT_COUNT
last_compaction_count=$COMPACTION_COUNT
last_sync_count=$SYNC_COUNT
fi
# Check for errors (excluding known non-critical)
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$ERROR_COUNT" -gt 0 ]; then
echo " ⚠ Critical errors: $ERROR_COUNT"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
fi
# Load generator status
if [ -f "$LOG_DIR/load.log" ]; then
LOAD_STATUS=$(tail -1 "$LOG_DIR/load.log" 2>/dev/null | grep -oE "writes_per_sec=[0-9.]+" | cut -d= -f2 || echo "0")
echo " Write rate: ${LOAD_STATUS:-0} writes/sec"
fi
# Check processes
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo " ✗ Litestream stopped unexpectedly!"
break
fi
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
echo " ✓ Load test completed"
break
fi
echo ""
done
}
echo "Running comprehensive test for $TEST_DURATION..."
echo "Monitor will report every 60 seconds"
echo "================================================"
echo ""
# Start monitoring in background
monitor_comprehensive &
MONITOR_PID=$!
# Wait for load test to complete
wait "$LOAD_PID" 2>/dev/null || true
# Stop the monitor
kill $MONITOR_PID 2>/dev/null || true
wait $MONITOR_PID 2>/dev/null || true
echo ""
echo "================================================"
echo "Final Test Results"
echo "================================================"
# Final statistics
echo "Database Statistics:"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
# Find the actual table name
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo " Total rows: $ROW_COUNT"
fi
echo ""
echo "Replication Statistics:"
if [ -d "$REPLICA_PATH" ]; then
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
echo " Snapshots created: $SNAPSHOT_COUNT"
echo " LTX segments: $LTX_COUNT"
echo " Replica size: $REPLICA_SIZE"
fi
echo ""
echo "Operation Counts:"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
else
COMPACTION_COUNT="0"
CHECKPOINT_COUNT="0"
SYNC_COUNT="0"
ERROR_COUNT="0"
fi
echo " Compactions: $COMPACTION_COUNT"
echo " Checkpoints: $CHECKPOINT_COUNT"
echo " Syncs: $SYNC_COUNT"
echo " Errors: $ERROR_COUNT"
# Validation test
echo ""
echo "Testing validation..."
bin/litestream-test validate \
-source "$DB_PATH" \
-replica "$REPLICA_PATH" \
> "$LOG_DIR/validate.log" 2>&1
if [ $? -eq 0 ]; then
echo " ✓ Validation passed!"
else
echo " ✗ Validation failed!"
tail -10 "$LOG_DIR/validate.log"
fi
# Test restoration
echo ""
echo "Testing restoration..."
RESTORE_DB="$TEST_DIR/restored.db"
bin/litestream restore -o "$RESTORE_DB" "file://$REPLICA_PATH" > "$LOG_DIR/restore.log" 2>&1
if [ $? -eq 0 ]; then
# Get row count from restored database
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
else
RESTORED_COUNT="0"
fi
if [ "$RESTORED_COUNT" = "$ROW_COUNT" ]; then
echo " ✓ Restoration successful! ($RESTORED_COUNT rows match)"
else
echo " ⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
fi
else
echo " ✗ Restoration failed!"
tail -10 "$LOG_DIR/restore.log"
fi
# Summary
echo ""
echo "================================================"
echo "Test Summary"
echo "================================================"
# Count critical errors (exclude known non-critical ones)
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
# Determine test result
TEST_PASSED=true
ISSUES=""
if [ "$CRITICAL_ERROR_COUNT" -gt 0 ]; then
TEST_PASSED=false
ISSUES="$ISSUES\n - Critical errors detected: $CRITICAL_ERROR_COUNT"
fi
if [ "$LTX_COUNT" -eq 0 ]; then
TEST_PASSED=false
ISSUES="$ISSUES\n - No LTX segments created (replication not working)"
fi
if [ "$CHECKPOINT_COUNT" -eq 0 ]; then
ISSUES="$ISSUES\n - No checkpoints recorded (may need more aggressive settings)"
fi
if [ "$COMPACTION_COUNT" -eq 0 ]; then
ISSUES="$ISSUES\n - No compactions occurred (unexpected for this test duration)"
fi
if [ "$TEST_PASSED" = true ]; then
echo "✓ COMPREHENSIVE TEST PASSED!"
echo ""
echo "Successfully validated:"
echo " - Continuous replication ($LTX_COUNT segments)"
echo " - Compaction ($COMPACTION_COUNT operations)"
[ "$CHECKPOINT_COUNT" -gt 0 ] && echo " - Checkpoints ($CHECKPOINT_COUNT operations)"
[ "$SNAPSHOT_COUNT" -gt 0 ] && echo " - Snapshots ($SNAPSHOT_COUNT created)"
echo " - Database restoration"
echo ""
echo "The configuration is ready for production use."
else
echo "⚠ TEST COMPLETED WITH ISSUES:"
echo -e "$ISSUES"
echo ""
echo "Review the logs for details:"
echo " $LOG_DIR/litestream.log"
fi
echo ""
echo "Full test results available in: $TEST_DIR"
echo "================================================"

View File

@@ -1,464 +0,0 @@
#!/bin/bash
set -euo pipefail
# MinIO S3-compatible test with Docker
# This test runs Litestream against a local MinIO instance to simulate S3 behavior
TEST_DURATION="${TEST_DURATION:-2h}"
TEST_DIR="/tmp/litestream-minio-$(date +%Y%m%d-%H%M%S)"
DB_PATH="$TEST_DIR/test.db"
CONFIG_FILE="$TEST_DIR/litestream.yml"
LOG_DIR="$TEST_DIR/logs"
# MinIO settings - use alternative ports to avoid conflicts
MINIO_CONTAINER_NAME="litestream-minio-test"
MINIO_PORT=9100
MINIO_CONSOLE_PORT=9101
MINIO_ROOT_USER="minioadmin"
MINIO_ROOT_PASSWORD="minioadmin"
MINIO_BUCKET="litestream-test"
MINIO_ENDPOINT="http://localhost:${MINIO_PORT}"
S3_PATH="s3://${MINIO_BUCKET}/litestream-test-$(date +%Y%m%d-%H%M%S)"
echo "================================================"
echo "Litestream MinIO S3 Test"
echo "================================================"
echo "Duration: $TEST_DURATION"
echo "Test directory: $TEST_DIR"
echo "MinIO endpoint: $MINIO_ENDPOINT"
echo "MinIO bucket: $MINIO_BUCKET"
echo "Start time: $(date)"
echo ""
# Check for Docker
if ! command -v docker &> /dev/null; then
echo "Error: Docker is not installed or not in PATH"
echo "Please install Docker to run this test"
exit 1
fi
cleanup() {
echo ""
echo "================================================"
echo "Cleaning up..."
echo "================================================"
# Kill all spawned processes
jobs -p | xargs -r kill 2>/dev/null || true
wait 2>/dev/null || true
# Stop and remove MinIO container
if [ -n "${MINIO_CONTAINER_NAME:-}" ]; then
echo "Stopping MinIO container..."
docker stop "$MINIO_CONTAINER_NAME" 2>/dev/null || true
docker rm "$MINIO_CONTAINER_NAME" 2>/dev/null || true
fi
echo ""
echo "Test completed at: $(date)"
echo "Results saved in: $TEST_DIR"
}
trap cleanup EXIT INT TERM
# Create directories
mkdir -p "$TEST_DIR" "$LOG_DIR"
# Clean up any existing container
if docker ps -a | grep -q "$MINIO_CONTAINER_NAME"; then
echo "Removing existing MinIO container..."
docker stop "$MINIO_CONTAINER_NAME" 2>/dev/null || true
docker rm "$MINIO_CONTAINER_NAME" 2>/dev/null || true
fi
# Start MinIO container
echo "Starting MinIO container..."
docker run -d \
--name "$MINIO_CONTAINER_NAME" \
-p "${MINIO_PORT}:9000" \
-p "${MINIO_CONSOLE_PORT}:9001" \
-e "MINIO_ROOT_USER=${MINIO_ROOT_USER}" \
-e "MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}" \
minio/minio server /data --console-address ":9001"
echo "Waiting for MinIO to start..."
sleep 5
# Check if MinIO is running
if ! docker ps | grep -q "$MINIO_CONTAINER_NAME"; then
echo "Error: MinIO container failed to start"
docker logs "$MINIO_CONTAINER_NAME" 2>&1
exit 1
fi
echo "MinIO is running!"
echo " API: http://localhost:${MINIO_PORT} (mapped from container port 9000)"
echo " Console: http://localhost:${MINIO_CONSOLE_PORT} (mapped from container port 9001)"
echo " Credentials: ${MINIO_ROOT_USER}/${MINIO_ROOT_PASSWORD}"
echo ""
# Create MinIO bucket using mc (MinIO Client) in Docker
echo "Creating MinIO bucket..."
docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc mb "minio/${MINIO_BUCKET}" 2>/dev/null || true
echo "Bucket '${MINIO_BUCKET}' ready"
echo ""
# Build binaries if needed
echo "Building binaries..."
if [ ! -f bin/litestream ]; then
go build -o bin/litestream ./cmd/litestream
fi
if [ ! -f bin/litestream-test ]; then
go build -o bin/litestream-test ./cmd/litestream-test
fi
# Create and populate test database
echo "Creating and populating test database..."
sqlite3 "$DB_PATH" <<EOF
PRAGMA journal_mode=WAL;
PRAGMA page_size=4096;
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
created_at INTEGER DEFAULT (strftime('%s', 'now'))
);
EOF
# Populate database with initial data
echo "Populating database (50MB initial data)..."
bin/litestream-test populate -db "$DB_PATH" -target-size 50MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Create Litestream configuration for MinIO
echo "Creating Litestream configuration for MinIO S3..."
cat > "$CONFIG_FILE" <<EOF
# MinIO S3 endpoint configuration
access-key-id: ${MINIO_ROOT_USER}
secret-access-key: ${MINIO_ROOT_PASSWORD}
# Aggressive snapshot settings for testing
snapshot:
interval: 10m # Snapshots every 10 minutes
retention: 1h # Keep data for 1 hour
# Aggressive compaction levels: 30s/1m/5m/15m/30m intervals
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
dbs:
- path: $DB_PATH
# Checkpoint settings
checkpoint-interval: 1m
min-checkpoint-page-count: 100
max-checkpoint-page-count: 5000
replicas:
- url: ${S3_PATH}
endpoint: ${MINIO_ENDPOINT}
region: us-east-1
force-path-style: true
skip-verify: true
retention-check-interval: 5m
EOF
echo "Starting litestream with MinIO backend..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "ERROR: Litestream failed to start!"
echo "Last 50 lines of log:"
tail -50 "$LOG_DIR/litestream.log"
exit 1
fi
echo "Litestream running (PID: $LITESTREAM_PID)"
echo ""
# Start load generator
echo "Starting load generator (heavy sustained load)..."
bin/litestream-test load \
-db "$DB_PATH" \
-write-rate 500 \
-duration "$TEST_DURATION" \
-pattern wave \
-payload-size 4096 \
-read-ratio 0.3 \
-workers 8 \
> "$LOG_DIR/load.log" 2>&1 &
LOAD_PID=$!
echo "Load generator running (PID: $LOAD_PID)"
echo ""
# Monitor function for MinIO
monitor_minio() {
local last_checkpoint_count=0
local last_compaction_count=0
local last_sync_count=0
while true; do
sleep 60
echo "[$(date +%H:%M:%S)] Status Report"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Database metrics
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
echo " Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
if [ -f "$DB_PATH-wal" ]; then
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
fi
# Row count
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
fi
fi
# MinIO/S3 metrics using docker exec
echo ""
echo " MinIO S3 Statistics:"
# Count objects in MinIO
OBJECT_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
# Count LTX files (modern format) and snapshots
LTX_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "\.ltx" || echo "0")
SNAPSHOT_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "snapshot" || echo "0")
echo " Total objects: $OBJECT_COUNT"
echo " LTX segments: $LTX_COUNT"
echo " Snapshots: $SNAPSHOT_COUNT"
# Operation metrics
if [ -f "$LOG_DIR/litestream.log" ]; then
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" 2>/dev/null)
CHECKPOINT_COUNT=${CHECKPOINT_COUNT:-0}
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null)
COMPACTION_COUNT=${COMPACTION_COUNT:-0}
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null)
SYNC_COUNT=${SYNC_COUNT:-0}
CHECKPOINT_DELTA=$((CHECKPOINT_COUNT - last_checkpoint_count))
COMPACTION_DELTA=$((COMPACTION_COUNT - last_compaction_count))
SYNC_DELTA=$((SYNC_COUNT - last_sync_count))
echo ""
echo " Operations: $CHECKPOINT_COUNT checkpoints (+$CHECKPOINT_DELTA), $COMPACTION_COUNT compactions (+$COMPACTION_DELTA)"
echo " Syncs: $SYNC_COUNT total (+$SYNC_DELTA in last minute)"
last_checkpoint_count=$CHECKPOINT_COUNT
last_compaction_count=$COMPACTION_COUNT
last_sync_count=$SYNC_COUNT
fi
# Check for errors
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$ERROR_COUNT" -gt 0 ]; then
echo " ⚠ Critical errors: $ERROR_COUNT"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
fi
# Check processes
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo " ✗ Litestream stopped unexpectedly!"
break
fi
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
echo " ✓ Load test completed"
break
fi
echo ""
done
}
echo "Running MinIO S3 test for $TEST_DURATION..."
echo "Monitor will report every 60 seconds"
echo "================================================"
echo ""
# Start monitoring in background
monitor_minio &
MONITOR_PID=$!
# Wait for load test to complete
wait "$LOAD_PID" 2>/dev/null || true
# Stop the monitor
kill $MONITOR_PID 2>/dev/null || true
wait $MONITOR_PID 2>/dev/null || true
echo ""
echo "================================================"
echo "Final Test Results"
echo "================================================"
# Final statistics
echo "Database Statistics:"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo " Total rows: $ROW_COUNT"
fi
echo ""
echo "MinIO S3 Statistics:"
FINAL_OBJECTS=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
FINAL_LTX=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "\.ltx" || echo "0")
FINAL_SNAPSHOTS=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "snapshot" || echo "0")
echo " Total objects in MinIO: $FINAL_OBJECTS"
echo " LTX segments: $FINAL_LTX"
echo " Snapshots: $FINAL_SNAPSHOTS"
# Get storage size
STORAGE_INFO=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc du "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | tail -1 || echo "0")
echo " Total storage used: $STORAGE_INFO"
echo ""
echo "Operation Counts:"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
else
COMPACTION_COUNT="0"
CHECKPOINT_COUNT="0"
SYNC_COUNT="0"
ERROR_COUNT="0"
fi
echo " Compactions: $COMPACTION_COUNT"
echo " Checkpoints: $CHECKPOINT_COUNT"
echo " Syncs: $SYNC_COUNT"
echo " Errors: $ERROR_COUNT"
# Test restoration from MinIO
echo ""
echo "Testing restoration from MinIO S3..."
RESTORE_DB="$TEST_DIR/restored.db"
# Export credentials for litestream restore
export AWS_ACCESS_KEY_ID="${MINIO_ROOT_USER}"
export AWS_SECRET_ACCESS_KEY="${MINIO_ROOT_PASSWORD}"
# Create a config file for restoration
cat > "$TEST_DIR/restore.yml" <<EOF
access-key-id: ${MINIO_ROOT_USER}
secret-access-key: ${MINIO_ROOT_PASSWORD}
EOF
bin/litestream restore \
-config "$TEST_DIR/restore.yml" \
-o "$RESTORE_DB" \
"$S3_PATH" > "$LOG_DIR/restore.log" 2>&1
if [ $? -eq 0 ]; then
echo "✓ Restoration successful!"
# Compare row counts
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
RESTORED_COUNT="0"
fi
if [ "$ROW_COUNT" = "$RESTORED_COUNT" ]; then
echo "✓ Row counts match! ($RESTORED_COUNT rows)"
else
echo "⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
fi
else
echo "✗ Restoration failed!"
tail -20 "$LOG_DIR/restore.log"
fi
# Summary
echo ""
echo "================================================"
echo "Test Summary"
echo "================================================"
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$CRITICAL_ERROR_COUNT" -eq 0 ] && [ "$FINAL_OBJECTS" -gt 0 ]; then
echo "✓ MINIO S3 TEST PASSED!"
echo ""
echo "Successfully validated:"
echo " - S3-compatible replication to MinIO"
echo " - Stored $FINAL_OBJECTS objects"
echo " - Compactions: $COMPACTION_COUNT"
echo " - Syncs: $SYNC_COUNT"
[ "$CHECKPOINT_COUNT" -gt 0 ] && echo " - Checkpoints: $CHECKPOINT_COUNT"
[ "$FINAL_SNAPSHOTS" -gt 0 ] && echo " - Snapshots: $FINAL_SNAPSHOTS"
echo " - Database restoration from S3"
else
echo "⚠ TEST COMPLETED WITH ISSUES:"
[ "$CRITICAL_ERROR_COUNT" -gt 0 ] && echo " - Critical errors detected: $CRITICAL_ERROR_COUNT"
[ "$FINAL_OBJECTS" -eq 0 ] && echo " - No objects stored in MinIO"
echo ""
echo "Review the logs for details:"
echo " $LOG_DIR/litestream.log"
fi
echo ""
echo "MinIO Console: http://localhost:${MINIO_CONSOLE_PORT}"
echo "Credentials: ${MINIO_ROOT_USER}/${MINIO_ROOT_PASSWORD}"
echo ""
echo "Full test results available in: $TEST_DIR"
echo "================================================"

View File

@@ -1,409 +0,0 @@
#!/bin/bash
set -euo pipefail
# Check for required environment variables
if [ -z "${AWS_ACCESS_KEY_ID:-}" ] || [ -z "${AWS_SECRET_ACCESS_KEY:-}" ] || [ -z "${S3_BUCKET:-}" ]; then
echo "Error: Required environment variables not set"
echo "Please set: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET"
echo ""
echo "Example:"
echo " export AWS_ACCESS_KEY_ID=your_key"
echo " export AWS_SECRET_ACCESS_KEY=your_secret"
echo " export S3_BUCKET=your-test-bucket"
echo " export AWS_REGION=us-east-1 # optional, defaults to us-east-1"
exit 1
fi
AWS_REGION="${AWS_REGION:-us-east-1}"
S3_PATH="s3://${S3_BUCKET}/litestream-overnight-$(date +%Y%m%d-%H%M%S)"
TEST_DIR="/tmp/litestream-overnight-s3-$(date +%Y%m%d-%H%M%S)"
DB_PATH="$TEST_DIR/test.db"
LOG_DIR="$TEST_DIR/logs"
CONFIG_FILE="$TEST_DIR/litestream.yml"
MONITOR_PID=""
LITESTREAM_PID=""
LOAD_PID=""
echo "================================================"
echo "Litestream Overnight S3 Test Suite"
echo "================================================"
echo "Test directory: $TEST_DIR"
echo "S3 destination: $S3_PATH"
echo "AWS Region: $AWS_REGION"
echo "Start time: $(date)"
echo ""
cleanup() {
echo ""
echo "================================================"
echo "Cleaning up..."
echo "================================================"
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
echo "Stopping load generator..."
kill "$LOAD_PID" 2>/dev/null || true
wait "$LOAD_PID" 2>/dev/null || true
fi
if [ -n "$LITESTREAM_PID" ] && kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "Stopping litestream..."
kill "$LITESTREAM_PID" 2>/dev/null || true
wait "$LITESTREAM_PID" 2>/dev/null || true
fi
if [ -n "$MONITOR_PID" ] && kill -0 "$MONITOR_PID" 2>/dev/null; then
echo "Stopping monitor..."
kill "$MONITOR_PID" 2>/dev/null || true
fi
echo ""
echo "Test Summary:"
echo "============="
if [ -f "$LOG_DIR/monitor.log" ]; then
echo "Final statistics from monitor log:"
tail -20 "$LOG_DIR/monitor.log"
fi
echo ""
echo "S3 Final Statistics:"
aws s3 ls "${S3_PATH}/" --recursive --summarize 2>/dev/null | tail -5 || true
echo ""
echo "Test artifacts saved locally in: $TEST_DIR"
echo "S3 replica data in: $S3_PATH"
echo "End time: $(date)"
}
trap cleanup EXIT INT TERM
mkdir -p "$TEST_DIR" "$LOG_DIR"
echo "Creating initial database..."
sqlite3 "$DB_PATH" <<EOF
PRAGMA journal_mode=WAL;
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
created_at INTEGER
);
EOF
echo "Creating litestream configuration for S3 with frequent intervals..."
cat > "$CONFIG_FILE" <<EOF
# Litestream S3 configuration for overnight testing
# with aggressive compaction and snapshot intervals
# Optional: Access key configuration (can also use environment variables)
# access-key-id: ${AWS_ACCESS_KEY_ID}
# secret-access-key: ${AWS_SECRET_ACCESS_KEY}
# Snapshot every 10 minutes
snapshot:
interval: 10m
retention: 720h # Keep data for 30 days
# Compaction settings - very frequent for testing
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
- interval: 1h
dbs:
- path: $DB_PATH
# Checkpoint settings - frequent for testing
checkpoint-interval: 30s
min-checkpoint-page-count: 1000
max-checkpoint-page-count: 10000
replicas:
- url: ${S3_PATH}
region: ${AWS_REGION}
retention-check-interval: 1h
# S3-specific settings
force-path-style: false
skip-verify: false
# Optional: Server-side encryption
# sse: AES256
# sse-kms-key-id: your-kms-key-id
EOF
echo ""
echo "Configuration created at: $CONFIG_FILE"
echo ""
echo "Testing S3 connectivity..."
if aws s3 ls "s3://${S3_BUCKET}/" > /dev/null 2>&1; then
echo "✓ S3 bucket accessible"
else
echo "✗ Failed to access S3 bucket: ${S3_BUCKET}"
exit 1
fi
echo "Building litestream if needed..."
if [ ! -f bin/litestream ]; then
go build -o bin/litestream ./cmd/litestream
fi
echo "Starting litestream replication to S3..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
echo "Litestream started with PID: $LITESTREAM_PID"
sleep 5
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "ERROR: Litestream failed to start. Check logs:"
tail -50 "$LOG_DIR/litestream.log"
exit 1
fi
monitor_s3_test() {
while true; do
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
echo "Monitor Update: $(date)" | tee -a "$LOG_DIR/monitor.log"
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
# Database size
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null || echo "0")
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
fi
# WAL size
if [ -f "$DB_PATH-wal" ]; then
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null || echo "0")
echo "WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
fi
# S3 statistics
echo "" | tee -a "$LOG_DIR/monitor.log"
echo "S3 Replica Statistics:" | tee -a "$LOG_DIR/monitor.log"
# Count objects in S3
SNAPSHOT_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.snapshot\.lz4" || echo "0")
WAL_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.wal\.lz4" || echo "0")
TOTAL_OBJECTS=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
echo " Snapshots in S3: $SNAPSHOT_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " WAL segments in S3: $WAL_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Total objects in S3: $TOTAL_OBJECTS" | tee -a "$LOG_DIR/monitor.log"
# Get S3 storage size (if possible)
S3_SIZE=$(aws s3 ls "${S3_PATH}/" --recursive --summarize 2>/dev/null | grep "Total Size" | awk '{print $3}' || echo "0")
if [ "$S3_SIZE" != "0" ]; then
echo " Total S3 storage: $(numfmt --to=iec-i --suffix=B $S3_SIZE 2>/dev/null || echo "$S3_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
fi
# Count operations
echo "" | tee -a "$LOG_DIR/monitor.log"
echo "Operations:" | tee -a "$LOG_DIR/monitor.log"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ' || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo " Compactions: $COMPACTION_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Checkpoints: $CHECKPOINT_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Syncs: $SYNC_COUNT" | tee -a "$LOG_DIR/monitor.log"
fi
# Check for errors (exclude known non-critical)
echo "" | tee -a "$LOG_DIR/monitor.log"
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo "Critical errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
if [ "$ERROR_COUNT" -gt 0 ]; then
echo "Recent errors:" | tee -a "$LOG_DIR/monitor.log"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -5 | tee -a "$LOG_DIR/monitor.log"
fi
# Check for S3-specific errors
S3_ERROR_COUNT=$(grep -c "S3\|AWS\|403\|404\|500\|503" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
if [ "$S3_ERROR_COUNT" -gt 0 ]; then
echo "S3-specific errors: $S3_ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
grep "S3\|AWS\|403\|404\|500\|503" "$LOG_DIR/litestream.log" | tail -3 | tee -a "$LOG_DIR/monitor.log"
fi
# Process status
echo "" | tee -a "$LOG_DIR/monitor.log"
echo "Process Status:" | tee -a "$LOG_DIR/monitor.log"
if kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo " Litestream: Running (PID: $LITESTREAM_PID)" | tee -a "$LOG_DIR/monitor.log"
else
echo " Litestream: STOPPED" | tee -a "$LOG_DIR/monitor.log"
fi
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
echo " Load generator: Running (PID: $LOAD_PID)" | tee -a "$LOG_DIR/monitor.log"
else
echo " Load generator: STOPPED" | tee -a "$LOG_DIR/monitor.log"
fi
# Network/API statistics from log
UPLOAD_COUNT=$(grep -c "uploading\|uploaded" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo " Total upload operations: $UPLOAD_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo "" | tee -a "$LOG_DIR/monitor.log"
sleep 60
done
}
echo "Starting monitor process..."
monitor_s3_test &
MONITOR_PID=$!
echo "Monitor started with PID: $MONITOR_PID"
echo ""
echo "Initial database population (before starting litestream)..."
# Kill litestream temporarily to populate database
kill "$LITESTREAM_PID" 2>/dev/null || true
wait "$LITESTREAM_PID" 2>/dev/null || true
bin/litestream-test populate -db "$DB_PATH" -target-size 100MB -batch-size 10000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Restart litestream
echo "Restarting litestream after population..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
echo ""
echo "Starting load generator for overnight S3 test..."
echo "Configuration:"
echo " - Duration: 8 hours"
echo " - Write rate: 100 writes/second (higher for S3 testing)"
echo " - Pattern: wave (simulates varying load)"
echo " - Workers: 8"
echo ""
# Run load test for 8 hours with higher load for S3
bin/litestream-test load \
-db "$DB_PATH" \
-write-rate 100 \
-duration 8h \
-pattern wave \
-payload-size 4096 \
-read-ratio 0.3 \
-workers 8 \
> "$LOG_DIR/load.log" 2>&1 &
LOAD_PID=$!
echo "Load generator started with PID: $LOAD_PID"
echo ""
echo "================================================"
echo "Overnight S3 test is running!"
echo "================================================"
echo ""
echo "Monitor the test with:"
echo " tail -f $LOG_DIR/monitor.log"
echo ""
echo "View litestream logs:"
echo " tail -f $LOG_DIR/litestream.log"
echo ""
echo "View load generator logs:"
echo " tail -f $LOG_DIR/load.log"
echo ""
echo "Check S3 contents:"
echo " aws s3 ls ${S3_PATH}/ --recursive"
echo ""
echo "The test will run for 8 hours. Press Ctrl+C to stop early."
echo ""
wait "$LOAD_PID"
echo ""
echo "Load generation completed."
# Final statistics
echo ""
echo "================================================"
echo "Final Statistics"
echo "================================================"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
# Find actual table name
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo "Total rows: $ROW_COUNT"
fi
echo ""
echo "S3 Statistics:"
# Count objects in S3
SNAPSHOT_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.snapshot\.lz4" || echo "0")
WAL_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.wal\.lz4" || echo "0")
TOTAL_OBJECTS=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
S3_SIZE=$(aws s3 ls "${S3_PATH}/" --recursive --summarize 2>/dev/null | grep "Total Size" | awk '{print $3}' || echo "0")
echo " Snapshots in S3: $SNAPSHOT_COUNT"
echo " WAL segments in S3: $WAL_COUNT"
echo " Total objects: $TOTAL_OBJECTS"
if [ "$S3_SIZE" != "0" ]; then
echo " Total S3 storage: $(numfmt --to=iec-i --suffix=B $S3_SIZE 2>/dev/null || echo "$S3_SIZE bytes")"
fi
echo ""
echo "Operation Counts:"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo " Compactions: $COMPACTION_COUNT"
echo " Checkpoints: $CHECKPOINT_COUNT"
echo " Syncs: $SYNC_COUNT"
echo " Critical errors: $ERROR_COUNT"
fi
echo ""
echo "Testing restoration from S3..."
# Test restoration
RESTORE_DB="$TEST_DIR/restored.db"
echo "Restoring database from S3 to: $RESTORE_DB"
bin/litestream restore -o "$RESTORE_DB" "$S3_PATH" > "$LOG_DIR/restore.log" 2>&1
if [ $? -eq 0 ]; then
echo "✓ Restoration successful!"
# Compare row counts - use same table detection logic
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
RESTORED_COUNT="0"
fi
if [ "$ROW_COUNT" = "$RESTORED_COUNT" ]; then
echo "✓ Row counts match! ($RESTORED_COUNT rows)"
else
echo "⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
fi
else
echo "✗ Restoration failed! Check $LOG_DIR/restore.log"
fi

View File

@@ -1,331 +0,0 @@
#!/bin/bash
set -euo pipefail
TEST_DIR="/tmp/litestream-overnight-$(date +%Y%m%d-%H%M%S)"
DB_PATH="$TEST_DIR/test.db"
REPLICA_PATH="$TEST_DIR/replica"
LOG_DIR="$TEST_DIR/logs"
CONFIG_FILE="$TEST_DIR/litestream.yml"
MONITOR_PID=""
LITESTREAM_PID=""
LOAD_PID=""
echo "================================================"
echo "Litestream Overnight Test Suite"
echo "================================================"
echo "Test directory: $TEST_DIR"
echo "Start time: $(date)"
echo ""
cleanup() {
echo ""
echo "================================================"
echo "Cleaning up..."
echo "================================================"
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
echo "Stopping load generator..."
kill "$LOAD_PID" 2>/dev/null || true
wait "$LOAD_PID" 2>/dev/null || true
fi
if [ -n "$LITESTREAM_PID" ] && kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "Stopping litestream..."
kill "$LITESTREAM_PID" 2>/dev/null || true
wait "$LITESTREAM_PID" 2>/dev/null || true
fi
if [ -n "$MONITOR_PID" ] && kill -0 "$MONITOR_PID" 2>/dev/null; then
echo "Stopping monitor..."
kill "$MONITOR_PID" 2>/dev/null || true
fi
echo ""
echo "Test Summary:"
echo "============="
if [ -f "$LOG_DIR/monitor.log" ]; then
echo "Final statistics from monitor log:"
tail -20 "$LOG_DIR/monitor.log"
fi
echo ""
echo "Test artifacts saved in: $TEST_DIR"
echo "End time: $(date)"
}
trap cleanup EXIT INT TERM
mkdir -p "$TEST_DIR" "$LOG_DIR" "$REPLICA_PATH"
echo "Creating initial database..."
sqlite3 "$DB_PATH" <<EOF
PRAGMA journal_mode=WAL;
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
created_at INTEGER
);
EOF
echo "Creating litestream configuration with frequent intervals..."
cat > "$CONFIG_FILE" <<EOF
# Litestream configuration for overnight testing
# with aggressive compaction and snapshot intervals
# Snapshot every 10 minutes
snapshot:
interval: 10m
retention: 720h # Keep everything for analysis
# Compaction settings - very frequent for testing
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
- interval: 1h
dbs:
- path: $DB_PATH
# Checkpoint after every 1000 frames (frequent for testing)
checkpoint-interval: 30s
min-checkpoint-page-count: 1000
max-checkpoint-page-count: 10000
replicas:
- type: file
path: $REPLICA_PATH
retention-check-interval: 1h
EOF
echo ""
echo "Configuration created at: $CONFIG_FILE"
cat "$CONFIG_FILE"
echo ""
echo "Building litestream if needed..."
if [ ! -f bin/litestream ]; then
go build -o bin/litestream ./cmd/litestream
fi
echo "Starting litestream replication..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
echo "Litestream started with PID: $LITESTREAM_PID"
sleep 5
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "ERROR: Litestream failed to start. Check logs:"
tail -50 "$LOG_DIR/litestream.log"
exit 1
fi
monitor_test() {
while true; do
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
echo "Monitor Update: $(date)" | tee -a "$LOG_DIR/monitor.log"
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
# Database size
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null || echo "0")
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
fi
# WAL size
if [ -f "$DB_PATH-wal" ]; then
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null || echo "0")
echo "WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
fi
# Replica statistics
echo "" | tee -a "$LOG_DIR/monitor.log"
echo "Replica Statistics:" | tee -a "$LOG_DIR/monitor.log"
# Count snapshots (for file replica, look for snapshot.ltx files)
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
echo " Snapshots: $SNAPSHOT_COUNT" | tee -a "$LOG_DIR/monitor.log"
# Count LTX segments by age (file replicas use .ltx not .wal.lz4)
if [ -d "$REPLICA_PATH" ]; then
LTX_30S=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -0.5 2>/dev/null | wc -l | tr -d ' ')
LTX_1M=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -1 2>/dev/null | wc -l | tr -d ' ')
LTX_5M=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -5 2>/dev/null | wc -l | tr -d ' ')
LTX_TOTAL=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
echo " LTX segments (last 30s): $LTX_30S" | tee -a "$LOG_DIR/monitor.log"
echo " LTX segments (last 1m): $LTX_1M" | tee -a "$LOG_DIR/monitor.log"
echo " LTX segments (last 5m): $LTX_5M" | tee -a "$LOG_DIR/monitor.log"
echo " LTX segments (total): $LTX_TOTAL" | tee -a "$LOG_DIR/monitor.log"
# Replica size
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
echo " Total replica size: $REPLICA_SIZE" | tee -a "$LOG_DIR/monitor.log"
fi
# Count operations
echo "" | tee -a "$LOG_DIR/monitor.log"
echo "Operations:" | tee -a "$LOG_DIR/monitor.log"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ' || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo " Compactions: $COMPACTION_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Checkpoints: $CHECKPOINT_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Syncs: $SYNC_COUNT" | tee -a "$LOG_DIR/monitor.log"
fi
# Check for errors in litestream log (exclude known non-critical)
echo "" | tee -a "$LOG_DIR/monitor.log"
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo "Critical errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
if [ "$ERROR_COUNT" -gt 0 ]; then
echo "Recent errors:" | tee -a "$LOG_DIR/monitor.log"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -5 | tee -a "$LOG_DIR/monitor.log"
fi
# Process status
echo "" | tee -a "$LOG_DIR/monitor.log"
echo "Process Status:" | tee -a "$LOG_DIR/monitor.log"
if kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo " Litestream: Running (PID: $LITESTREAM_PID)" | tee -a "$LOG_DIR/monitor.log"
else
echo " Litestream: STOPPED" | tee -a "$LOG_DIR/monitor.log"
fi
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
echo " Load generator: Running (PID: $LOAD_PID)" | tee -a "$LOG_DIR/monitor.log"
else
echo " Load generator: STOPPED" | tee -a "$LOG_DIR/monitor.log"
fi
echo "" | tee -a "$LOG_DIR/monitor.log"
sleep 60
done
}
echo "Starting monitor process..."
monitor_test &
MONITOR_PID=$!
echo "Monitor started with PID: $MONITOR_PID"
echo ""
echo "Initial database population (before starting litestream)..."
# Kill litestream temporarily to populate database
kill "$LITESTREAM_PID" 2>/dev/null || true
wait "$LITESTREAM_PID" 2>/dev/null || true
bin/litestream-test populate -db "$DB_PATH" -target-size 100MB -batch-size 10000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Restart litestream
echo "Restarting litestream after population..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
echo ""
echo "Starting load generator for overnight test..."
echo "Configuration:"
echo " - Duration: 8 hours"
echo " - Write rate: 50 writes/second"
echo " - Pattern: wave (simulates varying load)"
echo " - Workers: 4"
echo ""
# Run load test for 8 hours with varying patterns
bin/litestream-test load \
-db "$DB_PATH" \
-write-rate 50 \
-duration 8h \
-pattern wave \
-payload-size 2048 \
-read-ratio 0.3 \
-workers 4 \
> "$LOG_DIR/load.log" 2>&1 &
LOAD_PID=$!
echo "Load generator started with PID: $LOAD_PID"
echo ""
echo "================================================"
echo "Overnight test is running!"
echo "================================================"
echo ""
echo "Monitor the test with:"
echo " tail -f $LOG_DIR/monitor.log"
echo ""
echo "View litestream logs:"
echo " tail -f $LOG_DIR/litestream.log"
echo ""
echo "View load generator logs:"
echo " tail -f $LOG_DIR/load.log"
echo ""
echo "The test will run for 8 hours. Press Ctrl+C to stop early."
echo ""
wait "$LOAD_PID"
echo ""
echo "Load generation completed."
# Final statistics
echo ""
echo "================================================"
echo "Final Statistics"
echo "================================================"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
# Find actual table name
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo "Total rows: $ROW_COUNT"
fi
if [ -d "$REPLICA_PATH" ]; then
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
echo "Snapshots created: $SNAPSHOT_COUNT"
echo "LTX segments: $LTX_COUNT"
echo "Replica size: $REPLICA_SIZE"
fi
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo "Compactions: $COMPACTION_COUNT"
echo "Checkpoints: $CHECKPOINT_COUNT"
echo "Critical errors: $ERROR_COUNT"
fi
echo ""
echo "Running validation..."
bin/litestream-test validate \
-source "$DB_PATH" \
-replica "$REPLICA_PATH" \
> "$LOG_DIR/validate.log" 2>&1
if [ $? -eq 0 ]; then
echo "✓ Validation passed!"
else
echo "✗ Validation failed! Check $LOG_DIR/validate.log"
fi

View File

@@ -1,324 +0,0 @@
#!/bin/bash
set -euo pipefail
# Quick validation test - runs for 30 minutes with aggressive settings
# Use this to validate configuration before overnight runs
TEST_DURATION="${TEST_DURATION:-30m}"
TEST_DIR="/tmp/litestream-quick-$(date +%Y%m%d-%H%M%S)"
DB_PATH="$TEST_DIR/test.db"
REPLICA_PATH="$TEST_DIR/replica"
CONFIG_FILE="$TEST_DIR/litestream.yml"
LOG_DIR="$TEST_DIR/logs"
echo "================================================"
echo "Litestream Quick Validation Test"
echo "================================================"
echo "Duration: $TEST_DURATION"
echo "Test directory: $TEST_DIR"
echo "Start time: $(date)"
echo ""
cleanup() {
echo ""
echo "Cleaning up..."
# Kill all spawned processes
jobs -p | xargs -r kill 2>/dev/null || true
wait
echo "Test completed at: $(date)"
echo "Results saved in: $TEST_DIR"
}
trap cleanup EXIT INT TERM
# Create directories
mkdir -p "$TEST_DIR" "$LOG_DIR" "$REPLICA_PATH"
# Build binaries if needed
echo "Building binaries..."
if [ ! -f bin/litestream ]; then
go build -o bin/litestream ./cmd/litestream
fi
if [ ! -f bin/litestream-test ]; then
go build -o bin/litestream-test ./cmd/litestream-test
fi
# Create test database and populate BEFORE starting litestream
echo "Creating test database..."
sqlite3 "$DB_PATH" <<EOF
PRAGMA journal_mode=WAL;
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
created_at INTEGER
);
EOF
# Populate database BEFORE litestream starts
echo "Populating database (10MB)..."
bin/litestream-test populate -db "$DB_PATH" -target-size 10MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Create aggressive test configuration
echo "Creating test configuration..."
cat > "$CONFIG_FILE" <<EOF
# Very aggressive snapshot settings for quick testing
snapshot:
interval: 1m # Snapshots every minute
retention: 30m # Keep data for 30 minutes
# Frequent compaction levels for testing
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 10m
dbs:
- path: $DB_PATH
# Aggressive checkpoint settings
checkpoint-interval: 30s
min-checkpoint-page-count: 10
max-checkpoint-page-count: 10000
replicas:
- type: file
path: $REPLICA_PATH
retention-check-interval: 2m
EOF
echo "Starting litestream..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "ERROR: Litestream failed to start!"
tail -50 "$LOG_DIR/litestream.log"
exit 1
fi
echo "Litestream running (PID: $LITESTREAM_PID)"
echo ""
# Start load generator with more aggressive settings
echo "Starting load generator..."
bin/litestream-test load \
-db "$DB_PATH" \
-write-rate 100 \
-duration "$TEST_DURATION" \
-pattern wave \
-payload-size 4096 \
-read-ratio 0.2 \
-workers 4 \
> "$LOG_DIR/load.log" 2>&1 &
LOAD_PID=$!
echo "Load generator running (PID: $LOAD_PID)"
echo ""
# Monitor function
monitor_quick() {
while true; do
sleep 30
echo "[$(date +%H:%M:%S)] Status check"
# Check database size and WAL size
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
echo " Database: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
# Check WAL file size
if [ -f "$DB_PATH-wal" ]; then
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
fi
fi
# Count replica files (for file replica type, count LTX files)
if [ -d "$REPLICA_PATH" ]; then
# Count snapshot files (snapshot.ltx files)
SNAPSHOTS=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
# Count LTX files (WAL segments)
LTX_FILES=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
echo " Snapshots: $SNAPSHOTS, LTX segments: $LTX_FILES"
# Show replica directory size
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
echo " Replica size: $REPLICA_SIZE"
fi
# Check for compaction (look for "compaction complete")
COMPACT_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo " Compactions: $COMPACT_COUNT"
# Check for checkpoints (look for various checkpoint patterns)
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ')
echo " Checkpoints: $CHECKPOINT_COUNT"
# Check sync activity
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo " Syncs: $SYNC_COUNT"
# Check for errors (exclude known non-critical errors)
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$ERROR_COUNT" -gt 0 ]; then
echo " ⚠ Critical errors: $ERROR_COUNT"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
fi
# Check processes
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo " ✗ Litestream stopped unexpectedly!"
break
fi
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
echo " ✓ Load test completed"
break
fi
echo ""
done
}
echo "Running test for $TEST_DURATION..."
echo "================================================"
echo ""
# Start monitoring in background
monitor_quick &
MONITOR_PID=$!
# Wait for load test to complete
wait "$LOAD_PID" 2>/dev/null || true
# Stop the monitor
kill $MONITOR_PID 2>/dev/null || true
wait $MONITOR_PID 2>/dev/null || true
echo ""
echo "================================================"
echo "Test Results"
echo "================================================"
# Final statistics
echo "Database Statistics:"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
# Find the actual table name - tables are space-separated on one line
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
# Look for the main data table
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo " Total rows: $ROW_COUNT"
fi
echo ""
echo "Replication Statistics:"
if [ -d "$REPLICA_PATH" ]; then
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
echo " Snapshots created: $SNAPSHOT_COUNT"
echo " LTX segments: $LTX_COUNT"
echo " Replica size: $REPLICA_SIZE"
fi
echo ""
echo "Operation Counts:"
# Count operations from log
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
else
COMPACTION_COUNT="0"
CHECKPOINT_COUNT="0"
ERROR_COUNT="0"
fi
echo " Compactions: $COMPACTION_COUNT"
echo " Checkpoints: $CHECKPOINT_COUNT"
echo " Errors: $ERROR_COUNT"
# Quick validation
echo ""
echo "Validation:"
bin/litestream-test validate \
-source "$DB_PATH" \
-replica "$REPLICA_PATH" \
> "$LOG_DIR/validate.log" 2>&1
if [ $? -eq 0 ]; then
echo " ✓ Validation passed!"
else
echo " ✗ Validation failed!"
tail -10 "$LOG_DIR/validate.log"
fi
# Test restoration
echo ""
echo "Testing restoration..."
RESTORE_DB="$TEST_DIR/restored.db"
bin/litestream restore -o "$RESTORE_DB" "file://$REPLICA_PATH" > "$LOG_DIR/restore.log" 2>&1
if [ $? -eq 0 ]; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
ORIGINAL_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
if [ "$RESTORED_COUNT" = "$ORIGINAL_COUNT" ]; then
echo " ✓ Restoration successful! ($RESTORED_COUNT rows)"
else
echo " ⚠ Row count mismatch! Original: $ORIGINAL_COUNT, Restored: $RESTORED_COUNT"
fi
else
echo " ✗ Restoration failed!"
fi
# Summary
echo ""
echo "================================================"
# Count critical errors (exclude known non-critical ones)
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$CRITICAL_ERROR_COUNT" -eq 0 ] && [ "$LTX_COUNT" -gt 0 ]; then
echo "✓ Quick validation PASSED!"
echo ""
echo "Summary:"
echo " - Litestream successfully replicated data"
echo " - Created $LTX_COUNT LTX segments"
[ "$SNAPSHOT_COUNT" -gt 0 ] && echo " - Created $SNAPSHOT_COUNT snapshots"
[ "$COMPACTION_COUNT" -gt 0 ] && echo " - Performed $COMPACTION_COUNT compactions"
echo ""
echo "The configuration appears ready for overnight testing."
echo "Run the overnight test with:"
echo " ./test-overnight.sh"
else
echo "⚠ Quick validation completed with issues:"
[ "$CRITICAL_ERROR_COUNT" -gt 0 ] && echo " - Critical errors detected: $CRITICAL_ERROR_COUNT"
[ "$LTX_COUNT" -eq 0 ] && echo " - No LTX segments created (replication not working)"
[ "$SNAPSHOT_COUNT" -eq 0 ] && echo " - No snapshots created (may be normal for short tests)"
[ "$COMPACTION_COUNT" -eq 0 ] && echo " - No compactions occurred (may be normal for short tests)"
echo ""
echo "Review the logs before running overnight tests:"
echo " $LOG_DIR/litestream.log"
fi
echo ""
echo "Full results available in: $TEST_DIR"
echo "================================================"

474
tests/integration/README.md Normal file
View File

@@ -0,0 +1,474 @@
# Integration Tests
Go-based integration tests for Litestream. These tests replace the previous bash-based test scripts with proper Go testing infrastructure.
## Overview
This package contains comprehensive integration tests organized by test type:
- **scenario_test.go** - Core functionality scenarios (fresh start, integrity, deletion, failover)
- **concurrent_test.go** - Concurrency and stress tests (rapid checkpoints, WAL growth, concurrent ops, busy timeout)
- **quick_test.go** - Quick validation tests (30 minutes configurable)
- **overnight_test.go** - Long-running stability tests (8+ hours)
- **boundary_test.go** - Edge cases (1GB boundary, different page sizes)
- **helpers.go** - Shared test utilities and helpers
- **fixtures.go** - Test data generators and scenarios
## Prerequisites
Build the required binaries:
```bash
go build -o bin/litestream ./cmd/litestream
go build -o bin/litestream-test ./cmd/litestream-test
```
## Running Tests
### Quick Tests (Default)
Run fast integration tests suitable for CI:
```bash
go test -v -tags=integration -timeout=30m ./tests/integration/... \
-run="TestFreshStart|TestDatabaseIntegrity|TestRapidCheckpoints"
```
### All Scenario Tests
Run all scenario tests (excluding long-running):
```bash
go test -v -tags=integration -timeout=1h ./tests/integration/...
```
### Long-Running Tests
Run overnight and boundary tests:
```bash
go test -v -tags="integration,long" -timeout=10h ./tests/integration/... \
-run="TestOvernight|Test1GBBoundary"
```
## Soak Tests
Long-running soak tests live alongside the other integration tests and share the same helpers. They are excluded from CI by default and are intended for release validation or targeted debugging.
### Overview
| Test | Tags | Defaults | Purpose | Extra Requirements |
| --- | --- | --- | --- | --- |
| `TestComprehensiveSoak` | `integration,soak` | 2h duration, 50MB DB, 500 writes/s | File-backed end-to-end stress | Litestream binaries in `./bin` |
| `TestMinIOSoak` | `integration,soak,docker` | 2h duration, 5MB DB (short=2m), 100 writes/s | S3-compatible replication via MinIO | Docker daemon, `docker` CLI |
| `TestOvernightS3Soak` | `integration,soak,aws` | 8h duration, 50MB DB | Real S3 replication & restore | AWS credentials, `aws` CLI |
All soak tests support `go test -test.short` to scale the default duration down to roughly two minutes for smoke verification.
### Environment Variables
| Variable | Default | Description |
| --- | --- | --- |
| `SOAK_AUTO_PURGE` | `yes` for non-interactive shells; prompts otherwise | Controls whether MinIO buckets are cleared before each run. Set to `no` to retain objects between runs. |
| `SOAK_KEEP_TEMP` | unset | When set (any value), preserves the temporary directory and artifacts (database, config, logs) instead of removing them after the test completes. |
| `SOAK_DEBUG` | `0` | Streams command stdout/stderr (database population, load generation, docker helpers) directly to the console. Without this the output is captured and only shown on failure. |
| `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `S3_BUCKET`, `AWS_REGION` | required for `aws` tag | Provide credentials and target bucket for the overnight S3 soak. Region defaults to `us-east-1` if unset. |
### Example Commands
File-based soak (full length):
```bash
go test -v -tags="integration,soak" \
-run=TestComprehensiveSoak -timeout=3h ./tests/integration
```
File-based soak (short mode with preserved artifacts and debug logging):
```bash
SOAK_KEEP_TEMP=1 SOAK_DEBUG=1 go test -v -tags="integration,soak" \
-run=TestComprehensiveSoak -test.short -timeout=1h ./tests/integration
```
MinIO soak (short mode, auto-purges bucket, preserves results):
```bash
SOAK_AUTO_PURGE=yes SOAK_KEEP_TEMP=1 go test -v -tags="integration,soak,docker" \
-run=TestMinIOSoak -test.short -timeout=20m ./tests/integration
```
Overnight S3 soak (full duration):
```bash
export AWS_ACCESS_KEY_ID=...
export AWS_SECRET_ACCESS_KEY=...
export S3_BUCKET=your-bucket
export AWS_REGION=us-east-1
go test -v -tags="integration,soak,aws" \
-run=TestOvernightS3Soak -timeout=10h ./tests/integration
```
### Tips
- Run with `-v` to view the 60-second progress updates and final status summary. Without `-v`, progress output is suppressed by Gos test runner.
- When prompted about purging a MinIO bucket, answering “yes” clears the bucket via `minio/mc` before the run; “no” allows you to inspect lingering objects from previous executions.
- `SOAK_KEEP_TEMP=1` is especially useful when investigating failures—the helper prints the preserved path so you can inspect databases, configs, and logs.
- The monitoring infrastructure automatically prints additional status blocks when error counts change, making `SOAK_DEBUG=1` optional for most workflows.
### Specific Tests
Run individual test functions:
```bash
# Fresh start test
go test -v -tags=integration ./tests/integration/... -run=TestFreshStart
# Rapid checkpoints test
go test -v -tags=integration ./tests/integration/... -run=TestRapidCheckpoints
# 1GB boundary test
go test -v -tags=integration ./tests/integration/... -run=Test1GBBoundary
```
### Short Mode
Run abbreviated versions with `-short`:
```bash
go test -v -tags=integration -short ./tests/integration/...
```
This reduces test durations by 10x (e.g., 8 hours becomes 48 minutes).
## Test Categories
### Scenario Tests
Core functionality tests that run in seconds to minutes:
- `TestFreshStart` - Starting replication before database exists
- `TestDatabaseIntegrity` - Complex schema and data integrity
- `TestDatabaseDeletion` - Source database deletion during replication
### Concurrent Tests
Stress and concurrency tests:
- `TestRapidCheckpoints` - Rapid checkpoint operations under load
- `TestWALGrowth` - Large WAL file handling (100MB+)
- `TestConcurrentOperations` - Multiple databases replicating simultaneously
- `TestBusyTimeout` - Database busy timeout and lock handling
### Quick Tests
Configurable duration validation (default 30 minutes):
- `TestQuickValidation` - Comprehensive validation with wave pattern load
### Overnight Tests
Long-running stability tests (default 8 hours):
- `TestOvernightFile` - 8-hour file-based replication test
- `TestOvernightComprehensive` - 8-hour comprehensive test with large database
### Boundary Tests
Edge case and boundary condition tests:
- `Test1GBBoundary` - SQLite 1GB lock page boundary (page #262145 with 4KB pages)
- `TestLockPageWithDifferentPageSizes` - Lock page handling with various page sizes
## CI Integration
### Automatic (Pull Requests)
Quick tests run automatically on PRs modifying Go code:
```yaml
- Quick integration tests (TestFreshStart, TestDatabaseIntegrity, TestRapidCheckpoints)
- Timeout: 30 minutes
```
### Manual Workflows
Trigger via GitHub Actions UI:
**Quick Tests:**
```
workflow_dispatch → test_type: quick
```
**All Scenario Tests:**
```
workflow_dispatch → test_type: all
```
**Long-Running Tests:**
```
workflow_dispatch → test_type: long
```
## Test Infrastructure
### Helpers (helpers.go)
- `SetupTestDB(t, name)` - Create test database instance
- `TestDB.Create()` - Create database with WAL mode
- `TestDB.Populate(size)` - Populate to target size
- `TestDB.StartLitestream()` - Start replication
- `TestDB.StopLitestream()` - Stop replication
- `TestDB.Restore(path)` - Restore from replica
- `TestDB.Validate(path)` - Full validation (integrity, checksum, data)
- `TestDB.QuickValidate(path)` - Quick validation
- `TestDB.GenerateLoad(...)` - Generate database load
- `GetTestDuration(t, default)` - Get configurable test duration
- `RequireBinaries(t)` - Check for required binaries
### Fixtures (fixtures.go)
- `DefaultLoadConfig()` - Load generation configuration
- `DefaultPopulateConfig()` - Database population configuration
- `CreateComplexTestSchema(db)` - Multi-table schema with foreign keys
- `PopulateComplexTestData(db, ...)` - Populate complex data
- `LargeWALScenario()` - Large WAL test scenario
- `RapidCheckpointsScenario()` - Rapid checkpoint scenario
## Test Artifacts
Tests create temporary directories via `t.TempDir()`:
```
/tmp/<test-temp-dir>/
├── <name>.db # Test database
├── <name>.db-wal # WAL file
├── <name>.db-shm # Shared memory
├── replica/ # Replica directory
│ └── ltx/0/ # LTX files
├── litestream.log # Litestream output
└── *-restored.db # Restored databases
```
Artifacts are automatically cleaned up after tests complete.
## Debugging Tests
### View Litestream Logs
```go
log, err := db.GetLitestreamLog()
fmt.Println(log)
```
### Check for Errors
```go
errors, err := db.CheckForErrors()
for _, e := range errors {
t.Logf("Error: %s", e)
}
```
### Inspect Replica
```go
fileCount, _ := db.GetReplicaFileCount()
t.Logf("LTX files: %d", fileCount)
```
### Check Database Size
```go
size, _ := db.GetDatabaseSize()
t.Logf("DB size: %.2f MB", float64(size)/(1024*1024))
```
## Migration from Bash
This is part of an ongoing effort to migrate bash test scripts to Go integration tests. This migration improves maintainability, enables CI integration, and provides platform independence.
### Test Directory Organization
Three distinct test locations serve different purposes:
**`tests/integration/` (this directory)** - Go-based integration and soak tests:
- Quick integration tests: `scenario_test.go`, `concurrent_test.go`, `boundary_test.go`
- Soak tests (2-8 hours): `comprehensive_soak_test.go`, `minio_soak_test.go`, `overnight_s3_soak_test.go`
- All tests use proper Go testing infrastructure with build tags
**`scripts/` (top-level)** - Utility scripts only (soak tests migrated to Go):
- `analyze-test-results.sh` - Post-test analysis utility
- `setup-homebrew-tap.sh` - Packaging script (not a test)
**`cmd/litestream-test/scripts/`** - Scenario and debugging bash scripts (being phased out):
- Bug reproduction scripts for specific issues (#752, #754)
- Format & upgrade tests for version compatibility
- S3 retention tests with Python mock
- Quick validation and setup utilities
### Migration Status
**Migrated from `scripts/` (5 scripts):**
- `test-quick-validation.sh``quick_test.go::TestQuickValidation` (CI: ✅)
- `test-overnight.sh``overnight_test.go::TestOvernightFile` (CI: ❌ too long)
- `test-comprehensive.sh``comprehensive_soak_test.go::TestComprehensiveSoak` (CI: ❌ soak test)
- `test-minio-s3.sh``minio_soak_test.go::TestMinIOSoak` (CI: ❌ soak test, requires Docker)
- `test-overnight-s3.sh``overnight_s3_soak_test.go::TestOvernightS3Soak` (CI: ❌ soak test, 8 hours)
**Migrated from `cmd/litestream-test/scripts/` (9 scripts):**
- `test-fresh-start.sh``scenario_test.go::TestFreshStart`
- `test-database-integrity.sh``scenario_test.go::TestDatabaseIntegrity`
- `test-database-deletion.sh``scenario_test.go::TestDatabaseDeletion`
- `test-replica-failover.sh` → NOT MIGRATED (feature removed from Litestream)
- `test-rapid-checkpoints.sh``concurrent_test.go::TestRapidCheckpoints`
- `test-wal-growth.sh``concurrent_test.go::TestWALGrowth`
- `test-concurrent-operations.sh``concurrent_test.go::TestConcurrentOperations`
- `test-busy-timeout.sh``concurrent_test.go::TestBusyTimeout`
- `test-1gb-boundary.sh``boundary_test.go::Test1GBBoundary`
**Remaining Bash Scripts:**
_scripts/_ (2 scripts remaining):
- `analyze-test-results.sh` - Post-test analysis utility (may stay as bash)
- `setup-homebrew-tap.sh` - Packaging script (not a test)
_cmd/litestream-test/scripts/_ (16 scripts remaining):
- Bug reproduction scripts: `reproduce-critical-bug.sh`, `test-754-*.sh`, `test-v0.5-*.sh`
- Format & upgrade tests: `test-format-isolation.sh`, `test-upgrade-*.sh`, `test-massive-upgrade.sh`
- S3 retention tests: `test-s3-retention-*.sh` (4 scripts, use Python S3 mock)
- Utility: `verify-test-setup.sh`
### Why Some Tests Aren't in CI
Per industry best practices, CI tests should complete in < 1 hour (ideally < 10 minutes):
-**Quick tests** (< 5 min) - Run on every PR
-**Soak tests** (2-8 hours) - Run locally before releases only
-**Long-running tests** (> 30 min) - Too slow for CI feedback loop
Soak tests are migrated to Go for maintainability but run **locally only**. See "Soak Tests" section below.
## Soak Tests (Long-Running Stability Tests)
Soak tests run for 2-8 hours to validate long-term stability under sustained load. These tests are **NOT run in CI** per industry best practices (effective CI requires tests to complete in < 1 hour).
### Purpose
Soak tests validate:
- Long-term replication stability
- Memory leak detection over time
- Compaction effectiveness across multiple cycles
- Checkpoint behavior under sustained load
- Recovery from transient issues
- Storage growth patterns
### When to Run Soak Tests
- ✅ Before major releases
- ✅ After significant replication changes
- ✅ To reproduce stability issues
- ✅ For performance benchmarking
- ❌ NOT on every commit (too slow for CI)
### Running Soak Tests Locally
**File-based comprehensive test (2 hours):**
```bash
go test -v -tags="integration,soak" -timeout=3h -run=TestComprehensiveSoak ./tests/integration/
```
**MinIO S3 test (2 hours, requires Docker):**
```bash
# Ensure Docker is running
go test -v -tags="integration,soak,docker" -timeout=3h -run=TestMinIOSoak ./tests/integration/
```
**Overnight S3 test (8 hours, requires AWS):**
```bash
export AWS_ACCESS_KEY_ID=your_key
export AWS_SECRET_ACCESS_KEY=your_secret
export S3_BUCKET=your-test-bucket
export AWS_REGION=us-east-1
go test -v -tags="integration,soak,aws" -timeout=10h -run=TestOvernightS3Soak ./tests/integration/
```
**Run all soak tests:**
```bash
go test -v -tags="integration,soak,docker,aws" -timeout=15h ./tests/integration/
```
### Adjust Duration for Testing
Tests respect the `-test.short` flag to run abbreviated versions:
```bash
# Run comprehensive test for 30 minutes instead of 2 hours
go test -v -tags="integration,soak" -timeout=1h -run=TestComprehensiveSoak ./tests/integration/ -test.short
```
### Soak Test Build Tags
Soak tests use multiple build tags to control execution:
- `integration` - Required for all integration tests
- `soak` - Marks long-running stability tests (2-8 hours)
- `docker` - Requires Docker (MinIO test)
- `aws` - Requires AWS credentials (S3 tests)
### Monitoring Soak Tests
All soak tests log progress every 60 seconds:
```bash
# Watch test progress in real-time
go test -v -tags="integration,soak" -run=TestComprehensiveSoak ./tests/integration/ 2>&1 | tee soak-test.log
```
Metrics reported during execution:
- Database size and WAL size
- Row count
- Replica statistics (snapshots, LTX segments)
- Operation counts (checkpoints, compactions, syncs)
- Error counts
- Write rate
### Soak Test Summary
| Test | Duration | Requirements | What It Tests |
|------|----------|--------------|---------------|
| TestComprehensiveSoak | 2h | None | File-based replication with aggressive compaction |
| TestMinIOSoak | 2h | Docker | S3-compatible storage via MinIO container |
| TestOvernightS3Soak | 8h | AWS credentials | Real S3 replication, overnight stability |
## Benefits Over Bash
1. **Type Safety** - Compile-time error checking
2. **Better Debugging** - Use standard Go debugging tools
3. **Code Reuse** - Shared helpers and fixtures
4. **Parallel Execution** - Tests can run concurrently
5. **CI Integration** - Run automatically on PRs
6. **Test Coverage** - Measure code coverage
7. **Consistent Patterns** - Standard Go testing conventions
8. **Better Error Messages** - Structured, clear reporting
9. **Platform Independent** - Works on Linux, macOS, Windows
10. **IDE Integration** - Full editor support
## Contributing
When adding new integration tests:
1. Use appropriate build tags (`//go:build integration` or `//go:build integration && long`)
2. Call `RequireBinaries(t)` to check prerequisites
3. Use `SetupTestDB(t, name)` for test setup
4. Call `defer db.Cleanup()` for automatic cleanup
5. Log test progress with descriptive messages
6. Use `GetTestDuration(t, default)` for configurable durations
7. Add test to CI workflow if appropriate
8. Update this README with new test documentation
## Related Documentation
- [cmd/litestream-test README](../../cmd/litestream-test/README.md) - Testing harness CLI
- [scripts/README.md](../../scripts/README.md) - Legacy bash test scripts
- [GitHub Issue #798](https://github.com/benbjohnson/litestream/issues/798) - Migration tracking

View File

@@ -0,0 +1,200 @@
//go:build integration
package integration
import (
"fmt"
"path/filepath"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
)
func Test1GBBoundary(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
t.Log("Testing: SQLite 1GB lock page boundary handling")
t.Log("This tests database growth beyond 1GB with 4KB pages (lock page at #262145)")
db := SetupTestDB(t, "1gb-boundary")
defer db.Cleanup()
t.Log("[1] Creating database with 4KB page size...")
if err := db.CreateWithPageSize(4096); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
t.Log("✓ Database created with 4KB pages")
t.Log("[2] Populating to 1.5GB to cross lock page boundary...")
if err := db.PopulateWithOptions("1.5GB", 4096, 1024); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
dbSize, err := db.GetDatabaseSize()
if err != nil {
t.Fatalf("Failed to get database size: %v", err)
}
sizeGB := float64(dbSize) / (1024 * 1024 * 1024)
t.Logf("✓ Database populated: %.2f GB", sizeGB)
if sizeGB < 1.0 {
t.Fatalf("Database did not reach 1GB threshold: %.2f GB", sizeGB)
}
t.Log("[3] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(30 * time.Second)
t.Log("[4] Checking replication across lock page boundary...")
fileCount, err := db.GetReplicaFileCount()
if err != nil {
t.Fatalf("Failed to check replica: %v", err)
}
if fileCount == 0 {
t.Fatal("No LTX files created!")
}
t.Logf("✓ Replication started: %d LTX files", fileCount)
t.Log("[5] Checking for lock page errors...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
lockPageErrors := 0
for _, errMsg := range errors {
if containsAny(errMsg, []string{"lock page", "page 262145", "locking page"}) {
lockPageErrors++
t.Logf("Lock page error: %s", errMsg)
}
}
if lockPageErrors > 0 {
t.Fatalf("Found %d lock page errors!", lockPageErrors)
}
t.Log("✓ No lock page errors detected")
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[6] Testing restore of large database...")
restoredPath := filepath.Join(db.TempDir, "1gb-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
t.Log("[7] Validating restored database integrity...")
if err := db.QuickValidate(restoredPath); err != nil {
t.Fatalf("Validation failed: %v", err)
}
restoredDB := &TestDB{Path: restoredPath, t: t}
restoredSize, _ := restoredDB.GetDatabaseSize()
restoredSizeGB := float64(restoredSize) / (1024 * 1024 * 1024)
t.Logf("✓ Restored database size: %.2f GB", restoredSizeGB)
if restoredSizeGB < 0.9 {
t.Fatalf("Restored database too small: %.2f GB (expected ~%.2f GB)", restoredSizeGB, sizeGB)
}
t.Log("TEST PASSED: 1GB lock page boundary handled correctly")
}
func TestLockPageWithDifferentPageSizes(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
t.Log("Testing: Lock page handling with different SQLite page sizes")
pageSizes := []struct {
size int
lockPageNum int
targetSizeMB int
}{
{4096, 262145, 1200},
{8192, 131073, 1200},
}
for _, ps := range pageSizes {
t.Run(fmt.Sprintf("PageSize%d", ps.size), func(t *testing.T) {
db := SetupTestDB(t, fmt.Sprintf("lockpage-%d", ps.size))
defer db.Cleanup()
t.Logf("[1] Creating database with %d byte page size (lock page at #%d)...", ps.size, ps.lockPageNum)
if err := db.CreateWithPageSize(ps.size); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
t.Logf("[2] Populating to %dMB...", ps.targetSizeMB)
if err := db.PopulateWithOptions(fmt.Sprintf("%dMB", ps.targetSizeMB), ps.size, 1024); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
dbSize, _ := db.GetDatabaseSize()
t.Logf("✓ Database: %.2f MB", float64(dbSize)/(1024*1024))
t.Log("[3] Starting replication...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(20 * time.Second)
fileCount, _ := db.GetReplicaFileCount()
t.Logf("✓ LTX files: %d", fileCount)
db.StopLitestream()
t.Log("[4] Testing restore...")
restoredPath := filepath.Join(db.TempDir, fmt.Sprintf("lockpage-%d-restored.db", ps.size))
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Test passed for page size", ps.size)
})
}
t.Log("TEST PASSED: All page sizes handled correctly")
}
func containsAny(s string, substrs []string) bool {
for _, substr := range substrs {
if contains(s, substr) {
return true
}
}
return false
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || anySubstring(s, substr)))
}
func anySubstring(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}

View File

@@ -0,0 +1,266 @@
//go:build integration && soak
package integration
import (
"context"
"fmt"
"path/filepath"
"testing"
"time"
)
// TestComprehensiveSoak runs a comprehensive soak test with aggressive settings
// to validate all Litestream features: replication, snapshots, compaction, checkpoints.
//
// Default duration: 2 hours
// Can be shortened with: go test -test.short (runs for 30 minutes)
//
// This test exercises:
// - Continuous replication
// - Snapshot generation (every 10m)
// - Compaction (30s/1m/5m/15m/30m intervals)
// - Checkpoint operations
// - Database restoration
func TestComprehensiveSoak(t *testing.T) {
RequireBinaries(t)
// Determine test duration
duration := GetTestDuration(t, 2*time.Hour)
shortMode := testing.Short()
if shortMode {
duration = 2 * time.Minute
}
targetSize := "50MB"
writeRate := 500
if shortMode {
targetSize = "5MB"
writeRate = 100
}
t.Logf("================================================")
t.Logf("Litestream Comprehensive Soak Test")
t.Logf("================================================")
t.Logf("Duration: %v", duration)
t.Logf("Start time: %s", time.Now().Format(time.RFC3339))
t.Log("")
t.Log("This test uses aggressive settings to validate:")
t.Log(" - Continuous replication")
t.Log(" - Snapshot generation (every 10m)")
t.Log(" - Compaction (30s/1m/5m intervals)")
t.Log(" - Checkpoint operations")
t.Log(" - Database restoration")
t.Log("")
startTime := time.Now()
// Setup test database
db := SetupTestDB(t, "comprehensive-soak")
defer db.Cleanup()
// Create database
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
// Populate database
t.Logf("Populating database (%s initial data)...", targetSize)
if err := db.Populate(targetSize); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
t.Log("✓ Database populated")
t.Log("")
// Create aggressive configuration for testing
t.Log("Creating aggressive test configuration...")
replicaURL := fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
configPath := CreateSoakConfig(db.Path, replicaURL, nil, shortMode)
db.ConfigPath = configPath
t.Logf("✓ Configuration created: %s", configPath)
t.Log("")
// Start Litestream
t.Log("Starting Litestream replication...")
if err := db.StartLitestreamWithConfig(configPath); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
t.Logf("✓ Litestream running (PID: %d)", db.LitestreamPID)
t.Log("")
// Start load generator with heavy sustained load
t.Log("Starting load generator (heavy sustained load)...")
t.Logf(" Write rate: %d writes/second", writeRate)
t.Logf(" Pattern: wave (simulates varying load)")
t.Logf(" Payload size: 4KB")
t.Logf(" Workers: 8")
t.Log("")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
// Setup signal handler for graceful interruption
testInfo := &TestInfo{
StartTime: startTime,
Duration: duration,
DB: db,
cancel: cancel,
}
setupSignalHandler(t, cancel, testInfo)
// Run load generation in background
loadDone := make(chan error, 1)
go func() {
loadDone <- db.GenerateLoad(ctx, writeRate, duration, "wave")
}()
// Monitor every 60 seconds
t.Log("Running comprehensive test...")
t.Log("Monitor will report every 60 seconds")
t.Log("Press Ctrl+C twice within 5 seconds to stop early")
t.Log("================================================")
t.Log("")
refreshStats := func() {
testInfo.RowCount, _ = db.GetRowCount("load_test")
if testInfo.RowCount == 0 {
testInfo.RowCount, _ = db.GetRowCount("test_table_0")
}
if testInfo.RowCount == 0 {
testInfo.RowCount, _ = db.GetRowCount("test_data")
}
testInfo.FileCount, _ = db.GetReplicaFileCount()
}
logMetrics := func() {
LogSoakMetrics(t, db, "comprehensive")
if db.LitestreamCmd != nil && db.LitestreamCmd.ProcessState != nil {
t.Error("✗ Litestream stopped unexpectedly!")
if testInfo.cancel != nil {
testInfo.cancel()
}
}
}
MonitorSoakTest(t, db, ctx, testInfo, refreshStats, logMetrics)
// Wait for load generation to complete
if err := <-loadDone; err != nil {
t.Logf("Load generation completed: %v", err)
}
if err := db.WaitForSnapshots(30 * time.Second); err != nil {
t.Fatalf("Failed waiting for snapshot: %v", err)
}
t.Log("")
t.Log("================================================")
t.Log("Final Test Results")
t.Log("================================================")
t.Log("")
// Stop Litestream
t.Log("Stopping Litestream...")
if err := db.StopLitestream(); err != nil {
t.Logf("Warning: Failed to stop Litestream cleanly: %v", err)
}
// Final statistics
t.Log("Database Statistics:")
if dbSize, err := db.GetDatabaseSize(); err == nil {
t.Logf(" Final size: %.2f MB", float64(dbSize)/(1024*1024))
}
// Count rows using different table name possibilities
var rowCount int
var err error
if rowCount, err = db.GetRowCount("load_test"); err != nil {
if rowCount, err = db.GetRowCount("test_table_0"); err != nil {
if rowCount, err = db.GetRowCount("test_data"); err != nil {
t.Logf(" Warning: Could not get row count: %v", err)
}
}
}
if err == nil {
t.Logf(" Total rows: %d", rowCount)
}
t.Log("")
// Replica statistics
t.Log("Replication Statistics:")
if fileCount, err := db.GetReplicaFileCount(); err == nil {
t.Logf(" LTX segments: %d", fileCount)
}
// Check for errors
errors, _ := db.CheckForErrors()
criticalErrors := 0
for _, errLine := range errors {
// Filter out known non-critical errors
if !containsAny(errLine, []string{"page size not initialized"}) {
criticalErrors++
}
}
t.Logf(" Critical errors: %d", criticalErrors)
t.Log("")
// Test restoration
t.Log("Testing restoration...")
restoredPath := filepath.Join(db.TempDir, "restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restoration failed: %v", err)
}
t.Log("✓ Restoration successful!")
// Validate
t.Log("")
t.Log("Validating restored database integrity...")
restoredDB := &TestDB{Path: restoredPath, t: t}
if err := restoredDB.IntegrityCheck(); err != nil {
t.Fatalf("Integrity check failed: %v", err)
}
t.Log("✓ Integrity check passed!")
// Analyze test results
analysis := AnalyzeSoakTest(t, db, duration)
PrintSoakTestAnalysis(t, analysis)
// Test Summary
t.Log("================================================")
t.Log("Test Summary")
t.Log("================================================")
testPassed := true
issues := []string{}
if criticalErrors > 0 {
testPassed = false
issues = append(issues, fmt.Sprintf("Critical errors detected: %d", criticalErrors))
}
if analysis.FinalFileCount == 0 {
testPassed = false
issues = append(issues, "No files created (replication not working)")
}
if testPassed {
t.Log("✓ TEST PASSED!")
t.Log("")
t.Log("The configuration is ready for production use.")
} else {
t.Log("⚠ TEST COMPLETED WITH ISSUES:")
for _, issue := range issues {
t.Logf(" - %s", issue)
}
t.Log("")
t.Log("Review the logs for details:")
logPath, _ := db.GetLitestreamLog()
t.Logf(" %s", logPath)
t.Fail()
}
t.Log("")
t.Logf("Test duration: %v", time.Since(startTime).Round(time.Second))
t.Logf("Results available in: %s", db.TempDir)
t.Log("================================================")
}

View File

@@ -0,0 +1,482 @@
//go:build integration
package integration
import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
)
func TestRapidCheckpoints(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
t.Log("Testing: Litestream under rapid checkpoint pressure")
db := SetupTestDB(t, "rapid-checkpoints")
defer db.Cleanup()
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
t.Log("[1] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(3 * time.Second)
t.Log("[2] Generating rapid writes with frequent checkpoints...")
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
defer sqlDB.Close()
if _, err := sqlDB.Exec(`
CREATE TABLE checkpoint_test (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
timestamp INTEGER
)
`); err != nil {
t.Fatalf("Failed to create table: %v", err)
}
data := make([]byte, 4096)
checkpointCount := 0
for i := 0; i < 1000; i++ {
if _, err := sqlDB.Exec(
"INSERT INTO checkpoint_test (data, timestamp) VALUES (?, ?)",
data,
time.Now().Unix(),
); err != nil {
t.Fatalf("Failed to insert row %d: %v", i, err)
}
if i%100 == 0 {
if _, err := sqlDB.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
t.Logf("Checkpoint %d failed: %v", checkpointCount, err)
} else {
checkpointCount++
t.Logf("Checkpoint %d completed at row %d", checkpointCount, i)
}
}
}
t.Logf("✓ Generated 1000 writes with %d checkpoints", checkpointCount)
time.Sleep(5 * time.Second)
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[3] Checking for errors...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
if len(errors) > 5 {
t.Fatalf("Too many errors (%d), showing first 5:\n%v", len(errors), errors[:5])
} else if len(errors) > 0 {
t.Logf("Found %d errors (acceptable for checkpoint stress)", len(errors))
}
t.Log("[4] Verifying replica...")
fileCount, err := db.GetReplicaFileCount()
if err != nil {
t.Fatalf("Failed to check replica: %v", err)
}
if fileCount == 0 {
t.Fatal("No replica files created!")
}
t.Logf("✓ Replica created with %d files", fileCount)
t.Log("[5] Testing restore...")
restoredPath := filepath.Join(db.TempDir, "checkpoint-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
origCount, err := db.GetRowCount("checkpoint_test")
if err != nil {
t.Fatalf("Failed to get original row count: %v", err)
}
restoredDB := &TestDB{Path: restoredPath, t: t}
restCount, err := restoredDB.GetRowCount("checkpoint_test")
if err != nil {
t.Fatalf("Failed to get restored row count: %v", err)
}
if origCount != restCount {
t.Fatalf("Count mismatch: original=%d, restored=%d", origCount, restCount)
}
t.Logf("✓ Data integrity verified: %d rows", origCount)
t.Log("TEST PASSED: Handled rapid checkpoints successfully")
}
func TestWALGrowth(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
duration := GetTestDuration(t, 2*time.Minute)
t.Logf("Testing: Large WAL file handling (duration: %v)", duration)
db := SetupTestDB(t, "wal-growth")
defer db.Cleanup()
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
t.Log("[1] Creating test table...")
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
defer sqlDB.Close()
if _, err := sqlDB.Exec(`
CREATE TABLE wal_test (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB
)
`); err != nil {
t.Fatalf("Failed to create table: %v", err)
}
t.Log("✓ Table created")
t.Log("[2] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(3 * time.Second)
t.Log("[3] Generating sustained write load...")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
config := DefaultLoadConfig()
config.WriteRate = 400
config.Duration = duration
config.Pattern = LoadPatternWave
config.PayloadSize = 10 * 1024
config.Workers = 4
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
t.Fatalf("Load generation failed: %v", err)
}
t.Log("✓ Load generation complete")
time.Sleep(5 * time.Second)
t.Log("[4] Checking WAL size...")
walPath := db.Path + "-wal"
walSize, err := getFileSize(walPath)
if err != nil {
t.Logf("WAL file not found (may have been checkpointed): %v", err)
} else {
t.Logf("WAL size: %.2f MB", float64(walSize)/(1024*1024))
}
dbSize, err := db.GetDatabaseSize()
if err != nil {
t.Fatalf("Failed to get database size: %v", err)
}
t.Logf("Total database size: %.2f MB", float64(dbSize)/(1024*1024))
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[5] Checking for errors...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
if len(errors) > 10 {
t.Fatalf("Too many errors (%d), showing first 5:\n%v", len(errors), errors[:5])
}
t.Logf("✓ Found %d errors (acceptable)", len(errors))
t.Log("[6] Testing restore...")
restoredPath := filepath.Join(db.TempDir, "wal-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
origCount, err := db.GetRowCount("wal_test")
if err != nil {
t.Fatalf("Failed to get original row count: %v", err)
}
restoredDB := &TestDB{Path: restoredPath, t: t}
restCount, err := restoredDB.GetRowCount("wal_test")
if err != nil {
t.Fatalf("Failed to get restored row count: %v", err)
}
if origCount != restCount {
t.Fatalf("Count mismatch: original=%d, restored=%d", origCount, restCount)
}
t.Logf("✓ Data integrity verified: %d rows", origCount)
t.Log("TEST PASSED: Handled large WAL successfully")
}
func TestConcurrentOperations(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
duration := GetTestDuration(t, 3*time.Minute)
t.Logf("Testing: Multiple databases replicating concurrently (duration: %v)", duration)
dbCount := 3
dbs := make([]*TestDB, dbCount)
for i := 0; i < dbCount; i++ {
dbs[i] = SetupTestDB(t, fmt.Sprintf("concurrent-%d", i))
defer dbs[i].Cleanup()
}
t.Log("[1] Creating databases...")
for i, db := range dbs {
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database %d: %v", i, err)
}
if err := CreateTestTable(t, db.Path); err != nil {
t.Fatalf("Failed to create table for database %d: %v", i, err)
}
}
t.Logf("✓ Created %d databases", dbCount)
t.Log("[2] Starting Litestream for all databases...")
for i, db := range dbs {
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream for database %d: %v", i, err)
}
time.Sleep(1 * time.Second)
}
t.Logf("✓ All Litestream instances running")
t.Log("[3] Generating concurrent load...")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
done := make(chan error, dbCount)
for i, db := range dbs {
go func(idx int, database *TestDB) {
config := DefaultLoadConfig()
config.WriteRate = 50
config.Duration = duration
config.Pattern = LoadPatternConstant
config.Workers = 2
err := database.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern))
done <- err
}(i, db)
}
for i := 0; i < dbCount; i++ {
if err := <-done; err != nil && ctx.Err() == nil {
t.Logf("Load generation %d had error: %v", i, err)
}
}
t.Log("✓ Concurrent load complete")
time.Sleep(5 * time.Second)
t.Log("[4] Stopping all Litestream instances...")
for _, db := range dbs {
db.StopLitestream()
}
time.Sleep(2 * time.Second)
t.Log("[5] Verifying all replicas...")
for i, db := range dbs {
fileCount, err := db.GetReplicaFileCount()
if err != nil {
t.Fatalf("Failed to check replica %d: %v", i, err)
}
if fileCount == 0 {
t.Fatalf("Database %d has no replica files!", i)
}
t.Logf("✓ Database %d: %d replica files", i, fileCount)
}
t.Log("[6] Testing restore for all databases...")
for i, db := range dbs {
restoredPath := filepath.Join(db.TempDir, fmt.Sprintf("concurrent-restored-%d.db", i))
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed for database %d: %v", i, err)
}
origCount, _ := db.GetRowCount("test_data")
restoredDB := &TestDB{Path: restoredPath, t: t}
restCount, _ := restoredDB.GetRowCount("test_data")
if origCount != restCount {
t.Fatalf("Database %d count mismatch: original=%d, restored=%d", i, origCount, restCount)
}
t.Logf("✓ Database %d verified: %d rows", i, origCount)
}
t.Log("TEST PASSED: Concurrent replication works correctly")
}
func TestBusyTimeout(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
t.Log("Testing: Database busy timeout handling")
db := SetupTestDB(t, "busy-timeout")
defer db.Cleanup()
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
t.Log("[1] Creating test data...")
if err := CreateTestTable(t, db.Path); err != nil {
t.Fatalf("Failed to create table: %v", err)
}
if err := InsertTestData(t, db.Path, 100); err != nil {
t.Fatalf("Failed to insert test data: %v", err)
}
t.Log("✓ Created table with 100 rows")
t.Log("[2] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(3 * time.Second)
t.Log("[3] Simulating concurrent access with long transactions...")
sqlDB, err := sql.Open("sqlite3", db.Path+"?_busy_timeout=5000")
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
defer sqlDB.Close()
tx, err := sqlDB.Begin()
if err != nil {
t.Fatalf("Failed to begin transaction: %v", err)
}
for i := 0; i < 500; i++ {
if _, err := tx.Exec(
"INSERT INTO test_data (data, created_at) VALUES (?, ?)",
fmt.Sprintf("busy test %d", i),
time.Now().Unix(),
); err != nil {
t.Fatalf("Failed to insert in transaction: %v", err)
}
if i%100 == 0 {
time.Sleep(500 * time.Millisecond)
}
}
if err := tx.Commit(); err != nil {
t.Fatalf("Failed to commit transaction: %v", err)
}
t.Log("✓ Long transaction completed")
time.Sleep(5 * time.Second)
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[4] Checking for errors...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
if len(errors) > 0 {
t.Logf("Found %d errors (may include busy timeout messages)", len(errors))
}
t.Log("[5] Testing restore...")
restoredPath := filepath.Join(db.TempDir, "busy-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
origCount, err := db.GetRowCount("test_data")
if err != nil {
t.Fatalf("Failed to get original row count: %v", err)
}
restoredDB := &TestDB{Path: restoredPath, t: t}
restCount, err := restoredDB.GetRowCount("test_data")
if err != nil {
t.Fatalf("Failed to get restored row count: %v", err)
}
if origCount != restCount {
t.Fatalf("Count mismatch: original=%d, restored=%d", origCount, restCount)
}
t.Logf("✓ Data integrity verified: %d rows", origCount)
t.Log("TEST PASSED: Busy timeout handled correctly")
}
func getFileSize(path string) (int64, error) {
info, err := os.Stat(path)
if err != nil {
return 0, err
}
return info.Size(), nil
}

View File

@@ -0,0 +1,298 @@
//go:build integration
package integration
import (
"crypto/rand"
"database/sql"
"fmt"
"time"
_ "github.com/mattn/go-sqlite3"
)
type LoadPattern string
const (
LoadPatternConstant LoadPattern = "constant"
LoadPatternBurst LoadPattern = "burst"
LoadPatternRandom LoadPattern = "random"
LoadPatternWave LoadPattern = "wave"
)
type LoadConfig struct {
WriteRate int
Duration time.Duration
Pattern LoadPattern
PayloadSize int
ReadRatio float64
Workers int
}
func DefaultLoadConfig() *LoadConfig {
return &LoadConfig{
WriteRate: 100,
Duration: 1 * time.Minute,
Pattern: LoadPatternConstant,
PayloadSize: 1024,
ReadRatio: 0.2,
Workers: 1,
}
}
type PopulateConfig struct {
TargetSize string
RowSize int
BatchSize int
TableCount int
IndexRatio float64
PageSize int
}
func DefaultPopulateConfig() *PopulateConfig {
return &PopulateConfig{
TargetSize: "100MB",
RowSize: 1024,
BatchSize: 1000,
TableCount: 1,
IndexRatio: 0.2,
PageSize: 4096,
}
}
func CreateComplexTestSchema(db *sql.DB) error {
schemas := []string{
`CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT NOT NULL UNIQUE,
email TEXT NOT NULL,
created_at INTEGER NOT NULL
)`,
`CREATE TABLE IF NOT EXISTS posts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
title TEXT NOT NULL,
content TEXT,
created_at INTEGER NOT NULL,
FOREIGN KEY (user_id) REFERENCES users(id)
)`,
`CREATE TABLE IF NOT EXISTS comments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
post_id INTEGER NOT NULL,
user_id INTEGER NOT NULL,
content TEXT NOT NULL,
created_at INTEGER NOT NULL,
FOREIGN KEY (post_id) REFERENCES posts(id),
FOREIGN KEY (user_id) REFERENCES users(id)
)`,
`CREATE INDEX IF NOT EXISTS idx_posts_user_id ON posts(user_id)`,
`CREATE INDEX IF NOT EXISTS idx_posts_created_at ON posts(created_at)`,
`CREATE INDEX IF NOT EXISTS idx_comments_post_id ON comments(post_id)`,
`CREATE INDEX IF NOT EXISTS idx_comments_created_at ON comments(created_at)`,
}
for _, schema := range schemas {
if _, err := db.Exec(schema); err != nil {
return fmt.Errorf("execute schema: %w", err)
}
}
return nil
}
func PopulateComplexTestData(db *sql.DB, userCount, postsPerUser, commentsPerPost int) error {
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("begin transaction: %w", err)
}
defer tx.Rollback()
userStmt, err := tx.Prepare("INSERT INTO users (username, email, created_at) VALUES (?, ?, ?)")
if err != nil {
return fmt.Errorf("prepare user statement: %w", err)
}
defer userStmt.Close()
postStmt, err := tx.Prepare("INSERT INTO posts (user_id, title, content, created_at) VALUES (?, ?, ?, ?)")
if err != nil {
return fmt.Errorf("prepare post statement: %w", err)
}
defer postStmt.Close()
commentStmt, err := tx.Prepare("INSERT INTO comments (post_id, user_id, content, created_at) VALUES (?, ?, ?, ?)")
if err != nil {
return fmt.Errorf("prepare comment statement: %w", err)
}
defer commentStmt.Close()
now := time.Now().Unix()
for u := 1; u <= userCount; u++ {
userResult, err := userStmt.Exec(
fmt.Sprintf("user%d", u),
fmt.Sprintf("user%d@test.com", u),
now,
)
if err != nil {
return fmt.Errorf("insert user: %w", err)
}
userID, err := userResult.LastInsertId()
if err != nil {
return fmt.Errorf("get user id: %w", err)
}
for p := 1; p <= postsPerUser; p++ {
postResult, err := postStmt.Exec(
userID,
fmt.Sprintf("Post %d from user %d", p, u),
generateRandomContent(100),
now,
)
if err != nil {
return fmt.Errorf("insert post: %w", err)
}
postID, err := postResult.LastInsertId()
if err != nil {
return fmt.Errorf("get post id: %w", err)
}
for c := 1; c <= commentsPerPost; c++ {
commentUserID := (u + c) % userCount
if commentUserID == 0 {
commentUserID = userCount
}
_, err := commentStmt.Exec(
postID,
commentUserID,
generateRandomContent(50),
now,
)
if err != nil {
return fmt.Errorf("insert comment: %w", err)
}
}
}
}
return tx.Commit()
}
func generateRandomContent(length int) string {
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "
b := make([]byte, length)
rand.Read(b)
for i := range b {
b[i] = charset[int(b[i])%len(charset)]
}
return string(b)
}
type TestScenario struct {
Name string
Description string
Setup func(*sql.DB) error
Validate func(*sql.DB, *sql.DB) error
}
func LargeWALScenario() *TestScenario {
return &TestScenario{
Name: "Large WAL",
Description: "Generate large WAL file to test handling",
Setup: func(db *sql.DB) error {
if _, err := db.Exec(`
CREATE TABLE test_wal (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB
)
`); err != nil {
return err
}
data := make([]byte, 10*1024)
rand.Read(data)
for i := 0; i < 10000; i++ {
if _, err := db.Exec("INSERT INTO test_wal (data) VALUES (?)", data); err != nil {
return err
}
}
return nil
},
Validate: func(source, restored *sql.DB) error {
var sourceCount, restoredCount int
if err := source.QueryRow("SELECT COUNT(*) FROM test_wal").Scan(&sourceCount); err != nil {
return fmt.Errorf("query source: %w", err)
}
if err := restored.QueryRow("SELECT COUNT(*) FROM test_wal").Scan(&restoredCount); err != nil {
return fmt.Errorf("query restored: %w", err)
}
if sourceCount != restoredCount {
return fmt.Errorf("count mismatch: source=%d, restored=%d", sourceCount, restoredCount)
}
return nil
},
}
}
func RapidCheckpointsScenario() *TestScenario {
return &TestScenario{
Name: "Rapid Checkpoints",
Description: "Test rapid checkpoint operations",
Setup: func(db *sql.DB) error {
if _, err := db.Exec(`
CREATE TABLE test_checkpoints (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data TEXT,
timestamp INTEGER
)
`); err != nil {
return err
}
for i := 0; i < 1000; i++ {
if _, err := db.Exec(
"INSERT INTO test_checkpoints (data, timestamp) VALUES (?, ?)",
fmt.Sprintf("data %d", i),
time.Now().Unix(),
); err != nil {
return err
}
if i%100 == 0 {
if _, err := db.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
return err
}
}
}
return nil
},
Validate: func(source, restored *sql.DB) error {
var sourceCount, restoredCount int
if err := source.QueryRow("SELECT COUNT(*) FROM test_checkpoints").Scan(&sourceCount); err != nil {
return fmt.Errorf("query source: %w", err)
}
if err := restored.QueryRow("SELECT COUNT(*) FROM test_checkpoints").Scan(&restoredCount); err != nil {
return fmt.Errorf("query restored: %w", err)
}
if sourceCount != restoredCount {
return fmt.Errorf("count mismatch: source=%d, restored=%d", sourceCount, restoredCount)
}
return nil
},
}
}

View File

@@ -0,0 +1,554 @@
//go:build integration
package integration
import (
"bytes"
"context"
"database/sql"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
"github.com/benbjohnson/litestream"
)
type TestDB struct {
Path string
ReplicaPath string
ReplicaURL string
ReplicaEnv []string
ConfigPath string
TempDir string
LitestreamCmd *exec.Cmd
LitestreamPID int
t *testing.T
}
// getBinaryPath returns the cross-platform path to a binary.
// On Windows, it adds the .exe extension.
func getBinaryPath(name string) string {
binPath := filepath.Join("..", "..", "bin", name)
if runtime.GOOS == "windows" {
binPath += ".exe"
}
return binPath
}
func streamCommandOutput() bool {
v := strings.ToLower(strings.TrimSpace(os.Getenv("SOAK_DEBUG")))
switch v {
case "", "0", "false", "off", "no":
return false
default:
return true
}
}
func configureCmdIO(cmd *exec.Cmd) (bool, *bytes.Buffer, *bytes.Buffer) {
stream := streamCommandOutput()
stdoutBuf := &bytes.Buffer{}
stderrBuf := &bytes.Buffer{}
if stream {
cmd.Stdout = io.MultiWriter(os.Stdout, stdoutBuf)
cmd.Stderr = io.MultiWriter(os.Stderr, stderrBuf)
} else {
cmd.Stdout = stdoutBuf
cmd.Stderr = stderrBuf
}
return stream, stdoutBuf, stderrBuf
}
func combinedOutput(stdoutBuf, stderrBuf *bytes.Buffer) string {
var sb strings.Builder
if stdoutBuf != nil && stdoutBuf.Len() > 0 {
sb.Write(stdoutBuf.Bytes())
}
if stderrBuf != nil && stderrBuf.Len() > 0 {
sb.Write(stderrBuf.Bytes())
}
return strings.TrimSpace(sb.String())
}
func SetupTestDB(t *testing.T, name string) *TestDB {
t.Helper()
var tempDir string
if os.Getenv("SOAK_KEEP_TEMP") != "" {
dir, err := os.MkdirTemp("", fmt.Sprintf("litestream-%s-", name))
if err != nil {
t.Fatalf("create temp dir: %v", err)
}
tempDir = dir
t.Cleanup(func() {
t.Logf("SOAK_KEEP_TEMP set, preserving test artifacts at: %s", tempDir)
})
} else {
tempDir = t.TempDir()
}
dbPath := filepath.Join(tempDir, fmt.Sprintf("%s.db", name))
replicaPath := filepath.Join(tempDir, "replica")
return &TestDB{
Path: dbPath,
ReplicaPath: replicaPath,
ReplicaURL: fmt.Sprintf("file://%s", filepath.ToSlash(replicaPath)),
TempDir: tempDir,
t: t,
}
}
func (db *TestDB) Create() error {
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
return fmt.Errorf("open database: %w", err)
}
defer sqlDB.Close()
if _, err := sqlDB.Exec("PRAGMA journal_mode=WAL"); err != nil {
return fmt.Errorf("set WAL mode: %w", err)
}
return nil
}
func (db *TestDB) CreateWithPageSize(pageSize int) error {
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
return fmt.Errorf("open database: %w", err)
}
defer sqlDB.Close()
if _, err := sqlDB.Exec(fmt.Sprintf("PRAGMA page_size = %d", pageSize)); err != nil {
return fmt.Errorf("set page size: %w", err)
}
if _, err := sqlDB.Exec("PRAGMA journal_mode=WAL"); err != nil {
return fmt.Errorf("set WAL mode: %w", err)
}
return nil
}
func (db *TestDB) Populate(targetSize string) error {
cmd := exec.Command(getBinaryPath("litestream-test"), "populate",
"-db", db.Path,
"-target-size", targetSize,
)
_, stdoutBuf, stderrBuf := configureCmdIO(cmd)
db.t.Logf("Populating database to %s...", targetSize)
if err := cmd.Run(); err != nil {
if output := combinedOutput(stdoutBuf, stderrBuf); output != "" {
return fmt.Errorf("populate failed: %w\nOutput: %s", err, output)
}
return fmt.Errorf("populate failed: %w", err)
}
return nil
}
func (db *TestDB) PopulateWithOptions(targetSize string, pageSize int, rowSize int) error {
cmd := exec.Command(getBinaryPath("litestream-test"), "populate",
"-db", db.Path,
"-target-size", targetSize,
"-page-size", fmt.Sprintf("%d", pageSize),
"-row-size", fmt.Sprintf("%d", rowSize),
)
_, stdoutBuf, stderrBuf := configureCmdIO(cmd)
db.t.Logf("Populating database to %s (page size: %d, row size: %d)...", targetSize, pageSize, rowSize)
if err := cmd.Run(); err != nil {
if output := combinedOutput(stdoutBuf, stderrBuf); output != "" {
return fmt.Errorf("populate failed: %w\nOutput: %s", err, output)
}
return fmt.Errorf("populate failed: %w", err)
}
return nil
}
func (db *TestDB) GenerateLoad(ctx context.Context, writeRate int, duration time.Duration, pattern string) error {
cmd := exec.CommandContext(ctx, getBinaryPath("litestream-test"), "load",
"-db", db.Path,
"-write-rate", fmt.Sprintf("%d", writeRate),
"-duration", duration.String(),
"-pattern", pattern,
)
_, stdoutBuf, stderrBuf := configureCmdIO(cmd)
db.t.Logf("Starting load generation: %d writes/sec for %v (%s pattern)", writeRate, duration, pattern)
if err := cmd.Run(); err != nil {
if output := combinedOutput(stdoutBuf, stderrBuf); output != "" {
return fmt.Errorf("load generation failed: %w\nOutput: %s", err, output)
}
return fmt.Errorf("load generation failed: %w", err)
}
return nil
}
func (db *TestDB) StartLitestream() error {
logPath := filepath.Join(db.TempDir, "litestream.log")
logFile, err := os.Create(logPath)
if err != nil {
return fmt.Errorf("create log file: %w", err)
}
replicaURL := fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
cmd := exec.Command(getBinaryPath("litestream"), "replicate",
db.Path,
replicaURL,
)
cmd.Stdout = logFile
cmd.Stderr = logFile
if err := cmd.Start(); err != nil {
logFile.Close()
return fmt.Errorf("start litestream: %w", err)
}
db.LitestreamCmd = cmd
db.LitestreamPID = cmd.Process.Pid
time.Sleep(2 * time.Second)
if cmd.ProcessState != nil && cmd.ProcessState.Exited() {
logFile.Close()
return fmt.Errorf("litestream exited immediately")
}
return nil
}
func (db *TestDB) StartLitestreamWithConfig(configPath string) error {
logPath := filepath.Join(db.TempDir, "litestream.log")
logFile, err := os.Create(logPath)
if err != nil {
return fmt.Errorf("create log file: %w", err)
}
db.ConfigPath = configPath
cmd := exec.Command(getBinaryPath("litestream"), "replicate",
"-config", configPath,
)
cmd.Stdout = logFile
cmd.Stderr = logFile
if err := cmd.Start(); err != nil {
logFile.Close()
return fmt.Errorf("start litestream: %w", err)
}
db.LitestreamCmd = cmd
db.LitestreamPID = cmd.Process.Pid
time.Sleep(2 * time.Second)
return nil
}
func (db *TestDB) StopLitestream() error {
if db.LitestreamCmd == nil || db.LitestreamCmd.Process == nil {
return nil
}
if err := db.LitestreamCmd.Process.Kill(); err != nil {
return fmt.Errorf("kill litestream: %w", err)
}
db.LitestreamCmd.Wait()
time.Sleep(1 * time.Second)
return nil
}
func (db *TestDB) Restore(outputPath string) error {
replicaURL := db.ReplicaURL
if replicaURL == "" {
replicaURL = fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
}
var cmd *exec.Cmd
if db.ConfigPath != "" && (strings.HasPrefix(replicaURL, "s3://") || strings.HasPrefix(replicaURL, "abs://") || strings.HasPrefix(replicaURL, "nats://")) {
cmd = exec.Command(getBinaryPath("litestream"), "restore",
"-config", db.ConfigPath,
"-o", outputPath,
db.Path,
)
} else {
cmd = exec.Command(getBinaryPath("litestream"), "restore",
"-o", outputPath,
replicaURL,
)
}
cmd.Env = append(os.Environ(), db.ReplicaEnv...)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("restore failed: %w\nOutput: %s", err, string(output))
}
return nil
}
func (db *TestDB) Validate(restoredPath string) error {
replicaURL := db.ReplicaURL
if replicaURL == "" {
replicaURL = fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
}
cmd := exec.Command(getBinaryPath("litestream-test"), "validate",
"-source-db", db.Path,
"-replica-url", replicaURL,
"-restored-db", restoredPath,
"-check-type", "full",
)
cmd.Env = append(os.Environ(), db.ReplicaEnv...)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("validation failed: %w\nOutput: %s", err, string(output))
}
return nil
}
func (db *TestDB) QuickValidate(restoredPath string) error {
replicaURL := db.ReplicaURL
if replicaURL == "" {
replicaURL = fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
}
cmd := exec.Command(getBinaryPath("litestream-test"), "validate",
"-source-db", db.Path,
"-replica-url", replicaURL,
"-restored-db", restoredPath,
"-check-type", "quick",
)
cmd.Env = append(os.Environ(), db.ReplicaEnv...)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("validation failed: %w\nOutput: %s", err, string(output))
}
return nil
}
func (db *TestDB) GetRowCount(table string) (int, error) {
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
return 0, fmt.Errorf("open database: %w", err)
}
defer sqlDB.Close()
var count int
query := fmt.Sprintf("SELECT COUNT(*) FROM %s", table)
if err := sqlDB.QueryRow(query).Scan(&count); err != nil {
return 0, fmt.Errorf("query count: %w", err)
}
return count, nil
}
func (db *TestDB) GetDatabaseSize() (int64, error) {
info, err := os.Stat(db.Path)
if err != nil {
return 0, err
}
size := info.Size()
walPath := db.Path + "-wal"
if walInfo, err := os.Stat(walPath); err == nil {
size += walInfo.Size()
}
return size, nil
}
func (db *TestDB) GetReplicaFileCount() (int, error) {
ltxPath := filepath.Join(db.ReplicaPath, "ltx", "0")
files, err := filepath.Glob(filepath.Join(ltxPath, "*.ltx"))
if err != nil {
return 0, err
}
return len(files), nil
}
func (db *TestDB) GetLitestreamLog() (string, error) {
logPath := filepath.Join(db.TempDir, "litestream.log")
content, err := os.ReadFile(logPath)
if err != nil {
return "", err
}
return string(content), nil
}
func (db *TestDB) CheckForErrors() ([]string, error) {
log, err := db.GetLitestreamLog()
if err != nil {
return nil, err
}
var errors []string
lines := strings.Split(log, "\n")
for _, line := range lines {
if strings.Contains(strings.ToUpper(line), "ERROR") {
errors = append(errors, line)
}
}
return errors, nil
}
func (db *TestDB) Cleanup() {
db.StopLitestream()
}
// WaitForSnapshots waits for snapshots & WAL segments to appear on file replicas.
func (db *TestDB) WaitForSnapshots(timeout time.Duration) error {
if !strings.HasPrefix(db.ReplicaURL, "file://") {
return nil
}
snapshotDir := filepath.Join(db.ReplicaPath, "ltx", fmt.Sprintf("%d", litestream.SnapshotLevel))
walDir := filepath.Join(db.ReplicaPath, "ltx", "0")
deadline := time.Now().Add(timeout)
for {
snapshotCount := countLTXFiles(snapshotDir)
walCount := countLTXFiles(walDir)
if snapshotCount > 0 && walCount > 0 {
return nil
}
if time.Now().After(deadline) {
return fmt.Errorf("timeout waiting for replica data: snapshots=%d wal=%d", snapshotCount, walCount)
}
time.Sleep(500 * time.Millisecond)
}
}
func countLTXFiles(dir string) int {
matches, err := filepath.Glob(filepath.Join(dir, "*.ltx"))
if err != nil {
return 0
}
return len(matches)
}
func GetTestDuration(t *testing.T, defaultDuration time.Duration) time.Duration {
t.Helper()
if testing.Short() {
return defaultDuration / 10
}
return defaultDuration
}
func RequireBinaries(t *testing.T) {
t.Helper()
litestreamBin := getBinaryPath("litestream")
if _, err := os.Stat(litestreamBin); err != nil {
t.Skip("litestream binary not found, run: go build -o bin/litestream ./cmd/litestream")
}
litestreamTestBin := getBinaryPath("litestream-test")
if _, err := os.Stat(litestreamTestBin); err != nil {
t.Skip("litestream-test binary not found, run: go build -o bin/litestream-test ./cmd/litestream-test")
}
}
func CreateTestTable(t *testing.T, dbPath string) error {
t.Helper()
sqlDB, err := sql.Open("sqlite3", dbPath)
if err != nil {
return err
}
defer sqlDB.Close()
_, err = sqlDB.Exec(`
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data TEXT,
created_at INTEGER
)
`)
return err
}
func InsertTestData(t *testing.T, dbPath string, count int) error {
t.Helper()
sqlDB, err := sql.Open("sqlite3", dbPath)
if err != nil {
return err
}
defer sqlDB.Close()
tx, err := sqlDB.Begin()
if err != nil {
return err
}
defer tx.Rollback()
stmt, err := tx.Prepare("INSERT INTO test_data (data, created_at) VALUES (?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for i := 0; i < count; i++ {
if _, err := stmt.Exec(fmt.Sprintf("test data %d", i), time.Now().Unix()); err != nil {
return err
}
}
return tx.Commit()
}
// IntegrityCheck runs PRAGMA integrity_check on the database.
func (db *TestDB) IntegrityCheck() error {
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
return err
}
defer sqlDB.Close()
var result string
if err := sqlDB.QueryRow("PRAGMA integrity_check").Scan(&result); err != nil {
return err
}
if result != "ok" {
return fmt.Errorf("integrity check failed: %s", result)
}
return nil
}
// PrintTestSummary prints a summary of the test results
func (db *TestDB) PrintTestSummary(t *testing.T, testName string, startTime time.Time) {
t.Helper()
duration := time.Since(startTime)
dbSize, _ := db.GetDatabaseSize()
fileCount, _ := db.GetReplicaFileCount()
errors, _ := db.CheckForErrors()
t.Log("\n" + strings.Repeat("=", 80))
t.Logf("TEST SUMMARY: %s", testName)
t.Log(strings.Repeat("=", 80))
t.Logf("Duration: %v", duration.Round(time.Second))
t.Logf("Database Size: %.2f MB", float64(dbSize)/(1024*1024))
t.Logf("Replica Files: %d LTX files", fileCount)
t.Logf("Litestream Errors: %d", len(errors))
t.Log(strings.Repeat("=", 80))
}

View File

@@ -0,0 +1,368 @@
//go:build integration && soak && docker
package integration
import (
"context"
"database/sql"
"fmt"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
)
// TestMinIOSoak runs a soak test against local MinIO S3-compatible server using Docker.
//
// Default duration: 2 hours
// Can be shortened with: go test -test.short (runs for 30 minutes)
//
// Requirements:
// - Docker must be running
// - docker command must be in PATH
//
// This test validates:
// - S3-compatible replication to MinIO
// - Docker container lifecycle management
// - Heavy sustained load (500 writes/sec)
// - Restoration from S3-compatible storage
func TestMinIOSoak(t *testing.T) {
RequireBinaries(t)
RequireDocker(t)
// Determine test duration
duration := GetTestDuration(t, 2*time.Hour)
shortMode := testing.Short()
if shortMode {
duration = 2 * time.Minute
}
targetSize := "50MB"
writeRate := 500
if shortMode {
targetSize = "5MB"
writeRate = 100
}
t.Logf("================================================")
t.Logf("Litestream MinIO S3 Soak Test")
t.Logf("================================================")
t.Logf("Duration: %v", duration)
t.Logf("Start time: %s", time.Now().Format(time.RFC3339))
t.Log("")
startTime := time.Now()
// Start MinIO container
t.Log("Starting MinIO container...")
containerID, endpoint, dataVolume := StartMinIOContainer(t)
defer StopMinIOContainer(t, containerID, dataVolume)
t.Logf("✓ MinIO running at: %s", endpoint)
t.Log("")
// Create MinIO bucket
bucket := "litestream-test"
CreateMinIOBucket(t, containerID, bucket)
t.Log("")
// Setup test database
db := SetupTestDB(t, "minio-soak")
defer db.Cleanup()
// Create database
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
// Populate with initial data
t.Logf("Populating database (%s initial data)...", targetSize)
if err := db.Populate(targetSize); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
t.Log("✓ Database populated")
t.Log("")
// Create S3 configuration for MinIO
s3Path := fmt.Sprintf("litestream-test-%d", time.Now().Unix())
s3URL := fmt.Sprintf("s3://%s/%s", bucket, s3Path)
db.ReplicaURL = s3URL
t.Log("Creating Litestream configuration for MinIO S3...")
s3Config := &S3Config{
Endpoint: endpoint,
AccessKey: "minioadmin",
SecretKey: "minioadmin",
Region: "us-east-1",
ForcePathStyle: true,
SkipVerify: true,
}
configPath := CreateSoakConfig(db.Path, s3URL, s3Config, shortMode)
db.ConfigPath = configPath
t.Logf("✓ Configuration created: %s", configPath)
t.Logf(" S3 URL: %s", s3URL)
t.Log("")
// Start Litestream
t.Log("Starting Litestream with MinIO backend...")
if err := db.StartLitestreamWithConfig(configPath); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
t.Logf("✓ Litestream running (PID: %d)", db.LitestreamPID)
t.Log("")
// Start load generator
t.Log("Starting load generator (heavy sustained load)...")
t.Logf(" Write rate: %d writes/second", writeRate)
t.Logf(" Pattern: wave (simulates varying load)")
t.Logf(" Payload size: 4KB")
t.Logf(" Workers: 8")
t.Log("")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
// Setup signal handler for graceful interruption
testInfo := &TestInfo{
StartTime: startTime,
Duration: duration,
DB: db,
cancel: cancel,
}
setupSignalHandler(t, cancel, testInfo)
// Run load generation in background
loadDone := make(chan error, 1)
go func() {
loadDone <- db.GenerateLoad(ctx, writeRate, duration, "wave")
}()
// Monitor every 60 seconds with MinIO-specific metrics
t.Log("Running MinIO S3 test...")
t.Log("Monitor will report every 60 seconds")
t.Log("Press Ctrl+C twice within 5 seconds to stop early")
t.Log("================================================")
t.Log("")
refreshStats := func() {
testInfo.RowCount, _ = db.GetRowCount("load_test")
if testInfo.RowCount == 0 {
testInfo.RowCount, _ = db.GetRowCount("test_table_0")
}
if testInfo.RowCount == 0 {
testInfo.RowCount, _ = db.GetRowCount("test_data")
}
testInfo.FileCount = CountMinIOObjects(t, containerID, bucket)
}
logMetrics := func() {
logMinIOMetrics(t, db, containerID, bucket)
if db.LitestreamCmd != nil && db.LitestreamCmd.ProcessState != nil {
t.Error("✗ Litestream stopped unexpectedly!")
if testInfo.cancel != nil {
testInfo.cancel()
}
}
}
MonitorSoakTest(t, db, ctx, testInfo, refreshStats, logMetrics)
// Wait for load generation to complete
if err := <-loadDone; err != nil {
t.Logf("Load generation completed: %v", err)
}
if err := db.WaitForSnapshots(30 * time.Second); err != nil {
t.Fatalf("Failed waiting for snapshot: %v", err)
}
t.Log("")
t.Log("================================================")
t.Log("Final Test Results")
t.Log("================================================")
t.Log("")
// Stop Litestream
t.Log("Stopping Litestream...")
if err := db.StopLitestream(); err != nil {
t.Logf("Warning: Failed to stop Litestream cleanly: %v", err)
}
// Final statistics
t.Log("Database Statistics:")
if dbSize, err := db.GetDatabaseSize(); err == nil {
t.Logf(" Final size: %.2f MB", float64(dbSize)/(1024*1024))
}
// Count rows
var rowCount int
var err error
if rowCount, err = db.GetRowCount("load_test"); err != nil {
if rowCount, err = db.GetRowCount("test_table_0"); err != nil {
if rowCount, err = db.GetRowCount("test_data"); err != nil {
t.Logf(" Warning: Could not get row count: %v", err)
}
}
}
if err == nil {
t.Logf(" Total rows: %d", rowCount)
}
t.Log("")
// MinIO statistics
t.Log("MinIO S3 Statistics:")
finalObjects := CountMinIOObjects(t, containerID, bucket)
t.Logf(" Total objects in MinIO: %d", finalObjects)
t.Log("")
// Check for errors
errors, _ := db.CheckForErrors()
criticalErrors := 0
for _, errLine := range errors {
if !containsAny(errLine, []string{"page size not initialized"}) {
criticalErrors++
}
}
t.Logf(" Critical errors: %d", criticalErrors)
t.Log("")
// Test restoration from MinIO
t.Log("Testing restoration from MinIO S3...")
restoredPath := filepath.Join(db.TempDir, "restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restoration from MinIO failed: %v", err)
}
t.Log("✓ Restoration successful!")
// Compare row counts
var restoredCount int
if restoredCount, err = getRowCountFromPath(restoredPath, "load_test"); err != nil {
if restoredCount, err = getRowCountFromPath(restoredPath, "test_table_0"); err != nil {
if restoredCount, err = getRowCountFromPath(restoredPath, "test_data"); err != nil {
t.Logf(" Warning: Could not get restored row count: %v", err)
}
}
}
if err == nil && rowCount > 0 {
if rowCount == restoredCount {
t.Logf("✓ Row counts match! (%d rows)", restoredCount)
} else {
t.Logf("⚠ Row count mismatch! Original: %d, Restored: %d", rowCount, restoredCount)
}
}
// Validate integrity
t.Log("")
t.Log("Validating restored database integrity...")
restoredDB := &TestDB{Path: restoredPath, t: t}
if err := restoredDB.IntegrityCheck(); err != nil {
t.Fatalf("Integrity check failed: %v", err)
}
t.Log("✓ Integrity check passed!")
// Analyze test results
analysis := AnalyzeSoakTest(t, db, duration)
PrintSoakTestAnalysis(t, analysis)
// Test Summary
t.Log("================================================")
t.Log("Test Summary")
t.Log("================================================")
testPassed := true
issues := []string{}
if criticalErrors > 0 {
testPassed = false
issues = append(issues, fmt.Sprintf("Critical errors detected: %d", criticalErrors))
}
if finalObjects == 0 {
testPassed = false
issues = append(issues, "No objects stored in MinIO")
}
if testPassed {
t.Log("✓ TEST PASSED!")
t.Log("")
t.Logf("Successfully replicated to MinIO (%d objects)", finalObjects)
t.Log("The configuration is ready for production use.")
} else {
t.Log("⚠ TEST COMPLETED WITH ISSUES:")
for _, issue := range issues {
t.Logf(" - %s", issue)
}
t.Log("")
t.Log("Review the logs for details:")
logPath, _ := db.GetLitestreamLog()
t.Logf(" %s", logPath)
t.Fail()
}
t.Log("")
t.Logf("Test duration: %v", time.Since(startTime).Round(time.Second))
t.Logf("Results available in: %s", db.TempDir)
t.Log("================================================")
}
// logMinIOMetrics logs MinIO-specific metrics
func logMinIOMetrics(t *testing.T, db *TestDB, containerID, bucket string) {
t.Helper()
// Basic database metrics
LogSoakMetrics(t, db, "minio")
// MinIO-specific metrics
t.Log("")
t.Log(" MinIO S3 Statistics:")
objectCount := CountMinIOObjects(t, containerID, bucket)
t.Logf(" Total objects: %d", objectCount)
// Count LTX files specifically
ltxCount := countMinIOLTXFiles(t, containerID, bucket)
t.Logf(" LTX segments: %d", ltxCount)
}
// countMinIOLTXFiles counts LTX files in MinIO bucket
func countMinIOLTXFiles(t *testing.T, containerID, bucket string) int {
t.Helper()
cmd := exec.Command("docker", "run", "--rm",
"--link", containerID+":minio",
"-e", "MC_HOST_minio=http://minioadmin:minioadmin@minio:9000",
"minio/mc", "ls", "minio/"+bucket+"/", "--recursive")
output, err := cmd.CombinedOutput()
if err != nil {
return 0
}
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
ltxCount := 0
for _, line := range lines {
if strings.Contains(line, ".ltx") {
ltxCount++
}
}
return ltxCount
}
// getRowCountFromPath gets row count from a database file path
func getRowCountFromPath(dbPath, table string) (int, error) {
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
return 0, err
}
defer db.Close()
var count int
query := fmt.Sprintf("SELECT COUNT(*) FROM %s", table)
if err := db.QueryRow(query).Scan(&count); err != nil {
return 0, err
}
return count, nil
}

View File

@@ -0,0 +1,353 @@
//go:build integration && soak && aws
package integration
import (
"context"
"database/sql"
"fmt"
"path/filepath"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
)
// TestOvernightS3Soak runs an 8-hour overnight soak test against real AWS S3.
//
// Default duration: 8 hours
// Can be shortened with: go test -test.short (runs for 1 hour)
//
// Requirements:
// - AWS_ACCESS_KEY_ID environment variable
// - AWS_SECRET_ACCESS_KEY environment variable
// - S3_BUCKET environment variable
// - AWS_REGION environment variable (optional, defaults to us-east-1)
// - AWS CLI must be installed
//
// This test validates:
// - Long-term S3 replication stability
// - Network resilience over 8 hours
// - Real S3 API performance
// - Restoration from cloud storage
func TestOvernightS3Soak(t *testing.T) {
RequireBinaries(t)
// Check AWS credentials and get configuration
bucket, region := CheckAWSCredentials(t)
// Determine test duration
var duration time.Duration
if testing.Short() {
duration = 10 * time.Minute
} else {
duration = 8 * time.Hour
}
shortMode := testing.Short()
t.Logf("================================================")
t.Logf("Litestream Overnight S3 Soak Test")
t.Logf("================================================")
t.Logf("Duration: %v", duration)
t.Logf("S3 Bucket: %s", bucket)
t.Logf("AWS Region: %s", region)
t.Logf("Start time: %s", time.Now().Format(time.RFC3339))
t.Log("")
startTime := time.Now()
// Test S3 connectivity
t.Log("Testing S3 connectivity...")
TestS3Connectivity(t, bucket)
t.Log("")
// Setup test database
db := SetupTestDB(t, "overnight-s3-soak")
defer db.Cleanup()
// Create database
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
// Create S3 configuration
s3Path := fmt.Sprintf("litestream-overnight-%d", time.Now().Unix())
s3URL := fmt.Sprintf("s3://%s/%s", bucket, s3Path)
db.ReplicaURL = s3URL
t.Log("Creating Litestream configuration for S3...")
s3Config := &S3Config{
Region: region,
}
configPath := CreateSoakConfig(db.Path, s3URL, s3Config, shortMode)
db.ConfigPath = configPath
t.Logf("✓ Configuration created: %s", configPath)
t.Logf(" S3 URL: %s", s3URL)
t.Log("")
// Start Litestream initially (before population)
t.Log("Starting Litestream...")
if err := db.StartLitestreamWithConfig(configPath); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
t.Logf("✓ Litestream started (PID: %d)", db.LitestreamPID)
t.Log("")
// Stop Litestream to populate database
t.Log("Stopping Litestream temporarily for initial population...")
if err := db.StopLitestream(); err != nil {
t.Fatalf("Failed to stop Litestream: %v", err)
}
// Populate with 100MB of initial data
t.Log("Populating database (100MB initial data)...")
if err := db.Populate("100MB"); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
t.Log("✓ Database populated")
t.Log("")
// Restart Litestream after population
t.Log("Restarting Litestream after population...")
if err := db.StartLitestreamWithConfig(configPath); err != nil {
t.Fatalf("Failed to restart Litestream: %v", err)
}
t.Logf("✓ Litestream restarted (PID: %d)", db.LitestreamPID)
t.Log("")
// Start load generator for overnight test
t.Log("Starting load generator for overnight S3 test...")
t.Log("Configuration:")
t.Logf(" Duration: %v", duration)
t.Logf(" Write rate: 100 writes/second (higher for S3 testing)")
t.Logf(" Pattern: wave (simulates varying load)")
t.Logf(" Workers: 8")
t.Log("")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
// Setup signal handler for graceful interruption
testInfo := &TestInfo{
StartTime: startTime,
Duration: duration,
DB: db,
cancel: cancel,
}
setupSignalHandler(t, cancel, testInfo)
// Run load generation in background
loadDone := make(chan error, 1)
go func() {
loadDone <- db.GenerateLoad(ctx, 100, duration, "wave")
}()
// Monitor every 60 seconds with S3-specific metrics
t.Log("Overnight S3 test is running!")
t.Log("Monitor will report every 60 seconds")
t.Log("Press Ctrl+C twice within 5 seconds to stop early")
t.Log("================================================")
t.Log("")
t.Logf("The test will run for %v. Monitor progress below.", duration)
t.Log("")
refreshStats := func() {
testInfo.RowCount, _ = db.GetRowCount("load_test")
if testInfo.RowCount == 0 {
testInfo.RowCount, _ = db.GetRowCount("test_table_0")
}
if testInfo.RowCount == 0 {
testInfo.RowCount, _ = db.GetRowCount("test_data")
}
testInfo.FileCount = CountS3Objects(t, s3URL)
}
logMetrics := func() {
logS3Metrics(t, db, s3URL)
if db.LitestreamCmd != nil && db.LitestreamCmd.ProcessState != nil {
t.Error("✗ Litestream stopped unexpectedly!")
if testInfo.cancel != nil {
testInfo.cancel()
}
}
}
MonitorSoakTest(t, db, ctx, testInfo, refreshStats, logMetrics)
// Wait for load generation to complete
if err := <-loadDone; err != nil {
t.Logf("Load generation completed: %v", err)
}
t.Log("")
t.Log("Load generation completed.")
// Final statistics
t.Log("")
t.Log("================================================")
t.Log("Final Statistics")
t.Log("================================================")
t.Log("")
// Stop Litestream
t.Log("Stopping Litestream...")
if err := db.StopLitestream(); err != nil {
t.Logf("Warning: Failed to stop Litestream cleanly: %v", err)
}
// Database statistics
t.Log("Database Statistics:")
if dbSize, err := db.GetDatabaseSize(); err == nil {
t.Logf(" Final size: %.2f MB", float64(dbSize)/(1024*1024))
}
// Count rows
var rowCount int
var err error
if rowCount, err = db.GetRowCount("load_test"); err != nil {
if rowCount, err = db.GetRowCount("test_table_0"); err != nil {
if rowCount, err = db.GetRowCount("test_data"); err != nil {
t.Logf(" Warning: Could not get row count: %v", err)
}
}
}
if err == nil {
t.Logf(" Total rows: %d", rowCount)
}
t.Log("")
// S3 statistics
t.Log("S3 Statistics:")
finalObjects := CountS3Objects(t, s3URL)
t.Logf(" Total objects: %d", finalObjects)
if s3Size := GetS3StorageSize(t, s3URL); s3Size > 0 {
t.Logf(" Total S3 storage: %.2f MB", float64(s3Size)/(1024*1024))
}
t.Log("")
// Check for errors
errors, _ := db.CheckForErrors()
criticalErrors := 0
for _, errLine := range errors {
if !containsAny(errLine, []string{"page size not initialized"}) {
criticalErrors++
}
}
t.Logf(" Critical errors: %d", criticalErrors)
t.Log("")
// Test restoration from S3
t.Log("Testing restoration from S3...")
restoredPath := filepath.Join(db.TempDir, "restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restoration from S3 failed: %v", err)
}
t.Log("✓ Restoration successful!")
// Compare row counts
var restoredCount int
if restoredCount, err = getRowCountFromPath(restoredPath, "load_test"); err != nil {
if restoredCount, err = getRowCountFromPath(restoredPath, "test_table_0"); err != nil {
if restoredCount, err = getRowCountFromPath(restoredPath, "test_data"); err != nil {
t.Logf(" Warning: Could not get restored row count: %v", err)
}
}
}
if err == nil && rowCount > 0 {
if rowCount == restoredCount {
t.Logf("✓ Row counts match! (%d rows)", restoredCount)
} else {
t.Logf("⚠ Row count mismatch! Original: %d, Restored: %d", rowCount, restoredCount)
}
}
// Validate
t.Log("")
t.Log("Validating restored database...")
if err := db.Validate(restoredPath); err != nil {
t.Fatalf("Validation failed: %v", err)
}
t.Log("✓ Validation passed!")
// Analyze test results
analysis := AnalyzeSoakTest(t, db, duration)
PrintSoakTestAnalysis(t, analysis)
// Test Summary
t.Log("================================================")
t.Log("Test Summary")
t.Log("================================================")
testPassed := true
issues := []string{}
if criticalErrors > 0 {
testPassed = false
issues = append(issues, fmt.Sprintf("Critical errors detected: %d", criticalErrors))
}
if finalObjects == 0 {
testPassed = false
issues = append(issues, "No objects stored in S3")
}
if testPassed {
t.Log("✓ TEST PASSED!")
t.Log("")
t.Logf("Successfully replicated to AWS S3 (%d objects)", finalObjects)
t.Log("The configuration is ready for production use.")
} else {
t.Log("⚠ TEST COMPLETED WITH ISSUES:")
for _, issue := range issues {
t.Logf(" - %s", issue)
}
t.Log("")
t.Log("Review the logs for details:")
logPath, _ := db.GetLitestreamLog()
t.Logf(" %s", logPath)
t.Fail()
}
t.Log("")
t.Logf("Test duration: %v", time.Since(startTime).Round(time.Second))
t.Logf("Results available in: %s", db.TempDir)
t.Logf("S3 replica data in: %s", s3URL)
t.Log("================================================")
}
// logS3Metrics logs S3-specific metrics
func logS3Metrics(t *testing.T, db *TestDB, s3URL string) {
t.Helper()
// Basic database metrics
LogSoakMetrics(t, db, "overnight-s3")
// S3-specific metrics
t.Log("")
t.Log(" S3 Statistics:")
objectCount := CountS3Objects(t, s3URL)
t.Logf(" Total objects: %d", objectCount)
if s3Size := GetS3StorageSize(t, s3URL); s3Size > 0 {
t.Logf(" Total storage: %.2f MB", float64(s3Size)/(1024*1024))
}
}
// getRowCountFromPath gets row count from a database file path
func getRowCountFromPath(dbPath, table string) (int, error) {
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
return 0, err
}
defer db.Close()
var count int
query := fmt.Sprintf("SELECT COUNT(*) FROM %s", table)
if err := db.QueryRow(query).Scan(&count); err != nil {
return 0, err
}
return count, nil
}

View File

@@ -0,0 +1,215 @@
//go:build integration && long
package integration
import (
"context"
"path/filepath"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
)
func TestOvernightFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping long integration test in short mode")
}
RequireBinaries(t)
startTime := time.Now()
duration := GetTestDuration(t, 8*time.Hour)
t.Logf("Testing: Overnight file-based replication (duration: %v)", duration)
t.Log("Default: 8 hours, configurable via test duration")
db := SetupTestDB(t, "overnight-file")
defer db.Cleanup()
defer db.PrintTestSummary(t, "Overnight File Replication", startTime)
t.Log("[1] Creating and populating database...")
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
if err := db.Populate("100MB"); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
t.Log("✓ Database populated to 100MB")
t.Log("[2] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(10 * time.Second)
t.Log("[3] Generating sustained load...")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
config := DefaultLoadConfig()
config.WriteRate = 50
config.Duration = duration
config.Pattern = LoadPatternWave
config.PayloadSize = 2 * 1024
config.Workers = 4
ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop()
go func() {
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
fileCount, _ := db.GetReplicaFileCount()
dbSize, _ := db.GetDatabaseSize()
t.Logf("[Progress] Files: %d, DB Size: %.2f MB, Elapsed: %v",
fileCount, float64(dbSize)/(1024*1024), time.Since(time.Now().Add(-duration)))
}
}
}()
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
t.Fatalf("Load generation failed: %v", err)
}
t.Log("✓ Load generation complete")
time.Sleep(1 * time.Minute)
t.Log("[4] Final statistics...")
fileCount, err := db.GetReplicaFileCount()
if err != nil {
t.Fatalf("Failed to check replica: %v", err)
}
dbSize, err := db.GetDatabaseSize()
if err != nil {
t.Fatalf("Failed to get database size: %v", err)
}
t.Logf("Final LTX files: %d", fileCount)
t.Logf("Final DB size: %.2f MB", float64(dbSize)/(1024*1024))
t.Log("[5] Checking for errors...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
if len(errors) > 20 {
t.Fatalf("Too many errors (%d), test may be unstable", len(errors))
} else if len(errors) > 0 {
t.Logf("Found %d errors (acceptable for long test)", len(errors))
} else {
t.Log("✓ No errors detected")
}
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[6] Testing final restore...")
restoredPath := filepath.Join(db.TempDir, "overnight-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
t.Log("[7] Full validation...")
if err := db.Validate(restoredPath); err != nil {
t.Fatalf("Validation failed: %v", err)
}
t.Log("✓ Validation passed")
t.Log("TEST PASSED: Overnight file replication successful")
}
func TestOvernightComprehensive(t *testing.T) {
if testing.Short() {
t.Skip("skipping long integration test in short mode")
}
RequireBinaries(t)
startTime := time.Now()
duration := GetTestDuration(t, 8*time.Hour)
t.Logf("Testing: Comprehensive overnight test (duration: %v)", duration)
db := SetupTestDB(t, "overnight-comprehensive")
defer db.Cleanup()
defer db.PrintTestSummary(t, "Overnight Comprehensive Test", startTime)
t.Log("[1] Creating large database...")
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
if err := db.Populate("500MB"); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
t.Log("✓ Database populated to 500MB")
t.Log("[2] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(10 * time.Second)
t.Log("[3] Generating mixed workload...")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
config := DefaultLoadConfig()
config.WriteRate = 100
config.Duration = duration
config.Pattern = LoadPatternWave
config.PayloadSize = 4 * 1024
config.ReadRatio = 0.3
config.Workers = 8
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
go func() {
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
fileCount, _ := db.GetReplicaFileCount()
dbSize, _ := db.GetDatabaseSize()
t.Logf("[Progress] Files: %d, DB Size: %.2f MB", fileCount, float64(dbSize)/(1024*1024))
}
}
}()
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
t.Fatalf("Load generation failed: %v", err)
}
t.Log("✓ Load generation complete")
time.Sleep(2 * time.Minute)
db.StopLitestream()
t.Log("[4] Final validation...")
restoredPath := filepath.Join(db.TempDir, "comprehensive-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
if err := db.Validate(restoredPath); err != nil {
t.Fatalf("Validation failed: %v", err)
}
t.Log("✓ Comprehensive test passed")
t.Log("TEST PASSED: Overnight comprehensive test successful")
}

View File

@@ -0,0 +1,121 @@
//go:build integration
package integration
import (
"context"
"path/filepath"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
)
func TestQuickValidation(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
startTime := time.Now()
duration := GetTestDuration(t, 30*time.Minute)
t.Logf("Testing: Quick validation test (duration: %v)", duration)
t.Log("Default: 30 minutes, configurable via test duration")
db := SetupTestDB(t, "quick-validation")
defer db.Cleanup()
defer db.PrintTestSummary(t, "Quick Validation Test", startTime)
t.Log("[1] Creating and populating database...")
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
if err := db.Populate("10MB"); err != nil {
t.Fatalf("Failed to populate database: %v", err)
}
t.Log("✓ Database populated to 10MB")
t.Log("[2] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(5 * time.Second)
t.Log("[3] Generating wave pattern load...")
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
config := DefaultLoadConfig()
config.WriteRate = 100
config.Duration = duration
config.Pattern = LoadPatternWave
config.PayloadSize = 4 * 1024
config.Workers = 4
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
t.Fatalf("Load generation failed: %v", err)
}
t.Log("✓ Load generation complete")
time.Sleep(10 * time.Second)
t.Log("[4] Checking replica status...")
fileCount, err := db.GetReplicaFileCount()
if err != nil {
t.Fatalf("Failed to check replica: %v", err)
}
if fileCount == 0 {
t.Fatal("No LTX segments created!")
}
t.Logf("✓ LTX segments created: %d files", fileCount)
dbSize, err := db.GetDatabaseSize()
if err != nil {
t.Fatalf("Failed to get database size: %v", err)
}
t.Logf("Database size: %.2f MB", float64(dbSize)/(1024*1024))
t.Log("[5] Checking for errors...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
if len(errors) > 10 {
t.Fatalf("Too many critical errors (%d), showing first 5:\n%v", len(errors), errors[:5])
} else if len(errors) > 0 {
t.Logf("Found %d errors (showing first 3):", len(errors))
for i := 0; i < min(len(errors), 3); i++ {
t.Logf(" %s", errors[i])
}
} else {
t.Log("✓ No errors detected")
}
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[6] Testing restore...")
restoredPath := filepath.Join(db.TempDir, "quick-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
t.Log("[7] Validating restoration...")
if err := db.QuickValidate(restoredPath); err != nil {
t.Fatalf("Validation failed: %v", err)
}
t.Log("✓ Validation passed")
t.Log("TEST PASSED: Quick validation successful")
}

View File

@@ -0,0 +1,342 @@
//go:build integration
package integration
import (
"database/sql"
"fmt"
"os"
"path/filepath"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
)
func TestFreshStart(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
t.Log("Testing: Starting replication with a fresh (empty) database")
t.Log("This tests if Litestream works correctly when it creates the database from scratch")
db := SetupTestDB(t, "fresh-start")
defer db.Cleanup()
t.Log("[1] Starting Litestream with non-existent database...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(2 * time.Second)
t.Log("[2] Creating database while Litestream is running...")
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if _, err := sqlDB.Exec("PRAGMA journal_mode=WAL"); err != nil {
t.Fatalf("Failed to set WAL mode: %v", err)
}
if _, err := sqlDB.Exec("CREATE TABLE test (id INTEGER PRIMARY KEY, data TEXT)"); err != nil {
t.Fatalf("Failed to create table: %v", err)
}
if _, err := sqlDB.Exec("INSERT INTO test (data) VALUES ('initial data')"); err != nil {
t.Fatalf("Failed to insert initial data: %v", err)
}
sqlDB.Close()
time.Sleep(3 * time.Second)
t.Log("[3] Checking if Litestream detected the database...")
log, err := db.GetLitestreamLog()
if err != nil {
t.Fatalf("Failed to read log: %v", err)
}
t.Logf("Litestream log snippet:\n%s", log[:min(len(log), 500)])
t.Log("[4] Adding data to test replication...")
sqlDB, err = sql.Open("sqlite3", db.Path)
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
for i := 1; i <= 100; i++ {
if _, err := sqlDB.Exec("INSERT INTO test (data) VALUES (?)", fmt.Sprintf("row %d", i)); err != nil {
t.Fatalf("Failed to insert row %d: %v", i, err)
}
}
sqlDB.Close()
time.Sleep(5 * time.Second)
t.Log("[5] Checking for errors...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
if len(errors) > 1 {
t.Logf("Found %d errors (showing first 3):", len(errors))
for i := 0; i < min(len(errors), 3); i++ {
t.Logf(" %s", errors[i])
}
} else {
t.Log("✓ No significant errors")
}
t.Log("[6] Checking replica files...")
fileCount, err := db.GetReplicaFileCount()
if err != nil {
t.Fatalf("Failed to get replica file count: %v", err)
}
if fileCount == 0 {
t.Fatal("✗ No replica files created!")
}
t.Logf("✓ Replica created with %d LTX files", fileCount)
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[7] Testing restore...")
restoredPath := filepath.Join(db.TempDir, "fresh-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("✗ Restore failed: %v", err)
}
t.Log("✓ Restore successful")
origCount, err := db.GetRowCount("test")
if err != nil {
t.Fatalf("Failed to get original row count: %v", err)
}
restoredDB := &TestDB{Path: restoredPath, t: t}
restCount, err := restoredDB.GetRowCount("test")
if err != nil {
t.Fatalf("Failed to get restored row count: %v", err)
}
if origCount != restCount {
t.Fatalf("✗ Data mismatch: Original=%d, Restored=%d", origCount, restCount)
}
t.Logf("✓ Data integrity verified: %d rows", origCount)
t.Log("TEST PASSED: Fresh start works correctly")
}
func TestDatabaseIntegrity(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
t.Log("Testing: Complex data patterns and integrity after restore")
db := SetupTestDB(t, "integrity-test")
defer db.Cleanup()
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
t.Log("[1] Creating complex schema...")
sqlDB, err := sql.Open("sqlite3", db.Path)
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
defer sqlDB.Close()
if err := CreateComplexTestSchema(sqlDB); err != nil {
t.Fatalf("Failed to create schema: %v", err)
}
t.Log("✓ Schema created")
t.Log("[2] Populating with test data...")
if err := PopulateComplexTestData(sqlDB, 10, 5, 3); err != nil {
t.Fatalf("Failed to populate data: %v", err)
}
t.Log("✓ Data populated (10 users, 50 posts, 150 comments)")
t.Log("[3] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(10 * time.Second)
db.StopLitestream()
time.Sleep(2 * time.Second)
t.Log("[4] Checking integrity of original database...")
var integrityResult string
if err := sqlDB.QueryRow("PRAGMA integrity_check").Scan(&integrityResult); err != nil {
t.Fatalf("Integrity check failed: %v", err)
}
if integrityResult != "ok" {
t.Fatalf("Source database integrity check failed: %s", integrityResult)
}
t.Log("✓ Source database integrity OK")
t.Log("[5] Restoring database...")
restoredPath := filepath.Join(db.TempDir, "integrity-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
t.Log("[6] Checking integrity of restored database...")
restoredDB, err := sql.Open("sqlite3", restoredPath)
if err != nil {
t.Fatalf("Failed to open restored database: %v", err)
}
defer restoredDB.Close()
if err := restoredDB.QueryRow("PRAGMA integrity_check").Scan(&integrityResult); err != nil {
t.Fatalf("Restored integrity check failed: %v", err)
}
if integrityResult != "ok" {
t.Fatalf("Restored database integrity check failed: %s", integrityResult)
}
t.Log("✓ Restored database integrity OK")
t.Log("[7] Validating data consistency...")
tables := []string{"users", "posts", "comments"}
for _, table := range tables {
var sourceCount, restoredCount int
if err := sqlDB.QueryRow(fmt.Sprintf("SELECT COUNT(*) FROM %s", table)).Scan(&sourceCount); err != nil {
t.Fatalf("Failed to count source %s: %v", table, err)
}
if err := restoredDB.QueryRow(fmt.Sprintf("SELECT COUNT(*) FROM %s", table)).Scan(&restoredCount); err != nil {
t.Fatalf("Failed to count restored %s: %v", table, err)
}
if sourceCount != restoredCount {
t.Fatalf("Count mismatch for %s: source=%d, restored=%d", table, sourceCount, restoredCount)
}
t.Logf("✓ Table %s: %d rows match", table, sourceCount)
}
t.Log("TEST PASSED: Database integrity maintained through replication")
}
func TestDatabaseDeletion(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
RequireBinaries(t)
t.Log("Testing: Database deletion during active replication")
db := SetupTestDB(t, "deletion-test")
defer db.Cleanup()
if err := db.Create(); err != nil {
t.Fatalf("Failed to create database: %v", err)
}
t.Log("[1] Creating test table and data...")
if err := CreateTestTable(t, db.Path); err != nil {
t.Fatalf("Failed to create table: %v", err)
}
if err := InsertTestData(t, db.Path, 100); err != nil {
t.Fatalf("Failed to insert test data: %v", err)
}
t.Log("✓ Created table with 100 rows")
t.Log("[2] Starting Litestream...")
if err := db.StartLitestream(); err != nil {
t.Fatalf("Failed to start Litestream: %v", err)
}
time.Sleep(5 * time.Second)
fileCount, _ := db.GetReplicaFileCount()
t.Logf("✓ Replication started (%d files)", fileCount)
t.Log("[3] Deleting database files...")
os.Remove(db.Path)
os.Remove(db.Path + "-wal")
os.Remove(db.Path + "-shm")
time.Sleep(3 * time.Second)
t.Log("✓ Database deleted")
t.Log("[4] Checking Litestream behavior...")
errors, err := db.CheckForErrors()
if err != nil {
t.Fatalf("Failed to check errors: %v", err)
}
t.Logf("Litestream reported %d error messages (expected after database deletion)", len(errors))
db.StopLitestream()
t.Log("[5] Verifying replica is still intact...")
finalFileCount, err := db.GetReplicaFileCount()
if err != nil {
t.Fatalf("Failed to check replica: %v", err)
}
if finalFileCount == 0 {
t.Fatalf("Replica appears to be empty or missing")
}
t.Logf("✓ Replica exists with %d files (was %d - compaction may have reduced count)", finalFileCount, fileCount)
t.Log("[6] Testing restore from replica...")
restoredPath := filepath.Join(db.TempDir, "deletion-restored.db")
if err := db.Restore(restoredPath); err != nil {
t.Fatalf("Restore failed: %v", err)
}
t.Log("✓ Restore successful")
restoredDB := &TestDB{Path: restoredPath, t: t}
restCount, err := restoredDB.GetRowCount("test_data")
if err != nil {
t.Fatalf("Failed to get restored row count: %v", err)
}
if restCount != 100 {
t.Fatalf("Expected 100 rows, got %d", restCount)
}
t.Logf("✓ Restored database has correct data: %d rows", restCount)
t.Log("TEST PASSED: Replica survives source database deletion")
}
// TestReplicaFailover was removed because Litestream no longer supports
// multiple replicas on a single database (see cmd/litestream/main.go).
// The bash script test-replica-failover.sh was also non-functional.
func min(a, b int) int {
if a < b {
return a
}
return b
}

File diff suppressed because it is too large Load Diff