mirror of
https://github.com/benbjohnson/litestream.git
synced 2026-01-25 05:06:30 +00:00
test: migrate bash integration and soak tests to Go infrastructure (#799)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
148
.github/workflows/integration-tests.yml
vendored
Normal file
148
.github/workflows/integration-tests.yml
vendored
Normal file
@@ -0,0 +1,148 @@
|
||||
name: Integration Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- '**.go'
|
||||
- 'go.mod'
|
||||
- 'go.sum'
|
||||
- 'tests/integration/**'
|
||||
- '.github/workflows/integration-tests.yml'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
test_type:
|
||||
description: 'Test type to run'
|
||||
required: false
|
||||
default: 'quick'
|
||||
type: choice
|
||||
options:
|
||||
- 'quick'
|
||||
- 'all'
|
||||
- 'long'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
quick-tests:
|
||||
name: Quick Integration Tests
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'pull_request' || inputs.test_type == 'quick' || inputs.test_type == 'all'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: "go.mod"
|
||||
|
||||
- name: Build binaries
|
||||
run: |
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
|
||||
- name: Run quick integration tests
|
||||
run: |
|
||||
go test -v -tags=integration -timeout=30m ./tests/integration/... \
|
||||
-run="TestFreshStart|TestDatabaseIntegrity|TestRapidCheckpoints"
|
||||
env:
|
||||
CGO_ENABLED: 1
|
||||
|
||||
- name: Upload test logs
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: quick-test-logs
|
||||
path: |
|
||||
/tmp/litestream-*/*.log
|
||||
/tmp/*-test.log
|
||||
|
||||
scenario-tests:
|
||||
name: Scenario Integration Tests
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' && (inputs.test_type == 'all' || inputs.test_type == 'long')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: "go.mod"
|
||||
|
||||
- name: Build binaries
|
||||
run: |
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
|
||||
- name: Run all scenario tests
|
||||
run: |
|
||||
go test -v -tags=integration -timeout=1h ./tests/integration/... \
|
||||
-run="Test(FreshStart|DatabaseIntegrity|DatabaseDeletion|RapidCheckpoints|WALGrowth|ConcurrentOperations|BusyTimeout)"
|
||||
env:
|
||||
CGO_ENABLED: 1
|
||||
|
||||
- name: Upload test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: scenario-test-logs
|
||||
path: |
|
||||
/tmp/litestream-*/*.log
|
||||
/tmp/*-test.log
|
||||
|
||||
long-running-tests:
|
||||
name: Long-Running Integration Tests
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' && inputs.test_type == 'long'
|
||||
timeout-minutes: 600
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: "go.mod"
|
||||
|
||||
- name: Build binaries
|
||||
run: |
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
|
||||
- name: Run long tests
|
||||
run: |
|
||||
go test -v -tags="integration,long" -timeout=10h ./tests/integration/... \
|
||||
-run="TestOvernight|Test1GBBoundary"
|
||||
env:
|
||||
CGO_ENABLED: 1
|
||||
|
||||
- name: Upload test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: long-test-logs
|
||||
path: |
|
||||
/tmp/litestream-*/*.log
|
||||
/tmp/*-test.log
|
||||
|
||||
summary:
|
||||
name: Test Summary
|
||||
runs-on: ubuntu-latest
|
||||
needs: [quick-tests]
|
||||
if: always() && (github.event_name == 'pull_request' || inputs.test_type == 'quick' || inputs.test_type == 'all')
|
||||
steps:
|
||||
- name: Generate summary
|
||||
run: |
|
||||
echo "## Integration Test Results" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
if [ "${{ needs.quick-tests.result }}" == "success" ]; then
|
||||
echo "✅ **Quick Tests:** Passed" >> $GITHUB_STEP_SUMMARY
|
||||
elif [ "${{ needs.quick-tests.result }}" == "failure" ]; then
|
||||
echo "❌ **Quick Tests:** Failed" >> $GITHUB_STEP_SUMMARY
|
||||
elif [ "${{ needs.quick-tests.result }}" == "skipped" ]; then
|
||||
echo "⏭️ **Quick Tests:** Skipped" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "---" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Triggered by:** @${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Note: Scenario and long-running tests run independently on workflow_dispatch.
|
||||
# Check individual job results for those test suites.
|
||||
@@ -13,18 +13,11 @@ go build -o bin/litestream-test ./cmd/litestream-test
|
||||
|
||||
## Quick Reference
|
||||
|
||||
> **Note:** Some tests have been migrated to Go integration tests in `tests/integration/`. See [tests/integration/README.md](../../tests/integration/README.md) for the Go-based test suite.
|
||||
|
||||
| Script | Purpose | Duration | Status |
|
||||
|--------|---------|----------|--------|
|
||||
| verify-test-setup.sh | Environment validation | ~5s | ✅ Stable |
|
||||
| test-fresh-start.sh | Fresh database creation | ~30s | ✅ Stable |
|
||||
| test-rapid-checkpoints.sh | Checkpoint stress test | ~2min | ✅ Stable |
|
||||
| test-wal-growth.sh | Large WAL handling (100MB+) | ~5min | ✅ Stable |
|
||||
| test-concurrent-operations.sh | Multi-database concurrent replication | ~5min | ✅ Stable |
|
||||
| test-database-integrity.sh | Complex data integrity validation | ~3min | ✅ Stable |
|
||||
| test-database-deletion.sh | Database deletion scenarios | ~2min | ✅ Stable |
|
||||
| test-replica-failover.sh | Replica failover testing | ~3min | ✅ Stable |
|
||||
| test-busy-timeout.sh | Database busy timeout handling | ~2min | ✅ Stable |
|
||||
| test-1gb-boundary.sh | SQLite 1GB lock page boundary | ~10min | ⚠️ Blocked by #754 |
|
||||
| reproduce-critical-bug.sh | Checkpoint during downtime bug | ~2min | 🐛 Reproduces #752 |
|
||||
| test-754-s3-scenarios.sh | Issue #754 S3 vs file replication | ~10min | 🐛 Tests #754 |
|
||||
| test-754-restore-focus.sh | Issue #754 restore focus | ~5min | 🐛 Tests #754 |
|
||||
@@ -58,148 +51,6 @@ Verifies that the test environment is properly configured with required binaries
|
||||
- SQLite3 available
|
||||
- Python dependencies for S3 mock
|
||||
|
||||
### Core Functionality Tests
|
||||
|
||||
#### test-fresh-start.sh
|
||||
Tests replication with a fresh database that doesn't exist when Litestream starts.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-fresh-start.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Starting Litestream before database exists
|
||||
- Database creation while Litestream is running
|
||||
- Automatic detection of new database
|
||||
- Replication and restore integrity
|
||||
|
||||
#### test-database-integrity.sh
|
||||
Creates complex data patterns and verifies integrity after restore.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-database-integrity.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Complex data patterns (multiple tables, indexes)
|
||||
- SQLite PRAGMA integrity_check
|
||||
- Full database restoration
|
||||
- Data consistency verification
|
||||
|
||||
#### test-database-deletion.sh
|
||||
Tests scenarios where the source database is deleted during replication.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-database-deletion.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Database deletion during active replication
|
||||
- Recovery behavior
|
||||
- Replica consistency
|
||||
|
||||
#### test-replica-failover.sh
|
||||
Tests replica failover scenarios with multiple replicas.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-replica-failover.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Multiple replica configuration
|
||||
- Failover when primary replica fails
|
||||
- Data consistency across replicas
|
||||
|
||||
### Stress & Performance Tests
|
||||
|
||||
#### test-rapid-checkpoints.sh
|
||||
Tests Litestream under rapid checkpoint pressure with continuous writes.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-rapid-checkpoints.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- 100+ writes/second
|
||||
- Forced rapid checkpoints
|
||||
- Replication under checkpoint pressure
|
||||
- Data integrity under stress
|
||||
|
||||
**Key Metrics:**
|
||||
- Checkpoint frequency
|
||||
- WAL file growth
|
||||
- Replication lag
|
||||
- Error rates
|
||||
|
||||
#### test-wal-growth.sh
|
||||
Tests handling of large WAL files (100MB+) under sustained write load.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-wal-growth.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Sustained high write rates (400+ writes/sec)
|
||||
- Large WAL file creation and handling
|
||||
- Checkpoint behavior with large WALs
|
||||
- Replication performance with large data
|
||||
|
||||
**Key Findings:**
|
||||
- Successfully handles 100MB+ WAL files
|
||||
- Maintains data integrity
|
||||
- Handles 400+ writes/second
|
||||
|
||||
#### test-concurrent-operations.sh
|
||||
Tests multiple databases replicating simultaneously with competing operations.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-concurrent-operations.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Multiple databases (3-5) replicating concurrently
|
||||
- Mixed read/write operations
|
||||
- Competing checkpoints
|
||||
- Resource contention handling
|
||||
|
||||
#### test-busy-timeout.sh
|
||||
Tests database busy timeout handling with concurrent access.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-busy-timeout.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Concurrent database access
|
||||
- Busy timeout configuration
|
||||
- Lock contention handling
|
||||
- Recovery from busy states
|
||||
|
||||
### Boundary & Edge Case Tests
|
||||
|
||||
#### test-1gb-boundary.sh
|
||||
Tests SQLite's 1GB lock page boundary handling.
|
||||
|
||||
```bash
|
||||
./cmd/litestream-test/scripts/test-1gb-boundary.sh
|
||||
```
|
||||
|
||||
**Tests:**
|
||||
- Database growth beyond 1GB (with 4KB pages)
|
||||
- Lock page at #262145 properly skipped
|
||||
- Replication across lock page boundary
|
||||
- Restoration integrity after crossing boundary
|
||||
|
||||
**Status:** ⚠️ Currently blocked by ltx v0.5.0 flag compatibility issue (#754)
|
||||
|
||||
**Lock Page Numbers by Page Size:**
|
||||
| Page Size | Lock Page # |
|
||||
|-----------|-------------|
|
||||
| 4KB | 262145 |
|
||||
| 8KB | 131073 |
|
||||
| 16KB | 65537 |
|
||||
| 32KB | 32769 |
|
||||
|
||||
### Bug Reproduction Scripts
|
||||
|
||||
#### reproduce-critical-bug.sh
|
||||
|
||||
@@ -1,217 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test Script: SQLite 1GB Lock Page Boundary
|
||||
#
|
||||
# This test verifies that Litestream correctly handles the SQLite lock page
|
||||
# at the 1GB boundary (0x40000000). This page is reserved by SQLite and
|
||||
# cannot contain data - Litestream must skip it during replication.
|
||||
#
|
||||
# The lock page number varies by page size:
|
||||
# - 4KB: page 262145
|
||||
# - 8KB: page 131073
|
||||
# - 16KB: page 65537
|
||||
# - 32KB: page 32769
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "SQLite 1GB Lock Page Boundary Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing Litestream's handling of SQLite's reserved lock page at 1GB"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/1gb-test.db"
|
||||
REPLICA="/tmp/1gb-replica"
|
||||
LITESTREAM_TEST="./bin/litestream-test"
|
||||
LITESTREAM="./bin/litestream"
|
||||
|
||||
# Clean up any previous test
|
||||
echo "[SETUP] Cleaning up previous test files..."
|
||||
rm -f "$DB"*
|
||||
rm -rf "$REPLICA"
|
||||
|
||||
# Check for required binaries
|
||||
if [ ! -f "$LITESTREAM_TEST" ]; then
|
||||
echo "ERROR: litestream-test not found at $LITESTREAM_TEST"
|
||||
echo "Build with: go build -o bin/litestream-test ./cmd/litestream-test"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$LITESTREAM" ]; then
|
||||
echo "ERROR: litestream not found at $LITESTREAM"
|
||||
echo "Build with: go build -o bin/litestream ./cmd/litestream"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
test_page_size() {
|
||||
local PAGE_SIZE=$1
|
||||
local LOCK_PGNO=$2
|
||||
|
||||
echo ""
|
||||
echo "======================================="
|
||||
echo "Testing with page size: $PAGE_SIZE bytes"
|
||||
echo "Lock page should be at: $LOCK_PGNO"
|
||||
echo "======================================="
|
||||
|
||||
# Clean up for this test
|
||||
rm -f "$DB"*
|
||||
rm -rf "$REPLICA"
|
||||
|
||||
# Create database with specific page size
|
||||
echo "[1] Creating database with page_size=$PAGE_SIZE..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA page_size=$PAGE_SIZE;
|
||||
CREATE TABLE test_data (
|
||||
id INTEGER PRIMARY KEY,
|
||||
data BLOB
|
||||
);
|
||||
EOF
|
||||
|
||||
# Calculate target size (1.2GB to ensure we cross 1GB boundary)
|
||||
TARGET_SIZE=$((1200 * 1024 * 1024))
|
||||
|
||||
echo "[2] Populating database to cross 1GB boundary (target: 1.2GB)..."
|
||||
# Use litestream-test to populate efficiently
|
||||
$LITESTREAM_TEST populate -db "$DB" -target-size 1200MB -row-size $((PAGE_SIZE - 100))
|
||||
|
||||
# Get actual size and page count
|
||||
DB_SIZE=$(stat -f%z "$DB" 2>/dev/null || stat -c%s "$DB")
|
||||
PAGE_COUNT=$(sqlite3 "$DB" "PRAGMA page_count;")
|
||||
echo " Database size: $(( DB_SIZE / 1024 / 1024 ))MB"
|
||||
echo " Page count: $PAGE_COUNT"
|
||||
echo " Lock page at: $LOCK_PGNO"
|
||||
|
||||
# Verify we've crossed the boundary
|
||||
if [ "$PAGE_COUNT" -le "$LOCK_PGNO" ]; then
|
||||
echo " WARNING: Database doesn't cross lock page boundary!"
|
||||
echo " Need at least $LOCK_PGNO pages, have $PAGE_COUNT"
|
||||
else
|
||||
echo " ✓ Database crosses lock page boundary"
|
||||
fi
|
||||
|
||||
# Start Litestream replication
|
||||
echo "[3] Starting Litestream replication..."
|
||||
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/litestream-1gb.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo "ERROR: Litestream failed to start"
|
||||
cat /tmp/litestream-1gb.log
|
||||
return 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
# Add more data to trigger replication across the boundary
|
||||
echo "[4] Adding data around the lock page boundary..."
|
||||
# Use litestream-test load to ensure continuous writes
|
||||
$LITESTREAM_TEST load -db "$DB" -write-rate 10 -duration 10s -pattern constant &
|
||||
LOAD_PID=$!
|
||||
|
||||
# Let it run and create multiple transactions
|
||||
echo "[5] Running writes for 10 seconds to ensure multiple transactions..."
|
||||
sleep 10
|
||||
|
||||
# Stop writes and let replication catch up
|
||||
kill $LOAD_PID 2>/dev/null || true
|
||||
sleep 5
|
||||
|
||||
# Check for errors in log
|
||||
if grep -i "error\|panic\|fatal" /tmp/litestream-1gb.log > /dev/null 2>&1; then
|
||||
echo " WARNING: Errors detected in Litestream log:"
|
||||
grep -i "error\|panic\|fatal" /tmp/litestream-1gb.log | head -5
|
||||
fi
|
||||
|
||||
# Stop Litestream
|
||||
kill $LITESTREAM_PID 2>/dev/null || true
|
||||
sleep 2
|
||||
|
||||
# Attempt restore
|
||||
echo "[6] Testing restore..."
|
||||
rm -f /tmp/restored-1gb.db
|
||||
if $LITESTREAM restore -o /tmp/restored-1gb.db "file://$REPLICA" > /tmp/restore-1gb.log 2>&1; then
|
||||
echo " ✓ Restore successful"
|
||||
|
||||
# Verify integrity
|
||||
INTEGRITY=$(sqlite3 /tmp/restored-1gb.db "PRAGMA integrity_check;" 2>/dev/null || echo "FAILED")
|
||||
if [ "$INTEGRITY" = "ok" ]; then
|
||||
echo " ✓ Integrity check passed"
|
||||
else
|
||||
echo " ✗ Integrity check failed: $INTEGRITY"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Compare page counts
|
||||
RESTORED_COUNT=$(sqlite3 /tmp/restored-1gb.db "PRAGMA page_count;" 2>/dev/null || echo "0")
|
||||
echo " Original pages: $PAGE_COUNT"
|
||||
echo " Restored pages: $RESTORED_COUNT"
|
||||
|
||||
if [ "$PAGE_COUNT" -eq "$RESTORED_COUNT" ]; then
|
||||
echo " ✓ Page count matches"
|
||||
else
|
||||
echo " ✗ Page count mismatch!"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check data integrity
|
||||
ORIG_ROWS=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test_data;")
|
||||
REST_ROWS=$(sqlite3 /tmp/restored-1gb.db "SELECT COUNT(*) FROM test_data;")
|
||||
echo " Original rows: $ORIG_ROWS"
|
||||
echo " Restored rows: $REST_ROWS"
|
||||
|
||||
if [ "$ORIG_ROWS" -eq "$REST_ROWS" ]; then
|
||||
echo " ✓ Data integrity verified"
|
||||
echo ""
|
||||
echo " TEST PASSED for page_size=$PAGE_SIZE"
|
||||
else
|
||||
echo " ✗ Row count mismatch!"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo " ✗ Restore FAILED!"
|
||||
cat /tmp/restore-1gb.log
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -f /tmp/restored-1gb.db
|
||||
}
|
||||
|
||||
# Test with different page sizes
|
||||
echo "Testing SQLite lock page handling at 1GB boundary"
|
||||
echo "This verifies Litestream correctly skips the reserved lock page"
|
||||
echo ""
|
||||
|
||||
# Default 4KB page size (most common)
|
||||
if ! test_page_size 4096 262145; then
|
||||
echo "CRITICAL: Test failed for 4KB pages!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 8KB page size
|
||||
if ! test_page_size 8192 131073; then
|
||||
echo "CRITICAL: Test failed for 8KB pages!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 16KB page size (if time permits - these are large databases)
|
||||
# Uncomment to test:
|
||||
# if ! test_page_size 16384 65537; then
|
||||
# echo "CRITICAL: Test failed for 16KB pages!"
|
||||
# exit 1
|
||||
# fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "All 1GB boundary tests PASSED!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Litestream correctly handles the SQLite lock page at 1GB boundary"
|
||||
echo "for all tested page sizes."
|
||||
echo ""
|
||||
|
||||
# Clean up
|
||||
pkill -f "litestream replicate" 2>/dev/null || true
|
||||
echo "Test complete."
|
||||
@@ -1,225 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Test busy timeout handling with concurrent writes
|
||||
# This test verifies proper handling of write lock conflicts between app and Litestream
|
||||
|
||||
echo "=========================================="
|
||||
echo "Busy Timeout and Write Lock Conflict Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing write lock conflict handling with various busy_timeout settings"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/busy-test.db"
|
||||
REPLICA="/tmp/busy-replica"
|
||||
LITESTREAM="./bin/litestream"
|
||||
LITESTREAM_TEST="./bin/litestream-test"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
pkill -f "litestream replicate.*busy-test.db" 2>/dev/null || true
|
||||
pkill -f "litestream-test load.*busy-test.db" 2>/dev/null || true
|
||||
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
|
||||
rm -rf "$REPLICA"
|
||||
rm -f /tmp/busy-*.log
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "[SETUP] Cleaning up previous test files..."
|
||||
cleanup
|
||||
|
||||
echo ""
|
||||
echo "[1] Creating test database..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode = WAL;
|
||||
CREATE TABLE test (id INTEGER PRIMARY KEY, data BLOB, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP);
|
||||
INSERT INTO test (data) VALUES (randomblob(1000));
|
||||
EOF
|
||||
echo " ✓ Database created"
|
||||
|
||||
echo ""
|
||||
echo "[2] Starting Litestream replication..."
|
||||
"$LITESTREAM" replicate "$DB" "file://$REPLICA" > /tmp/busy-litestream.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 2
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream failed to start"
|
||||
cat /tmp/busy-litestream.log
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test 1: No busy_timeout (default behavior)"
|
||||
echo "=========================================="
|
||||
|
||||
echo "[3] Starting aggressive writes without busy_timeout..."
|
||||
ERRORS_NO_TIMEOUT=0
|
||||
SUCCESS_NO_TIMEOUT=0
|
||||
|
||||
for i in {1..100}; do
|
||||
if sqlite3 "$DB" "INSERT INTO test (data) VALUES (randomblob(1000));" 2>/dev/null; then
|
||||
((SUCCESS_NO_TIMEOUT++))
|
||||
else
|
||||
((ERRORS_NO_TIMEOUT++))
|
||||
fi
|
||||
done
|
||||
|
||||
echo " Results without busy_timeout:"
|
||||
echo " ✓ Successful writes: $SUCCESS_NO_TIMEOUT"
|
||||
echo " ✗ Failed writes (SQLITE_BUSY): $ERRORS_NO_TIMEOUT"
|
||||
|
||||
if [ $ERRORS_NO_TIMEOUT -gt 0 ]; then
|
||||
echo " ⚠️ Conflicts detected without busy_timeout (expected)"
|
||||
else
|
||||
echo " ✓ No conflicts (may indicate low checkpoint frequency)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test 2: With 5-second busy_timeout (recommended)"
|
||||
echo "=========================================="
|
||||
|
||||
echo "[4] Testing with recommended 5-second timeout..."
|
||||
ERRORS_WITH_TIMEOUT=0
|
||||
SUCCESS_WITH_TIMEOUT=0
|
||||
|
||||
for i in {1..100}; do
|
||||
if sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES (randomblob(1000));" 2>/dev/null; then
|
||||
((SUCCESS_WITH_TIMEOUT++))
|
||||
else
|
||||
((ERRORS_WITH_TIMEOUT++))
|
||||
fi
|
||||
done
|
||||
|
||||
echo " Results with 5s busy_timeout:"
|
||||
echo " ✓ Successful writes: $SUCCESS_WITH_TIMEOUT"
|
||||
echo " ✗ Failed writes: $ERRORS_WITH_TIMEOUT"
|
||||
|
||||
if [ $ERRORS_WITH_TIMEOUT -eq 0 ]; then
|
||||
echo " ✓ All writes succeeded with proper timeout!"
|
||||
elif [ $ERRORS_WITH_TIMEOUT -lt $ERRORS_NO_TIMEOUT ]; then
|
||||
echo " ✓ Timeout reduced conflicts significantly"
|
||||
else
|
||||
echo " ⚠️ Timeout didn't help (may need investigation)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test 3: Concurrent high-frequency writes"
|
||||
echo "=========================================="
|
||||
|
||||
echo "[5] Starting 3 concurrent write processes..."
|
||||
|
||||
# Start multiple concurrent writers
|
||||
(
|
||||
for i in {1..50}; do
|
||||
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES ('Writer1: ' || randomblob(500));" 2>/dev/null
|
||||
sleep 0.01
|
||||
done
|
||||
) > /tmp/busy-writer1.log 2>&1 &
|
||||
WRITER1_PID=$!
|
||||
|
||||
(
|
||||
for i in {1..50}; do
|
||||
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES ('Writer2: ' || randomblob(500));" 2>/dev/null
|
||||
sleep 0.01
|
||||
done
|
||||
) > /tmp/busy-writer2.log 2>&1 &
|
||||
WRITER2_PID=$!
|
||||
|
||||
(
|
||||
for i in {1..50}; do
|
||||
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; INSERT INTO test (data) VALUES ('Writer3: ' || randomblob(500));" 2>/dev/null
|
||||
sleep 0.01
|
||||
done
|
||||
) > /tmp/busy-writer3.log 2>&1 &
|
||||
WRITER3_PID=$!
|
||||
|
||||
echo " Writers started: PID $WRITER1_PID, $WRITER2_PID, $WRITER3_PID"
|
||||
|
||||
# Monitor for conflicts
|
||||
sleep 1
|
||||
echo ""
|
||||
echo "[6] Forcing checkpoints during concurrent writes..."
|
||||
for i in {1..5}; do
|
||||
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; PRAGMA wal_checkpoint(PASSIVE);" 2>/dev/null || true
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Wait for writers to complete
|
||||
wait $WRITER1_PID 2>/dev/null
|
||||
wait $WRITER2_PID 2>/dev/null
|
||||
wait $WRITER3_PID 2>/dev/null
|
||||
|
||||
echo " ✓ Concurrent writers completed"
|
||||
|
||||
echo ""
|
||||
echo "[7] Checking for lock contention in Litestream log..."
|
||||
CHECKPOINT_ERRORS=$(grep -c "checkpoint" /tmp/busy-litestream.log 2>/dev/null || echo "0")
|
||||
SYNC_ERRORS=$(grep -c "database is locked" /tmp/busy-litestream.log 2>/dev/null || echo "0")
|
||||
|
||||
echo " Litestream errors:"
|
||||
echo " Checkpoint errors: $CHECKPOINT_ERRORS"
|
||||
echo " Lock errors: $SYNC_ERRORS"
|
||||
|
||||
if [ "$SYNC_ERRORS" -eq "0" ]; then
|
||||
echo " ✓ No lock errors in Litestream"
|
||||
else
|
||||
echo " ⚠️ Some lock contention detected (may be normal under high load)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test 4: Checkpoint during write transaction"
|
||||
echo "=========================================="
|
||||
|
||||
echo "[8] Testing checkpoint during long transaction..."
|
||||
|
||||
# Start a long transaction
|
||||
sqlite3 "$DB" "PRAGMA busy_timeout = 5000; BEGIN EXCLUSIVE;" 2>/dev/null &
|
||||
TRANS_PID=$!
|
||||
sleep 0.5
|
||||
|
||||
# Try to checkpoint while transaction is held
|
||||
CHECKPOINT_RESULT=$(sqlite3 "$DB" "PRAGMA busy_timeout = 1000; PRAGMA wal_checkpoint(FULL);" 2>&1 || echo "FAILED")
|
||||
|
||||
if [[ "$CHECKPOINT_RESULT" == *"FAILED"* ]] || [[ "$CHECKPOINT_RESULT" == *"database is locked"* ]]; then
|
||||
echo " ✓ Checkpoint correctly blocked by exclusive transaction"
|
||||
else
|
||||
echo " ⚠️ Unexpected checkpoint behavior: $CHECKPOINT_RESULT"
|
||||
fi
|
||||
|
||||
# Clean up transaction
|
||||
kill $TRANS_PID 2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "[9] Final statistics..."
|
||||
TOTAL_ROWS=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;")
|
||||
WAL_SIZE=$(du -h "$DB-wal" 2>/dev/null | cut -f1 || echo "0")
|
||||
DB_SIZE=$(du -h "$DB" | cut -f1)
|
||||
|
||||
echo " Database stats:"
|
||||
echo " Total rows inserted: $TOTAL_ROWS"
|
||||
echo " Database size: $DB_SIZE"
|
||||
echo " WAL size: $WAL_SIZE"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Busy Timeout Test Summary:"
|
||||
echo " Without timeout: $ERRORS_NO_TIMEOUT conflicts"
|
||||
echo " With 5s timeout: $ERRORS_WITH_TIMEOUT conflicts"
|
||||
echo " Concurrent writes: Completed successfully"
|
||||
echo " Lock contention: Properly handled"
|
||||
echo ""
|
||||
if [ $ERRORS_WITH_TIMEOUT -lt $ERRORS_NO_TIMEOUT ] || [ $ERRORS_WITH_TIMEOUT -eq 0 ]; then
|
||||
echo "✅ TEST PASSED: busy_timeout improves conflict handling"
|
||||
else
|
||||
echo "⚠️ TEST NOTICE: Timeout may need tuning for this workload"
|
||||
fi
|
||||
echo "=========================================="
|
||||
@@ -1,300 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test Script: Concurrent Database Operations
|
||||
#
|
||||
# This test verifies Litestream's behavior under heavy concurrent load with
|
||||
# multiple databases replicating simultaneously, mixed operations, and
|
||||
# competing checkpoints.
|
||||
|
||||
set -e
|
||||
|
||||
echo "============================================"
|
||||
echo "Concurrent Database Operations Test"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo "Testing Litestream with multiple concurrent databases and operations"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
BASE_DIR="/tmp/concurrent-test"
|
||||
LITESTREAM_TEST="./bin/litestream-test"
|
||||
LITESTREAM="./bin/litestream"
|
||||
NUM_DBS=5
|
||||
DB_SIZE="50MB"
|
||||
DURATION="30s"
|
||||
|
||||
# Clean up any previous test
|
||||
echo "[SETUP] Cleaning up previous test files..."
|
||||
rm -rf "$BASE_DIR"
|
||||
mkdir -p "$BASE_DIR"
|
||||
|
||||
# Check for required binaries
|
||||
if [ ! -f "$LITESTREAM_TEST" ]; then
|
||||
echo "ERROR: litestream-test not found at $LITESTREAM_TEST"
|
||||
echo "Build with: go build -o bin/litestream-test ./cmd/litestream-test"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$LITESTREAM" ]; then
|
||||
echo "ERROR: litestream not found at $LITESTREAM"
|
||||
echo "Build with: go build -o bin/litestream ./cmd/litestream"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create configuration file for multiple databases
|
||||
echo "[1] Creating Litestream configuration for $NUM_DBS databases..."
|
||||
cat > "$BASE_DIR/litestream.yml" <<EOF
|
||||
dbs:
|
||||
EOF
|
||||
|
||||
for i in $(seq 1 $NUM_DBS); do
|
||||
cat >> "$BASE_DIR/litestream.yml" <<EOF
|
||||
- path: $BASE_DIR/db${i}.db
|
||||
replicas:
|
||||
- url: file://$BASE_DIR/replica${i}
|
||||
sync-interval: 1s
|
||||
EOF
|
||||
done
|
||||
|
||||
echo " ✓ Configuration created"
|
||||
|
||||
# Create and populate databases
|
||||
echo ""
|
||||
echo "[2] Creating and populating $NUM_DBS databases..."
|
||||
for i in $(seq 1 $NUM_DBS); do
|
||||
echo " Creating database $i..."
|
||||
$LITESTREAM_TEST populate -db "$BASE_DIR/db${i}.db" -target-size "$DB_SIZE" -table-count 2 &
|
||||
done
|
||||
wait
|
||||
echo " ✓ All databases created"
|
||||
|
||||
# Start Litestream with multiple databases
|
||||
echo ""
|
||||
echo "[3] Starting Litestream for all databases..."
|
||||
$LITESTREAM replicate -config "$BASE_DIR/litestream.yml" > "$BASE_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo "ERROR: Litestream failed to start"
|
||||
cat "$BASE_DIR/litestream.log"
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
# Start concurrent operations on all databases
|
||||
echo ""
|
||||
echo "[4] Starting concurrent operations on all databases..."
|
||||
PIDS=()
|
||||
|
||||
# Different workload patterns for each database
|
||||
for i in $(seq 1 $NUM_DBS); do
|
||||
case $i in
|
||||
1)
|
||||
# High-frequency writes
|
||||
echo " DB$i: High-frequency writes (500/sec)"
|
||||
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
|
||||
-write-rate 500 -duration "$DURATION" \
|
||||
-pattern constant > "$BASE_DIR/load${i}.log" 2>&1 &
|
||||
;;
|
||||
2)
|
||||
# Burst writes
|
||||
echo " DB$i: Burst writes (1000/sec burst)"
|
||||
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
|
||||
-write-rate 1000 -duration "$DURATION" \
|
||||
-pattern burst > "$BASE_DIR/load${i}.log" 2>&1 &
|
||||
;;
|
||||
3)
|
||||
# Mixed with checkpoints
|
||||
echo " DB$i: Moderate writes with periodic checkpoints"
|
||||
(
|
||||
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
|
||||
-write-rate 100 -duration "$DURATION" \
|
||||
-pattern constant > "$BASE_DIR/load${i}.log" 2>&1 &
|
||||
LOAD_PID=$!
|
||||
|
||||
# Periodic checkpoints
|
||||
for j in {1..6}; do
|
||||
sleep 5
|
||||
sqlite3 "$BASE_DIR/db${i}.db" "PRAGMA wal_checkpoint(PASSIVE);" 2>/dev/null || true
|
||||
done
|
||||
|
||||
wait $LOAD_PID
|
||||
) &
|
||||
;;
|
||||
4)
|
||||
# Shrinking operations
|
||||
echo " DB$i: Writes with periodic shrinking"
|
||||
(
|
||||
$LITESTREAM_TEST load -db "$BASE_DIR/db${i}.db" \
|
||||
-write-rate 50 -duration "$DURATION" \
|
||||
-pattern wave > "$BASE_DIR/load${i}.log" 2>&1 &
|
||||
LOAD_PID=$!
|
||||
|
||||
# Periodic shrinks
|
||||
for j in {1..3}; do
|
||||
sleep 10
|
||||
$LITESTREAM_TEST shrink -db "$BASE_DIR/db${i}.db" \
|
||||
-delete-percentage 30 2>/dev/null || true
|
||||
done
|
||||
|
||||
wait $LOAD_PID
|
||||
) &
|
||||
;;
|
||||
5)
|
||||
# Large transactions
|
||||
echo " DB$i: Large batch transactions"
|
||||
for j in {1..10}; do
|
||||
sqlite3 "$BASE_DIR/db${i}.db" <<EOF
|
||||
BEGIN;
|
||||
INSERT INTO test_table_0 (data)
|
||||
SELECT randomblob(1000) FROM generate_series(1, 10000);
|
||||
COMMIT;
|
||||
EOF
|
||||
sleep 3
|
||||
done &
|
||||
;;
|
||||
esac
|
||||
PIDS+=($!)
|
||||
done
|
||||
|
||||
# Monitor progress
|
||||
echo ""
|
||||
echo "[5] Running concurrent operations for $DURATION..."
|
||||
ELAPSED=0
|
||||
MAX_ELAPSED=30
|
||||
|
||||
while [ $ELAPSED -lt $MAX_ELAPSED ]; do
|
||||
sleep 5
|
||||
ELAPSED=$((ELAPSED + 5))
|
||||
|
||||
# Check Litestream health
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ERROR: Litestream crashed!"
|
||||
cat "$BASE_DIR/litestream.log" | tail -20
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for errors
|
||||
ERROR_COUNT=$(grep -i "error\|panic" "$BASE_DIR/litestream.log" 2>/dev/null | wc -l || echo "0")
|
||||
if [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
echo " Errors detected: $ERROR_COUNT"
|
||||
fi
|
||||
|
||||
echo " Progress: ${ELAPSED}s / ${MAX_ELAPSED}s"
|
||||
done
|
||||
|
||||
# Stop all operations
|
||||
echo ""
|
||||
echo "[6] Stopping operations..."
|
||||
for pid in "${PIDS[@]}"; do
|
||||
kill $pid 2>/dev/null || true
|
||||
done
|
||||
wait
|
||||
|
||||
# Give Litestream time to catch up
|
||||
echo " Waiting for final sync..."
|
||||
sleep 5
|
||||
|
||||
# Collect metrics
|
||||
echo ""
|
||||
echo "[7] Collecting metrics..."
|
||||
for i in $(seq 1 $NUM_DBS); do
|
||||
DB_SIZE=$(stat -f%z "$BASE_DIR/db${i}.db" 2>/dev/null || stat -c%s "$BASE_DIR/db${i}.db")
|
||||
WAL_SIZE=$(stat -f%z "$BASE_DIR/db${i}.db-wal" 2>/dev/null || stat -c%s "$BASE_DIR/db${i}.db-wal" 2>/dev/null || echo "0")
|
||||
REPLICA_COUNT=$(find "$BASE_DIR/replica${i}" -type f 2>/dev/null | wc -l || echo "0")
|
||||
|
||||
echo " DB$i:"
|
||||
echo " Database size: $((DB_SIZE / 1024 / 1024))MB"
|
||||
echo " WAL size: $((WAL_SIZE / 1024 / 1024))MB"
|
||||
echo " Replica files: $REPLICA_COUNT"
|
||||
done
|
||||
|
||||
# Stop Litestream
|
||||
kill $LITESTREAM_PID 2>/dev/null || true
|
||||
sleep 2
|
||||
|
||||
# Test restoration for all databases
|
||||
echo ""
|
||||
echo "[8] Testing restoration of all databases..."
|
||||
RESTORE_FAILED=0
|
||||
|
||||
for i in $(seq 1 $NUM_DBS); do
|
||||
echo " Restoring DB$i..."
|
||||
rm -f "$BASE_DIR/restored${i}.db"
|
||||
|
||||
if $LITESTREAM restore -config "$BASE_DIR/litestream.yml" \
|
||||
-o "$BASE_DIR/restored${i}.db" "$BASE_DIR/db${i}.db" > "$BASE_DIR/restore${i}.log" 2>&1; then
|
||||
|
||||
# Verify integrity
|
||||
INTEGRITY=$(sqlite3 "$BASE_DIR/restored${i}.db" "PRAGMA integrity_check;" 2>/dev/null || echo "FAILED")
|
||||
if [ "$INTEGRITY" = "ok" ]; then
|
||||
echo " ✓ DB$i restored successfully"
|
||||
else
|
||||
echo " ✗ DB$i integrity check failed!"
|
||||
RESTORE_FAILED=$((RESTORE_FAILED + 1))
|
||||
fi
|
||||
else
|
||||
echo " ✗ DB$i restore failed!"
|
||||
cat "$BASE_DIR/restore${i}.log"
|
||||
RESTORE_FAILED=$((RESTORE_FAILED + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Check for race conditions or deadlocks in logs
|
||||
echo ""
|
||||
echo "[9] Analyzing logs for issues..."
|
||||
ISSUES_FOUND=0
|
||||
|
||||
# Check for deadlocks
|
||||
if grep -i "deadlock" "$BASE_DIR/litestream.log" > /dev/null 2>&1; then
|
||||
echo " ✗ Deadlock detected!"
|
||||
ISSUES_FOUND=$((ISSUES_FOUND + 1))
|
||||
fi
|
||||
|
||||
# Check for database locked errors
|
||||
LOCKED_COUNT=$(grep -c "database is locked" "$BASE_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
if [ "$LOCKED_COUNT" -gt 10 ]; then
|
||||
echo " ⚠ High number of 'database locked' errors: $LOCKED_COUNT"
|
||||
ISSUES_FOUND=$((ISSUES_FOUND + 1))
|
||||
fi
|
||||
|
||||
# Check for checkpoint failures
|
||||
CHECKPOINT_ERRORS=$(grep -c "checkpoint.*error\|checkpoint.*fail" "$BASE_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
if [ "$CHECKPOINT_ERRORS" -gt 0 ]; then
|
||||
echo " ⚠ Checkpoint errors detected: $CHECKPOINT_ERRORS"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo "Test Results Summary"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo "Databases tested: $NUM_DBS"
|
||||
echo "Restore failures: $RESTORE_FAILED"
|
||||
echo "Critical issues found: $ISSUES_FOUND"
|
||||
|
||||
if [ "$RESTORE_FAILED" -eq 0 ] && [ "$ISSUES_FOUND" -eq 0 ]; then
|
||||
echo ""
|
||||
echo "✅ CONCURRENT OPERATIONS TEST PASSED"
|
||||
echo ""
|
||||
echo "Litestream successfully handled:"
|
||||
echo "- $NUM_DBS databases replicating simultaneously"
|
||||
echo "- Mixed workload patterns (high-frequency, burst, batch)"
|
||||
echo "- Concurrent checkpoints and shrinking operations"
|
||||
echo "- All databases restored successfully"
|
||||
else
|
||||
echo ""
|
||||
echo "❌ CONCURRENT OPERATIONS TEST FAILED"
|
||||
echo ""
|
||||
echo "Issues detected during concurrent operations"
|
||||
echo "Check logs at: $BASE_DIR/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
pkill -f litestream-test 2>/dev/null || true
|
||||
pkill -f "litestream replicate" 2>/dev/null || true
|
||||
echo ""
|
||||
echo "Test complete. Artifacts saved in: $BASE_DIR/"
|
||||
@@ -1,172 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Test database deletion and recreation scenarios
|
||||
# This test verifies proper handling when databases are deleted and recreated
|
||||
|
||||
echo "=========================================="
|
||||
echo "Database Deletion and Recreation Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing Litestream's handling of database deletion and recreation"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/deletion-test.db"
|
||||
REPLICA="/tmp/deletion-replica"
|
||||
LITESTREAM="./bin/litestream"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
pkill -f "litestream replicate.*deletion-test.db" 2>/dev/null || true
|
||||
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
|
||||
rm -rf "$REPLICA"
|
||||
rm -f /tmp/deletion-*.log
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "[SETUP] Cleaning up previous test files..."
|
||||
cleanup
|
||||
|
||||
echo ""
|
||||
echo "[1] Creating initial database..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode = WAL;
|
||||
CREATE TABLE original (id INTEGER PRIMARY KEY, data TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
|
||||
INSERT INTO original (data) VALUES ('Original database content');
|
||||
INSERT INTO original (data) VALUES ('Should not appear in new database');
|
||||
EOF
|
||||
ORIGINAL_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM original;")
|
||||
echo " ✓ Original database created with $ORIGINAL_COUNT rows"
|
||||
|
||||
echo ""
|
||||
echo "[2] Starting Litestream replication..."
|
||||
"$LITESTREAM" replicate "$DB" "file://$REPLICA" > /tmp/deletion-litestream.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 2
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream failed to start"
|
||||
cat /tmp/deletion-litestream.log
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
echo ""
|
||||
echo "[3] Letting replication stabilize..."
|
||||
sleep 3
|
||||
echo " ✓ Initial replication complete"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test 1: Delete database while Litestream running"
|
||||
echo "=========================================="
|
||||
|
||||
echo "[4] Deleting database files..."
|
||||
rm -f "$DB" "$DB-wal" "$DB-shm"
|
||||
echo " ✓ Database files deleted"
|
||||
|
||||
echo ""
|
||||
echo "[5] Creating new database with different schema..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode = WAL;
|
||||
CREATE TABLE replacement (id INTEGER PRIMARY KEY, content BLOB, version INTEGER);
|
||||
INSERT INTO replacement (content, version) VALUES (randomblob(100), 1);
|
||||
INSERT INTO replacement (content, version) VALUES (randomblob(200), 2);
|
||||
EOF
|
||||
NEW_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM replacement;")
|
||||
echo " ✓ New database created with $NEW_COUNT rows"
|
||||
|
||||
echo ""
|
||||
echo "[6] Checking for Litestream errors..."
|
||||
sleep 2
|
||||
ERRORS=$(grep -c "ERROR" /tmp/deletion-litestream.log 2>/dev/null || echo "0")
|
||||
WARNINGS=$(grep -c "WAL" /tmp/deletion-litestream.log 2>/dev/null || echo "0")
|
||||
echo " Litestream errors: $ERRORS"
|
||||
echo " WAL warnings: $WARNINGS"
|
||||
|
||||
if [ $ERRORS -gt 0 ]; then
|
||||
echo " ⚠️ Errors detected (expected when database deleted)"
|
||||
tail -5 /tmp/deletion-litestream.log | grep ERROR || true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test 2: Check for leftover WAL corruption"
|
||||
echo "=========================================="
|
||||
|
||||
echo "[7] Stopping Litestream..."
|
||||
kill $LITESTREAM_PID 2>/dev/null || true
|
||||
wait $LITESTREAM_PID 2>/dev/null
|
||||
echo " ✓ Litestream stopped"
|
||||
|
||||
echo ""
|
||||
echo "[8] Simulating leftover WAL file scenario..."
|
||||
# Create a database with WAL
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode = WAL;
|
||||
INSERT INTO replacement (content, version) VALUES (randomblob(300), 3);
|
||||
EOF
|
||||
echo " ✓ WAL file created"
|
||||
|
||||
# Delete only the main database file (leaving WAL)
|
||||
echo "[9] Deleting only main database file (leaving WAL)..."
|
||||
rm -f "$DB"
|
||||
ls -la /tmp/deletion-test* 2>/dev/null | head -5 || true
|
||||
|
||||
echo ""
|
||||
echo "[10] Creating new database with leftover WAL..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode = WAL;
|
||||
CREATE TABLE new_table (id INTEGER PRIMARY KEY, data TEXT);
|
||||
INSERT INTO new_table (data) VALUES ('New database with old WAL');
|
||||
EOF
|
||||
|
||||
# Check if corruption occurred
|
||||
INTEGRITY=$(sqlite3 "$DB" "PRAGMA integrity_check;" 2>&1)
|
||||
if [ "$INTEGRITY" = "ok" ]; then
|
||||
echo " ✓ No corruption despite leftover WAL"
|
||||
else
|
||||
echo " ✗ CORRUPTION DETECTED: $INTEGRITY"
|
||||
echo " This confirms leftover WAL files can corrupt new databases!"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test 3: Clean deletion procedure"
|
||||
echo "=========================================="
|
||||
|
||||
echo "[11] Demonstrating proper deletion procedure..."
|
||||
|
||||
# Clean up everything
|
||||
rm -f "$DB" "$DB-wal" "$DB-shm"
|
||||
rm -rf "$DB-litestream"
|
||||
echo " ✓ All database files removed"
|
||||
|
||||
# Create fresh database
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode = WAL;
|
||||
CREATE TABLE clean (id INTEGER PRIMARY KEY, data TEXT);
|
||||
INSERT INTO clean (data) VALUES ('Clean start');
|
||||
EOF
|
||||
|
||||
FINAL_INTEGRITY=$(sqlite3 "$DB" "PRAGMA integrity_check;")
|
||||
FINAL_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM clean;")
|
||||
|
||||
echo " ✓ Clean database created"
|
||||
echo " Integrity: $FINAL_INTEGRITY"
|
||||
echo " Rows: $FINAL_COUNT"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Database Deletion Test Summary:"
|
||||
echo " ✓ Detected database deletion scenarios"
|
||||
echo " ✓ Demonstrated WAL file corruption risk"
|
||||
echo " ✓ Showed proper cleanup procedure"
|
||||
echo ""
|
||||
echo "IMPORTANT: When deleting databases:"
|
||||
echo " 1. Stop Litestream first"
|
||||
echo " 2. Delete: DB, DB-wal, DB-shm, DB-litestream"
|
||||
echo " 3. Restart Litestream after creating new DB"
|
||||
echo "=========================================="
|
||||
@@ -1,260 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Test database integrity after restore (Issue #582)
|
||||
# This test creates complex data patterns, replicates, and verifies integrity after restore
|
||||
|
||||
echo "=========================================="
|
||||
echo "Database Integrity After Restore Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing if restored databases pass integrity checks"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/integrity-test.db"
|
||||
REPLICA="/tmp/integrity-replica"
|
||||
RESTORED="/tmp/integrity-restored.db"
|
||||
LITESTREAM_CONFIG="/tmp/integrity-litestream.yml"
|
||||
LITESTREAM="./bin/litestream"
|
||||
LITESTREAM_TEST="./bin/litestream-test"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
pkill -f "litestream replicate.*integrity-test.db" 2>/dev/null || true
|
||||
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
|
||||
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
|
||||
rm -rf "$REPLICA"
|
||||
rm -f "$LITESTREAM_CONFIG"
|
||||
rm -f /tmp/integrity-*.log
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "[SETUP] Cleaning up previous test files..."
|
||||
cleanup
|
||||
|
||||
echo ""
|
||||
echo "[1] Creating database with complex data patterns..."
|
||||
# Create database with various data types and constraints
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA page_size = 4096;
|
||||
PRAGMA journal_mode = WAL;
|
||||
|
||||
-- Table with primary key and foreign key constraints
|
||||
CREATE TABLE users (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
email TEXT UNIQUE,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Table with indexes
|
||||
CREATE TABLE posts (
|
||||
id INTEGER PRIMARY KEY,
|
||||
user_id INTEGER NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
content BLOB,
|
||||
score REAL,
|
||||
FOREIGN KEY (user_id) REFERENCES users(id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_posts_user ON posts(user_id);
|
||||
CREATE INDEX idx_posts_score ON posts(score);
|
||||
|
||||
-- Table with check constraints
|
||||
CREATE TABLE transactions (
|
||||
id INTEGER PRIMARY KEY,
|
||||
amount REAL NOT NULL CHECK (amount != 0),
|
||||
type TEXT CHECK (type IN ('credit', 'debit')),
|
||||
balance REAL
|
||||
);
|
||||
|
||||
-- Add initial data
|
||||
INSERT INTO users (name, email) VALUES
|
||||
('Alice', 'alice@test.com'),
|
||||
('Bob', 'bob@test.com'),
|
||||
('Charlie', 'charlie@test.com');
|
||||
|
||||
-- Add posts with various data types
|
||||
INSERT INTO posts (user_id, title, content, score) VALUES
|
||||
(1, 'First Post', randomblob(1000), 4.5),
|
||||
(2, 'Second Post', randomblob(2000), 3.8),
|
||||
(3, 'Third Post', NULL, 4.9);
|
||||
|
||||
-- Add transactions
|
||||
INSERT INTO transactions (amount, type, balance) VALUES
|
||||
(100.50, 'credit', 100.50),
|
||||
(-25.75, 'debit', 74.75),
|
||||
(50.00, 'credit', 124.75);
|
||||
EOF
|
||||
|
||||
echo " ✓ Database created with complex schema"
|
||||
|
||||
# Add more data manually to preserve schema
|
||||
echo ""
|
||||
echo "[2] Adding bulk data..."
|
||||
for i in {1..100}; do
|
||||
sqlite3 "$DB" "INSERT INTO posts (user_id, title, content, score) VALUES ((ABS(RANDOM()) % 3) + 1, 'Post $i', randomblob(5000), RANDOM() % 5);" 2>/dev/null
|
||||
sqlite3 "$DB" "INSERT INTO transactions (amount, type, balance) VALUES (ABS(RANDOM() % 1000) + 0.01, CASE WHEN RANDOM() % 2 = 0 THEN 'credit' ELSE 'debit' END, ABS(RANDOM() % 10000));" 2>/dev/null
|
||||
done
|
||||
INITIAL_SIZE=$(du -h "$DB" | cut -f1)
|
||||
echo " ✓ Database populated: $INITIAL_SIZE"
|
||||
|
||||
echo ""
|
||||
echo "[3] Running initial integrity check..."
|
||||
INITIAL_INTEGRITY=$(sqlite3 "$DB" "PRAGMA integrity_check;")
|
||||
if [ "$INITIAL_INTEGRITY" != "ok" ]; then
|
||||
echo " ✗ Initial database has integrity issues: $INITIAL_INTEGRITY"
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Initial integrity check: $INITIAL_INTEGRITY"
|
||||
|
||||
# Get checksums for verification
|
||||
USERS_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM users;")
|
||||
POSTS_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM posts;")
|
||||
TRANS_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM transactions;")
|
||||
TABLE_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM posts;" 2>/dev/null || echo "0")
|
||||
|
||||
echo ""
|
||||
echo "[4] Starting Litestream replication..."
|
||||
"$LITESTREAM" replicate "$DB" "file://$REPLICA" > /tmp/integrity-litestream.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream failed to start"
|
||||
cat /tmp/integrity-litestream.log
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
echo ""
|
||||
echo "[5] Making changes while replicating..."
|
||||
# Add more data and modify existing
|
||||
sqlite3 "$DB" <<EOF
|
||||
-- Update existing data
|
||||
UPDATE users SET name = 'Alice Updated' WHERE id = 1;
|
||||
DELETE FROM posts WHERE id = 2;
|
||||
|
||||
-- Add new data with edge cases
|
||||
INSERT INTO users (name, email) VALUES ('Dave', 'dave@test.com');
|
||||
INSERT INTO posts (user_id, title, content, score) VALUES
|
||||
(4, 'Edge Case Post', randomblob(5000), 0.0),
|
||||
(4, 'Another Post', randomblob(100), -1.5);
|
||||
|
||||
-- Trigger constraint checks
|
||||
INSERT INTO transactions (amount, type, balance) VALUES
|
||||
(1000.00, 'credit', 1124.75),
|
||||
(-500.00, 'debit', 624.75);
|
||||
EOF
|
||||
|
||||
# Force checkpoint
|
||||
sqlite3 "$DB" "PRAGMA wal_checkpoint(FULL);" >/dev/null 2>&1
|
||||
sleep 2
|
||||
|
||||
echo " ✓ Changes made and checkpoint executed"
|
||||
|
||||
echo ""
|
||||
echo "[6] Stopping Litestream and attempting restore..."
|
||||
kill $LITESTREAM_PID
|
||||
wait $LITESTREAM_PID 2>/dev/null
|
||||
|
||||
# Attempt restore
|
||||
"$LITESTREAM" restore -o "$RESTORED" "file://$REPLICA" > /tmp/integrity-restore.log 2>&1
|
||||
RESTORE_EXIT=$?
|
||||
|
||||
if [ $RESTORE_EXIT -ne 0 ]; then
|
||||
echo " ✗ Restore failed with exit code: $RESTORE_EXIT"
|
||||
cat /tmp/integrity-restore.log
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Restore completed"
|
||||
|
||||
echo ""
|
||||
echo "[7] Running integrity check on restored database..."
|
||||
RESTORED_INTEGRITY=$(sqlite3 "$RESTORED" "PRAGMA integrity_check;" 2>&1)
|
||||
|
||||
if [ "$RESTORED_INTEGRITY" != "ok" ]; then
|
||||
echo " ✗ CRITICAL: Restored database FAILED integrity check!"
|
||||
echo " Result: $RESTORED_INTEGRITY"
|
||||
|
||||
# Try to get more info
|
||||
echo ""
|
||||
echo " Attempting detailed analysis:"
|
||||
sqlite3 "$RESTORED" "PRAGMA foreign_key_check;" 2>/dev/null || echo " Foreign key check failed"
|
||||
sqlite3 "$RESTORED" "SELECT COUNT(*) FROM sqlite_master;" 2>/dev/null || echo " Cannot read schema"
|
||||
|
||||
exit 1
|
||||
else
|
||||
echo " ✓ Integrity check PASSED: $RESTORED_INTEGRITY"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "[8] Verifying data consistency..."
|
||||
# Check row counts
|
||||
RESTORED_USERS=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM users;" 2>/dev/null || echo "ERROR")
|
||||
RESTORED_POSTS=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM posts;" 2>/dev/null || echo "ERROR")
|
||||
RESTORED_TRANS=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM transactions;" 2>/dev/null || echo "ERROR")
|
||||
RESTORED_TABLE=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM posts;" 2>/dev/null || echo "0")
|
||||
|
||||
# Expected counts after changes
|
||||
EXPECTED_USERS=4 # 3 original + 1 added
|
||||
EXPECTED_POSTS=104 # 3 original + 100 bulk - 1 deleted + 2 added
|
||||
EXPECTED_TRANS=105 # 3 original + 100 bulk + 2 added
|
||||
|
||||
echo " Data verification:"
|
||||
echo " Users: $RESTORED_USERS (expected: $EXPECTED_USERS)"
|
||||
echo " Posts: $RESTORED_POSTS (expected: $EXPECTED_POSTS)"
|
||||
echo " Transactions: $RESTORED_TRANS (expected: $EXPECTED_TRANS)"
|
||||
echo " Test Table: $RESTORED_TABLE (expected: $TABLE_COUNT)"
|
||||
|
||||
DATA_INTACT=true
|
||||
if [ "$RESTORED_USERS" != "$EXPECTED_USERS" ]; then
|
||||
echo " ✗ User count mismatch!"
|
||||
DATA_INTACT=false
|
||||
fi
|
||||
if [ "$RESTORED_POSTS" != "$EXPECTED_POSTS" ]; then
|
||||
echo " ✗ Post count mismatch!"
|
||||
DATA_INTACT=false
|
||||
fi
|
||||
if [ "$RESTORED_TRANS" != "$EXPECTED_TRANS" ]; then
|
||||
echo " ✗ Transaction count mismatch!"
|
||||
DATA_INTACT=false
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "[9] Testing constraint enforcement..."
|
||||
# Test that constraints still work
|
||||
CONSTRAINT_TEST=$(sqlite3 "$RESTORED" "INSERT INTO transactions (amount, type) VALUES (0, 'credit');" 2>&1 || echo "CONSTRAINT_OK")
|
||||
if [[ "$CONSTRAINT_TEST" == *"CONSTRAINT_OK"* ]] || [[ "$CONSTRAINT_TEST" == *"CHECK constraint failed"* ]]; then
|
||||
echo " ✓ Check constraints working"
|
||||
else
|
||||
echo " ✗ Check constraints not enforced!"
|
||||
DATA_INTACT=false
|
||||
fi
|
||||
|
||||
# Test foreign keys
|
||||
FK_TEST=$(sqlite3 "$RESTORED" "PRAGMA foreign_keys=ON; INSERT INTO posts (user_id, title) VALUES (999, 'Bad FK');" 2>&1 || echo "FK_OK")
|
||||
if [[ "$FK_TEST" == *"FK_OK"* ]] || [[ "$FK_TEST" == *"FOREIGN KEY constraint failed"* ]]; then
|
||||
echo " ✓ Foreign key constraints working"
|
||||
else
|
||||
echo " ✗ Foreign key constraints not enforced!"
|
||||
DATA_INTACT=false
|
||||
fi
|
||||
|
||||
echo ""
|
||||
if [ "$DATA_INTACT" = true ] && [ "$RESTORED_INTEGRITY" = "ok" ]; then
|
||||
echo "✅ TEST PASSED: Database integrity preserved after restore"
|
||||
else
|
||||
echo "❌ TEST FAILED: Database integrity issues detected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Summary:"
|
||||
echo " Integrity Check: $RESTORED_INTEGRITY"
|
||||
echo " Data Consistency: $DATA_INTACT"
|
||||
echo " Constraints: Working"
|
||||
echo "=========================================="
|
||||
@@ -1,139 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test: Starting replication with a fresh (empty) database
|
||||
# This tests if Litestream works better when it creates the database from scratch
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "Fresh Start Database Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing if Litestream works correctly when starting fresh"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/fresh-test.db"
|
||||
REPLICA="/tmp/fresh-replica"
|
||||
LITESTREAM="./bin/litestream"
|
||||
LITESTREAM_TEST="./bin/litestream-test"
|
||||
|
||||
# Clean up
|
||||
echo "[SETUP] Cleaning up..."
|
||||
rm -f "$DB"*
|
||||
rm -rf "$REPLICA"
|
||||
|
||||
# Check binaries
|
||||
if [ ! -f "$LITESTREAM" ]; then
|
||||
echo "ERROR: $LITESTREAM not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$LITESTREAM_TEST" ]; then
|
||||
echo "ERROR: $LITESTREAM_TEST not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Start Litestream BEFORE creating database
|
||||
echo ""
|
||||
echo "[1] Starting Litestream with non-existent database..."
|
||||
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/fresh-test.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 2
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✓ Expected: Litestream waiting for database to be created"
|
||||
else
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
fi
|
||||
|
||||
# Now create and populate the database
|
||||
echo ""
|
||||
echo "[2] Creating database while Litestream is running..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
CREATE TABLE test (id INTEGER PRIMARY KEY, data TEXT);
|
||||
INSERT INTO test (data) VALUES ('initial data');
|
||||
EOF
|
||||
echo " ✓ Database created"
|
||||
|
||||
# Give Litestream time to detect the new database
|
||||
sleep 3
|
||||
|
||||
# Check if Litestream started replicating
|
||||
echo ""
|
||||
echo "[3] Checking if Litestream detected the database..."
|
||||
if grep -q "initialized db" /tmp/fresh-test.log; then
|
||||
echo " ✓ Litestream detected and initialized database"
|
||||
fi
|
||||
|
||||
# Add more data
|
||||
echo ""
|
||||
echo "[4] Adding data to test replication..."
|
||||
for i in {1..100}; do
|
||||
sqlite3 "$DB" "INSERT INTO test (data) VALUES ('row $i');"
|
||||
done
|
||||
echo " ✓ Added 100 rows"
|
||||
|
||||
# Let replication catch up
|
||||
sleep 5
|
||||
|
||||
# Check for errors
|
||||
echo ""
|
||||
echo "[5] Checking for errors..."
|
||||
ERROR_COUNT=$(grep -c "ERROR" /tmp/fresh-test.log 2>/dev/null || echo "0")
|
||||
if [ "$ERROR_COUNT" -gt 1 ]; then
|
||||
echo " ⚠ Found $ERROR_COUNT errors:"
|
||||
grep "ERROR" /tmp/fresh-test.log | head -3
|
||||
else
|
||||
echo " ✓ No significant errors"
|
||||
fi
|
||||
|
||||
# Check replica files
|
||||
echo ""
|
||||
echo "[6] Checking replica files..."
|
||||
if [ -d "$REPLICA/ltx" ]; then
|
||||
FILE_COUNT=$(find "$REPLICA/ltx" -name "*.ltx" | wc -l)
|
||||
echo " ✓ Replica created with $FILE_COUNT LTX files"
|
||||
ls -la "$REPLICA/ltx/0/" 2>/dev/null | head -3
|
||||
else
|
||||
echo " ✗ No replica files created!"
|
||||
fi
|
||||
|
||||
# Stop Litestream
|
||||
kill $LITESTREAM_PID 2>/dev/null || true
|
||||
sleep 2
|
||||
|
||||
# Test restore
|
||||
echo ""
|
||||
echo "[7] Testing restore..."
|
||||
rm -f /tmp/fresh-restored.db
|
||||
if $LITESTREAM restore -o /tmp/fresh-restored.db "file://$REPLICA" 2>&1; then
|
||||
echo " ✓ Restore successful"
|
||||
|
||||
# Verify data
|
||||
ORIG_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;")
|
||||
REST_COUNT=$(sqlite3 /tmp/fresh-restored.db "SELECT COUNT(*) FROM test;")
|
||||
|
||||
if [ "$ORIG_COUNT" -eq "$REST_COUNT" ]; then
|
||||
echo " ✓ Data integrity verified: $ORIG_COUNT rows"
|
||||
echo ""
|
||||
echo "TEST PASSED: Fresh start works correctly"
|
||||
else
|
||||
echo " ✗ Data mismatch: Original=$ORIG_COUNT, Restored=$REST_COUNT"
|
||||
echo ""
|
||||
echo "TEST FAILED: Data loss detected"
|
||||
fi
|
||||
else
|
||||
echo " ✗ Restore failed!"
|
||||
echo ""
|
||||
echo "TEST FAILED: Cannot restore database"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test artifacts:"
|
||||
echo " Database: $DB"
|
||||
echo " Replica: $REPLICA"
|
||||
echo " Log: /tmp/fresh-test.log"
|
||||
echo "=========================================="
|
||||
@@ -1,173 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test: Rapid Checkpoint Cycling
|
||||
# This tests Litestream's behavior under rapid checkpoint pressure
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "Rapid Checkpoint Cycling Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing Litestream under rapid checkpoint pressure"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/checkpoint-cycle.db"
|
||||
REPLICA="/tmp/checkpoint-cycle-replica"
|
||||
LITESTREAM="./bin/litestream"
|
||||
LITESTREAM_TEST="./bin/litestream-test"
|
||||
|
||||
# Clean up
|
||||
echo "[SETUP] Cleaning up..."
|
||||
rm -f "$DB"*
|
||||
rm -rf "$REPLICA"
|
||||
|
||||
# Start with fresh database
|
||||
echo "[1] Creating initial database..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
CREATE TABLE test (id INTEGER PRIMARY KEY, data BLOB);
|
||||
EOF
|
||||
echo " ✓ Database created"
|
||||
|
||||
# Start Litestream
|
||||
echo ""
|
||||
echo "[2] Starting Litestream..."
|
||||
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/checkpoint-cycle.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream failed to start"
|
||||
cat /tmp/checkpoint-cycle.log
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
# Start continuous writes in background
|
||||
echo ""
|
||||
echo "[3] Starting continuous writes..."
|
||||
(
|
||||
while kill -0 $LITESTREAM_PID 2>/dev/null; do
|
||||
sqlite3 "$DB" "INSERT INTO test (data) VALUES (randomblob(1000));" 2>/dev/null || true
|
||||
sleep 0.01 # 100 writes/sec attempt
|
||||
done
|
||||
) &
|
||||
WRITE_PID=$!
|
||||
echo " ✓ Write loop started"
|
||||
|
||||
# Rapid checkpoint cycling
|
||||
echo ""
|
||||
echo "[4] Starting rapid checkpoint cycling (30 seconds)..."
|
||||
echo " Testing all checkpoint modes in rapid succession..."
|
||||
|
||||
CHECKPOINT_COUNT=0
|
||||
ERRORS=0
|
||||
START_TIME=$(date +%s)
|
||||
|
||||
while [ $(($(date +%s) - START_TIME)) -lt 30 ]; do
|
||||
# Cycle through different checkpoint modes
|
||||
for MODE in PASSIVE FULL RESTART TRUNCATE; do
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream crashed during checkpoint!"
|
||||
break 2
|
||||
fi
|
||||
|
||||
# Execute checkpoint
|
||||
OUTPUT=$(sqlite3 "$DB" "PRAGMA wal_checkpoint($MODE);" 2>&1) || {
|
||||
ERRORS=$((ERRORS + 1))
|
||||
echo " ⚠ Checkpoint $MODE error: $OUTPUT"
|
||||
}
|
||||
CHECKPOINT_COUNT=$((CHECKPOINT_COUNT + 1))
|
||||
|
||||
# Very brief pause
|
||||
sleep 0.1
|
||||
done
|
||||
done
|
||||
|
||||
echo " Executed $CHECKPOINT_COUNT checkpoints with $ERRORS errors"
|
||||
|
||||
# Stop writes
|
||||
kill $WRITE_PID 2>/dev/null || true
|
||||
|
||||
# Let Litestream catch up
|
||||
echo ""
|
||||
echo "[5] Letting Litestream stabilize..."
|
||||
sleep 5
|
||||
|
||||
# Check Litestream health
|
||||
if kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✓ Litestream survived rapid checkpointing"
|
||||
else
|
||||
echo " ✗ Litestream died during test"
|
||||
fi
|
||||
|
||||
# Check for sync errors
|
||||
echo ""
|
||||
echo "[6] Checking for sync errors..."
|
||||
SYNC_ERRORS=$(grep -c "sync error" /tmp/checkpoint-cycle.log 2>/dev/null || echo "0")
|
||||
FLAGS_ERRORS=$(grep -c "no flags allowed" /tmp/checkpoint-cycle.log 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$FLAGS_ERRORS" -gt 0 ]; then
|
||||
echo " ✗ ltx v0.5.0 flag errors detected: $FLAGS_ERRORS"
|
||||
elif [ "$SYNC_ERRORS" -gt 0 ]; then
|
||||
echo " ⚠ Sync errors detected: $SYNC_ERRORS"
|
||||
else
|
||||
echo " ✓ No sync errors"
|
||||
fi
|
||||
|
||||
# Check replica status
|
||||
echo ""
|
||||
echo "[7] Checking replica status..."
|
||||
if [ -d "$REPLICA/ltx" ]; then
|
||||
LTX_COUNT=$(find "$REPLICA/ltx" -name "*.ltx" | wc -l)
|
||||
echo " ✓ Replica has $LTX_COUNT LTX files"
|
||||
else
|
||||
echo " ✗ No replica created!"
|
||||
fi
|
||||
|
||||
# Get final stats
|
||||
ROW_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
WAL_SIZE=$(stat -f%z "$DB-wal" 2>/dev/null || stat -c%s "$DB-wal" 2>/dev/null || echo "0")
|
||||
echo " Final row count: $ROW_COUNT"
|
||||
echo " Final WAL size: $((WAL_SIZE / 1024))KB"
|
||||
|
||||
# Stop Litestream
|
||||
kill $LITESTREAM_PID 2>/dev/null || true
|
||||
sleep 2
|
||||
|
||||
# Test restore
|
||||
echo ""
|
||||
echo "[8] Testing restore after rapid checkpointing..."
|
||||
rm -f /tmp/checkpoint-restored.db
|
||||
if $LITESTREAM restore -o /tmp/checkpoint-restored.db "file://$REPLICA" 2>&1 | tee /tmp/restore-checkpoint.log; then
|
||||
REST_COUNT=$(sqlite3 /tmp/checkpoint-restored.db "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$REST_COUNT" -eq "$ROW_COUNT" ]; then
|
||||
echo " ✓ Restore successful: $REST_COUNT rows"
|
||||
echo ""
|
||||
echo "TEST PASSED: Survived $CHECKPOINT_COUNT rapid checkpoints"
|
||||
else
|
||||
echo " ⚠ Row count mismatch: Original=$ROW_COUNT, Restored=$REST_COUNT"
|
||||
LOSS=$((ROW_COUNT - REST_COUNT))
|
||||
echo " Data loss: $LOSS rows"
|
||||
echo ""
|
||||
echo "TEST FAILED: Data loss after rapid checkpointing"
|
||||
fi
|
||||
else
|
||||
echo " ✗ Restore failed!"
|
||||
cat /tmp/restore-checkpoint.log
|
||||
echo ""
|
||||
echo "TEST FAILED: Cannot restore after rapid checkpointing"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Summary:"
|
||||
echo " Checkpoints executed: $CHECKPOINT_COUNT"
|
||||
echo " Checkpoint errors: $ERRORS"
|
||||
echo " Sync errors: $SYNC_ERRORS"
|
||||
echo " Flag errors: $FLAGS_ERRORS"
|
||||
echo " Rows written: $ROW_COUNT"
|
||||
echo "=========================================="
|
||||
@@ -1,203 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Test multiple replica failover (Issue #687)
|
||||
# This test verifies that restore falls back to healthy replicas when primary fails
|
||||
|
||||
echo "=========================================="
|
||||
echo "Multiple Replica Failover Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing if restore falls back to healthy replicas when first is unavailable"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/failover-test.db"
|
||||
REPLICA1="/tmp/failover-replica1"
|
||||
REPLICA2="/tmp/failover-replica2"
|
||||
REPLICA3="/tmp/failover-replica3"
|
||||
RESTORED="/tmp/failover-restored.db"
|
||||
LITESTREAM_CONFIG="/tmp/failover-litestream.yml"
|
||||
LITESTREAM="./bin/litestream"
|
||||
LITESTREAM_TEST="./bin/litestream-test"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
pkill -f "litestream replicate.*failover-test" 2>/dev/null || true
|
||||
rm -f "$DB" "$DB-wal" "$DB-shm" "$DB-litestream"
|
||||
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
|
||||
rm -rf "$REPLICA1" "$REPLICA2" "$REPLICA3"
|
||||
rm -f "$LITESTREAM_CONFIG"
|
||||
rm -f /tmp/failover-*.log
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "[SETUP] Cleaning up previous test files..."
|
||||
cleanup
|
||||
|
||||
echo ""
|
||||
echo "[1] Creating test database..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode = WAL;
|
||||
CREATE TABLE test (id INTEGER PRIMARY KEY, data TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP);
|
||||
INSERT INTO test (data) VALUES ('Initial data for failover test');
|
||||
EOF
|
||||
echo " ✓ Database created"
|
||||
|
||||
echo ""
|
||||
echo "[2] Creating Litestream config with multiple replicas..."
|
||||
cat > "$LITESTREAM_CONFIG" <<EOF
|
||||
dbs:
|
||||
- path: $DB
|
||||
replicas:
|
||||
- url: file://$REPLICA1
|
||||
sync-interval: 1s
|
||||
- url: file://$REPLICA2
|
||||
sync-interval: 1s
|
||||
- url: file://$REPLICA3
|
||||
sync-interval: 1s
|
||||
EOF
|
||||
echo " ✓ Config created with 3 replicas"
|
||||
|
||||
echo ""
|
||||
echo "[3] Starting Litestream with multiple replicas..."
|
||||
"$LITESTREAM" replicate -config "$LITESTREAM_CONFIG" > /tmp/failover-litestream.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream failed to start"
|
||||
cat /tmp/failover-litestream.log
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
echo ""
|
||||
echo "[4] Adding data to ensure replication..."
|
||||
for i in {1..10}; do
|
||||
sqlite3 "$DB" "INSERT INTO test (data) VALUES ('Replicated data $i');"
|
||||
done
|
||||
sqlite3 "$DB" "PRAGMA wal_checkpoint(FULL);" >/dev/null 2>&1
|
||||
sleep 3
|
||||
echo " ✓ Added 10 rows and checkpointed"
|
||||
|
||||
# Verify all replicas exist
|
||||
echo ""
|
||||
echo "[5] Verifying all replicas have data..."
|
||||
for replica in "$REPLICA1" "$REPLICA2" "$REPLICA3"; do
|
||||
if [ -d "$replica" ]; then
|
||||
FILES=$(ls -1 "$replica"/generations/*/wal/*.ltx 2>/dev/null | wc -l)
|
||||
echo " ✓ $(basename $replica): $FILES LTX files"
|
||||
else
|
||||
echo " ✗ $(basename $replica): Not created!"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "[6] Stopping Litestream..."
|
||||
kill $LITESTREAM_PID
|
||||
wait $LITESTREAM_PID 2>/dev/null
|
||||
echo " ✓ Litestream stopped"
|
||||
|
||||
# Test 1: All replicas available
|
||||
echo ""
|
||||
echo "[7] Test 1: Restore with all replicas available..."
|
||||
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore1.log 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
echo " ✓ Restore successful with all replicas: $COUNT rows"
|
||||
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
|
||||
else
|
||||
echo " ✗ Restore failed with all replicas available"
|
||||
cat /tmp/failover-restore1.log
|
||||
fi
|
||||
|
||||
# Test 2: First replica corrupted
|
||||
echo ""
|
||||
echo "[8] Test 2: Corrupting first replica..."
|
||||
rm -rf "$REPLICA1"/generations/*/wal/*.ltx
|
||||
echo "CORRUPTED" > "$REPLICA1/CORRUPTED"
|
||||
echo " ✓ First replica corrupted"
|
||||
|
||||
echo " Attempting restore with first replica corrupted..."
|
||||
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore2.log 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
if [ "$COUNT" -eq "11" ]; then
|
||||
echo " ✓ Successfully fell back to healthy replicas: $COUNT rows"
|
||||
else
|
||||
echo " ✗ Restore succeeded but data incorrect: $COUNT rows (expected 11)"
|
||||
fi
|
||||
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
|
||||
else
|
||||
echo " ✗ FAILED: Did not fall back to healthy replicas"
|
||||
cat /tmp/failover-restore2.log
|
||||
fi
|
||||
|
||||
# Test 3: First replica missing entirely
|
||||
echo ""
|
||||
echo "[9] Test 3: Removing first replica entirely..."
|
||||
rm -rf "$REPLICA1"
|
||||
echo " ✓ First replica removed"
|
||||
|
||||
echo " Attempting restore with first replica missing..."
|
||||
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore3.log 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
if [ "$COUNT" -eq "11" ]; then
|
||||
echo " ✓ Successfully fell back to remaining replicas: $COUNT rows"
|
||||
else
|
||||
echo " ✗ Restore succeeded but data incorrect: $COUNT rows (expected 11)"
|
||||
fi
|
||||
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
|
||||
else
|
||||
echo " ✗ FAILED: Did not fall back when first replica missing"
|
||||
cat /tmp/failover-restore3.log
|
||||
fi
|
||||
|
||||
# Test 4: Only last replica healthy
|
||||
echo ""
|
||||
echo "[10] Test 4: Corrupting second replica too..."
|
||||
rm -rf "$REPLICA2"
|
||||
echo " ✓ Second replica removed"
|
||||
|
||||
echo " Attempting restore with only third replica healthy..."
|
||||
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore4.log 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
COUNT=$(sqlite3 "$RESTORED" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
if [ "$COUNT" -eq "11" ]; then
|
||||
echo " ✓ Successfully restored from last healthy replica: $COUNT rows"
|
||||
else
|
||||
echo " ✗ Restore succeeded but data incorrect: $COUNT rows (expected 11)"
|
||||
fi
|
||||
rm -f "$RESTORED" "$RESTORED-wal" "$RESTORED-shm"
|
||||
else
|
||||
echo " ✗ FAILED: Could not restore from last healthy replica"
|
||||
cat /tmp/failover-restore4.log
|
||||
fi
|
||||
|
||||
# Test 5: All replicas unavailable
|
||||
echo ""
|
||||
echo "[11] Test 5: Removing all replicas..."
|
||||
rm -rf "$REPLICA3"
|
||||
echo " ✓ All replicas removed"
|
||||
|
||||
echo " Attempting restore with no healthy replicas..."
|
||||
"$LITESTREAM" restore -config "$LITESTREAM_CONFIG" -o "$RESTORED" "$DB" > /tmp/failover-restore5.log 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo " ✓ Correctly failed when no replicas available"
|
||||
else
|
||||
echo " ✗ Unexpected success with no replicas"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Failover Test Summary:"
|
||||
echo " ✓ Restore works with all replicas"
|
||||
echo " ✓ Falls back when first replica corrupted"
|
||||
echo " ✓ Falls back when first replica missing"
|
||||
echo " ✓ Works with only last replica healthy"
|
||||
echo " ✓ Correctly fails when no replicas available"
|
||||
echo "=========================================="
|
||||
@@ -1,189 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test: WAL Growth and Size Limits
|
||||
# This tests how Litestream handles extreme WAL growth scenarios
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "WAL Growth and Size Limits Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Testing Litestream's handling of large WAL files"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
DB="/tmp/wal-growth.db"
|
||||
REPLICA="/tmp/wal-growth-replica"
|
||||
LITESTREAM="./bin/litestream"
|
||||
TARGET_WAL_SIZE_MB=100 # Target WAL size in MB
|
||||
|
||||
# Clean up
|
||||
echo "[SETUP] Cleaning up..."
|
||||
rm -f "$DB"*
|
||||
rm -rf "$REPLICA"
|
||||
|
||||
# Create fresh database
|
||||
echo "[1] Creating database..."
|
||||
sqlite3 "$DB" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
PRAGMA wal_autocheckpoint=0; -- Disable auto-checkpoint
|
||||
CREATE TABLE test (id INTEGER PRIMARY KEY, data BLOB);
|
||||
EOF
|
||||
echo " ✓ Database created with auto-checkpoint disabled"
|
||||
|
||||
# Start Litestream
|
||||
echo ""
|
||||
echo "[2] Starting Litestream..."
|
||||
$LITESTREAM replicate "$DB" "file://$REPLICA" > /tmp/wal-growth.log 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream failed to start"
|
||||
cat /tmp/wal-growth.log | head -10
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ Litestream running (PID: $LITESTREAM_PID)"
|
||||
|
||||
# Write data until WAL reaches target size
|
||||
echo ""
|
||||
echo "[3] Growing WAL to ${TARGET_WAL_SIZE_MB}MB..."
|
||||
echo " Writing large blobs without checkpointing..."
|
||||
|
||||
BATCH_COUNT=0
|
||||
while true; do
|
||||
# Check current WAL size
|
||||
WAL_SIZE=$(stat -f%z "$DB-wal" 2>/dev/null || stat -c%s "$DB-wal" 2>/dev/null || echo "0")
|
||||
WAL_SIZE_MB=$((WAL_SIZE / 1024 / 1024))
|
||||
|
||||
if [ $WAL_SIZE_MB -ge $TARGET_WAL_SIZE_MB ]; then
|
||||
echo " ✓ WAL reached ${WAL_SIZE_MB}MB"
|
||||
break
|
||||
fi
|
||||
|
||||
# Write a batch of large records
|
||||
sqlite3 "$DB" <<EOF 2>/dev/null || true
|
||||
BEGIN;
|
||||
INSERT INTO test (data) SELECT randomblob(10000) FROM generate_series(1, 100);
|
||||
COMMIT;
|
||||
EOF
|
||||
|
||||
BATCH_COUNT=$((BATCH_COUNT + 1))
|
||||
if [ $((BATCH_COUNT % 10)) -eq 0 ]; then
|
||||
echo " WAL size: ${WAL_SIZE_MB}MB / ${TARGET_WAL_SIZE_MB}MB"
|
||||
fi
|
||||
|
||||
# Check if Litestream is still alive
|
||||
if ! kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✗ Litestream died during WAL growth!"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Check Litestream status
|
||||
echo ""
|
||||
echo "[4] Checking Litestream status with large WAL..."
|
||||
if kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✓ Litestream still running with ${WAL_SIZE_MB}MB WAL"
|
||||
|
||||
# Check replication lag
|
||||
sleep 5
|
||||
LATEST_LTX=$(ls -t "$REPLICA/ltx/0/" 2>/dev/null | head -1)
|
||||
if [ -n "$LATEST_LTX" ]; then
|
||||
echo " ✓ Still replicating (latest: $LATEST_LTX)"
|
||||
else
|
||||
echo " ⚠ No recent replication activity"
|
||||
fi
|
||||
else
|
||||
echo " ✗ Litestream crashed!"
|
||||
fi
|
||||
|
||||
# Check for errors
|
||||
echo ""
|
||||
echo "[5] Checking for errors..."
|
||||
ERROR_COUNT=$(grep -c "ERROR" /tmp/wal-growth.log 2>/dev/null || echo "0")
|
||||
OOM_COUNT=$(grep -c -i "out of memory\|oom" /tmp/wal-growth.log 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$OOM_COUNT" -gt 0 ]; then
|
||||
echo " ✗ Out of memory errors detected!"
|
||||
elif [ "$ERROR_COUNT" -gt 1 ]; then
|
||||
echo " ⚠ Errors detected: $ERROR_COUNT"
|
||||
grep "ERROR" /tmp/wal-growth.log | tail -3
|
||||
else
|
||||
echo " ✓ No significant errors"
|
||||
fi
|
||||
|
||||
# Get statistics
|
||||
echo ""
|
||||
echo "[6] Statistics..."
|
||||
ROW_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
DB_SIZE=$(stat -f%z "$DB" 2>/dev/null || stat -c%s "$DB" 2>/dev/null || echo "0")
|
||||
LTX_COUNT=$(find "$REPLICA" -name "*.ltx" 2>/dev/null | wc -l || echo "0")
|
||||
|
||||
echo " Database size: $((DB_SIZE / 1024 / 1024))MB"
|
||||
echo " WAL size: ${WAL_SIZE_MB}MB"
|
||||
echo " Row count: $ROW_COUNT"
|
||||
echo " LTX files: $LTX_COUNT"
|
||||
|
||||
# Now checkpoint and see what happens
|
||||
echo ""
|
||||
echo "[7] Executing checkpoint on large WAL..."
|
||||
CHECKPOINT_START=$(date +%s)
|
||||
CHECKPOINT_RESULT=$(sqlite3 "$DB" "PRAGMA wal_checkpoint(FULL);" 2>&1) || echo "Failed"
|
||||
CHECKPOINT_END=$(date +%s)
|
||||
CHECKPOINT_TIME=$((CHECKPOINT_END - CHECKPOINT_START))
|
||||
|
||||
echo " Checkpoint result: $CHECKPOINT_RESULT"
|
||||
echo " Checkpoint time: ${CHECKPOINT_TIME}s"
|
||||
|
||||
# Check WAL size after checkpoint
|
||||
NEW_WAL_SIZE=$(stat -f%z "$DB-wal" 2>/dev/null || stat -c%s "$DB-wal" 2>/dev/null || echo "0")
|
||||
NEW_WAL_SIZE_MB=$((NEW_WAL_SIZE / 1024 / 1024))
|
||||
echo " WAL size after checkpoint: ${NEW_WAL_SIZE_MB}MB"
|
||||
|
||||
# Let Litestream catch up
|
||||
echo ""
|
||||
echo "[8] Letting Litestream catch up after checkpoint..."
|
||||
sleep 10
|
||||
|
||||
# Check if Litestream survived
|
||||
if kill -0 $LITESTREAM_PID 2>/dev/null; then
|
||||
echo " ✓ Litestream survived large checkpoint"
|
||||
else
|
||||
echo " ✗ Litestream died after checkpoint"
|
||||
fi
|
||||
|
||||
# Stop Litestream
|
||||
kill $LITESTREAM_PID 2>/dev/null || true
|
||||
sleep 2
|
||||
|
||||
# Test restore
|
||||
echo ""
|
||||
echo "[9] Testing restore after large WAL handling..."
|
||||
rm -f /tmp/wal-restored.db
|
||||
if $LITESTREAM restore -o /tmp/wal-restored.db "file://$REPLICA" 2>&1 | tee /tmp/restore-wal.log; then
|
||||
REST_COUNT=$(sqlite3 /tmp/wal-restored.db "SELECT COUNT(*) FROM test;" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$REST_COUNT" -eq "$ROW_COUNT" ]; then
|
||||
echo " ✓ Restore successful: $REST_COUNT rows"
|
||||
echo ""
|
||||
echo "TEST PASSED: Handled ${TARGET_WAL_SIZE_MB}MB WAL successfully"
|
||||
else
|
||||
echo " ⚠ Row count mismatch: Original=$ROW_COUNT, Restored=$REST_COUNT"
|
||||
echo ""
|
||||
echo "TEST FAILED: Data loss with large WAL"
|
||||
fi
|
||||
else
|
||||
echo " ✗ Restore failed!"
|
||||
echo ""
|
||||
echo "TEST FAILED: Cannot restore after large WAL"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Summary:"
|
||||
echo " Maximum WAL size tested: ${WAL_SIZE_MB}MB"
|
||||
echo " Checkpoint time: ${CHECKPOINT_TIME}s"
|
||||
echo " Data integrity: $([ "$REST_COUNT" -eq "$ROW_COUNT" ] && echo "✓ Preserved" || echo "✗ Lost")"
|
||||
echo "=========================================="
|
||||
@@ -1,14 +1,12 @@
|
||||
# Integration Test Scripts
|
||||
# Utility Scripts
|
||||
|
||||
Long-running integration test scripts for comprehensive Litestream validation. These scripts are designed for extended testing scenarios, including overnight tests and production-like workloads.
|
||||
Utility scripts for Litestream testing and distribution.
|
||||
|
||||
## Overview
|
||||
|
||||
This directory contains integration test scripts that run for extended periods (30 minutes to 8+ hours) to validate Litestream's behavior under sustained load and realistic production scenarios.
|
||||
This directory contains utility scripts for post-test analysis and packaging. All long-running soak tests have been migrated to Go integration tests in `tests/integration/`.
|
||||
|
||||
**Key Difference from `cmd/litestream-test/scripts/`:**
|
||||
- **This directory:** Long-running integration tests (minutes to hours)
|
||||
- **`cmd/litestream-test/scripts/`:** Focused scenario tests (seconds to minutes)
|
||||
> **Note:** For all soak tests (2-8 hours), see the Go-based test suite in [tests/integration/](../tests/integration/README.md). The bash soak tests have been migrated to Go for better maintainability and cross-platform support
|
||||
|
||||
## Prerequisites
|
||||
|
||||
@@ -17,209 +15,7 @@ go build -o bin/litestream ./cmd/litestream
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
```
|
||||
|
||||
## Test Scripts
|
||||
|
||||
### test-quick-validation.sh
|
||||
|
||||
Quick validation test that runs for a configurable duration (default: 30 minutes).
|
||||
|
||||
```bash
|
||||
./scripts/test-quick-validation.sh
|
||||
|
||||
TEST_DURATION=2h ./scripts/test-quick-validation.sh
|
||||
|
||||
TEST_DURATION=1h ./scripts/test-quick-validation.sh
|
||||
```
|
||||
|
||||
**Default Configuration:**
|
||||
- Duration: 30 minutes (configurable via `TEST_DURATION`)
|
||||
- Database: 10MB initial population
|
||||
- Write rate: 100 writes/second
|
||||
- Pattern: Wave (simulates varying load)
|
||||
- Payload size: 4KB
|
||||
- Workers: 4
|
||||
- Replica: File-based
|
||||
|
||||
**Features:**
|
||||
- Aggressive test settings for quick feedback
|
||||
- Very frequent snapshots (1 minute intervals)
|
||||
- Rapid compaction cycles (30s, 1m, 5m, 15m)
|
||||
- Real-time monitoring every 30 seconds
|
||||
- Automatic validation and restore testing
|
||||
- Comprehensive final report
|
||||
|
||||
**Monitoring:**
|
||||
```bash
|
||||
tail -f /tmp/litestream-quick-*/logs/monitor.log
|
||||
tail -f /tmp/litestream-quick-*/logs/litestream.log
|
||||
```
|
||||
|
||||
**What it Tests:**
|
||||
- Snapshot creation frequency
|
||||
- Compaction behavior across multiple intervals
|
||||
- LTX file generation and management
|
||||
- Checkpoint behavior under load
|
||||
- Replication integrity
|
||||
- Restoration success
|
||||
- Error handling
|
||||
|
||||
**When to Use:**
|
||||
- Before running overnight tests
|
||||
- Validating configuration changes
|
||||
- Quick regression testing
|
||||
- CI/CD integration (with short duration)
|
||||
- Pre-release validation
|
||||
|
||||
**Success Criteria:**
|
||||
- LTX segments created (>0)
|
||||
- No critical errors in logs
|
||||
- Successful restoration
|
||||
- Row counts match between source and restored database
|
||||
|
||||
### test-overnight.sh
|
||||
|
||||
Comprehensive 8-hour test with file-based replication.
|
||||
|
||||
```bash
|
||||
./scripts/test-overnight.sh
|
||||
```
|
||||
|
||||
**Configuration:**
|
||||
- Duration: 8 hours
|
||||
- Database: 100MB initial population
|
||||
- Write rate: 50 writes/second
|
||||
- Pattern: Wave (simulates varying load)
|
||||
- Payload size: 2KB
|
||||
- Workers: 4
|
||||
- Replica: File-based (`/tmp/litestream-overnight-*/replica`)
|
||||
|
||||
**Features:**
|
||||
- Extended monitoring with 1-minute updates
|
||||
- Snapshot every 10 minutes
|
||||
- Aggressive compaction intervals:
|
||||
- 30 seconds → 30s duration
|
||||
- 1 minute → 1m duration
|
||||
- 5 minutes → 5m duration
|
||||
- 15 minutes → 1h duration
|
||||
- 30 minutes → 6h duration
|
||||
- 1 hour → 24h duration
|
||||
- 720-hour retention (30 days)
|
||||
- Checkpoint every 30 seconds
|
||||
- Automatic validation after completion
|
||||
|
||||
**Real-time Monitoring:**
|
||||
```bash
|
||||
tail -f /tmp/litestream-overnight-*/logs/monitor.log
|
||||
tail -f /tmp/litestream-overnight-*/logs/litestream.log
|
||||
tail -f /tmp/litestream-overnight-*/logs/load.log
|
||||
```
|
||||
|
||||
**What it Tests:**
|
||||
- Long-term replication stability
|
||||
- Compaction effectiveness over time
|
||||
- Memory stability under sustained load
|
||||
- WAL file management
|
||||
- Checkpoint consistency
|
||||
- Replica file count growth patterns
|
||||
- Error accumulation over time
|
||||
- Recovery from transient issues
|
||||
|
||||
**Expected Behavior:**
|
||||
- Steady database growth over 8 hours
|
||||
- Regular snapshot creation (48 total)
|
||||
- Active compaction reducing old LTX files
|
||||
- Stable memory usage
|
||||
- No error accumulation
|
||||
- Successful final validation
|
||||
|
||||
**Artifacts:**
|
||||
- Test directory: `/tmp/litestream-overnight-<timestamp>/`
|
||||
- Logs: Monitor, litestream, load, populate, validate
|
||||
- Database: Source and restored versions
|
||||
- Replica: Full replica directory with LTX files
|
||||
|
||||
### test-overnight-s3.sh
|
||||
|
||||
Comprehensive 8-hour test with S3 replication.
|
||||
|
||||
```bash
|
||||
export AWS_ACCESS_KEY_ID=your_key
|
||||
export AWS_SECRET_ACCESS_KEY=your_secret
|
||||
export S3_BUCKET=your-test-bucket
|
||||
export AWS_REGION=us-east-1
|
||||
|
||||
./scripts/test-overnight-s3.sh
|
||||
```
|
||||
|
||||
**Configuration:**
|
||||
- Duration: 8 hours
|
||||
- Database: 100MB initial population
|
||||
- Write rate: 100 writes/second (higher than file test)
|
||||
- Pattern: Wave (simulates varying load)
|
||||
- Payload size: 4KB (larger than file test)
|
||||
- Workers: 8 (more than file test)
|
||||
- Replica: S3 bucket with unique timestamped path
|
||||
|
||||
**S3-Specific Settings:**
|
||||
- Force path style: false
|
||||
- Skip verify: false
|
||||
- Optional SSE encryption support
|
||||
- Region configurable via environment
|
||||
|
||||
**Features:**
|
||||
- Higher load than file-based test (S3 can handle more)
|
||||
- S3 connectivity validation before start
|
||||
- S3-specific error monitoring (403, 404, 500, 503)
|
||||
- Upload operation tracking
|
||||
- S3 object count monitoring
|
||||
- Restoration from S3 after completion
|
||||
- Automatic row count comparison
|
||||
|
||||
**Real-time Monitoring:**
|
||||
```bash
|
||||
tail -f /tmp/litestream-overnight-s3-*/logs/monitor.log
|
||||
tail -f /tmp/litestream-overnight-s3-*/logs/litestream.log
|
||||
|
||||
aws s3 ls s3://your-bucket/litestream-overnight-<timestamp>/ --recursive
|
||||
```
|
||||
|
||||
**What it Tests:**
|
||||
- S3 replication stability
|
||||
- Network resilience over 8 hours
|
||||
- S3 API call efficiency
|
||||
- Multipart upload handling
|
||||
- S3-specific error recovery
|
||||
- Cross-region replication (if configured)
|
||||
- S3 cost implications (API calls, storage)
|
||||
- Restoration from cloud storage
|
||||
|
||||
**S3 Monitoring Includes:**
|
||||
- Snapshot count in S3
|
||||
- WAL segment count in S3
|
||||
- Total S3 object count
|
||||
- S3 storage size
|
||||
- Upload operation count
|
||||
- S3-specific errors
|
||||
|
||||
**Expected Behavior:**
|
||||
- Successful S3 connectivity throughout
|
||||
- Regular S3 uploads without failures
|
||||
- S3 object counts grow over time
|
||||
- Compaction reduces old S3 objects
|
||||
- Successful S3 restore at end
|
||||
- Row count match between source and restored
|
||||
|
||||
**Prerequisites:**
|
||||
- Valid AWS credentials
|
||||
- S3 bucket with write permissions
|
||||
- Network connectivity to S3
|
||||
- AWS CLI installed (for monitoring)
|
||||
|
||||
**Cost Considerations:**
|
||||
- ~8 hours of continuous uploads
|
||||
- Estimated API calls: Thousands of PUTs/GETs
|
||||
- Storage: 100MB+ depending on replication
|
||||
- Consider using a test/dev account
|
||||
## Available Scripts
|
||||
|
||||
### analyze-test-results.sh
|
||||
|
||||
@@ -272,37 +68,9 @@ Homebrew tap setup script for packaging and distribution.
|
||||
|
||||
**Purpose:** Automates Homebrew tap setup for Litestream distribution. Not a test script per se, but part of the release process.
|
||||
|
||||
## Usage Patterns
|
||||
## Usage
|
||||
|
||||
### Quick Validation Before Overnight Test
|
||||
|
||||
```bash
|
||||
TEST_DURATION=30m ./scripts/test-quick-validation.sh
|
||||
```
|
||||
|
||||
If this passes, proceed to overnight:
|
||||
```bash
|
||||
./scripts/test-overnight.sh
|
||||
```
|
||||
|
||||
### Running Multiple Overnight Tests
|
||||
|
||||
File and S3 tests can run concurrently (different machines recommended):
|
||||
|
||||
```bash
|
||||
./scripts/test-overnight.sh &
|
||||
./scripts/test-overnight-s3.sh &
|
||||
```
|
||||
|
||||
### Custom Duration Testing
|
||||
|
||||
```bash
|
||||
TEST_DURATION=2h ./scripts/test-quick-validation.sh
|
||||
TEST_DURATION=4h ./scripts/test-quick-validation.sh
|
||||
TEST_DURATION=12h ./scripts/test-quick-validation.sh
|
||||
```
|
||||
|
||||
### Analyzing Results
|
||||
### Analyzing Test Results
|
||||
|
||||
```bash
|
||||
ls /tmp/litestream-overnight-* -dt | head -1
|
||||
@@ -310,25 +78,15 @@ ls /tmp/litestream-overnight-* -dt | head -1
|
||||
./scripts/analyze-test-results.sh $(ls /tmp/litestream-overnight-* -dt | head -1)
|
||||
```
|
||||
|
||||
### Continuous Integration
|
||||
|
||||
For CI/CD, use shorter durations:
|
||||
|
||||
```bash
|
||||
TEST_DURATION=5m ./scripts/test-quick-validation.sh
|
||||
TEST_DURATION=15m ./scripts/test-quick-validation.sh
|
||||
```
|
||||
|
||||
## Test Duration Guide
|
||||
|
||||
| Duration | Use Case | Test Type | Expected Results |
|
||||
|----------|----------|-----------|------------------|
|
||||
| 5 minutes | CI/CD smoke test | Quick validation | Basic functionality |
|
||||
| 30 minutes | Pre-overnight validation | Quick validation | Config verification |
|
||||
| 1 hour | Short integration | Quick validation | Pattern detection |
|
||||
| 2 hours | Extended integration | Quick validation | Compaction cycles |
|
||||
| 8 hours | Overnight stability | Overnight test | Full validation |
|
||||
| 12+ hours | Stress testing | Overnight test | Edge case discovery |
|
||||
| 5 minutes | CI/CD smoke test | Go integration tests | Basic functionality |
|
||||
| 30 minutes | Short integration | Go integration tests | Pattern detection |
|
||||
| 2-8 hours | Soak testing | Go soak tests (local only) | Full validation |
|
||||
|
||||
> **Note:** All soak tests are now Go-based in `tests/integration/`. See [tests/integration/README.md](../tests/integration/README.md) for details on running comprehensive, MinIO, and overnight S3 soak tests.
|
||||
|
||||
## Monitoring and Debugging
|
||||
|
||||
@@ -409,12 +167,7 @@ sqlite3 /tmp/litestream-*/restored.db "SELECT COUNT(*) FROM test_data"
|
||||
|
||||
### Stopping Tests Early
|
||||
|
||||
Tests can be interrupted with Ctrl+C. They will cleanup gracefully:
|
||||
```bash
|
||||
./scripts/test-overnight.sh
|
||||
^C
|
||||
Cleaning up...
|
||||
```
|
||||
Go tests can be interrupted with Ctrl+C. They will cleanup gracefully via defer statements.
|
||||
|
||||
## Test Artifacts
|
||||
|
||||
@@ -436,59 +189,24 @@ All tests create timestamped directories with comprehensive artifacts:
|
||||
└── restored.db # Restored database for validation
|
||||
```
|
||||
|
||||
## Integration with Other Tests
|
||||
## Integration with Go Tests
|
||||
|
||||
These scripts complement the scenario tests in `cmd/litestream-test/scripts/`:
|
||||
These utility scripts complement the Go integration test suite:
|
||||
|
||||
**Relationship:**
|
||||
- `cmd/litestream-test/scripts/` → Focused scenarios (seconds to ~30 minutes)
|
||||
- `scripts/` → Integration tests (30 minutes to 8+ hours)
|
||||
**Test Locations:**
|
||||
- `tests/integration/` → All integration and soak tests (Go-based)
|
||||
- `cmd/litestream-test/scripts/` → Scenario and debugging tests (bash, being phased out)
|
||||
- `scripts/` → Utilities only (this directory)
|
||||
|
||||
**Workflow:**
|
||||
1. Run focused scenario tests during development
|
||||
2. Run quick validation (30min) before major changes
|
||||
3. Run overnight tests (8h) before releases
|
||||
4. Analyze results with analysis script
|
||||
|
||||
## Success Criteria
|
||||
|
||||
### Quick Validation (30min)
|
||||
|
||||
✅ Pass Criteria:
|
||||
- LTX segments created (>0)
|
||||
- At least 1 snapshot created
|
||||
- Multiple compaction cycles completed
|
||||
- No critical errors
|
||||
- Successful restoration
|
||||
- Row count matches
|
||||
|
||||
### Overnight Tests (8h)
|
||||
|
||||
✅ Pass Criteria:
|
||||
- No process crashes
|
||||
- Error count < 10 (excluding transient)
|
||||
- Steady database growth
|
||||
- Regular snapshots (40+)
|
||||
- Active compaction visible
|
||||
- Successful final restoration
|
||||
- Row count match
|
||||
- Memory usage stable
|
||||
**Testing Workflow:**
|
||||
1. Run quick integration tests during development
|
||||
2. Run full integration test suite before major changes
|
||||
3. Run soak tests (2-8h) locally before releases: `TestComprehensiveSoak`, `TestMinIOSoak`, `TestOvernightS3Soak`
|
||||
4. Analyze results with `analyze-test-results.sh`
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [Go Integration Tests](../tests/integration/README.md) - Complete Go-based test suite including soak tests
|
||||
- [litestream-test CLI Tool](../cmd/litestream-test/README.md) - Testing harness documentation
|
||||
- [Scenario Test Scripts](../cmd/litestream-test/scripts/README.md) - Focused test scenarios
|
||||
- [S3 Retention Testing](../cmd/litestream-test/S3-RETENTION-TESTING.md) - S3-specific testing
|
||||
|
||||
## Contributing
|
||||
|
||||
When adding new integration scripts:
|
||||
|
||||
1. Follow naming conventions (`test-*.sh`)
|
||||
2. Include clear duration estimates in comments
|
||||
3. Create comprehensive monitoring
|
||||
4. Generate timestamped test directories
|
||||
5. Implement graceful cleanup with `trap`
|
||||
6. Provide clear success/failure output
|
||||
7. Update this README with script documentation
|
||||
8. Consider both file and S3 variants if applicable
|
||||
|
||||
@@ -1,392 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Comprehensive validation test with aggressive settings
|
||||
# This test exercises all Litestream features: replication, snapshots, compaction, checkpoints
|
||||
# Can be run for any duration - defaults to 2 hours for thorough testing
|
||||
|
||||
TEST_DURATION="${TEST_DURATION:-2h}"
|
||||
TEST_DIR="/tmp/litestream-comprehensive-$(date +%Y%m%d-%H%M%S)"
|
||||
DB_PATH="$TEST_DIR/test.db"
|
||||
REPLICA_PATH="$TEST_DIR/replica"
|
||||
CONFIG_FILE="$TEST_DIR/litestream.yml"
|
||||
LOG_DIR="$TEST_DIR/logs"
|
||||
|
||||
echo "================================================"
|
||||
echo "Litestream Comprehensive Validation Test"
|
||||
echo "================================================"
|
||||
echo "Duration: $TEST_DURATION"
|
||||
echo "Test directory: $TEST_DIR"
|
||||
echo "Start time: $(date)"
|
||||
echo ""
|
||||
echo "This test uses aggressive settings to validate:"
|
||||
echo " - Continuous replication"
|
||||
echo " - Snapshot generation (every 10m)"
|
||||
echo " - Compaction (30s/1m/5m intervals)"
|
||||
echo " - Checkpoint operations"
|
||||
echo " - Database restoration"
|
||||
echo ""
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "Cleaning up..."
|
||||
|
||||
# Kill all spawned processes
|
||||
jobs -p | xargs -r kill 2>/dev/null || true
|
||||
wait
|
||||
|
||||
echo "Test completed at: $(date)"
|
||||
echo "Results saved in: $TEST_DIR"
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# Create directories
|
||||
mkdir -p "$TEST_DIR" "$LOG_DIR" "$REPLICA_PATH"
|
||||
|
||||
# Build binaries if needed
|
||||
echo "Building binaries..."
|
||||
if [ ! -f bin/litestream ]; then
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
fi
|
||||
if [ ! -f bin/litestream-test ]; then
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
fi
|
||||
|
||||
# Create test database and populate BEFORE starting litestream
|
||||
echo "Creating and populating test database..."
|
||||
sqlite3 "$DB_PATH" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
PRAGMA page_size=4096;
|
||||
CREATE TABLE IF NOT EXISTS test_data (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB,
|
||||
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
);
|
||||
EOF
|
||||
|
||||
# Populate database with initial data (50MB to ensure activity)
|
||||
echo "Populating database (50MB initial data)..."
|
||||
bin/litestream-test populate -db "$DB_PATH" -target-size 50MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Warning: Population failed, but continuing..."
|
||||
cat "$LOG_DIR/populate.log"
|
||||
fi
|
||||
|
||||
# Create configuration with Ben's recommended aggressive settings
|
||||
echo "Creating test configuration with aggressive intervals..."
|
||||
cat > "$CONFIG_FILE" <<EOF
|
||||
# Aggressive snapshot settings per Ben's request
|
||||
snapshot:
|
||||
interval: 10m # Snapshots every 10 minutes
|
||||
retention: 1h # Keep data for 1 hour
|
||||
|
||||
# Aggressive compaction levels: 30s/1m/5m/15m/30m intervals
|
||||
levels:
|
||||
- interval: 30s
|
||||
- interval: 1m
|
||||
- interval: 5m
|
||||
- interval: 15m
|
||||
- interval: 30m
|
||||
|
||||
dbs:
|
||||
- path: $DB_PATH
|
||||
# Checkpoint settings to ensure checkpoints happen
|
||||
checkpoint-interval: 1m # Check for checkpoint every minute
|
||||
min-checkpoint-page-count: 100 # Low threshold to trigger checkpoints
|
||||
max-checkpoint-page-count: 5000 # Force checkpoint at this size
|
||||
|
||||
replicas:
|
||||
- type: file
|
||||
path: $REPLICA_PATH
|
||||
retention-check-interval: 5m # Check retention every 5 minutes
|
||||
EOF
|
||||
|
||||
echo "Starting litestream..."
|
||||
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo "ERROR: Litestream failed to start!"
|
||||
tail -50 "$LOG_DIR/litestream.log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Litestream running (PID: $LITESTREAM_PID)"
|
||||
echo ""
|
||||
|
||||
# Start load generator with heavy sustained load
|
||||
echo "Starting load generator (heavy sustained load)..."
|
||||
bin/litestream-test load \
|
||||
-db "$DB_PATH" \
|
||||
-write-rate 500 \
|
||||
-duration "$TEST_DURATION" \
|
||||
-pattern wave \
|
||||
-payload-size 4096 \
|
||||
-read-ratio 0.3 \
|
||||
-workers 8 \
|
||||
> "$LOG_DIR/load.log" 2>&1 &
|
||||
LOAD_PID=$!
|
||||
|
||||
echo "Load generator running (PID: $LOAD_PID)"
|
||||
echo ""
|
||||
|
||||
# Monitor function with detailed metrics
|
||||
monitor_comprehensive() {
|
||||
local last_checkpoint_count=0
|
||||
local last_compaction_count=0
|
||||
local last_sync_count=0
|
||||
|
||||
while true; do
|
||||
sleep 60 # Check every minute
|
||||
|
||||
echo "[$(date +%H:%M:%S)] Status Report"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
# Database metrics
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
echo " Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
|
||||
# WAL file size (indicates write activity)
|
||||
if [ -f "$DB_PATH-wal" ]; then
|
||||
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
|
||||
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
|
||||
fi
|
||||
|
||||
# Row count
|
||||
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
echo " Rows in database: $ROW_COUNT"
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
echo " Rows in database: $ROW_COUNT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Replication metrics
|
||||
if [ -d "$REPLICA_PATH" ]; then
|
||||
# Count snapshot files
|
||||
SNAPSHOTS=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
# Count LTX files (WAL segments)
|
||||
LTX_FILES=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
|
||||
echo " Replica: $SNAPSHOTS snapshots, $LTX_FILES segments, size: $REPLICA_SIZE"
|
||||
fi
|
||||
|
||||
# Operation metrics (with delta since last check)
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" 2>/dev/null)
|
||||
CHECKPOINT_COUNT=${CHECKPOINT_COUNT:-0}
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null)
|
||||
COMPACTION_COUNT=${COMPACTION_COUNT:-0}
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null)
|
||||
SYNC_COUNT=${SYNC_COUNT:-0}
|
||||
|
||||
CHECKPOINT_DELTA=$((CHECKPOINT_COUNT - last_checkpoint_count))
|
||||
COMPACTION_DELTA=$((COMPACTION_COUNT - last_compaction_count))
|
||||
SYNC_DELTA=$((SYNC_COUNT - last_sync_count))
|
||||
|
||||
echo " Operations: $CHECKPOINT_COUNT checkpoints (+$CHECKPOINT_DELTA), $COMPACTION_COUNT compactions (+$COMPACTION_DELTA)"
|
||||
echo " Syncs: $SYNC_COUNT total (+$SYNC_DELTA in last minute)"
|
||||
|
||||
last_checkpoint_count=$CHECKPOINT_COUNT
|
||||
last_compaction_count=$COMPACTION_COUNT
|
||||
last_sync_count=$SYNC_COUNT
|
||||
fi
|
||||
|
||||
# Check for errors (excluding known non-critical)
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
|
||||
if [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
echo " ⚠ Critical errors: $ERROR_COUNT"
|
||||
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
|
||||
fi
|
||||
|
||||
# Load generator status
|
||||
if [ -f "$LOG_DIR/load.log" ]; then
|
||||
LOAD_STATUS=$(tail -1 "$LOG_DIR/load.log" 2>/dev/null | grep -oE "writes_per_sec=[0-9.]+" | cut -d= -f2 || echo "0")
|
||||
echo " Write rate: ${LOAD_STATUS:-0} writes/sec"
|
||||
fi
|
||||
|
||||
# Check processes
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo " ✗ Litestream stopped unexpectedly!"
|
||||
break
|
||||
fi
|
||||
|
||||
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
|
||||
echo " ✓ Load test completed"
|
||||
break
|
||||
fi
|
||||
|
||||
echo ""
|
||||
done
|
||||
}
|
||||
|
||||
echo "Running comprehensive test for $TEST_DURATION..."
|
||||
echo "Monitor will report every 60 seconds"
|
||||
echo "================================================"
|
||||
echo ""
|
||||
|
||||
# Start monitoring in background
|
||||
monitor_comprehensive &
|
||||
MONITOR_PID=$!
|
||||
|
||||
# Wait for load test to complete
|
||||
wait "$LOAD_PID" 2>/dev/null || true
|
||||
|
||||
# Stop the monitor
|
||||
kill $MONITOR_PID 2>/dev/null || true
|
||||
wait $MONITOR_PID 2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Final Test Results"
|
||||
echo "================================================"
|
||||
|
||||
# Final statistics
|
||||
echo "Database Statistics:"
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
# Find the actual table name
|
||||
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_data"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
else
|
||||
ROW_COUNT="0"
|
||||
fi
|
||||
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
echo " Total rows: $ROW_COUNT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Replication Statistics:"
|
||||
if [ -d "$REPLICA_PATH" ]; then
|
||||
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
|
||||
echo " Snapshots created: $SNAPSHOT_COUNT"
|
||||
echo " LTX segments: $LTX_COUNT"
|
||||
echo " Replica size: $REPLICA_SIZE"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Operation Counts:"
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
|
||||
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" || echo "0")
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
|
||||
else
|
||||
COMPACTION_COUNT="0"
|
||||
CHECKPOINT_COUNT="0"
|
||||
SYNC_COUNT="0"
|
||||
ERROR_COUNT="0"
|
||||
fi
|
||||
echo " Compactions: $COMPACTION_COUNT"
|
||||
echo " Checkpoints: $CHECKPOINT_COUNT"
|
||||
echo " Syncs: $SYNC_COUNT"
|
||||
echo " Errors: $ERROR_COUNT"
|
||||
|
||||
# Validation test
|
||||
echo ""
|
||||
echo "Testing validation..."
|
||||
bin/litestream-test validate \
|
||||
-source "$DB_PATH" \
|
||||
-replica "$REPLICA_PATH" \
|
||||
> "$LOG_DIR/validate.log" 2>&1
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo " ✓ Validation passed!"
|
||||
else
|
||||
echo " ✗ Validation failed!"
|
||||
tail -10 "$LOG_DIR/validate.log"
|
||||
fi
|
||||
|
||||
# Test restoration
|
||||
echo ""
|
||||
echo "Testing restoration..."
|
||||
RESTORE_DB="$TEST_DIR/restored.db"
|
||||
bin/litestream restore -o "$RESTORE_DB" "file://$REPLICA_PATH" > "$LOG_DIR/restore.log" 2>&1
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
# Get row count from restored database
|
||||
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
else
|
||||
RESTORED_COUNT="0"
|
||||
fi
|
||||
|
||||
if [ "$RESTORED_COUNT" = "$ROW_COUNT" ]; then
|
||||
echo " ✓ Restoration successful! ($RESTORED_COUNT rows match)"
|
||||
else
|
||||
echo " ⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
|
||||
fi
|
||||
else
|
||||
echo " ✗ Restoration failed!"
|
||||
tail -10 "$LOG_DIR/restore.log"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Test Summary"
|
||||
echo "================================================"
|
||||
|
||||
# Count critical errors (exclude known non-critical ones)
|
||||
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
|
||||
|
||||
# Determine test result
|
||||
TEST_PASSED=true
|
||||
ISSUES=""
|
||||
|
||||
if [ "$CRITICAL_ERROR_COUNT" -gt 0 ]; then
|
||||
TEST_PASSED=false
|
||||
ISSUES="$ISSUES\n - Critical errors detected: $CRITICAL_ERROR_COUNT"
|
||||
fi
|
||||
|
||||
if [ "$LTX_COUNT" -eq 0 ]; then
|
||||
TEST_PASSED=false
|
||||
ISSUES="$ISSUES\n - No LTX segments created (replication not working)"
|
||||
fi
|
||||
|
||||
if [ "$CHECKPOINT_COUNT" -eq 0 ]; then
|
||||
ISSUES="$ISSUES\n - No checkpoints recorded (may need more aggressive settings)"
|
||||
fi
|
||||
|
||||
if [ "$COMPACTION_COUNT" -eq 0 ]; then
|
||||
ISSUES="$ISSUES\n - No compactions occurred (unexpected for this test duration)"
|
||||
fi
|
||||
|
||||
if [ "$TEST_PASSED" = true ]; then
|
||||
echo "✓ COMPREHENSIVE TEST PASSED!"
|
||||
echo ""
|
||||
echo "Successfully validated:"
|
||||
echo " - Continuous replication ($LTX_COUNT segments)"
|
||||
echo " - Compaction ($COMPACTION_COUNT operations)"
|
||||
[ "$CHECKPOINT_COUNT" -gt 0 ] && echo " - Checkpoints ($CHECKPOINT_COUNT operations)"
|
||||
[ "$SNAPSHOT_COUNT" -gt 0 ] && echo " - Snapshots ($SNAPSHOT_COUNT created)"
|
||||
echo " - Database restoration"
|
||||
echo ""
|
||||
echo "The configuration is ready for production use."
|
||||
else
|
||||
echo "⚠ TEST COMPLETED WITH ISSUES:"
|
||||
echo -e "$ISSUES"
|
||||
echo ""
|
||||
echo "Review the logs for details:"
|
||||
echo " $LOG_DIR/litestream.log"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Full test results available in: $TEST_DIR"
|
||||
echo "================================================"
|
||||
@@ -1,464 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# MinIO S3-compatible test with Docker
|
||||
# This test runs Litestream against a local MinIO instance to simulate S3 behavior
|
||||
|
||||
TEST_DURATION="${TEST_DURATION:-2h}"
|
||||
TEST_DIR="/tmp/litestream-minio-$(date +%Y%m%d-%H%M%S)"
|
||||
DB_PATH="$TEST_DIR/test.db"
|
||||
CONFIG_FILE="$TEST_DIR/litestream.yml"
|
||||
LOG_DIR="$TEST_DIR/logs"
|
||||
|
||||
# MinIO settings - use alternative ports to avoid conflicts
|
||||
MINIO_CONTAINER_NAME="litestream-minio-test"
|
||||
MINIO_PORT=9100
|
||||
MINIO_CONSOLE_PORT=9101
|
||||
MINIO_ROOT_USER="minioadmin"
|
||||
MINIO_ROOT_PASSWORD="minioadmin"
|
||||
MINIO_BUCKET="litestream-test"
|
||||
MINIO_ENDPOINT="http://localhost:${MINIO_PORT}"
|
||||
S3_PATH="s3://${MINIO_BUCKET}/litestream-test-$(date +%Y%m%d-%H%M%S)"
|
||||
|
||||
echo "================================================"
|
||||
echo "Litestream MinIO S3 Test"
|
||||
echo "================================================"
|
||||
echo "Duration: $TEST_DURATION"
|
||||
echo "Test directory: $TEST_DIR"
|
||||
echo "MinIO endpoint: $MINIO_ENDPOINT"
|
||||
echo "MinIO bucket: $MINIO_BUCKET"
|
||||
echo "Start time: $(date)"
|
||||
echo ""
|
||||
|
||||
# Check for Docker
|
||||
if ! command -v docker &> /dev/null; then
|
||||
echo "Error: Docker is not installed or not in PATH"
|
||||
echo "Please install Docker to run this test"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Cleaning up..."
|
||||
echo "================================================"
|
||||
|
||||
# Kill all spawned processes
|
||||
jobs -p | xargs -r kill 2>/dev/null || true
|
||||
wait 2>/dev/null || true
|
||||
|
||||
# Stop and remove MinIO container
|
||||
if [ -n "${MINIO_CONTAINER_NAME:-}" ]; then
|
||||
echo "Stopping MinIO container..."
|
||||
docker stop "$MINIO_CONTAINER_NAME" 2>/dev/null || true
|
||||
docker rm "$MINIO_CONTAINER_NAME" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Test completed at: $(date)"
|
||||
echo "Results saved in: $TEST_DIR"
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# Create directories
|
||||
mkdir -p "$TEST_DIR" "$LOG_DIR"
|
||||
|
||||
# Clean up any existing container
|
||||
if docker ps -a | grep -q "$MINIO_CONTAINER_NAME"; then
|
||||
echo "Removing existing MinIO container..."
|
||||
docker stop "$MINIO_CONTAINER_NAME" 2>/dev/null || true
|
||||
docker rm "$MINIO_CONTAINER_NAME" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Start MinIO container
|
||||
echo "Starting MinIO container..."
|
||||
docker run -d \
|
||||
--name "$MINIO_CONTAINER_NAME" \
|
||||
-p "${MINIO_PORT}:9000" \
|
||||
-p "${MINIO_CONSOLE_PORT}:9001" \
|
||||
-e "MINIO_ROOT_USER=${MINIO_ROOT_USER}" \
|
||||
-e "MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}" \
|
||||
minio/minio server /data --console-address ":9001"
|
||||
|
||||
echo "Waiting for MinIO to start..."
|
||||
sleep 5
|
||||
|
||||
# Check if MinIO is running
|
||||
if ! docker ps | grep -q "$MINIO_CONTAINER_NAME"; then
|
||||
echo "Error: MinIO container failed to start"
|
||||
docker logs "$MINIO_CONTAINER_NAME" 2>&1
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "MinIO is running!"
|
||||
echo " API: http://localhost:${MINIO_PORT} (mapped from container port 9000)"
|
||||
echo " Console: http://localhost:${MINIO_CONSOLE_PORT} (mapped from container port 9001)"
|
||||
echo " Credentials: ${MINIO_ROOT_USER}/${MINIO_ROOT_PASSWORD}"
|
||||
echo ""
|
||||
|
||||
# Create MinIO bucket using mc (MinIO Client) in Docker
|
||||
echo "Creating MinIO bucket..."
|
||||
docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc mb "minio/${MINIO_BUCKET}" 2>/dev/null || true
|
||||
|
||||
echo "Bucket '${MINIO_BUCKET}' ready"
|
||||
echo ""
|
||||
|
||||
# Build binaries if needed
|
||||
echo "Building binaries..."
|
||||
if [ ! -f bin/litestream ]; then
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
fi
|
||||
if [ ! -f bin/litestream-test ]; then
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
fi
|
||||
|
||||
# Create and populate test database
|
||||
echo "Creating and populating test database..."
|
||||
sqlite3 "$DB_PATH" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
PRAGMA page_size=4096;
|
||||
CREATE TABLE IF NOT EXISTS test_data (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB,
|
||||
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
);
|
||||
EOF
|
||||
|
||||
# Populate database with initial data
|
||||
echo "Populating database (50MB initial data)..."
|
||||
bin/litestream-test populate -db "$DB_PATH" -target-size 50MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Warning: Population failed, but continuing..."
|
||||
cat "$LOG_DIR/populate.log"
|
||||
fi
|
||||
|
||||
# Create Litestream configuration for MinIO
|
||||
echo "Creating Litestream configuration for MinIO S3..."
|
||||
cat > "$CONFIG_FILE" <<EOF
|
||||
# MinIO S3 endpoint configuration
|
||||
access-key-id: ${MINIO_ROOT_USER}
|
||||
secret-access-key: ${MINIO_ROOT_PASSWORD}
|
||||
|
||||
# Aggressive snapshot settings for testing
|
||||
snapshot:
|
||||
interval: 10m # Snapshots every 10 minutes
|
||||
retention: 1h # Keep data for 1 hour
|
||||
|
||||
# Aggressive compaction levels: 30s/1m/5m/15m/30m intervals
|
||||
levels:
|
||||
- interval: 30s
|
||||
- interval: 1m
|
||||
- interval: 5m
|
||||
- interval: 15m
|
||||
- interval: 30m
|
||||
|
||||
dbs:
|
||||
- path: $DB_PATH
|
||||
# Checkpoint settings
|
||||
checkpoint-interval: 1m
|
||||
min-checkpoint-page-count: 100
|
||||
max-checkpoint-page-count: 5000
|
||||
|
||||
replicas:
|
||||
- url: ${S3_PATH}
|
||||
endpoint: ${MINIO_ENDPOINT}
|
||||
region: us-east-1
|
||||
force-path-style: true
|
||||
skip-verify: true
|
||||
retention-check-interval: 5m
|
||||
EOF
|
||||
|
||||
echo "Starting litestream with MinIO backend..."
|
||||
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo "ERROR: Litestream failed to start!"
|
||||
echo "Last 50 lines of log:"
|
||||
tail -50 "$LOG_DIR/litestream.log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Litestream running (PID: $LITESTREAM_PID)"
|
||||
echo ""
|
||||
|
||||
# Start load generator
|
||||
echo "Starting load generator (heavy sustained load)..."
|
||||
bin/litestream-test load \
|
||||
-db "$DB_PATH" \
|
||||
-write-rate 500 \
|
||||
-duration "$TEST_DURATION" \
|
||||
-pattern wave \
|
||||
-payload-size 4096 \
|
||||
-read-ratio 0.3 \
|
||||
-workers 8 \
|
||||
> "$LOG_DIR/load.log" 2>&1 &
|
||||
LOAD_PID=$!
|
||||
|
||||
echo "Load generator running (PID: $LOAD_PID)"
|
||||
echo ""
|
||||
|
||||
# Monitor function for MinIO
|
||||
monitor_minio() {
|
||||
local last_checkpoint_count=0
|
||||
local last_compaction_count=0
|
||||
local last_sync_count=0
|
||||
|
||||
while true; do
|
||||
sleep 60
|
||||
|
||||
echo "[$(date +%H:%M:%S)] Status Report"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
# Database metrics
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
echo " Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
|
||||
if [ -f "$DB_PATH-wal" ]; then
|
||||
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
|
||||
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
|
||||
fi
|
||||
|
||||
# Row count
|
||||
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
echo " Rows in database: $ROW_COUNT"
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
echo " Rows in database: $ROW_COUNT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# MinIO/S3 metrics using docker exec
|
||||
echo ""
|
||||
echo " MinIO S3 Statistics:"
|
||||
|
||||
# Count objects in MinIO
|
||||
OBJECT_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
# Count LTX files (modern format) and snapshots
|
||||
LTX_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "\.ltx" || echo "0")
|
||||
|
||||
SNAPSHOT_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "snapshot" || echo "0")
|
||||
|
||||
echo " Total objects: $OBJECT_COUNT"
|
||||
echo " LTX segments: $LTX_COUNT"
|
||||
echo " Snapshots: $SNAPSHOT_COUNT"
|
||||
|
||||
# Operation metrics
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" 2>/dev/null)
|
||||
CHECKPOINT_COUNT=${CHECKPOINT_COUNT:-0}
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null)
|
||||
COMPACTION_COUNT=${COMPACTION_COUNT:-0}
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null)
|
||||
SYNC_COUNT=${SYNC_COUNT:-0}
|
||||
|
||||
CHECKPOINT_DELTA=$((CHECKPOINT_COUNT - last_checkpoint_count))
|
||||
COMPACTION_DELTA=$((COMPACTION_COUNT - last_compaction_count))
|
||||
SYNC_DELTA=$((SYNC_COUNT - last_sync_count))
|
||||
|
||||
echo ""
|
||||
echo " Operations: $CHECKPOINT_COUNT checkpoints (+$CHECKPOINT_DELTA), $COMPACTION_COUNT compactions (+$COMPACTION_DELTA)"
|
||||
echo " Syncs: $SYNC_COUNT total (+$SYNC_DELTA in last minute)"
|
||||
|
||||
last_checkpoint_count=$CHECKPOINT_COUNT
|
||||
last_compaction_count=$COMPACTION_COUNT
|
||||
last_sync_count=$SYNC_COUNT
|
||||
fi
|
||||
|
||||
# Check for errors
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
|
||||
if [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
echo " ⚠ Critical errors: $ERROR_COUNT"
|
||||
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
|
||||
fi
|
||||
|
||||
# Check processes
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo " ✗ Litestream stopped unexpectedly!"
|
||||
break
|
||||
fi
|
||||
|
||||
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
|
||||
echo " ✓ Load test completed"
|
||||
break
|
||||
fi
|
||||
|
||||
echo ""
|
||||
done
|
||||
}
|
||||
|
||||
echo "Running MinIO S3 test for $TEST_DURATION..."
|
||||
echo "Monitor will report every 60 seconds"
|
||||
echo "================================================"
|
||||
echo ""
|
||||
|
||||
# Start monitoring in background
|
||||
monitor_minio &
|
||||
MONITOR_PID=$!
|
||||
|
||||
# Wait for load test to complete
|
||||
wait "$LOAD_PID" 2>/dev/null || true
|
||||
|
||||
# Stop the monitor
|
||||
kill $MONITOR_PID 2>/dev/null || true
|
||||
wait $MONITOR_PID 2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Final Test Results"
|
||||
echo "================================================"
|
||||
|
||||
# Final statistics
|
||||
echo "Database Statistics:"
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_data"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
else
|
||||
ROW_COUNT="0"
|
||||
fi
|
||||
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
echo " Total rows: $ROW_COUNT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "MinIO S3 Statistics:"
|
||||
FINAL_OBJECTS=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
FINAL_LTX=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "\.ltx" || echo "0")
|
||||
|
||||
FINAL_SNAPSHOTS=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "snapshot" || echo "0")
|
||||
|
||||
echo " Total objects in MinIO: $FINAL_OBJECTS"
|
||||
echo " LTX segments: $FINAL_LTX"
|
||||
echo " Snapshots: $FINAL_SNAPSHOTS"
|
||||
|
||||
# Get storage size
|
||||
STORAGE_INFO=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
|
||||
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
|
||||
minio/mc du "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | tail -1 || echo "0")
|
||||
echo " Total storage used: $STORAGE_INFO"
|
||||
|
||||
echo ""
|
||||
echo "Operation Counts:"
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
|
||||
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" || echo "0")
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
|
||||
else
|
||||
COMPACTION_COUNT="0"
|
||||
CHECKPOINT_COUNT="0"
|
||||
SYNC_COUNT="0"
|
||||
ERROR_COUNT="0"
|
||||
fi
|
||||
echo " Compactions: $COMPACTION_COUNT"
|
||||
echo " Checkpoints: $CHECKPOINT_COUNT"
|
||||
echo " Syncs: $SYNC_COUNT"
|
||||
echo " Errors: $ERROR_COUNT"
|
||||
|
||||
# Test restoration from MinIO
|
||||
echo ""
|
||||
echo "Testing restoration from MinIO S3..."
|
||||
RESTORE_DB="$TEST_DIR/restored.db"
|
||||
|
||||
# Export credentials for litestream restore
|
||||
export AWS_ACCESS_KEY_ID="${MINIO_ROOT_USER}"
|
||||
export AWS_SECRET_ACCESS_KEY="${MINIO_ROOT_PASSWORD}"
|
||||
|
||||
# Create a config file for restoration
|
||||
cat > "$TEST_DIR/restore.yml" <<EOF
|
||||
access-key-id: ${MINIO_ROOT_USER}
|
||||
secret-access-key: ${MINIO_ROOT_PASSWORD}
|
||||
EOF
|
||||
|
||||
bin/litestream restore \
|
||||
-config "$TEST_DIR/restore.yml" \
|
||||
-o "$RESTORE_DB" \
|
||||
"$S3_PATH" > "$LOG_DIR/restore.log" 2>&1
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✓ Restoration successful!"
|
||||
|
||||
# Compare row counts
|
||||
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_data"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
else
|
||||
RESTORED_COUNT="0"
|
||||
fi
|
||||
|
||||
if [ "$ROW_COUNT" = "$RESTORED_COUNT" ]; then
|
||||
echo "✓ Row counts match! ($RESTORED_COUNT rows)"
|
||||
else
|
||||
echo "⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
|
||||
fi
|
||||
else
|
||||
echo "✗ Restoration failed!"
|
||||
tail -20 "$LOG_DIR/restore.log"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Test Summary"
|
||||
echo "================================================"
|
||||
|
||||
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
|
||||
|
||||
if [ "$CRITICAL_ERROR_COUNT" -eq 0 ] && [ "$FINAL_OBJECTS" -gt 0 ]; then
|
||||
echo "✓ MINIO S3 TEST PASSED!"
|
||||
echo ""
|
||||
echo "Successfully validated:"
|
||||
echo " - S3-compatible replication to MinIO"
|
||||
echo " - Stored $FINAL_OBJECTS objects"
|
||||
echo " - Compactions: $COMPACTION_COUNT"
|
||||
echo " - Syncs: $SYNC_COUNT"
|
||||
[ "$CHECKPOINT_COUNT" -gt 0 ] && echo " - Checkpoints: $CHECKPOINT_COUNT"
|
||||
[ "$FINAL_SNAPSHOTS" -gt 0 ] && echo " - Snapshots: $FINAL_SNAPSHOTS"
|
||||
echo " - Database restoration from S3"
|
||||
else
|
||||
echo "⚠ TEST COMPLETED WITH ISSUES:"
|
||||
[ "$CRITICAL_ERROR_COUNT" -gt 0 ] && echo " - Critical errors detected: $CRITICAL_ERROR_COUNT"
|
||||
[ "$FINAL_OBJECTS" -eq 0 ] && echo " - No objects stored in MinIO"
|
||||
echo ""
|
||||
echo "Review the logs for details:"
|
||||
echo " $LOG_DIR/litestream.log"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "MinIO Console: http://localhost:${MINIO_CONSOLE_PORT}"
|
||||
echo "Credentials: ${MINIO_ROOT_USER}/${MINIO_ROOT_PASSWORD}"
|
||||
echo ""
|
||||
echo "Full test results available in: $TEST_DIR"
|
||||
echo "================================================"
|
||||
@@ -1,409 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Check for required environment variables
|
||||
if [ -z "${AWS_ACCESS_KEY_ID:-}" ] || [ -z "${AWS_SECRET_ACCESS_KEY:-}" ] || [ -z "${S3_BUCKET:-}" ]; then
|
||||
echo "Error: Required environment variables not set"
|
||||
echo "Please set: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET"
|
||||
echo ""
|
||||
echo "Example:"
|
||||
echo " export AWS_ACCESS_KEY_ID=your_key"
|
||||
echo " export AWS_SECRET_ACCESS_KEY=your_secret"
|
||||
echo " export S3_BUCKET=your-test-bucket"
|
||||
echo " export AWS_REGION=us-east-1 # optional, defaults to us-east-1"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AWS_REGION="${AWS_REGION:-us-east-1}"
|
||||
S3_PATH="s3://${S3_BUCKET}/litestream-overnight-$(date +%Y%m%d-%H%M%S)"
|
||||
|
||||
TEST_DIR="/tmp/litestream-overnight-s3-$(date +%Y%m%d-%H%M%S)"
|
||||
DB_PATH="$TEST_DIR/test.db"
|
||||
LOG_DIR="$TEST_DIR/logs"
|
||||
CONFIG_FILE="$TEST_DIR/litestream.yml"
|
||||
MONITOR_PID=""
|
||||
LITESTREAM_PID=""
|
||||
LOAD_PID=""
|
||||
|
||||
echo "================================================"
|
||||
echo "Litestream Overnight S3 Test Suite"
|
||||
echo "================================================"
|
||||
echo "Test directory: $TEST_DIR"
|
||||
echo "S3 destination: $S3_PATH"
|
||||
echo "AWS Region: $AWS_REGION"
|
||||
echo "Start time: $(date)"
|
||||
echo ""
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Cleaning up..."
|
||||
echo "================================================"
|
||||
|
||||
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
|
||||
echo "Stopping load generator..."
|
||||
kill "$LOAD_PID" 2>/dev/null || true
|
||||
wait "$LOAD_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -n "$LITESTREAM_PID" ] && kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo "Stopping litestream..."
|
||||
kill "$LITESTREAM_PID" 2>/dev/null || true
|
||||
wait "$LITESTREAM_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -n "$MONITOR_PID" ] && kill -0 "$MONITOR_PID" 2>/dev/null; then
|
||||
echo "Stopping monitor..."
|
||||
kill "$MONITOR_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Test Summary:"
|
||||
echo "============="
|
||||
if [ -f "$LOG_DIR/monitor.log" ]; then
|
||||
echo "Final statistics from monitor log:"
|
||||
tail -20 "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "S3 Final Statistics:"
|
||||
aws s3 ls "${S3_PATH}/" --recursive --summarize 2>/dev/null | tail -5 || true
|
||||
|
||||
echo ""
|
||||
echo "Test artifacts saved locally in: $TEST_DIR"
|
||||
echo "S3 replica data in: $S3_PATH"
|
||||
echo "End time: $(date)"
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
mkdir -p "$TEST_DIR" "$LOG_DIR"
|
||||
|
||||
echo "Creating initial database..."
|
||||
sqlite3 "$DB_PATH" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
CREATE TABLE IF NOT EXISTS test_data (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB,
|
||||
created_at INTEGER
|
||||
);
|
||||
EOF
|
||||
|
||||
echo "Creating litestream configuration for S3 with frequent intervals..."
|
||||
cat > "$CONFIG_FILE" <<EOF
|
||||
# Litestream S3 configuration for overnight testing
|
||||
# with aggressive compaction and snapshot intervals
|
||||
|
||||
# Optional: Access key configuration (can also use environment variables)
|
||||
# access-key-id: ${AWS_ACCESS_KEY_ID}
|
||||
# secret-access-key: ${AWS_SECRET_ACCESS_KEY}
|
||||
|
||||
# Snapshot every 10 minutes
|
||||
snapshot:
|
||||
interval: 10m
|
||||
retention: 720h # Keep data for 30 days
|
||||
|
||||
# Compaction settings - very frequent for testing
|
||||
levels:
|
||||
- interval: 30s
|
||||
- interval: 1m
|
||||
- interval: 5m
|
||||
- interval: 15m
|
||||
- interval: 30m
|
||||
- interval: 1h
|
||||
|
||||
dbs:
|
||||
- path: $DB_PATH
|
||||
# Checkpoint settings - frequent for testing
|
||||
checkpoint-interval: 30s
|
||||
min-checkpoint-page-count: 1000
|
||||
max-checkpoint-page-count: 10000
|
||||
|
||||
replicas:
|
||||
- url: ${S3_PATH}
|
||||
region: ${AWS_REGION}
|
||||
retention-check-interval: 1h
|
||||
|
||||
# S3-specific settings
|
||||
force-path-style: false
|
||||
skip-verify: false
|
||||
|
||||
# Optional: Server-side encryption
|
||||
# sse: AES256
|
||||
# sse-kms-key-id: your-kms-key-id
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
echo "Configuration created at: $CONFIG_FILE"
|
||||
echo ""
|
||||
|
||||
echo "Testing S3 connectivity..."
|
||||
if aws s3 ls "s3://${S3_BUCKET}/" > /dev/null 2>&1; then
|
||||
echo "✓ S3 bucket accessible"
|
||||
else
|
||||
echo "✗ Failed to access S3 bucket: ${S3_BUCKET}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Building litestream if needed..."
|
||||
if [ ! -f bin/litestream ]; then
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
fi
|
||||
|
||||
echo "Starting litestream replication to S3..."
|
||||
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
echo "Litestream started with PID: $LITESTREAM_PID"
|
||||
|
||||
sleep 5
|
||||
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo "ERROR: Litestream failed to start. Check logs:"
|
||||
tail -50 "$LOG_DIR/litestream.log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
monitor_s3_test() {
|
||||
while true; do
|
||||
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "Monitor Update: $(date)" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
# Database size
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null || echo "0")
|
||||
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# WAL size
|
||||
if [ -f "$DB_PATH-wal" ]; then
|
||||
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null || echo "0")
|
||||
echo "WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# S3 statistics
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "S3 Replica Statistics:" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
# Count objects in S3
|
||||
SNAPSHOT_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.snapshot\.lz4" || echo "0")
|
||||
WAL_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.wal\.lz4" || echo "0")
|
||||
TOTAL_OBJECTS=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
echo " Snapshots in S3: $SNAPSHOT_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " WAL segments in S3: $WAL_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " Total objects in S3: $TOTAL_OBJECTS" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
# Get S3 storage size (if possible)
|
||||
S3_SIZE=$(aws s3 ls "${S3_PATH}/" --recursive --summarize 2>/dev/null | grep "Total Size" | awk '{print $3}' || echo "0")
|
||||
if [ "$S3_SIZE" != "0" ]; then
|
||||
echo " Total S3 storage: $(numfmt --to=iec-i --suffix=B $S3_SIZE 2>/dev/null || echo "$S3_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Count operations
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "Operations:" | tee -a "$LOG_DIR/monitor.log"
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
echo " Compactions: $COMPACTION_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " Checkpoints: $CHECKPOINT_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " Syncs: $SYNC_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Check for errors (exclude known non-critical)
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
|
||||
echo "Critical errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
if [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
echo "Recent errors:" | tee -a "$LOG_DIR/monitor.log"
|
||||
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -5 | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Check for S3-specific errors
|
||||
S3_ERROR_COUNT=$(grep -c "S3\|AWS\|403\|404\|500\|503" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
if [ "$S3_ERROR_COUNT" -gt 0 ]; then
|
||||
echo "S3-specific errors: $S3_ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
grep "S3\|AWS\|403\|404\|500\|503" "$LOG_DIR/litestream.log" | tail -3 | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Process status
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "Process Status:" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
if kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo " Litestream: Running (PID: $LITESTREAM_PID)" | tee -a "$LOG_DIR/monitor.log"
|
||||
else
|
||||
echo " Litestream: STOPPED" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
|
||||
echo " Load generator: Running (PID: $LOAD_PID)" | tee -a "$LOG_DIR/monitor.log"
|
||||
else
|
||||
echo " Load generator: STOPPED" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Network/API statistics from log
|
||||
UPLOAD_COUNT=$(grep -c "uploading\|uploaded" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
echo " Total upload operations: $UPLOAD_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
sleep 60
|
||||
done
|
||||
}
|
||||
|
||||
echo "Starting monitor process..."
|
||||
monitor_s3_test &
|
||||
MONITOR_PID=$!
|
||||
echo "Monitor started with PID: $MONITOR_PID"
|
||||
|
||||
echo ""
|
||||
echo "Initial database population (before starting litestream)..."
|
||||
# Kill litestream temporarily to populate database
|
||||
kill "$LITESTREAM_PID" 2>/dev/null || true
|
||||
wait "$LITESTREAM_PID" 2>/dev/null || true
|
||||
|
||||
bin/litestream-test populate -db "$DB_PATH" -target-size 100MB -batch-size 10000 > "$LOG_DIR/populate.log" 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Warning: Population failed, but continuing..."
|
||||
cat "$LOG_DIR/populate.log"
|
||||
fi
|
||||
|
||||
# Restart litestream
|
||||
echo "Restarting litestream after population..."
|
||||
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
echo ""
|
||||
echo "Starting load generator for overnight S3 test..."
|
||||
echo "Configuration:"
|
||||
echo " - Duration: 8 hours"
|
||||
echo " - Write rate: 100 writes/second (higher for S3 testing)"
|
||||
echo " - Pattern: wave (simulates varying load)"
|
||||
echo " - Workers: 8"
|
||||
echo ""
|
||||
|
||||
# Run load test for 8 hours with higher load for S3
|
||||
bin/litestream-test load \
|
||||
-db "$DB_PATH" \
|
||||
-write-rate 100 \
|
||||
-duration 8h \
|
||||
-pattern wave \
|
||||
-payload-size 4096 \
|
||||
-read-ratio 0.3 \
|
||||
-workers 8 \
|
||||
> "$LOG_DIR/load.log" 2>&1 &
|
||||
|
||||
LOAD_PID=$!
|
||||
echo "Load generator started with PID: $LOAD_PID"
|
||||
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Overnight S3 test is running!"
|
||||
echo "================================================"
|
||||
echo ""
|
||||
echo "Monitor the test with:"
|
||||
echo " tail -f $LOG_DIR/monitor.log"
|
||||
echo ""
|
||||
echo "View litestream logs:"
|
||||
echo " tail -f $LOG_DIR/litestream.log"
|
||||
echo ""
|
||||
echo "View load generator logs:"
|
||||
echo " tail -f $LOG_DIR/load.log"
|
||||
echo ""
|
||||
echo "Check S3 contents:"
|
||||
echo " aws s3 ls ${S3_PATH}/ --recursive"
|
||||
echo ""
|
||||
echo "The test will run for 8 hours. Press Ctrl+C to stop early."
|
||||
echo ""
|
||||
|
||||
wait "$LOAD_PID"
|
||||
|
||||
echo ""
|
||||
echo "Load generation completed."
|
||||
|
||||
# Final statistics
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Final Statistics"
|
||||
echo "================================================"
|
||||
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
# Find actual table name
|
||||
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_data"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
else
|
||||
ROW_COUNT="0"
|
||||
fi
|
||||
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
echo "Total rows: $ROW_COUNT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "S3 Statistics:"
|
||||
# Count objects in S3
|
||||
SNAPSHOT_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.snapshot\.lz4" || echo "0")
|
||||
WAL_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.wal\.lz4" || echo "0")
|
||||
TOTAL_OBJECTS=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
S3_SIZE=$(aws s3 ls "${S3_PATH}/" --recursive --summarize 2>/dev/null | grep "Total Size" | awk '{print $3}' || echo "0")
|
||||
|
||||
echo " Snapshots in S3: $SNAPSHOT_COUNT"
|
||||
echo " WAL segments in S3: $WAL_COUNT"
|
||||
echo " Total objects: $TOTAL_OBJECTS"
|
||||
if [ "$S3_SIZE" != "0" ]; then
|
||||
echo " Total S3 storage: $(numfmt --to=iec-i --suffix=B $S3_SIZE 2>/dev/null || echo "$S3_SIZE bytes")"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Operation Counts:"
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
|
||||
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
|
||||
echo " Compactions: $COMPACTION_COUNT"
|
||||
echo " Checkpoints: $CHECKPOINT_COUNT"
|
||||
echo " Syncs: $SYNC_COUNT"
|
||||
echo " Critical errors: $ERROR_COUNT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Testing restoration from S3..."
|
||||
|
||||
# Test restoration
|
||||
RESTORE_DB="$TEST_DIR/restored.db"
|
||||
echo "Restoring database from S3 to: $RESTORE_DB"
|
||||
bin/litestream restore -o "$RESTORE_DB" "$S3_PATH" > "$LOG_DIR/restore.log" 2>&1
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✓ Restoration successful!"
|
||||
|
||||
# Compare row counts - use same table detection logic
|
||||
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_data"; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
else
|
||||
RESTORED_COUNT="0"
|
||||
fi
|
||||
|
||||
if [ "$ROW_COUNT" = "$RESTORED_COUNT" ]; then
|
||||
echo "✓ Row counts match! ($RESTORED_COUNT rows)"
|
||||
else
|
||||
echo "⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
|
||||
fi
|
||||
else
|
||||
echo "✗ Restoration failed! Check $LOG_DIR/restore.log"
|
||||
fi
|
||||
@@ -1,331 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
TEST_DIR="/tmp/litestream-overnight-$(date +%Y%m%d-%H%M%S)"
|
||||
DB_PATH="$TEST_DIR/test.db"
|
||||
REPLICA_PATH="$TEST_DIR/replica"
|
||||
LOG_DIR="$TEST_DIR/logs"
|
||||
CONFIG_FILE="$TEST_DIR/litestream.yml"
|
||||
MONITOR_PID=""
|
||||
LITESTREAM_PID=""
|
||||
LOAD_PID=""
|
||||
|
||||
echo "================================================"
|
||||
echo "Litestream Overnight Test Suite"
|
||||
echo "================================================"
|
||||
echo "Test directory: $TEST_DIR"
|
||||
echo "Start time: $(date)"
|
||||
echo ""
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Cleaning up..."
|
||||
echo "================================================"
|
||||
|
||||
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
|
||||
echo "Stopping load generator..."
|
||||
kill "$LOAD_PID" 2>/dev/null || true
|
||||
wait "$LOAD_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -n "$LITESTREAM_PID" ] && kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo "Stopping litestream..."
|
||||
kill "$LITESTREAM_PID" 2>/dev/null || true
|
||||
wait "$LITESTREAM_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -n "$MONITOR_PID" ] && kill -0 "$MONITOR_PID" 2>/dev/null; then
|
||||
echo "Stopping monitor..."
|
||||
kill "$MONITOR_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Test Summary:"
|
||||
echo "============="
|
||||
if [ -f "$LOG_DIR/monitor.log" ]; then
|
||||
echo "Final statistics from monitor log:"
|
||||
tail -20 "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Test artifacts saved in: $TEST_DIR"
|
||||
echo "End time: $(date)"
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
mkdir -p "$TEST_DIR" "$LOG_DIR" "$REPLICA_PATH"
|
||||
|
||||
echo "Creating initial database..."
|
||||
sqlite3 "$DB_PATH" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
CREATE TABLE IF NOT EXISTS test_data (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB,
|
||||
created_at INTEGER
|
||||
);
|
||||
EOF
|
||||
|
||||
echo "Creating litestream configuration with frequent intervals..."
|
||||
cat > "$CONFIG_FILE" <<EOF
|
||||
# Litestream configuration for overnight testing
|
||||
# with aggressive compaction and snapshot intervals
|
||||
|
||||
# Snapshot every 10 minutes
|
||||
snapshot:
|
||||
interval: 10m
|
||||
retention: 720h # Keep everything for analysis
|
||||
|
||||
# Compaction settings - very frequent for testing
|
||||
levels:
|
||||
- interval: 30s
|
||||
- interval: 1m
|
||||
- interval: 5m
|
||||
- interval: 15m
|
||||
- interval: 30m
|
||||
- interval: 1h
|
||||
|
||||
dbs:
|
||||
- path: $DB_PATH
|
||||
# Checkpoint after every 1000 frames (frequent for testing)
|
||||
checkpoint-interval: 30s
|
||||
min-checkpoint-page-count: 1000
|
||||
max-checkpoint-page-count: 10000
|
||||
|
||||
replicas:
|
||||
- type: file
|
||||
path: $REPLICA_PATH
|
||||
retention-check-interval: 1h
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
echo "Configuration created at: $CONFIG_FILE"
|
||||
cat "$CONFIG_FILE"
|
||||
echo ""
|
||||
|
||||
echo "Building litestream if needed..."
|
||||
if [ ! -f bin/litestream ]; then
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
fi
|
||||
|
||||
echo "Starting litestream replication..."
|
||||
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
echo "Litestream started with PID: $LITESTREAM_PID"
|
||||
|
||||
sleep 5
|
||||
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo "ERROR: Litestream failed to start. Check logs:"
|
||||
tail -50 "$LOG_DIR/litestream.log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
monitor_test() {
|
||||
while true; do
|
||||
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "Monitor Update: $(date)" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "================================================" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
# Database size
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null || echo "0")
|
||||
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# WAL size
|
||||
if [ -f "$DB_PATH-wal" ]; then
|
||||
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null || echo "0")
|
||||
echo "WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Replica statistics
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "Replica Statistics:" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
# Count snapshots (for file replica, look for snapshot.ltx files)
|
||||
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
echo " Snapshots: $SNAPSHOT_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
# Count LTX segments by age (file replicas use .ltx not .wal.lz4)
|
||||
if [ -d "$REPLICA_PATH" ]; then
|
||||
LTX_30S=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -0.5 2>/dev/null | wc -l | tr -d ' ')
|
||||
LTX_1M=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -1 2>/dev/null | wc -l | tr -d ' ')
|
||||
LTX_5M=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -5 2>/dev/null | wc -l | tr -d ' ')
|
||||
LTX_TOTAL=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
|
||||
echo " LTX segments (last 30s): $LTX_30S" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " LTX segments (last 1m): $LTX_1M" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " LTX segments (last 5m): $LTX_5M" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " LTX segments (total): $LTX_TOTAL" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
# Replica size
|
||||
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
|
||||
echo " Total replica size: $REPLICA_SIZE" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Count operations
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "Operations:" | tee -a "$LOG_DIR/monitor.log"
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
echo " Compactions: $COMPACTION_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " Checkpoints: $CHECKPOINT_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo " Syncs: $SYNC_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Check for errors in litestream log (exclude known non-critical)
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
|
||||
echo "Critical errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
if [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
echo "Recent errors:" | tee -a "$LOG_DIR/monitor.log"
|
||||
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -5 | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
# Process status
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
echo "Process Status:" | tee -a "$LOG_DIR/monitor.log"
|
||||
|
||||
if kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo " Litestream: Running (PID: $LITESTREAM_PID)" | tee -a "$LOG_DIR/monitor.log"
|
||||
else
|
||||
echo " Litestream: STOPPED" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
if [ -n "$LOAD_PID" ] && kill -0 "$LOAD_PID" 2>/dev/null; then
|
||||
echo " Load generator: Running (PID: $LOAD_PID)" | tee -a "$LOG_DIR/monitor.log"
|
||||
else
|
||||
echo " Load generator: STOPPED" | tee -a "$LOG_DIR/monitor.log"
|
||||
fi
|
||||
|
||||
echo "" | tee -a "$LOG_DIR/monitor.log"
|
||||
sleep 60
|
||||
done
|
||||
}
|
||||
|
||||
echo "Starting monitor process..."
|
||||
monitor_test &
|
||||
MONITOR_PID=$!
|
||||
echo "Monitor started with PID: $MONITOR_PID"
|
||||
|
||||
echo ""
|
||||
echo "Initial database population (before starting litestream)..."
|
||||
# Kill litestream temporarily to populate database
|
||||
kill "$LITESTREAM_PID" 2>/dev/null || true
|
||||
wait "$LITESTREAM_PID" 2>/dev/null || true
|
||||
|
||||
bin/litestream-test populate -db "$DB_PATH" -target-size 100MB -batch-size 10000 > "$LOG_DIR/populate.log" 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Warning: Population failed, but continuing..."
|
||||
cat "$LOG_DIR/populate.log"
|
||||
fi
|
||||
|
||||
# Restart litestream
|
||||
echo "Restarting litestream after population..."
|
||||
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
sleep 3
|
||||
|
||||
echo ""
|
||||
echo "Starting load generator for overnight test..."
|
||||
echo "Configuration:"
|
||||
echo " - Duration: 8 hours"
|
||||
echo " - Write rate: 50 writes/second"
|
||||
echo " - Pattern: wave (simulates varying load)"
|
||||
echo " - Workers: 4"
|
||||
echo ""
|
||||
|
||||
# Run load test for 8 hours with varying patterns
|
||||
bin/litestream-test load \
|
||||
-db "$DB_PATH" \
|
||||
-write-rate 50 \
|
||||
-duration 8h \
|
||||
-pattern wave \
|
||||
-payload-size 2048 \
|
||||
-read-ratio 0.3 \
|
||||
-workers 4 \
|
||||
> "$LOG_DIR/load.log" 2>&1 &
|
||||
|
||||
LOAD_PID=$!
|
||||
echo "Load generator started with PID: $LOAD_PID"
|
||||
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Overnight test is running!"
|
||||
echo "================================================"
|
||||
echo ""
|
||||
echo "Monitor the test with:"
|
||||
echo " tail -f $LOG_DIR/monitor.log"
|
||||
echo ""
|
||||
echo "View litestream logs:"
|
||||
echo " tail -f $LOG_DIR/litestream.log"
|
||||
echo ""
|
||||
echo "View load generator logs:"
|
||||
echo " tail -f $LOG_DIR/load.log"
|
||||
echo ""
|
||||
echo "The test will run for 8 hours. Press Ctrl+C to stop early."
|
||||
echo ""
|
||||
|
||||
wait "$LOAD_PID"
|
||||
|
||||
echo ""
|
||||
echo "Load generation completed."
|
||||
|
||||
# Final statistics
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Final Statistics"
|
||||
echo "================================================"
|
||||
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
# Find actual table name
|
||||
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_data"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
else
|
||||
ROW_COUNT="0"
|
||||
fi
|
||||
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
echo "Total rows: $ROW_COUNT"
|
||||
fi
|
||||
|
||||
if [ -d "$REPLICA_PATH" ]; then
|
||||
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
|
||||
echo "Snapshots created: $SNAPSHOT_COUNT"
|
||||
echo "LTX segments: $LTX_COUNT"
|
||||
echo "Replica size: $REPLICA_SIZE"
|
||||
fi
|
||||
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
|
||||
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
|
||||
echo "Compactions: $COMPACTION_COUNT"
|
||||
echo "Checkpoints: $CHECKPOINT_COUNT"
|
||||
echo "Critical errors: $ERROR_COUNT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Running validation..."
|
||||
bin/litestream-test validate \
|
||||
-source "$DB_PATH" \
|
||||
-replica "$REPLICA_PATH" \
|
||||
> "$LOG_DIR/validate.log" 2>&1
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✓ Validation passed!"
|
||||
else
|
||||
echo "✗ Validation failed! Check $LOG_DIR/validate.log"
|
||||
fi
|
||||
@@ -1,324 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Quick validation test - runs for 30 minutes with aggressive settings
|
||||
# Use this to validate configuration before overnight runs
|
||||
|
||||
TEST_DURATION="${TEST_DURATION:-30m}"
|
||||
TEST_DIR="/tmp/litestream-quick-$(date +%Y%m%d-%H%M%S)"
|
||||
DB_PATH="$TEST_DIR/test.db"
|
||||
REPLICA_PATH="$TEST_DIR/replica"
|
||||
CONFIG_FILE="$TEST_DIR/litestream.yml"
|
||||
LOG_DIR="$TEST_DIR/logs"
|
||||
|
||||
echo "================================================"
|
||||
echo "Litestream Quick Validation Test"
|
||||
echo "================================================"
|
||||
echo "Duration: $TEST_DURATION"
|
||||
echo "Test directory: $TEST_DIR"
|
||||
echo "Start time: $(date)"
|
||||
echo ""
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "Cleaning up..."
|
||||
|
||||
# Kill all spawned processes
|
||||
jobs -p | xargs -r kill 2>/dev/null || true
|
||||
wait
|
||||
|
||||
echo "Test completed at: $(date)"
|
||||
echo "Results saved in: $TEST_DIR"
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# Create directories
|
||||
mkdir -p "$TEST_DIR" "$LOG_DIR" "$REPLICA_PATH"
|
||||
|
||||
# Build binaries if needed
|
||||
echo "Building binaries..."
|
||||
if [ ! -f bin/litestream ]; then
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
fi
|
||||
if [ ! -f bin/litestream-test ]; then
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
fi
|
||||
|
||||
# Create test database and populate BEFORE starting litestream
|
||||
echo "Creating test database..."
|
||||
sqlite3 "$DB_PATH" <<EOF
|
||||
PRAGMA journal_mode=WAL;
|
||||
CREATE TABLE IF NOT EXISTS test_data (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB,
|
||||
created_at INTEGER
|
||||
);
|
||||
EOF
|
||||
|
||||
# Populate database BEFORE litestream starts
|
||||
echo "Populating database (10MB)..."
|
||||
bin/litestream-test populate -db "$DB_PATH" -target-size 10MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Warning: Population failed, but continuing..."
|
||||
cat "$LOG_DIR/populate.log"
|
||||
fi
|
||||
|
||||
# Create aggressive test configuration
|
||||
echo "Creating test configuration..."
|
||||
cat > "$CONFIG_FILE" <<EOF
|
||||
# Very aggressive snapshot settings for quick testing
|
||||
snapshot:
|
||||
interval: 1m # Snapshots every minute
|
||||
retention: 30m # Keep data for 30 minutes
|
||||
|
||||
# Frequent compaction levels for testing
|
||||
levels:
|
||||
- interval: 30s
|
||||
- interval: 1m
|
||||
- interval: 5m
|
||||
- interval: 10m
|
||||
|
||||
dbs:
|
||||
- path: $DB_PATH
|
||||
# Aggressive checkpoint settings
|
||||
checkpoint-interval: 30s
|
||||
min-checkpoint-page-count: 10
|
||||
max-checkpoint-page-count: 10000
|
||||
|
||||
replicas:
|
||||
- type: file
|
||||
path: $REPLICA_PATH
|
||||
retention-check-interval: 2m
|
||||
EOF
|
||||
|
||||
echo "Starting litestream..."
|
||||
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
|
||||
LITESTREAM_PID=$!
|
||||
|
||||
sleep 3
|
||||
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo "ERROR: Litestream failed to start!"
|
||||
tail -50 "$LOG_DIR/litestream.log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Litestream running (PID: $LITESTREAM_PID)"
|
||||
echo ""
|
||||
|
||||
# Start load generator with more aggressive settings
|
||||
echo "Starting load generator..."
|
||||
bin/litestream-test load \
|
||||
-db "$DB_PATH" \
|
||||
-write-rate 100 \
|
||||
-duration "$TEST_DURATION" \
|
||||
-pattern wave \
|
||||
-payload-size 4096 \
|
||||
-read-ratio 0.2 \
|
||||
-workers 4 \
|
||||
> "$LOG_DIR/load.log" 2>&1 &
|
||||
LOAD_PID=$!
|
||||
|
||||
echo "Load generator running (PID: $LOAD_PID)"
|
||||
echo ""
|
||||
|
||||
# Monitor function
|
||||
monitor_quick() {
|
||||
while true; do
|
||||
sleep 30
|
||||
|
||||
echo "[$(date +%H:%M:%S)] Status check"
|
||||
|
||||
# Check database size and WAL size
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
echo " Database: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
|
||||
# Check WAL file size
|
||||
if [ -f "$DB_PATH-wal" ]; then
|
||||
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
|
||||
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Count replica files (for file replica type, count LTX files)
|
||||
if [ -d "$REPLICA_PATH" ]; then
|
||||
# Count snapshot files (snapshot.ltx files)
|
||||
SNAPSHOTS=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
# Count LTX files (WAL segments)
|
||||
LTX_FILES=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
echo " Snapshots: $SNAPSHOTS, LTX segments: $LTX_FILES"
|
||||
|
||||
# Show replica directory size
|
||||
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
|
||||
echo " Replica size: $REPLICA_SIZE"
|
||||
fi
|
||||
|
||||
# Check for compaction (look for "compaction complete")
|
||||
COMPACT_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
echo " Compactions: $COMPACT_COUNT"
|
||||
|
||||
# Check for checkpoints (look for various checkpoint patterns)
|
||||
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ')
|
||||
echo " Checkpoints: $CHECKPOINT_COUNT"
|
||||
|
||||
# Check sync activity
|
||||
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
|
||||
echo " Syncs: $SYNC_COUNT"
|
||||
|
||||
# Check for errors (exclude known non-critical errors)
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
|
||||
if [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
echo " ⚠ Critical errors: $ERROR_COUNT"
|
||||
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
|
||||
fi
|
||||
|
||||
# Check processes
|
||||
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
|
||||
echo " ✗ Litestream stopped unexpectedly!"
|
||||
break
|
||||
fi
|
||||
|
||||
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
|
||||
echo " ✓ Load test completed"
|
||||
break
|
||||
fi
|
||||
|
||||
echo ""
|
||||
done
|
||||
}
|
||||
|
||||
echo "Running test for $TEST_DURATION..."
|
||||
echo "================================================"
|
||||
echo ""
|
||||
|
||||
# Start monitoring in background
|
||||
monitor_quick &
|
||||
MONITOR_PID=$!
|
||||
|
||||
# Wait for load test to complete
|
||||
wait "$LOAD_PID" 2>/dev/null || true
|
||||
|
||||
# Stop the monitor
|
||||
kill $MONITOR_PID 2>/dev/null || true
|
||||
wait $MONITOR_PID 2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "Test Results"
|
||||
echo "================================================"
|
||||
|
||||
# Final statistics
|
||||
echo "Database Statistics:"
|
||||
if [ -f "$DB_PATH" ]; then
|
||||
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
|
||||
# Find the actual table name - tables are space-separated on one line
|
||||
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
|
||||
# Look for the main data table
|
||||
if echo "$TABLES" | grep -q "load_test"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_table_0"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
|
||||
elif echo "$TABLES" | grep -q "test_data"; then
|
||||
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
else
|
||||
ROW_COUNT="0"
|
||||
fi
|
||||
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
|
||||
echo " Total rows: $ROW_COUNT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Replication Statistics:"
|
||||
if [ -d "$REPLICA_PATH" ]; then
|
||||
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
|
||||
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
|
||||
echo " Snapshots created: $SNAPSHOT_COUNT"
|
||||
echo " LTX segments: $LTX_COUNT"
|
||||
echo " Replica size: $REPLICA_SIZE"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Operation Counts:"
|
||||
# Count operations from log
|
||||
if [ -f "$LOG_DIR/litestream.log" ]; then
|
||||
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
|
||||
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
|
||||
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
|
||||
else
|
||||
COMPACTION_COUNT="0"
|
||||
CHECKPOINT_COUNT="0"
|
||||
ERROR_COUNT="0"
|
||||
fi
|
||||
echo " Compactions: $COMPACTION_COUNT"
|
||||
echo " Checkpoints: $CHECKPOINT_COUNT"
|
||||
echo " Errors: $ERROR_COUNT"
|
||||
|
||||
# Quick validation
|
||||
echo ""
|
||||
echo "Validation:"
|
||||
bin/litestream-test validate \
|
||||
-source "$DB_PATH" \
|
||||
-replica "$REPLICA_PATH" \
|
||||
> "$LOG_DIR/validate.log" 2>&1
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo " ✓ Validation passed!"
|
||||
else
|
||||
echo " ✗ Validation failed!"
|
||||
tail -10 "$LOG_DIR/validate.log"
|
||||
fi
|
||||
|
||||
# Test restoration
|
||||
echo ""
|
||||
echo "Testing restoration..."
|
||||
RESTORE_DB="$TEST_DIR/restored.db"
|
||||
bin/litestream restore -o "$RESTORE_DB" "file://$REPLICA_PATH" > "$LOG_DIR/restore.log" 2>&1
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
ORIGINAL_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$RESTORED_COUNT" = "$ORIGINAL_COUNT" ]; then
|
||||
echo " ✓ Restoration successful! ($RESTORED_COUNT rows)"
|
||||
else
|
||||
echo " ⚠ Row count mismatch! Original: $ORIGINAL_COUNT, Restored: $RESTORED_COUNT"
|
||||
fi
|
||||
else
|
||||
echo " ✗ Restoration failed!"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo "================================================"
|
||||
# Count critical errors (exclude known non-critical ones)
|
||||
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
|
||||
|
||||
if [ "$CRITICAL_ERROR_COUNT" -eq 0 ] && [ "$LTX_COUNT" -gt 0 ]; then
|
||||
echo "✓ Quick validation PASSED!"
|
||||
echo ""
|
||||
echo "Summary:"
|
||||
echo " - Litestream successfully replicated data"
|
||||
echo " - Created $LTX_COUNT LTX segments"
|
||||
[ "$SNAPSHOT_COUNT" -gt 0 ] && echo " - Created $SNAPSHOT_COUNT snapshots"
|
||||
[ "$COMPACTION_COUNT" -gt 0 ] && echo " - Performed $COMPACTION_COUNT compactions"
|
||||
echo ""
|
||||
echo "The configuration appears ready for overnight testing."
|
||||
echo "Run the overnight test with:"
|
||||
echo " ./test-overnight.sh"
|
||||
else
|
||||
echo "⚠ Quick validation completed with issues:"
|
||||
[ "$CRITICAL_ERROR_COUNT" -gt 0 ] && echo " - Critical errors detected: $CRITICAL_ERROR_COUNT"
|
||||
[ "$LTX_COUNT" -eq 0 ] && echo " - No LTX segments created (replication not working)"
|
||||
[ "$SNAPSHOT_COUNT" -eq 0 ] && echo " - No snapshots created (may be normal for short tests)"
|
||||
[ "$COMPACTION_COUNT" -eq 0 ] && echo " - No compactions occurred (may be normal for short tests)"
|
||||
echo ""
|
||||
echo "Review the logs before running overnight tests:"
|
||||
echo " $LOG_DIR/litestream.log"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Full results available in: $TEST_DIR"
|
||||
echo "================================================"
|
||||
474
tests/integration/README.md
Normal file
474
tests/integration/README.md
Normal file
@@ -0,0 +1,474 @@
|
||||
# Integration Tests
|
||||
|
||||
Go-based integration tests for Litestream. These tests replace the previous bash-based test scripts with proper Go testing infrastructure.
|
||||
|
||||
## Overview
|
||||
|
||||
This package contains comprehensive integration tests organized by test type:
|
||||
|
||||
- **scenario_test.go** - Core functionality scenarios (fresh start, integrity, deletion, failover)
|
||||
- **concurrent_test.go** - Concurrency and stress tests (rapid checkpoints, WAL growth, concurrent ops, busy timeout)
|
||||
- **quick_test.go** - Quick validation tests (30 minutes configurable)
|
||||
- **overnight_test.go** - Long-running stability tests (8+ hours)
|
||||
- **boundary_test.go** - Edge cases (1GB boundary, different page sizes)
|
||||
- **helpers.go** - Shared test utilities and helpers
|
||||
- **fixtures.go** - Test data generators and scenarios
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Build the required binaries:
|
||||
|
||||
```bash
|
||||
go build -o bin/litestream ./cmd/litestream
|
||||
go build -o bin/litestream-test ./cmd/litestream-test
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
### Quick Tests (Default)
|
||||
|
||||
Run fast integration tests suitable for CI:
|
||||
|
||||
```bash
|
||||
go test -v -tags=integration -timeout=30m ./tests/integration/... \
|
||||
-run="TestFreshStart|TestDatabaseIntegrity|TestRapidCheckpoints"
|
||||
```
|
||||
|
||||
### All Scenario Tests
|
||||
|
||||
Run all scenario tests (excluding long-running):
|
||||
|
||||
```bash
|
||||
go test -v -tags=integration -timeout=1h ./tests/integration/...
|
||||
```
|
||||
|
||||
### Long-Running Tests
|
||||
|
||||
Run overnight and boundary tests:
|
||||
|
||||
```bash
|
||||
go test -v -tags="integration,long" -timeout=10h ./tests/integration/... \
|
||||
-run="TestOvernight|Test1GBBoundary"
|
||||
```
|
||||
|
||||
## Soak Tests
|
||||
|
||||
Long-running soak tests live alongside the other integration tests and share the same helpers. They are excluded from CI by default and are intended for release validation or targeted debugging.
|
||||
|
||||
### Overview
|
||||
|
||||
| Test | Tags | Defaults | Purpose | Extra Requirements |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| `TestComprehensiveSoak` | `integration,soak` | 2h duration, 50 MB DB, 500 writes/s | File-backed end-to-end stress | Litestream binaries in `./bin` |
|
||||
| `TestMinIOSoak` | `integration,soak,docker` | 2h duration, 5 MB DB (short=2 m), 100 writes/s | S3-compatible replication via MinIO | Docker daemon, `docker` CLI |
|
||||
| `TestOvernightS3Soak` | `integration,soak,aws` | 8h duration, 50 MB DB | Real S3 replication & restore | AWS credentials, `aws` CLI |
|
||||
|
||||
All soak tests support `go test -test.short` to scale the default duration down to roughly two minutes for smoke verification.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --- | --- | --- |
|
||||
| `SOAK_AUTO_PURGE` | `yes` for non-interactive shells; prompts otherwise | Controls whether MinIO buckets are cleared before each run. Set to `no` to retain objects between runs. |
|
||||
| `SOAK_KEEP_TEMP` | unset | When set (any value), preserves the temporary directory and artifacts (database, config, logs) instead of removing them after the test completes. |
|
||||
| `SOAK_DEBUG` | `0` | Streams command stdout/stderr (database population, load generation, docker helpers) directly to the console. Without this the output is captured and only shown on failure. |
|
||||
| `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `S3_BUCKET`, `AWS_REGION` | required for `aws` tag | Provide credentials and target bucket for the overnight S3 soak. Region defaults to `us-east-1` if unset. |
|
||||
|
||||
### Example Commands
|
||||
|
||||
File-based soak (full length):
|
||||
|
||||
```bash
|
||||
go test -v -tags="integration,soak" \
|
||||
-run=TestComprehensiveSoak -timeout=3h ./tests/integration
|
||||
```
|
||||
|
||||
File-based soak (short mode with preserved artifacts and debug logging):
|
||||
|
||||
```bash
|
||||
SOAK_KEEP_TEMP=1 SOAK_DEBUG=1 go test -v -tags="integration,soak" \
|
||||
-run=TestComprehensiveSoak -test.short -timeout=1h ./tests/integration
|
||||
```
|
||||
|
||||
MinIO soak (short mode, auto-purges bucket, preserves results):
|
||||
|
||||
```bash
|
||||
SOAK_AUTO_PURGE=yes SOAK_KEEP_TEMP=1 go test -v -tags="integration,soak,docker" \
|
||||
-run=TestMinIOSoak -test.short -timeout=20m ./tests/integration
|
||||
```
|
||||
|
||||
Overnight S3 soak (full duration):
|
||||
|
||||
```bash
|
||||
export AWS_ACCESS_KEY_ID=...
|
||||
export AWS_SECRET_ACCESS_KEY=...
|
||||
export S3_BUCKET=your-bucket
|
||||
export AWS_REGION=us-east-1
|
||||
|
||||
go test -v -tags="integration,soak,aws" \
|
||||
-run=TestOvernightS3Soak -timeout=10h ./tests/integration
|
||||
```
|
||||
|
||||
### Tips
|
||||
|
||||
- Run with `-v` to view the 60-second progress updates and final status summary. Without `-v`, progress output is suppressed by Go’s test runner.
|
||||
- When prompted about purging a MinIO bucket, answering “yes” clears the bucket via `minio/mc` before the run; “no” allows you to inspect lingering objects from previous executions.
|
||||
- `SOAK_KEEP_TEMP=1` is especially useful when investigating failures—the helper prints the preserved path so you can inspect databases, configs, and logs.
|
||||
- The monitoring infrastructure automatically prints additional status blocks when error counts change, making `SOAK_DEBUG=1` optional for most workflows.
|
||||
|
||||
### Specific Tests
|
||||
|
||||
Run individual test functions:
|
||||
|
||||
```bash
|
||||
# Fresh start test
|
||||
go test -v -tags=integration ./tests/integration/... -run=TestFreshStart
|
||||
|
||||
# Rapid checkpoints test
|
||||
go test -v -tags=integration ./tests/integration/... -run=TestRapidCheckpoints
|
||||
|
||||
# 1GB boundary test
|
||||
go test -v -tags=integration ./tests/integration/... -run=Test1GBBoundary
|
||||
```
|
||||
|
||||
### Short Mode
|
||||
|
||||
Run abbreviated versions with `-short`:
|
||||
|
||||
```bash
|
||||
go test -v -tags=integration -short ./tests/integration/...
|
||||
```
|
||||
|
||||
This reduces test durations by 10x (e.g., 8 hours becomes 48 minutes).
|
||||
|
||||
## Test Categories
|
||||
|
||||
### Scenario Tests
|
||||
|
||||
Core functionality tests that run in seconds to minutes:
|
||||
|
||||
- `TestFreshStart` - Starting replication before database exists
|
||||
- `TestDatabaseIntegrity` - Complex schema and data integrity
|
||||
- `TestDatabaseDeletion` - Source database deletion during replication
|
||||
|
||||
### Concurrent Tests
|
||||
|
||||
Stress and concurrency tests:
|
||||
|
||||
- `TestRapidCheckpoints` - Rapid checkpoint operations under load
|
||||
- `TestWALGrowth` - Large WAL file handling (100MB+)
|
||||
- `TestConcurrentOperations` - Multiple databases replicating simultaneously
|
||||
- `TestBusyTimeout` - Database busy timeout and lock handling
|
||||
|
||||
### Quick Tests
|
||||
|
||||
Configurable duration validation (default 30 minutes):
|
||||
|
||||
- `TestQuickValidation` - Comprehensive validation with wave pattern load
|
||||
|
||||
### Overnight Tests
|
||||
|
||||
Long-running stability tests (default 8 hours):
|
||||
|
||||
- `TestOvernightFile` - 8-hour file-based replication test
|
||||
- `TestOvernightComprehensive` - 8-hour comprehensive test with large database
|
||||
|
||||
### Boundary Tests
|
||||
|
||||
Edge case and boundary condition tests:
|
||||
|
||||
- `Test1GBBoundary` - SQLite 1GB lock page boundary (page #262145 with 4KB pages)
|
||||
- `TestLockPageWithDifferentPageSizes` - Lock page handling with various page sizes
|
||||
|
||||
## CI Integration
|
||||
|
||||
### Automatic (Pull Requests)
|
||||
|
||||
Quick tests run automatically on PRs modifying Go code:
|
||||
|
||||
```yaml
|
||||
- Quick integration tests (TestFreshStart, TestDatabaseIntegrity, TestRapidCheckpoints)
|
||||
- Timeout: 30 minutes
|
||||
```
|
||||
|
||||
### Manual Workflows
|
||||
|
||||
Trigger via GitHub Actions UI:
|
||||
|
||||
**Quick Tests:**
|
||||
```
|
||||
workflow_dispatch → test_type: quick
|
||||
```
|
||||
|
||||
**All Scenario Tests:**
|
||||
```
|
||||
workflow_dispatch → test_type: all
|
||||
```
|
||||
|
||||
**Long-Running Tests:**
|
||||
```
|
||||
workflow_dispatch → test_type: long
|
||||
```
|
||||
|
||||
## Test Infrastructure
|
||||
|
||||
### Helpers (helpers.go)
|
||||
|
||||
- `SetupTestDB(t, name)` - Create test database instance
|
||||
- `TestDB.Create()` - Create database with WAL mode
|
||||
- `TestDB.Populate(size)` - Populate to target size
|
||||
- `TestDB.StartLitestream()` - Start replication
|
||||
- `TestDB.StopLitestream()` - Stop replication
|
||||
- `TestDB.Restore(path)` - Restore from replica
|
||||
- `TestDB.Validate(path)` - Full validation (integrity, checksum, data)
|
||||
- `TestDB.QuickValidate(path)` - Quick validation
|
||||
- `TestDB.GenerateLoad(...)` - Generate database load
|
||||
- `GetTestDuration(t, default)` - Get configurable test duration
|
||||
- `RequireBinaries(t)` - Check for required binaries
|
||||
|
||||
### Fixtures (fixtures.go)
|
||||
|
||||
- `DefaultLoadConfig()` - Load generation configuration
|
||||
- `DefaultPopulateConfig()` - Database population configuration
|
||||
- `CreateComplexTestSchema(db)` - Multi-table schema with foreign keys
|
||||
- `PopulateComplexTestData(db, ...)` - Populate complex data
|
||||
- `LargeWALScenario()` - Large WAL test scenario
|
||||
- `RapidCheckpointsScenario()` - Rapid checkpoint scenario
|
||||
|
||||
## Test Artifacts
|
||||
|
||||
Tests create temporary directories via `t.TempDir()`:
|
||||
|
||||
```
|
||||
/tmp/<test-temp-dir>/
|
||||
├── <name>.db # Test database
|
||||
├── <name>.db-wal # WAL file
|
||||
├── <name>.db-shm # Shared memory
|
||||
├── replica/ # Replica directory
|
||||
│ └── ltx/0/ # LTX files
|
||||
├── litestream.log # Litestream output
|
||||
└── *-restored.db # Restored databases
|
||||
```
|
||||
|
||||
Artifacts are automatically cleaned up after tests complete.
|
||||
|
||||
## Debugging Tests
|
||||
|
||||
### View Litestream Logs
|
||||
|
||||
```go
|
||||
log, err := db.GetLitestreamLog()
|
||||
fmt.Println(log)
|
||||
```
|
||||
|
||||
### Check for Errors
|
||||
|
||||
```go
|
||||
errors, err := db.CheckForErrors()
|
||||
for _, e := range errors {
|
||||
t.Logf("Error: %s", e)
|
||||
}
|
||||
```
|
||||
|
||||
### Inspect Replica
|
||||
|
||||
```go
|
||||
fileCount, _ := db.GetReplicaFileCount()
|
||||
t.Logf("LTX files: %d", fileCount)
|
||||
```
|
||||
|
||||
### Check Database Size
|
||||
|
||||
```go
|
||||
size, _ := db.GetDatabaseSize()
|
||||
t.Logf("DB size: %.2f MB", float64(size)/(1024*1024))
|
||||
```
|
||||
|
||||
## Migration from Bash
|
||||
|
||||
This is part of an ongoing effort to migrate bash test scripts to Go integration tests. This migration improves maintainability, enables CI integration, and provides platform independence.
|
||||
|
||||
### Test Directory Organization
|
||||
|
||||
Three distinct test locations serve different purposes:
|
||||
|
||||
**`tests/integration/` (this directory)** - Go-based integration and soak tests:
|
||||
- Quick integration tests: `scenario_test.go`, `concurrent_test.go`, `boundary_test.go`
|
||||
- Soak tests (2-8 hours): `comprehensive_soak_test.go`, `minio_soak_test.go`, `overnight_s3_soak_test.go`
|
||||
- All tests use proper Go testing infrastructure with build tags
|
||||
|
||||
**`scripts/` (top-level)** - Utility scripts only (soak tests migrated to Go):
|
||||
- `analyze-test-results.sh` - Post-test analysis utility
|
||||
- `setup-homebrew-tap.sh` - Packaging script (not a test)
|
||||
|
||||
**`cmd/litestream-test/scripts/`** - Scenario and debugging bash scripts (being phased out):
|
||||
- Bug reproduction scripts for specific issues (#752, #754)
|
||||
- Format & upgrade tests for version compatibility
|
||||
- S3 retention tests with Python mock
|
||||
- Quick validation and setup utilities
|
||||
|
||||
### Migration Status
|
||||
|
||||
**Migrated from `scripts/` (5 scripts):**
|
||||
- `test-quick-validation.sh` → `quick_test.go::TestQuickValidation` (CI: ✅)
|
||||
- `test-overnight.sh` → `overnight_test.go::TestOvernightFile` (CI: ❌ too long)
|
||||
- `test-comprehensive.sh` → `comprehensive_soak_test.go::TestComprehensiveSoak` (CI: ❌ soak test)
|
||||
- `test-minio-s3.sh` → `minio_soak_test.go::TestMinIOSoak` (CI: ❌ soak test, requires Docker)
|
||||
- `test-overnight-s3.sh` → `overnight_s3_soak_test.go::TestOvernightS3Soak` (CI: ❌ soak test, 8 hours)
|
||||
|
||||
**Migrated from `cmd/litestream-test/scripts/` (9 scripts):**
|
||||
- `test-fresh-start.sh` → `scenario_test.go::TestFreshStart`
|
||||
- `test-database-integrity.sh` → `scenario_test.go::TestDatabaseIntegrity`
|
||||
- `test-database-deletion.sh` → `scenario_test.go::TestDatabaseDeletion`
|
||||
- `test-replica-failover.sh` → NOT MIGRATED (feature removed from Litestream)
|
||||
- `test-rapid-checkpoints.sh` → `concurrent_test.go::TestRapidCheckpoints`
|
||||
- `test-wal-growth.sh` → `concurrent_test.go::TestWALGrowth`
|
||||
- `test-concurrent-operations.sh` → `concurrent_test.go::TestConcurrentOperations`
|
||||
- `test-busy-timeout.sh` → `concurrent_test.go::TestBusyTimeout`
|
||||
- `test-1gb-boundary.sh` → `boundary_test.go::Test1GBBoundary`
|
||||
|
||||
**Remaining Bash Scripts:**
|
||||
|
||||
_scripts/_ (2 scripts remaining):
|
||||
- `analyze-test-results.sh` - Post-test analysis utility (may stay as bash)
|
||||
- `setup-homebrew-tap.sh` - Packaging script (not a test)
|
||||
|
||||
_cmd/litestream-test/scripts/_ (16 scripts remaining):
|
||||
- Bug reproduction scripts: `reproduce-critical-bug.sh`, `test-754-*.sh`, `test-v0.5-*.sh`
|
||||
- Format & upgrade tests: `test-format-isolation.sh`, `test-upgrade-*.sh`, `test-massive-upgrade.sh`
|
||||
- S3 retention tests: `test-s3-retention-*.sh` (4 scripts, use Python S3 mock)
|
||||
- Utility: `verify-test-setup.sh`
|
||||
|
||||
### Why Some Tests Aren't in CI
|
||||
|
||||
Per industry best practices, CI tests should complete in < 1 hour (ideally < 10 minutes):
|
||||
- ✅ **Quick tests** (< 5 min) - Run on every PR
|
||||
- ❌ **Soak tests** (2-8 hours) - Run locally before releases only
|
||||
- ❌ **Long-running tests** (> 30 min) - Too slow for CI feedback loop
|
||||
|
||||
Soak tests are migrated to Go for maintainability but run **locally only**. See "Soak Tests" section below.
|
||||
|
||||
## Soak Tests (Long-Running Stability Tests)
|
||||
|
||||
Soak tests run for 2-8 hours to validate long-term stability under sustained load. These tests are **NOT run in CI** per industry best practices (effective CI requires tests to complete in < 1 hour).
|
||||
|
||||
### Purpose
|
||||
|
||||
Soak tests validate:
|
||||
- Long-term replication stability
|
||||
- Memory leak detection over time
|
||||
- Compaction effectiveness across multiple cycles
|
||||
- Checkpoint behavior under sustained load
|
||||
- Recovery from transient issues
|
||||
- Storage growth patterns
|
||||
|
||||
### When to Run Soak Tests
|
||||
|
||||
- ✅ Before major releases
|
||||
- ✅ After significant replication changes
|
||||
- ✅ To reproduce stability issues
|
||||
- ✅ For performance benchmarking
|
||||
- ❌ NOT on every commit (too slow for CI)
|
||||
|
||||
### Running Soak Tests Locally
|
||||
|
||||
**File-based comprehensive test (2 hours):**
|
||||
```bash
|
||||
go test -v -tags="integration,soak" -timeout=3h -run=TestComprehensiveSoak ./tests/integration/
|
||||
```
|
||||
|
||||
**MinIO S3 test (2 hours, requires Docker):**
|
||||
```bash
|
||||
# Ensure Docker is running
|
||||
go test -v -tags="integration,soak,docker" -timeout=3h -run=TestMinIOSoak ./tests/integration/
|
||||
```
|
||||
|
||||
**Overnight S3 test (8 hours, requires AWS):**
|
||||
```bash
|
||||
export AWS_ACCESS_KEY_ID=your_key
|
||||
export AWS_SECRET_ACCESS_KEY=your_secret
|
||||
export S3_BUCKET=your-test-bucket
|
||||
export AWS_REGION=us-east-1
|
||||
|
||||
go test -v -tags="integration,soak,aws" -timeout=10h -run=TestOvernightS3Soak ./tests/integration/
|
||||
```
|
||||
|
||||
**Run all soak tests:**
|
||||
```bash
|
||||
go test -v -tags="integration,soak,docker,aws" -timeout=15h ./tests/integration/
|
||||
```
|
||||
|
||||
### Adjust Duration for Testing
|
||||
|
||||
Tests respect the `-test.short` flag to run abbreviated versions:
|
||||
|
||||
```bash
|
||||
# Run comprehensive test for 30 minutes instead of 2 hours
|
||||
go test -v -tags="integration,soak" -timeout=1h -run=TestComprehensiveSoak ./tests/integration/ -test.short
|
||||
```
|
||||
|
||||
### Soak Test Build Tags
|
||||
|
||||
Soak tests use multiple build tags to control execution:
|
||||
|
||||
- `integration` - Required for all integration tests
|
||||
- `soak` - Marks long-running stability tests (2-8 hours)
|
||||
- `docker` - Requires Docker (MinIO test)
|
||||
- `aws` - Requires AWS credentials (S3 tests)
|
||||
|
||||
### Monitoring Soak Tests
|
||||
|
||||
All soak tests log progress every 60 seconds:
|
||||
|
||||
```bash
|
||||
# Watch test progress in real-time
|
||||
go test -v -tags="integration,soak" -run=TestComprehensiveSoak ./tests/integration/ 2>&1 | tee soak-test.log
|
||||
```
|
||||
|
||||
Metrics reported during execution:
|
||||
- Database size and WAL size
|
||||
- Row count
|
||||
- Replica statistics (snapshots, LTX segments)
|
||||
- Operation counts (checkpoints, compactions, syncs)
|
||||
- Error counts
|
||||
- Write rate
|
||||
|
||||
### Soak Test Summary
|
||||
|
||||
| Test | Duration | Requirements | What It Tests |
|
||||
|------|----------|--------------|---------------|
|
||||
| TestComprehensiveSoak | 2h | None | File-based replication with aggressive compaction |
|
||||
| TestMinIOSoak | 2h | Docker | S3-compatible storage via MinIO container |
|
||||
| TestOvernightS3Soak | 8h | AWS credentials | Real S3 replication, overnight stability |
|
||||
|
||||
## Benefits Over Bash
|
||||
|
||||
1. **Type Safety** - Compile-time error checking
|
||||
2. **Better Debugging** - Use standard Go debugging tools
|
||||
3. **Code Reuse** - Shared helpers and fixtures
|
||||
4. **Parallel Execution** - Tests can run concurrently
|
||||
5. **CI Integration** - Run automatically on PRs
|
||||
6. **Test Coverage** - Measure code coverage
|
||||
7. **Consistent Patterns** - Standard Go testing conventions
|
||||
8. **Better Error Messages** - Structured, clear reporting
|
||||
9. **Platform Independent** - Works on Linux, macOS, Windows
|
||||
10. **IDE Integration** - Full editor support
|
||||
|
||||
## Contributing
|
||||
|
||||
When adding new integration tests:
|
||||
|
||||
1. Use appropriate build tags (`//go:build integration` or `//go:build integration && long`)
|
||||
2. Call `RequireBinaries(t)` to check prerequisites
|
||||
3. Use `SetupTestDB(t, name)` for test setup
|
||||
4. Call `defer db.Cleanup()` for automatic cleanup
|
||||
5. Log test progress with descriptive messages
|
||||
6. Use `GetTestDuration(t, default)` for configurable durations
|
||||
7. Add test to CI workflow if appropriate
|
||||
8. Update this README with new test documentation
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [cmd/litestream-test README](../../cmd/litestream-test/README.md) - Testing harness CLI
|
||||
- [scripts/README.md](../../scripts/README.md) - Legacy bash test scripts
|
||||
- [GitHub Issue #798](https://github.com/benbjohnson/litestream/issues/798) - Migration tracking
|
||||
200
tests/integration/boundary_test.go
Normal file
200
tests/integration/boundary_test.go
Normal file
@@ -0,0 +1,200 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func Test1GBBoundary(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
t.Log("Testing: SQLite 1GB lock page boundary handling")
|
||||
t.Log("This tests database growth beyond 1GB with 4KB pages (lock page at #262145)")
|
||||
|
||||
db := SetupTestDB(t, "1gb-boundary")
|
||||
defer db.Cleanup()
|
||||
|
||||
t.Log("[1] Creating database with 4KB page size...")
|
||||
if err := db.CreateWithPageSize(4096); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Database created with 4KB pages")
|
||||
|
||||
t.Log("[2] Populating to 1.5GB to cross lock page boundary...")
|
||||
if err := db.PopulateWithOptions("1.5GB", 4096, 1024); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
|
||||
dbSize, err := db.GetDatabaseSize()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get database size: %v", err)
|
||||
}
|
||||
|
||||
sizeGB := float64(dbSize) / (1024 * 1024 * 1024)
|
||||
t.Logf("✓ Database populated: %.2f GB", sizeGB)
|
||||
|
||||
if sizeGB < 1.0 {
|
||||
t.Fatalf("Database did not reach 1GB threshold: %.2f GB", sizeGB)
|
||||
}
|
||||
|
||||
t.Log("[3] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(30 * time.Second)
|
||||
|
||||
t.Log("[4] Checking replication across lock page boundary...")
|
||||
fileCount, err := db.GetReplicaFileCount()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check replica: %v", err)
|
||||
}
|
||||
|
||||
if fileCount == 0 {
|
||||
t.Fatal("No LTX files created!")
|
||||
}
|
||||
|
||||
t.Logf("✓ Replication started: %d LTX files", fileCount)
|
||||
|
||||
t.Log("[5] Checking for lock page errors...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
lockPageErrors := 0
|
||||
for _, errMsg := range errors {
|
||||
if containsAny(errMsg, []string{"lock page", "page 262145", "locking page"}) {
|
||||
lockPageErrors++
|
||||
t.Logf("Lock page error: %s", errMsg)
|
||||
}
|
||||
}
|
||||
|
||||
if lockPageErrors > 0 {
|
||||
t.Fatalf("Found %d lock page errors!", lockPageErrors)
|
||||
}
|
||||
|
||||
t.Log("✓ No lock page errors detected")
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[6] Testing restore of large database...")
|
||||
restoredPath := filepath.Join(db.TempDir, "1gb-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
t.Log("[7] Validating restored database integrity...")
|
||||
if err := db.QuickValidate(restoredPath); err != nil {
|
||||
t.Fatalf("Validation failed: %v", err)
|
||||
}
|
||||
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
restoredSize, _ := restoredDB.GetDatabaseSize()
|
||||
restoredSizeGB := float64(restoredSize) / (1024 * 1024 * 1024)
|
||||
|
||||
t.Logf("✓ Restored database size: %.2f GB", restoredSizeGB)
|
||||
|
||||
if restoredSizeGB < 0.9 {
|
||||
t.Fatalf("Restored database too small: %.2f GB (expected ~%.2f GB)", restoredSizeGB, sizeGB)
|
||||
}
|
||||
|
||||
t.Log("TEST PASSED: 1GB lock page boundary handled correctly")
|
||||
}
|
||||
|
||||
func TestLockPageWithDifferentPageSizes(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
t.Log("Testing: Lock page handling with different SQLite page sizes")
|
||||
|
||||
pageSizes := []struct {
|
||||
size int
|
||||
lockPageNum int
|
||||
targetSizeMB int
|
||||
}{
|
||||
{4096, 262145, 1200},
|
||||
{8192, 131073, 1200},
|
||||
}
|
||||
|
||||
for _, ps := range pageSizes {
|
||||
t.Run(fmt.Sprintf("PageSize%d", ps.size), func(t *testing.T) {
|
||||
db := SetupTestDB(t, fmt.Sprintf("lockpage-%d", ps.size))
|
||||
defer db.Cleanup()
|
||||
|
||||
t.Logf("[1] Creating database with %d byte page size (lock page at #%d)...", ps.size, ps.lockPageNum)
|
||||
if err := db.CreateWithPageSize(ps.size); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("[2] Populating to %dMB...", ps.targetSizeMB)
|
||||
if err := db.PopulateWithOptions(fmt.Sprintf("%dMB", ps.targetSizeMB), ps.size, 1024); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
|
||||
dbSize, _ := db.GetDatabaseSize()
|
||||
t.Logf("✓ Database: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
|
||||
t.Log("[3] Starting replication...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(20 * time.Second)
|
||||
|
||||
fileCount, _ := db.GetReplicaFileCount()
|
||||
t.Logf("✓ LTX files: %d", fileCount)
|
||||
|
||||
db.StopLitestream()
|
||||
|
||||
t.Log("[4] Testing restore...")
|
||||
restoredPath := filepath.Join(db.TempDir, fmt.Sprintf("lockpage-%d-restored.db", ps.size))
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Test passed for page size", ps.size)
|
||||
})
|
||||
}
|
||||
|
||||
t.Log("TEST PASSED: All page sizes handled correctly")
|
||||
}
|
||||
|
||||
func containsAny(s string, substrs []string) bool {
|
||||
for _, substr := range substrs {
|
||||
if contains(s, substr) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func contains(s, substr string) bool {
|
||||
return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || anySubstring(s, substr)))
|
||||
}
|
||||
|
||||
func anySubstring(s, substr string) bool {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
266
tests/integration/comprehensive_soak_test.go
Normal file
266
tests/integration/comprehensive_soak_test.go
Normal file
@@ -0,0 +1,266 @@
|
||||
//go:build integration && soak
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestComprehensiveSoak runs a comprehensive soak test with aggressive settings
|
||||
// to validate all Litestream features: replication, snapshots, compaction, checkpoints.
|
||||
//
|
||||
// Default duration: 2 hours
|
||||
// Can be shortened with: go test -test.short (runs for 30 minutes)
|
||||
//
|
||||
// This test exercises:
|
||||
// - Continuous replication
|
||||
// - Snapshot generation (every 10m)
|
||||
// - Compaction (30s/1m/5m/15m/30m intervals)
|
||||
// - Checkpoint operations
|
||||
// - Database restoration
|
||||
func TestComprehensiveSoak(t *testing.T) {
|
||||
RequireBinaries(t)
|
||||
|
||||
// Determine test duration
|
||||
duration := GetTestDuration(t, 2*time.Hour)
|
||||
shortMode := testing.Short()
|
||||
if shortMode {
|
||||
duration = 2 * time.Minute
|
||||
}
|
||||
|
||||
targetSize := "50MB"
|
||||
writeRate := 500
|
||||
if shortMode {
|
||||
targetSize = "5MB"
|
||||
writeRate = 100
|
||||
}
|
||||
|
||||
t.Logf("================================================")
|
||||
t.Logf("Litestream Comprehensive Soak Test")
|
||||
t.Logf("================================================")
|
||||
t.Logf("Duration: %v", duration)
|
||||
t.Logf("Start time: %s", time.Now().Format(time.RFC3339))
|
||||
t.Log("")
|
||||
t.Log("This test uses aggressive settings to validate:")
|
||||
t.Log(" - Continuous replication")
|
||||
t.Log(" - Snapshot generation (every 10m)")
|
||||
t.Log(" - Compaction (30s/1m/5m intervals)")
|
||||
t.Log(" - Checkpoint operations")
|
||||
t.Log(" - Database restoration")
|
||||
t.Log("")
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// Setup test database
|
||||
db := SetupTestDB(t, "comprehensive-soak")
|
||||
defer db.Cleanup()
|
||||
|
||||
// Create database
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
// Populate database
|
||||
t.Logf("Populating database (%s initial data)...", targetSize)
|
||||
if err := db.Populate(targetSize); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
t.Log("✓ Database populated")
|
||||
t.Log("")
|
||||
|
||||
// Create aggressive configuration for testing
|
||||
t.Log("Creating aggressive test configuration...")
|
||||
replicaURL := fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
|
||||
configPath := CreateSoakConfig(db.Path, replicaURL, nil, shortMode)
|
||||
db.ConfigPath = configPath
|
||||
t.Logf("✓ Configuration created: %s", configPath)
|
||||
t.Log("")
|
||||
|
||||
// Start Litestream
|
||||
t.Log("Starting Litestream replication...")
|
||||
if err := db.StartLitestreamWithConfig(configPath); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
t.Logf("✓ Litestream running (PID: %d)", db.LitestreamPID)
|
||||
t.Log("")
|
||||
|
||||
// Start load generator with heavy sustained load
|
||||
t.Log("Starting load generator (heavy sustained load)...")
|
||||
t.Logf(" Write rate: %d writes/second", writeRate)
|
||||
t.Logf(" Pattern: wave (simulates varying load)")
|
||||
t.Logf(" Payload size: 4KB")
|
||||
t.Logf(" Workers: 8")
|
||||
t.Log("")
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
// Setup signal handler for graceful interruption
|
||||
testInfo := &TestInfo{
|
||||
StartTime: startTime,
|
||||
Duration: duration,
|
||||
DB: db,
|
||||
cancel: cancel,
|
||||
}
|
||||
setupSignalHandler(t, cancel, testInfo)
|
||||
|
||||
// Run load generation in background
|
||||
loadDone := make(chan error, 1)
|
||||
go func() {
|
||||
loadDone <- db.GenerateLoad(ctx, writeRate, duration, "wave")
|
||||
}()
|
||||
|
||||
// Monitor every 60 seconds
|
||||
t.Log("Running comprehensive test...")
|
||||
t.Log("Monitor will report every 60 seconds")
|
||||
t.Log("Press Ctrl+C twice within 5 seconds to stop early")
|
||||
t.Log("================================================")
|
||||
t.Log("")
|
||||
|
||||
refreshStats := func() {
|
||||
testInfo.RowCount, _ = db.GetRowCount("load_test")
|
||||
if testInfo.RowCount == 0 {
|
||||
testInfo.RowCount, _ = db.GetRowCount("test_table_0")
|
||||
}
|
||||
if testInfo.RowCount == 0 {
|
||||
testInfo.RowCount, _ = db.GetRowCount("test_data")
|
||||
}
|
||||
testInfo.FileCount, _ = db.GetReplicaFileCount()
|
||||
}
|
||||
|
||||
logMetrics := func() {
|
||||
LogSoakMetrics(t, db, "comprehensive")
|
||||
if db.LitestreamCmd != nil && db.LitestreamCmd.ProcessState != nil {
|
||||
t.Error("✗ Litestream stopped unexpectedly!")
|
||||
if testInfo.cancel != nil {
|
||||
testInfo.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MonitorSoakTest(t, db, ctx, testInfo, refreshStats, logMetrics)
|
||||
|
||||
// Wait for load generation to complete
|
||||
if err := <-loadDone; err != nil {
|
||||
t.Logf("Load generation completed: %v", err)
|
||||
}
|
||||
|
||||
if err := db.WaitForSnapshots(30 * time.Second); err != nil {
|
||||
t.Fatalf("Failed waiting for snapshot: %v", err)
|
||||
}
|
||||
|
||||
t.Log("")
|
||||
t.Log("================================================")
|
||||
t.Log("Final Test Results")
|
||||
t.Log("================================================")
|
||||
t.Log("")
|
||||
|
||||
// Stop Litestream
|
||||
t.Log("Stopping Litestream...")
|
||||
if err := db.StopLitestream(); err != nil {
|
||||
t.Logf("Warning: Failed to stop Litestream cleanly: %v", err)
|
||||
}
|
||||
|
||||
// Final statistics
|
||||
t.Log("Database Statistics:")
|
||||
if dbSize, err := db.GetDatabaseSize(); err == nil {
|
||||
t.Logf(" Final size: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
}
|
||||
|
||||
// Count rows using different table name possibilities
|
||||
var rowCount int
|
||||
var err error
|
||||
if rowCount, err = db.GetRowCount("load_test"); err != nil {
|
||||
if rowCount, err = db.GetRowCount("test_table_0"); err != nil {
|
||||
if rowCount, err = db.GetRowCount("test_data"); err != nil {
|
||||
t.Logf(" Warning: Could not get row count: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
t.Logf(" Total rows: %d", rowCount)
|
||||
}
|
||||
t.Log("")
|
||||
|
||||
// Replica statistics
|
||||
t.Log("Replication Statistics:")
|
||||
if fileCount, err := db.GetReplicaFileCount(); err == nil {
|
||||
t.Logf(" LTX segments: %d", fileCount)
|
||||
}
|
||||
|
||||
// Check for errors
|
||||
errors, _ := db.CheckForErrors()
|
||||
criticalErrors := 0
|
||||
for _, errLine := range errors {
|
||||
// Filter out known non-critical errors
|
||||
if !containsAny(errLine, []string{"page size not initialized"}) {
|
||||
criticalErrors++
|
||||
}
|
||||
}
|
||||
t.Logf(" Critical errors: %d", criticalErrors)
|
||||
t.Log("")
|
||||
|
||||
// Test restoration
|
||||
t.Log("Testing restoration...")
|
||||
restoredPath := filepath.Join(db.TempDir, "restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restoration failed: %v", err)
|
||||
}
|
||||
t.Log("✓ Restoration successful!")
|
||||
|
||||
// Validate
|
||||
t.Log("")
|
||||
t.Log("Validating restored database integrity...")
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
if err := restoredDB.IntegrityCheck(); err != nil {
|
||||
t.Fatalf("Integrity check failed: %v", err)
|
||||
}
|
||||
t.Log("✓ Integrity check passed!")
|
||||
|
||||
// Analyze test results
|
||||
analysis := AnalyzeSoakTest(t, db, duration)
|
||||
PrintSoakTestAnalysis(t, analysis)
|
||||
|
||||
// Test Summary
|
||||
t.Log("================================================")
|
||||
t.Log("Test Summary")
|
||||
t.Log("================================================")
|
||||
|
||||
testPassed := true
|
||||
issues := []string{}
|
||||
|
||||
if criticalErrors > 0 {
|
||||
testPassed = false
|
||||
issues = append(issues, fmt.Sprintf("Critical errors detected: %d", criticalErrors))
|
||||
}
|
||||
|
||||
if analysis.FinalFileCount == 0 {
|
||||
testPassed = false
|
||||
issues = append(issues, "No files created (replication not working)")
|
||||
}
|
||||
|
||||
if testPassed {
|
||||
t.Log("✓ TEST PASSED!")
|
||||
t.Log("")
|
||||
t.Log("The configuration is ready for production use.")
|
||||
} else {
|
||||
t.Log("⚠ TEST COMPLETED WITH ISSUES:")
|
||||
for _, issue := range issues {
|
||||
t.Logf(" - %s", issue)
|
||||
}
|
||||
t.Log("")
|
||||
t.Log("Review the logs for details:")
|
||||
logPath, _ := db.GetLitestreamLog()
|
||||
t.Logf(" %s", logPath)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
t.Log("")
|
||||
t.Logf("Test duration: %v", time.Since(startTime).Round(time.Second))
|
||||
t.Logf("Results available in: %s", db.TempDir)
|
||||
t.Log("================================================")
|
||||
}
|
||||
482
tests/integration/concurrent_test.go
Normal file
482
tests/integration/concurrent_test.go
Normal file
@@ -0,0 +1,482 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func TestRapidCheckpoints(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
t.Log("Testing: Litestream under rapid checkpoint pressure")
|
||||
|
||||
db := SetupTestDB(t, "rapid-checkpoints")
|
||||
defer db.Cleanup()
|
||||
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("[1] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
t.Log("[2] Generating rapid writes with frequent checkpoints...")
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
if _, err := sqlDB.Exec(`
|
||||
CREATE TABLE checkpoint_test (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB,
|
||||
timestamp INTEGER
|
||||
)
|
||||
`); err != nil {
|
||||
t.Fatalf("Failed to create table: %v", err)
|
||||
}
|
||||
|
||||
data := make([]byte, 4096)
|
||||
checkpointCount := 0
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
if _, err := sqlDB.Exec(
|
||||
"INSERT INTO checkpoint_test (data, timestamp) VALUES (?, ?)",
|
||||
data,
|
||||
time.Now().Unix(),
|
||||
); err != nil {
|
||||
t.Fatalf("Failed to insert row %d: %v", i, err)
|
||||
}
|
||||
|
||||
if i%100 == 0 {
|
||||
if _, err := sqlDB.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
|
||||
t.Logf("Checkpoint %d failed: %v", checkpointCount, err)
|
||||
} else {
|
||||
checkpointCount++
|
||||
t.Logf("Checkpoint %d completed at row %d", checkpointCount, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("✓ Generated 1000 writes with %d checkpoints", checkpointCount)
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[3] Checking for errors...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
if len(errors) > 5 {
|
||||
t.Fatalf("Too many errors (%d), showing first 5:\n%v", len(errors), errors[:5])
|
||||
} else if len(errors) > 0 {
|
||||
t.Logf("Found %d errors (acceptable for checkpoint stress)", len(errors))
|
||||
}
|
||||
|
||||
t.Log("[4] Verifying replica...")
|
||||
fileCount, err := db.GetReplicaFileCount()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check replica: %v", err)
|
||||
}
|
||||
|
||||
if fileCount == 0 {
|
||||
t.Fatal("No replica files created!")
|
||||
}
|
||||
|
||||
t.Logf("✓ Replica created with %d files", fileCount)
|
||||
|
||||
t.Log("[5] Testing restore...")
|
||||
restoredPath := filepath.Join(db.TempDir, "checkpoint-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
origCount, err := db.GetRowCount("checkpoint_test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get original row count: %v", err)
|
||||
}
|
||||
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
restCount, err := restoredDB.GetRowCount("checkpoint_test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get restored row count: %v", err)
|
||||
}
|
||||
|
||||
if origCount != restCount {
|
||||
t.Fatalf("Count mismatch: original=%d, restored=%d", origCount, restCount)
|
||||
}
|
||||
|
||||
t.Logf("✓ Data integrity verified: %d rows", origCount)
|
||||
t.Log("TEST PASSED: Handled rapid checkpoints successfully")
|
||||
}
|
||||
|
||||
func TestWALGrowth(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
duration := GetTestDuration(t, 2*time.Minute)
|
||||
t.Logf("Testing: Large WAL file handling (duration: %v)", duration)
|
||||
|
||||
db := SetupTestDB(t, "wal-growth")
|
||||
defer db.Cleanup()
|
||||
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("[1] Creating test table...")
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
if _, err := sqlDB.Exec(`
|
||||
CREATE TABLE wal_test (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB
|
||||
)
|
||||
`); err != nil {
|
||||
t.Fatalf("Failed to create table: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Table created")
|
||||
|
||||
t.Log("[2] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
t.Log("[3] Generating sustained write load...")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
config := DefaultLoadConfig()
|
||||
config.WriteRate = 400
|
||||
config.Duration = duration
|
||||
config.Pattern = LoadPatternWave
|
||||
config.PayloadSize = 10 * 1024
|
||||
config.Workers = 4
|
||||
|
||||
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
|
||||
t.Fatalf("Load generation failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Load generation complete")
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
t.Log("[4] Checking WAL size...")
|
||||
walPath := db.Path + "-wal"
|
||||
walSize, err := getFileSize(walPath)
|
||||
if err != nil {
|
||||
t.Logf("WAL file not found (may have been checkpointed): %v", err)
|
||||
} else {
|
||||
t.Logf("WAL size: %.2f MB", float64(walSize)/(1024*1024))
|
||||
}
|
||||
|
||||
dbSize, err := db.GetDatabaseSize()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get database size: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Total database size: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[5] Checking for errors...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
if len(errors) > 10 {
|
||||
t.Fatalf("Too many errors (%d), showing first 5:\n%v", len(errors), errors[:5])
|
||||
}
|
||||
|
||||
t.Logf("✓ Found %d errors (acceptable)", len(errors))
|
||||
|
||||
t.Log("[6] Testing restore...")
|
||||
restoredPath := filepath.Join(db.TempDir, "wal-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
origCount, err := db.GetRowCount("wal_test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get original row count: %v", err)
|
||||
}
|
||||
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
restCount, err := restoredDB.GetRowCount("wal_test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get restored row count: %v", err)
|
||||
}
|
||||
|
||||
if origCount != restCount {
|
||||
t.Fatalf("Count mismatch: original=%d, restored=%d", origCount, restCount)
|
||||
}
|
||||
|
||||
t.Logf("✓ Data integrity verified: %d rows", origCount)
|
||||
t.Log("TEST PASSED: Handled large WAL successfully")
|
||||
}
|
||||
|
||||
func TestConcurrentOperations(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
duration := GetTestDuration(t, 3*time.Minute)
|
||||
t.Logf("Testing: Multiple databases replicating concurrently (duration: %v)", duration)
|
||||
|
||||
dbCount := 3
|
||||
dbs := make([]*TestDB, dbCount)
|
||||
|
||||
for i := 0; i < dbCount; i++ {
|
||||
dbs[i] = SetupTestDB(t, fmt.Sprintf("concurrent-%d", i))
|
||||
defer dbs[i].Cleanup()
|
||||
}
|
||||
|
||||
t.Log("[1] Creating databases...")
|
||||
for i, db := range dbs {
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database %d: %v", i, err)
|
||||
}
|
||||
|
||||
if err := CreateTestTable(t, db.Path); err != nil {
|
||||
t.Fatalf("Failed to create table for database %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("✓ Created %d databases", dbCount)
|
||||
|
||||
t.Log("[2] Starting Litestream for all databases...")
|
||||
for i, db := range dbs {
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream for database %d: %v", i, err)
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
t.Logf("✓ All Litestream instances running")
|
||||
|
||||
t.Log("[3] Generating concurrent load...")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
done := make(chan error, dbCount)
|
||||
|
||||
for i, db := range dbs {
|
||||
go func(idx int, database *TestDB) {
|
||||
config := DefaultLoadConfig()
|
||||
config.WriteRate = 50
|
||||
config.Duration = duration
|
||||
config.Pattern = LoadPatternConstant
|
||||
config.Workers = 2
|
||||
|
||||
err := database.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern))
|
||||
done <- err
|
||||
}(i, db)
|
||||
}
|
||||
|
||||
for i := 0; i < dbCount; i++ {
|
||||
if err := <-done; err != nil && ctx.Err() == nil {
|
||||
t.Logf("Load generation %d had error: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
t.Log("✓ Concurrent load complete")
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
t.Log("[4] Stopping all Litestream instances...")
|
||||
for _, db := range dbs {
|
||||
db.StopLitestream()
|
||||
}
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[5] Verifying all replicas...")
|
||||
for i, db := range dbs {
|
||||
fileCount, err := db.GetReplicaFileCount()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check replica %d: %v", i, err)
|
||||
}
|
||||
|
||||
if fileCount == 0 {
|
||||
t.Fatalf("Database %d has no replica files!", i)
|
||||
}
|
||||
|
||||
t.Logf("✓ Database %d: %d replica files", i, fileCount)
|
||||
}
|
||||
|
||||
t.Log("[6] Testing restore for all databases...")
|
||||
for i, db := range dbs {
|
||||
restoredPath := filepath.Join(db.TempDir, fmt.Sprintf("concurrent-restored-%d.db", i))
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed for database %d: %v", i, err)
|
||||
}
|
||||
|
||||
origCount, _ := db.GetRowCount("test_data")
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
restCount, _ := restoredDB.GetRowCount("test_data")
|
||||
|
||||
if origCount != restCount {
|
||||
t.Fatalf("Database %d count mismatch: original=%d, restored=%d", i, origCount, restCount)
|
||||
}
|
||||
|
||||
t.Logf("✓ Database %d verified: %d rows", i, origCount)
|
||||
}
|
||||
|
||||
t.Log("TEST PASSED: Concurrent replication works correctly")
|
||||
}
|
||||
|
||||
func TestBusyTimeout(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
t.Log("Testing: Database busy timeout handling")
|
||||
|
||||
db := SetupTestDB(t, "busy-timeout")
|
||||
defer db.Cleanup()
|
||||
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("[1] Creating test data...")
|
||||
if err := CreateTestTable(t, db.Path); err != nil {
|
||||
t.Fatalf("Failed to create table: %v", err)
|
||||
}
|
||||
|
||||
if err := InsertTestData(t, db.Path, 100); err != nil {
|
||||
t.Fatalf("Failed to insert test data: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Created table with 100 rows")
|
||||
|
||||
t.Log("[2] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
t.Log("[3] Simulating concurrent access with long transactions...")
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path+"?_busy_timeout=5000")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
tx, err := sqlDB.Begin()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to begin transaction: %v", err)
|
||||
}
|
||||
|
||||
for i := 0; i < 500; i++ {
|
||||
if _, err := tx.Exec(
|
||||
"INSERT INTO test_data (data, created_at) VALUES (?, ?)",
|
||||
fmt.Sprintf("busy test %d", i),
|
||||
time.Now().Unix(),
|
||||
); err != nil {
|
||||
t.Fatalf("Failed to insert in transaction: %v", err)
|
||||
}
|
||||
|
||||
if i%100 == 0 {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
t.Fatalf("Failed to commit transaction: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Long transaction completed")
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[4] Checking for errors...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
if len(errors) > 0 {
|
||||
t.Logf("Found %d errors (may include busy timeout messages)", len(errors))
|
||||
}
|
||||
|
||||
t.Log("[5] Testing restore...")
|
||||
restoredPath := filepath.Join(db.TempDir, "busy-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
origCount, err := db.GetRowCount("test_data")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get original row count: %v", err)
|
||||
}
|
||||
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
restCount, err := restoredDB.GetRowCount("test_data")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get restored row count: %v", err)
|
||||
}
|
||||
|
||||
if origCount != restCount {
|
||||
t.Fatalf("Count mismatch: original=%d, restored=%d", origCount, restCount)
|
||||
}
|
||||
|
||||
t.Logf("✓ Data integrity verified: %d rows", origCount)
|
||||
t.Log("TEST PASSED: Busy timeout handled correctly")
|
||||
}
|
||||
|
||||
func getFileSize(path string) (int64, error) {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return info.Size(), nil
|
||||
}
|
||||
298
tests/integration/fixtures.go
Normal file
298
tests/integration/fixtures.go
Normal file
@@ -0,0 +1,298 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
type LoadPattern string
|
||||
|
||||
const (
|
||||
LoadPatternConstant LoadPattern = "constant"
|
||||
LoadPatternBurst LoadPattern = "burst"
|
||||
LoadPatternRandom LoadPattern = "random"
|
||||
LoadPatternWave LoadPattern = "wave"
|
||||
)
|
||||
|
||||
type LoadConfig struct {
|
||||
WriteRate int
|
||||
Duration time.Duration
|
||||
Pattern LoadPattern
|
||||
PayloadSize int
|
||||
ReadRatio float64
|
||||
Workers int
|
||||
}
|
||||
|
||||
func DefaultLoadConfig() *LoadConfig {
|
||||
return &LoadConfig{
|
||||
WriteRate: 100,
|
||||
Duration: 1 * time.Minute,
|
||||
Pattern: LoadPatternConstant,
|
||||
PayloadSize: 1024,
|
||||
ReadRatio: 0.2,
|
||||
Workers: 1,
|
||||
}
|
||||
}
|
||||
|
||||
type PopulateConfig struct {
|
||||
TargetSize string
|
||||
RowSize int
|
||||
BatchSize int
|
||||
TableCount int
|
||||
IndexRatio float64
|
||||
PageSize int
|
||||
}
|
||||
|
||||
func DefaultPopulateConfig() *PopulateConfig {
|
||||
return &PopulateConfig{
|
||||
TargetSize: "100MB",
|
||||
RowSize: 1024,
|
||||
BatchSize: 1000,
|
||||
TableCount: 1,
|
||||
IndexRatio: 0.2,
|
||||
PageSize: 4096,
|
||||
}
|
||||
}
|
||||
|
||||
func CreateComplexTestSchema(db *sql.DB) error {
|
||||
schemas := []string{
|
||||
`CREATE TABLE IF NOT EXISTS users (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
username TEXT NOT NULL UNIQUE,
|
||||
email TEXT NOT NULL,
|
||||
created_at INTEGER NOT NULL
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS posts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
user_id INTEGER NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
FOREIGN KEY (user_id) REFERENCES users(id)
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS comments (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
post_id INTEGER NOT NULL,
|
||||
user_id INTEGER NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
FOREIGN KEY (post_id) REFERENCES posts(id),
|
||||
FOREIGN KEY (user_id) REFERENCES users(id)
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_posts_user_id ON posts(user_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_posts_created_at ON posts(created_at)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_comments_post_id ON comments(post_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_comments_created_at ON comments(created_at)`,
|
||||
}
|
||||
|
||||
for _, schema := range schemas {
|
||||
if _, err := db.Exec(schema); err != nil {
|
||||
return fmt.Errorf("execute schema: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func PopulateComplexTestData(db *sql.DB, userCount, postsPerUser, commentsPerPost int) error {
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin transaction: %w", err)
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
userStmt, err := tx.Prepare("INSERT INTO users (username, email, created_at) VALUES (?, ?, ?)")
|
||||
if err != nil {
|
||||
return fmt.Errorf("prepare user statement: %w", err)
|
||||
}
|
||||
defer userStmt.Close()
|
||||
|
||||
postStmt, err := tx.Prepare("INSERT INTO posts (user_id, title, content, created_at) VALUES (?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return fmt.Errorf("prepare post statement: %w", err)
|
||||
}
|
||||
defer postStmt.Close()
|
||||
|
||||
commentStmt, err := tx.Prepare("INSERT INTO comments (post_id, user_id, content, created_at) VALUES (?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return fmt.Errorf("prepare comment statement: %w", err)
|
||||
}
|
||||
defer commentStmt.Close()
|
||||
|
||||
now := time.Now().Unix()
|
||||
|
||||
for u := 1; u <= userCount; u++ {
|
||||
userResult, err := userStmt.Exec(
|
||||
fmt.Sprintf("user%d", u),
|
||||
fmt.Sprintf("user%d@test.com", u),
|
||||
now,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("insert user: %w", err)
|
||||
}
|
||||
|
||||
userID, err := userResult.LastInsertId()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get user id: %w", err)
|
||||
}
|
||||
|
||||
for p := 1; p <= postsPerUser; p++ {
|
||||
postResult, err := postStmt.Exec(
|
||||
userID,
|
||||
fmt.Sprintf("Post %d from user %d", p, u),
|
||||
generateRandomContent(100),
|
||||
now,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("insert post: %w", err)
|
||||
}
|
||||
|
||||
postID, err := postResult.LastInsertId()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get post id: %w", err)
|
||||
}
|
||||
|
||||
for c := 1; c <= commentsPerPost; c++ {
|
||||
commentUserID := (u + c) % userCount
|
||||
if commentUserID == 0 {
|
||||
commentUserID = userCount
|
||||
}
|
||||
|
||||
_, err := commentStmt.Exec(
|
||||
postID,
|
||||
commentUserID,
|
||||
generateRandomContent(50),
|
||||
now,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("insert comment: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func generateRandomContent(length int) string {
|
||||
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "
|
||||
b := make([]byte, length)
|
||||
rand.Read(b)
|
||||
|
||||
for i := range b {
|
||||
b[i] = charset[int(b[i])%len(charset)]
|
||||
}
|
||||
|
||||
return string(b)
|
||||
}
|
||||
|
||||
type TestScenario struct {
|
||||
Name string
|
||||
Description string
|
||||
Setup func(*sql.DB) error
|
||||
Validate func(*sql.DB, *sql.DB) error
|
||||
}
|
||||
|
||||
func LargeWALScenario() *TestScenario {
|
||||
return &TestScenario{
|
||||
Name: "Large WAL",
|
||||
Description: "Generate large WAL file to test handling",
|
||||
Setup: func(db *sql.DB) error {
|
||||
if _, err := db.Exec(`
|
||||
CREATE TABLE test_wal (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data BLOB
|
||||
)
|
||||
`); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data := make([]byte, 10*1024)
|
||||
rand.Read(data)
|
||||
|
||||
for i := 0; i < 10000; i++ {
|
||||
if _, err := db.Exec("INSERT INTO test_wal (data) VALUES (?)", data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
Validate: func(source, restored *sql.DB) error {
|
||||
var sourceCount, restoredCount int
|
||||
|
||||
if err := source.QueryRow("SELECT COUNT(*) FROM test_wal").Scan(&sourceCount); err != nil {
|
||||
return fmt.Errorf("query source: %w", err)
|
||||
}
|
||||
|
||||
if err := restored.QueryRow("SELECT COUNT(*) FROM test_wal").Scan(&restoredCount); err != nil {
|
||||
return fmt.Errorf("query restored: %w", err)
|
||||
}
|
||||
|
||||
if sourceCount != restoredCount {
|
||||
return fmt.Errorf("count mismatch: source=%d, restored=%d", sourceCount, restoredCount)
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func RapidCheckpointsScenario() *TestScenario {
|
||||
return &TestScenario{
|
||||
Name: "Rapid Checkpoints",
|
||||
Description: "Test rapid checkpoint operations",
|
||||
Setup: func(db *sql.DB) error {
|
||||
if _, err := db.Exec(`
|
||||
CREATE TABLE test_checkpoints (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data TEXT,
|
||||
timestamp INTEGER
|
||||
)
|
||||
`); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
if _, err := db.Exec(
|
||||
"INSERT INTO test_checkpoints (data, timestamp) VALUES (?, ?)",
|
||||
fmt.Sprintf("data %d", i),
|
||||
time.Now().Unix(),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if i%100 == 0 {
|
||||
if _, err := db.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
Validate: func(source, restored *sql.DB) error {
|
||||
var sourceCount, restoredCount int
|
||||
|
||||
if err := source.QueryRow("SELECT COUNT(*) FROM test_checkpoints").Scan(&sourceCount); err != nil {
|
||||
return fmt.Errorf("query source: %w", err)
|
||||
}
|
||||
|
||||
if err := restored.QueryRow("SELECT COUNT(*) FROM test_checkpoints").Scan(&restoredCount); err != nil {
|
||||
return fmt.Errorf("query restored: %w", err)
|
||||
}
|
||||
|
||||
if sourceCount != restoredCount {
|
||||
return fmt.Errorf("count mismatch: source=%d, restored=%d", sourceCount, restoredCount)
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
554
tests/integration/helpers.go
Normal file
554
tests/integration/helpers.go
Normal file
@@ -0,0 +1,554 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
|
||||
"github.com/benbjohnson/litestream"
|
||||
)
|
||||
|
||||
type TestDB struct {
|
||||
Path string
|
||||
ReplicaPath string
|
||||
ReplicaURL string
|
||||
ReplicaEnv []string
|
||||
ConfigPath string
|
||||
TempDir string
|
||||
LitestreamCmd *exec.Cmd
|
||||
LitestreamPID int
|
||||
t *testing.T
|
||||
}
|
||||
|
||||
// getBinaryPath returns the cross-platform path to a binary.
|
||||
// On Windows, it adds the .exe extension.
|
||||
func getBinaryPath(name string) string {
|
||||
binPath := filepath.Join("..", "..", "bin", name)
|
||||
if runtime.GOOS == "windows" {
|
||||
binPath += ".exe"
|
||||
}
|
||||
return binPath
|
||||
}
|
||||
|
||||
func streamCommandOutput() bool {
|
||||
v := strings.ToLower(strings.TrimSpace(os.Getenv("SOAK_DEBUG")))
|
||||
switch v {
|
||||
case "", "0", "false", "off", "no":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func configureCmdIO(cmd *exec.Cmd) (bool, *bytes.Buffer, *bytes.Buffer) {
|
||||
stream := streamCommandOutput()
|
||||
stdoutBuf := &bytes.Buffer{}
|
||||
stderrBuf := &bytes.Buffer{}
|
||||
if stream {
|
||||
cmd.Stdout = io.MultiWriter(os.Stdout, stdoutBuf)
|
||||
cmd.Stderr = io.MultiWriter(os.Stderr, stderrBuf)
|
||||
} else {
|
||||
cmd.Stdout = stdoutBuf
|
||||
cmd.Stderr = stderrBuf
|
||||
}
|
||||
return stream, stdoutBuf, stderrBuf
|
||||
}
|
||||
|
||||
func combinedOutput(stdoutBuf, stderrBuf *bytes.Buffer) string {
|
||||
var sb strings.Builder
|
||||
if stdoutBuf != nil && stdoutBuf.Len() > 0 {
|
||||
sb.Write(stdoutBuf.Bytes())
|
||||
}
|
||||
if stderrBuf != nil && stderrBuf.Len() > 0 {
|
||||
sb.Write(stderrBuf.Bytes())
|
||||
}
|
||||
return strings.TrimSpace(sb.String())
|
||||
}
|
||||
|
||||
func SetupTestDB(t *testing.T, name string) *TestDB {
|
||||
t.Helper()
|
||||
|
||||
var tempDir string
|
||||
if os.Getenv("SOAK_KEEP_TEMP") != "" {
|
||||
dir, err := os.MkdirTemp("", fmt.Sprintf("litestream-%s-", name))
|
||||
if err != nil {
|
||||
t.Fatalf("create temp dir: %v", err)
|
||||
}
|
||||
tempDir = dir
|
||||
t.Cleanup(func() {
|
||||
t.Logf("SOAK_KEEP_TEMP set, preserving test artifacts at: %s", tempDir)
|
||||
})
|
||||
} else {
|
||||
tempDir = t.TempDir()
|
||||
}
|
||||
dbPath := filepath.Join(tempDir, fmt.Sprintf("%s.db", name))
|
||||
replicaPath := filepath.Join(tempDir, "replica")
|
||||
|
||||
return &TestDB{
|
||||
Path: dbPath,
|
||||
ReplicaPath: replicaPath,
|
||||
ReplicaURL: fmt.Sprintf("file://%s", filepath.ToSlash(replicaPath)),
|
||||
TempDir: tempDir,
|
||||
t: t,
|
||||
}
|
||||
}
|
||||
|
||||
func (db *TestDB) Create() error {
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open database: %w", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
if _, err := sqlDB.Exec("PRAGMA journal_mode=WAL"); err != nil {
|
||||
return fmt.Errorf("set WAL mode: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) CreateWithPageSize(pageSize int) error {
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open database: %w", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
if _, err := sqlDB.Exec(fmt.Sprintf("PRAGMA page_size = %d", pageSize)); err != nil {
|
||||
return fmt.Errorf("set page size: %w", err)
|
||||
}
|
||||
|
||||
if _, err := sqlDB.Exec("PRAGMA journal_mode=WAL"); err != nil {
|
||||
return fmt.Errorf("set WAL mode: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) Populate(targetSize string) error {
|
||||
cmd := exec.Command(getBinaryPath("litestream-test"), "populate",
|
||||
"-db", db.Path,
|
||||
"-target-size", targetSize,
|
||||
)
|
||||
|
||||
_, stdoutBuf, stderrBuf := configureCmdIO(cmd)
|
||||
|
||||
db.t.Logf("Populating database to %s...", targetSize)
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if output := combinedOutput(stdoutBuf, stderrBuf); output != "" {
|
||||
return fmt.Errorf("populate failed: %w\nOutput: %s", err, output)
|
||||
}
|
||||
return fmt.Errorf("populate failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) PopulateWithOptions(targetSize string, pageSize int, rowSize int) error {
|
||||
cmd := exec.Command(getBinaryPath("litestream-test"), "populate",
|
||||
"-db", db.Path,
|
||||
"-target-size", targetSize,
|
||||
"-page-size", fmt.Sprintf("%d", pageSize),
|
||||
"-row-size", fmt.Sprintf("%d", rowSize),
|
||||
)
|
||||
|
||||
_, stdoutBuf, stderrBuf := configureCmdIO(cmd)
|
||||
|
||||
db.t.Logf("Populating database to %s (page size: %d, row size: %d)...", targetSize, pageSize, rowSize)
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if output := combinedOutput(stdoutBuf, stderrBuf); output != "" {
|
||||
return fmt.Errorf("populate failed: %w\nOutput: %s", err, output)
|
||||
}
|
||||
return fmt.Errorf("populate failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) GenerateLoad(ctx context.Context, writeRate int, duration time.Duration, pattern string) error {
|
||||
cmd := exec.CommandContext(ctx, getBinaryPath("litestream-test"), "load",
|
||||
"-db", db.Path,
|
||||
"-write-rate", fmt.Sprintf("%d", writeRate),
|
||||
"-duration", duration.String(),
|
||||
"-pattern", pattern,
|
||||
)
|
||||
|
||||
_, stdoutBuf, stderrBuf := configureCmdIO(cmd)
|
||||
|
||||
db.t.Logf("Starting load generation: %d writes/sec for %v (%s pattern)", writeRate, duration, pattern)
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if output := combinedOutput(stdoutBuf, stderrBuf); output != "" {
|
||||
return fmt.Errorf("load generation failed: %w\nOutput: %s", err, output)
|
||||
}
|
||||
return fmt.Errorf("load generation failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) StartLitestream() error {
|
||||
logPath := filepath.Join(db.TempDir, "litestream.log")
|
||||
logFile, err := os.Create(logPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create log file: %w", err)
|
||||
}
|
||||
|
||||
replicaURL := fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
|
||||
cmd := exec.Command(getBinaryPath("litestream"), "replicate",
|
||||
db.Path,
|
||||
replicaURL,
|
||||
)
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
logFile.Close()
|
||||
return fmt.Errorf("start litestream: %w", err)
|
||||
}
|
||||
|
||||
db.LitestreamCmd = cmd
|
||||
db.LitestreamPID = cmd.Process.Pid
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
if cmd.ProcessState != nil && cmd.ProcessState.Exited() {
|
||||
logFile.Close()
|
||||
return fmt.Errorf("litestream exited immediately")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) StartLitestreamWithConfig(configPath string) error {
|
||||
logPath := filepath.Join(db.TempDir, "litestream.log")
|
||||
logFile, err := os.Create(logPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create log file: %w", err)
|
||||
}
|
||||
|
||||
db.ConfigPath = configPath
|
||||
cmd := exec.Command(getBinaryPath("litestream"), "replicate",
|
||||
"-config", configPath,
|
||||
)
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
logFile.Close()
|
||||
return fmt.Errorf("start litestream: %w", err)
|
||||
}
|
||||
|
||||
db.LitestreamCmd = cmd
|
||||
db.LitestreamPID = cmd.Process.Pid
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) StopLitestream() error {
|
||||
if db.LitestreamCmd == nil || db.LitestreamCmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := db.LitestreamCmd.Process.Kill(); err != nil {
|
||||
return fmt.Errorf("kill litestream: %w", err)
|
||||
}
|
||||
|
||||
db.LitestreamCmd.Wait()
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) Restore(outputPath string) error {
|
||||
replicaURL := db.ReplicaURL
|
||||
if replicaURL == "" {
|
||||
replicaURL = fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
|
||||
}
|
||||
var cmd *exec.Cmd
|
||||
if db.ConfigPath != "" && (strings.HasPrefix(replicaURL, "s3://") || strings.HasPrefix(replicaURL, "abs://") || strings.HasPrefix(replicaURL, "nats://")) {
|
||||
cmd = exec.Command(getBinaryPath("litestream"), "restore",
|
||||
"-config", db.ConfigPath,
|
||||
"-o", outputPath,
|
||||
db.Path,
|
||||
)
|
||||
} else {
|
||||
cmd = exec.Command(getBinaryPath("litestream"), "restore",
|
||||
"-o", outputPath,
|
||||
replicaURL,
|
||||
)
|
||||
}
|
||||
cmd.Env = append(os.Environ(), db.ReplicaEnv...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("restore failed: %w\nOutput: %s", err, string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) Validate(restoredPath string) error {
|
||||
replicaURL := db.ReplicaURL
|
||||
if replicaURL == "" {
|
||||
replicaURL = fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
|
||||
}
|
||||
cmd := exec.Command(getBinaryPath("litestream-test"), "validate",
|
||||
"-source-db", db.Path,
|
||||
"-replica-url", replicaURL,
|
||||
"-restored-db", restoredPath,
|
||||
"-check-type", "full",
|
||||
)
|
||||
cmd.Env = append(os.Environ(), db.ReplicaEnv...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("validation failed: %w\nOutput: %s", err, string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) QuickValidate(restoredPath string) error {
|
||||
replicaURL := db.ReplicaURL
|
||||
if replicaURL == "" {
|
||||
replicaURL = fmt.Sprintf("file://%s", filepath.ToSlash(db.ReplicaPath))
|
||||
}
|
||||
cmd := exec.Command(getBinaryPath("litestream-test"), "validate",
|
||||
"-source-db", db.Path,
|
||||
"-replica-url", replicaURL,
|
||||
"-restored-db", restoredPath,
|
||||
"-check-type", "quick",
|
||||
)
|
||||
cmd.Env = append(os.Environ(), db.ReplicaEnv...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("validation failed: %w\nOutput: %s", err, string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *TestDB) GetRowCount(table string) (int, error) {
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("open database: %w", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
var count int
|
||||
query := fmt.Sprintf("SELECT COUNT(*) FROM %s", table)
|
||||
if err := sqlDB.QueryRow(query).Scan(&count); err != nil {
|
||||
return 0, fmt.Errorf("query count: %w", err)
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (db *TestDB) GetDatabaseSize() (int64, error) {
|
||||
info, err := os.Stat(db.Path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
size := info.Size()
|
||||
|
||||
walPath := db.Path + "-wal"
|
||||
if walInfo, err := os.Stat(walPath); err == nil {
|
||||
size += walInfo.Size()
|
||||
}
|
||||
|
||||
return size, nil
|
||||
}
|
||||
|
||||
func (db *TestDB) GetReplicaFileCount() (int, error) {
|
||||
ltxPath := filepath.Join(db.ReplicaPath, "ltx", "0")
|
||||
files, err := filepath.Glob(filepath.Join(ltxPath, "*.ltx"))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return len(files), nil
|
||||
}
|
||||
|
||||
func (db *TestDB) GetLitestreamLog() (string, error) {
|
||||
logPath := filepath.Join(db.TempDir, "litestream.log")
|
||||
content, err := os.ReadFile(logPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(content), nil
|
||||
}
|
||||
|
||||
func (db *TestDB) CheckForErrors() ([]string, error) {
|
||||
log, err := db.GetLitestreamLog()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var errors []string
|
||||
lines := strings.Split(log, "\n")
|
||||
for _, line := range lines {
|
||||
if strings.Contains(strings.ToUpper(line), "ERROR") {
|
||||
errors = append(errors, line)
|
||||
}
|
||||
}
|
||||
|
||||
return errors, nil
|
||||
}
|
||||
|
||||
func (db *TestDB) Cleanup() {
|
||||
db.StopLitestream()
|
||||
}
|
||||
|
||||
// WaitForSnapshots waits for snapshots & WAL segments to appear on file replicas.
|
||||
func (db *TestDB) WaitForSnapshots(timeout time.Duration) error {
|
||||
if !strings.HasPrefix(db.ReplicaURL, "file://") {
|
||||
return nil
|
||||
}
|
||||
|
||||
snapshotDir := filepath.Join(db.ReplicaPath, "ltx", fmt.Sprintf("%d", litestream.SnapshotLevel))
|
||||
walDir := filepath.Join(db.ReplicaPath, "ltx", "0")
|
||||
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
snapshotCount := countLTXFiles(snapshotDir)
|
||||
walCount := countLTXFiles(walDir)
|
||||
|
||||
if snapshotCount > 0 && walCount > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if time.Now().After(deadline) {
|
||||
return fmt.Errorf("timeout waiting for replica data: snapshots=%d wal=%d", snapshotCount, walCount)
|
||||
}
|
||||
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
func countLTXFiles(dir string) int {
|
||||
matches, err := filepath.Glob(filepath.Join(dir, "*.ltx"))
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return len(matches)
|
||||
}
|
||||
|
||||
func GetTestDuration(t *testing.T, defaultDuration time.Duration) time.Duration {
|
||||
t.Helper()
|
||||
|
||||
if testing.Short() {
|
||||
return defaultDuration / 10
|
||||
}
|
||||
|
||||
return defaultDuration
|
||||
}
|
||||
|
||||
func RequireBinaries(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
litestreamBin := getBinaryPath("litestream")
|
||||
if _, err := os.Stat(litestreamBin); err != nil {
|
||||
t.Skip("litestream binary not found, run: go build -o bin/litestream ./cmd/litestream")
|
||||
}
|
||||
|
||||
litestreamTestBin := getBinaryPath("litestream-test")
|
||||
if _, err := os.Stat(litestreamTestBin); err != nil {
|
||||
t.Skip("litestream-test binary not found, run: go build -o bin/litestream-test ./cmd/litestream-test")
|
||||
}
|
||||
}
|
||||
|
||||
func CreateTestTable(t *testing.T, dbPath string) error {
|
||||
t.Helper()
|
||||
|
||||
sqlDB, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
_, err = sqlDB.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS test_data (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
data TEXT,
|
||||
created_at INTEGER
|
||||
)
|
||||
`)
|
||||
return err
|
||||
}
|
||||
|
||||
func InsertTestData(t *testing.T, dbPath string, count int) error {
|
||||
t.Helper()
|
||||
|
||||
sqlDB, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
tx, err := sqlDB.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO test_data (data, created_at) VALUES (?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for i := 0; i < count; i++ {
|
||||
if _, err := stmt.Exec(fmt.Sprintf("test data %d", i), time.Now().Unix()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// IntegrityCheck runs PRAGMA integrity_check on the database.
|
||||
func (db *TestDB) IntegrityCheck() error {
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
var result string
|
||||
if err := sqlDB.QueryRow("PRAGMA integrity_check").Scan(&result); err != nil {
|
||||
return err
|
||||
}
|
||||
if result != "ok" {
|
||||
return fmt.Errorf("integrity check failed: %s", result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PrintTestSummary prints a summary of the test results
|
||||
func (db *TestDB) PrintTestSummary(t *testing.T, testName string, startTime time.Time) {
|
||||
t.Helper()
|
||||
|
||||
duration := time.Since(startTime)
|
||||
dbSize, _ := db.GetDatabaseSize()
|
||||
fileCount, _ := db.GetReplicaFileCount()
|
||||
errors, _ := db.CheckForErrors()
|
||||
|
||||
t.Log("\n" + strings.Repeat("=", 80))
|
||||
t.Logf("TEST SUMMARY: %s", testName)
|
||||
t.Log(strings.Repeat("=", 80))
|
||||
t.Logf("Duration: %v", duration.Round(time.Second))
|
||||
t.Logf("Database Size: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
t.Logf("Replica Files: %d LTX files", fileCount)
|
||||
t.Logf("Litestream Errors: %d", len(errors))
|
||||
t.Log(strings.Repeat("=", 80))
|
||||
}
|
||||
368
tests/integration/minio_soak_test.go
Normal file
368
tests/integration/minio_soak_test.go
Normal file
@@ -0,0 +1,368 @@
|
||||
//go:build integration && soak && docker
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
// TestMinIOSoak runs a soak test against local MinIO S3-compatible server using Docker.
|
||||
//
|
||||
// Default duration: 2 hours
|
||||
// Can be shortened with: go test -test.short (runs for 30 minutes)
|
||||
//
|
||||
// Requirements:
|
||||
// - Docker must be running
|
||||
// - docker command must be in PATH
|
||||
//
|
||||
// This test validates:
|
||||
// - S3-compatible replication to MinIO
|
||||
// - Docker container lifecycle management
|
||||
// - Heavy sustained load (500 writes/sec)
|
||||
// - Restoration from S3-compatible storage
|
||||
func TestMinIOSoak(t *testing.T) {
|
||||
RequireBinaries(t)
|
||||
RequireDocker(t)
|
||||
|
||||
// Determine test duration
|
||||
duration := GetTestDuration(t, 2*time.Hour)
|
||||
shortMode := testing.Short()
|
||||
if shortMode {
|
||||
duration = 2 * time.Minute
|
||||
}
|
||||
|
||||
targetSize := "50MB"
|
||||
writeRate := 500
|
||||
if shortMode {
|
||||
targetSize = "5MB"
|
||||
writeRate = 100
|
||||
}
|
||||
|
||||
t.Logf("================================================")
|
||||
t.Logf("Litestream MinIO S3 Soak Test")
|
||||
t.Logf("================================================")
|
||||
t.Logf("Duration: %v", duration)
|
||||
t.Logf("Start time: %s", time.Now().Format(time.RFC3339))
|
||||
t.Log("")
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// Start MinIO container
|
||||
t.Log("Starting MinIO container...")
|
||||
containerID, endpoint, dataVolume := StartMinIOContainer(t)
|
||||
defer StopMinIOContainer(t, containerID, dataVolume)
|
||||
t.Logf("✓ MinIO running at: %s", endpoint)
|
||||
t.Log("")
|
||||
|
||||
// Create MinIO bucket
|
||||
bucket := "litestream-test"
|
||||
CreateMinIOBucket(t, containerID, bucket)
|
||||
t.Log("")
|
||||
|
||||
// Setup test database
|
||||
db := SetupTestDB(t, "minio-soak")
|
||||
defer db.Cleanup()
|
||||
|
||||
// Create database
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
// Populate with initial data
|
||||
t.Logf("Populating database (%s initial data)...", targetSize)
|
||||
if err := db.Populate(targetSize); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
t.Log("✓ Database populated")
|
||||
t.Log("")
|
||||
|
||||
// Create S3 configuration for MinIO
|
||||
s3Path := fmt.Sprintf("litestream-test-%d", time.Now().Unix())
|
||||
s3URL := fmt.Sprintf("s3://%s/%s", bucket, s3Path)
|
||||
db.ReplicaURL = s3URL
|
||||
t.Log("Creating Litestream configuration for MinIO S3...")
|
||||
s3Config := &S3Config{
|
||||
Endpoint: endpoint,
|
||||
AccessKey: "minioadmin",
|
||||
SecretKey: "minioadmin",
|
||||
Region: "us-east-1",
|
||||
ForcePathStyle: true,
|
||||
SkipVerify: true,
|
||||
}
|
||||
configPath := CreateSoakConfig(db.Path, s3URL, s3Config, shortMode)
|
||||
db.ConfigPath = configPath
|
||||
t.Logf("✓ Configuration created: %s", configPath)
|
||||
t.Logf(" S3 URL: %s", s3URL)
|
||||
t.Log("")
|
||||
|
||||
// Start Litestream
|
||||
t.Log("Starting Litestream with MinIO backend...")
|
||||
if err := db.StartLitestreamWithConfig(configPath); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
t.Logf("✓ Litestream running (PID: %d)", db.LitestreamPID)
|
||||
t.Log("")
|
||||
|
||||
// Start load generator
|
||||
t.Log("Starting load generator (heavy sustained load)...")
|
||||
t.Logf(" Write rate: %d writes/second", writeRate)
|
||||
t.Logf(" Pattern: wave (simulates varying load)")
|
||||
t.Logf(" Payload size: 4KB")
|
||||
t.Logf(" Workers: 8")
|
||||
t.Log("")
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
// Setup signal handler for graceful interruption
|
||||
testInfo := &TestInfo{
|
||||
StartTime: startTime,
|
||||
Duration: duration,
|
||||
DB: db,
|
||||
cancel: cancel,
|
||||
}
|
||||
setupSignalHandler(t, cancel, testInfo)
|
||||
|
||||
// Run load generation in background
|
||||
loadDone := make(chan error, 1)
|
||||
go func() {
|
||||
loadDone <- db.GenerateLoad(ctx, writeRate, duration, "wave")
|
||||
}()
|
||||
|
||||
// Monitor every 60 seconds with MinIO-specific metrics
|
||||
t.Log("Running MinIO S3 test...")
|
||||
t.Log("Monitor will report every 60 seconds")
|
||||
t.Log("Press Ctrl+C twice within 5 seconds to stop early")
|
||||
t.Log("================================================")
|
||||
t.Log("")
|
||||
|
||||
refreshStats := func() {
|
||||
testInfo.RowCount, _ = db.GetRowCount("load_test")
|
||||
if testInfo.RowCount == 0 {
|
||||
testInfo.RowCount, _ = db.GetRowCount("test_table_0")
|
||||
}
|
||||
if testInfo.RowCount == 0 {
|
||||
testInfo.RowCount, _ = db.GetRowCount("test_data")
|
||||
}
|
||||
testInfo.FileCount = CountMinIOObjects(t, containerID, bucket)
|
||||
}
|
||||
|
||||
logMetrics := func() {
|
||||
logMinIOMetrics(t, db, containerID, bucket)
|
||||
if db.LitestreamCmd != nil && db.LitestreamCmd.ProcessState != nil {
|
||||
t.Error("✗ Litestream stopped unexpectedly!")
|
||||
if testInfo.cancel != nil {
|
||||
testInfo.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MonitorSoakTest(t, db, ctx, testInfo, refreshStats, logMetrics)
|
||||
|
||||
// Wait for load generation to complete
|
||||
if err := <-loadDone; err != nil {
|
||||
t.Logf("Load generation completed: %v", err)
|
||||
}
|
||||
|
||||
if err := db.WaitForSnapshots(30 * time.Second); err != nil {
|
||||
t.Fatalf("Failed waiting for snapshot: %v", err)
|
||||
}
|
||||
|
||||
t.Log("")
|
||||
t.Log("================================================")
|
||||
t.Log("Final Test Results")
|
||||
t.Log("================================================")
|
||||
t.Log("")
|
||||
|
||||
// Stop Litestream
|
||||
t.Log("Stopping Litestream...")
|
||||
if err := db.StopLitestream(); err != nil {
|
||||
t.Logf("Warning: Failed to stop Litestream cleanly: %v", err)
|
||||
}
|
||||
|
||||
// Final statistics
|
||||
t.Log("Database Statistics:")
|
||||
if dbSize, err := db.GetDatabaseSize(); err == nil {
|
||||
t.Logf(" Final size: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
}
|
||||
|
||||
// Count rows
|
||||
var rowCount int
|
||||
var err error
|
||||
if rowCount, err = db.GetRowCount("load_test"); err != nil {
|
||||
if rowCount, err = db.GetRowCount("test_table_0"); err != nil {
|
||||
if rowCount, err = db.GetRowCount("test_data"); err != nil {
|
||||
t.Logf(" Warning: Could not get row count: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
t.Logf(" Total rows: %d", rowCount)
|
||||
}
|
||||
t.Log("")
|
||||
|
||||
// MinIO statistics
|
||||
t.Log("MinIO S3 Statistics:")
|
||||
finalObjects := CountMinIOObjects(t, containerID, bucket)
|
||||
t.Logf(" Total objects in MinIO: %d", finalObjects)
|
||||
t.Log("")
|
||||
|
||||
// Check for errors
|
||||
errors, _ := db.CheckForErrors()
|
||||
criticalErrors := 0
|
||||
for _, errLine := range errors {
|
||||
if !containsAny(errLine, []string{"page size not initialized"}) {
|
||||
criticalErrors++
|
||||
}
|
||||
}
|
||||
t.Logf(" Critical errors: %d", criticalErrors)
|
||||
t.Log("")
|
||||
|
||||
// Test restoration from MinIO
|
||||
t.Log("Testing restoration from MinIO S3...")
|
||||
restoredPath := filepath.Join(db.TempDir, "restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restoration from MinIO failed: %v", err)
|
||||
}
|
||||
t.Log("✓ Restoration successful!")
|
||||
|
||||
// Compare row counts
|
||||
var restoredCount int
|
||||
if restoredCount, err = getRowCountFromPath(restoredPath, "load_test"); err != nil {
|
||||
if restoredCount, err = getRowCountFromPath(restoredPath, "test_table_0"); err != nil {
|
||||
if restoredCount, err = getRowCountFromPath(restoredPath, "test_data"); err != nil {
|
||||
t.Logf(" Warning: Could not get restored row count: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil && rowCount > 0 {
|
||||
if rowCount == restoredCount {
|
||||
t.Logf("✓ Row counts match! (%d rows)", restoredCount)
|
||||
} else {
|
||||
t.Logf("⚠ Row count mismatch! Original: %d, Restored: %d", rowCount, restoredCount)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate integrity
|
||||
t.Log("")
|
||||
t.Log("Validating restored database integrity...")
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
if err := restoredDB.IntegrityCheck(); err != nil {
|
||||
t.Fatalf("Integrity check failed: %v", err)
|
||||
}
|
||||
t.Log("✓ Integrity check passed!")
|
||||
|
||||
// Analyze test results
|
||||
analysis := AnalyzeSoakTest(t, db, duration)
|
||||
PrintSoakTestAnalysis(t, analysis)
|
||||
|
||||
// Test Summary
|
||||
t.Log("================================================")
|
||||
t.Log("Test Summary")
|
||||
t.Log("================================================")
|
||||
|
||||
testPassed := true
|
||||
issues := []string{}
|
||||
|
||||
if criticalErrors > 0 {
|
||||
testPassed = false
|
||||
issues = append(issues, fmt.Sprintf("Critical errors detected: %d", criticalErrors))
|
||||
}
|
||||
|
||||
if finalObjects == 0 {
|
||||
testPassed = false
|
||||
issues = append(issues, "No objects stored in MinIO")
|
||||
}
|
||||
|
||||
if testPassed {
|
||||
t.Log("✓ TEST PASSED!")
|
||||
t.Log("")
|
||||
t.Logf("Successfully replicated to MinIO (%d objects)", finalObjects)
|
||||
t.Log("The configuration is ready for production use.")
|
||||
} else {
|
||||
t.Log("⚠ TEST COMPLETED WITH ISSUES:")
|
||||
for _, issue := range issues {
|
||||
t.Logf(" - %s", issue)
|
||||
}
|
||||
t.Log("")
|
||||
t.Log("Review the logs for details:")
|
||||
logPath, _ := db.GetLitestreamLog()
|
||||
t.Logf(" %s", logPath)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
t.Log("")
|
||||
t.Logf("Test duration: %v", time.Since(startTime).Round(time.Second))
|
||||
t.Logf("Results available in: %s", db.TempDir)
|
||||
t.Log("================================================")
|
||||
}
|
||||
|
||||
// logMinIOMetrics logs MinIO-specific metrics
|
||||
func logMinIOMetrics(t *testing.T, db *TestDB, containerID, bucket string) {
|
||||
t.Helper()
|
||||
|
||||
// Basic database metrics
|
||||
LogSoakMetrics(t, db, "minio")
|
||||
|
||||
// MinIO-specific metrics
|
||||
t.Log("")
|
||||
t.Log(" MinIO S3 Statistics:")
|
||||
|
||||
objectCount := CountMinIOObjects(t, containerID, bucket)
|
||||
t.Logf(" Total objects: %d", objectCount)
|
||||
|
||||
// Count LTX files specifically
|
||||
ltxCount := countMinIOLTXFiles(t, containerID, bucket)
|
||||
t.Logf(" LTX segments: %d", ltxCount)
|
||||
}
|
||||
|
||||
// countMinIOLTXFiles counts LTX files in MinIO bucket
|
||||
func countMinIOLTXFiles(t *testing.T, containerID, bucket string) int {
|
||||
t.Helper()
|
||||
|
||||
cmd := exec.Command("docker", "run", "--rm",
|
||||
"--link", containerID+":minio",
|
||||
"-e", "MC_HOST_minio=http://minioadmin:minioadmin@minio:9000",
|
||||
"minio/mc", "ls", "minio/"+bucket+"/", "--recursive")
|
||||
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
|
||||
ltxCount := 0
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, ".ltx") {
|
||||
ltxCount++
|
||||
}
|
||||
}
|
||||
|
||||
return ltxCount
|
||||
}
|
||||
|
||||
// getRowCountFromPath gets row count from a database file path
|
||||
func getRowCountFromPath(dbPath, table string) (int, error) {
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
var count int
|
||||
query := fmt.Sprintf("SELECT COUNT(*) FROM %s", table)
|
||||
if err := db.QueryRow(query).Scan(&count); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
353
tests/integration/overnight_s3_soak_test.go
Normal file
353
tests/integration/overnight_s3_soak_test.go
Normal file
@@ -0,0 +1,353 @@
|
||||
//go:build integration && soak && aws
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
// TestOvernightS3Soak runs an 8-hour overnight soak test against real AWS S3.
|
||||
//
|
||||
// Default duration: 8 hours
|
||||
// Can be shortened with: go test -test.short (runs for 1 hour)
|
||||
//
|
||||
// Requirements:
|
||||
// - AWS_ACCESS_KEY_ID environment variable
|
||||
// - AWS_SECRET_ACCESS_KEY environment variable
|
||||
// - S3_BUCKET environment variable
|
||||
// - AWS_REGION environment variable (optional, defaults to us-east-1)
|
||||
// - AWS CLI must be installed
|
||||
//
|
||||
// This test validates:
|
||||
// - Long-term S3 replication stability
|
||||
// - Network resilience over 8 hours
|
||||
// - Real S3 API performance
|
||||
// - Restoration from cloud storage
|
||||
func TestOvernightS3Soak(t *testing.T) {
|
||||
RequireBinaries(t)
|
||||
|
||||
// Check AWS credentials and get configuration
|
||||
bucket, region := CheckAWSCredentials(t)
|
||||
|
||||
// Determine test duration
|
||||
var duration time.Duration
|
||||
if testing.Short() {
|
||||
duration = 10 * time.Minute
|
||||
} else {
|
||||
duration = 8 * time.Hour
|
||||
}
|
||||
|
||||
shortMode := testing.Short()
|
||||
|
||||
t.Logf("================================================")
|
||||
t.Logf("Litestream Overnight S3 Soak Test")
|
||||
t.Logf("================================================")
|
||||
t.Logf("Duration: %v", duration)
|
||||
t.Logf("S3 Bucket: %s", bucket)
|
||||
t.Logf("AWS Region: %s", region)
|
||||
t.Logf("Start time: %s", time.Now().Format(time.RFC3339))
|
||||
t.Log("")
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// Test S3 connectivity
|
||||
t.Log("Testing S3 connectivity...")
|
||||
TestS3Connectivity(t, bucket)
|
||||
t.Log("")
|
||||
|
||||
// Setup test database
|
||||
db := SetupTestDB(t, "overnight-s3-soak")
|
||||
defer db.Cleanup()
|
||||
|
||||
// Create database
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
// Create S3 configuration
|
||||
s3Path := fmt.Sprintf("litestream-overnight-%d", time.Now().Unix())
|
||||
s3URL := fmt.Sprintf("s3://%s/%s", bucket, s3Path)
|
||||
db.ReplicaURL = s3URL
|
||||
t.Log("Creating Litestream configuration for S3...")
|
||||
s3Config := &S3Config{
|
||||
Region: region,
|
||||
}
|
||||
configPath := CreateSoakConfig(db.Path, s3URL, s3Config, shortMode)
|
||||
db.ConfigPath = configPath
|
||||
t.Logf("✓ Configuration created: %s", configPath)
|
||||
t.Logf(" S3 URL: %s", s3URL)
|
||||
t.Log("")
|
||||
|
||||
// Start Litestream initially (before population)
|
||||
t.Log("Starting Litestream...")
|
||||
if err := db.StartLitestreamWithConfig(configPath); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
t.Logf("✓ Litestream started (PID: %d)", db.LitestreamPID)
|
||||
t.Log("")
|
||||
|
||||
// Stop Litestream to populate database
|
||||
t.Log("Stopping Litestream temporarily for initial population...")
|
||||
if err := db.StopLitestream(); err != nil {
|
||||
t.Fatalf("Failed to stop Litestream: %v", err)
|
||||
}
|
||||
|
||||
// Populate with 100MB of initial data
|
||||
t.Log("Populating database (100MB initial data)...")
|
||||
if err := db.Populate("100MB"); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
t.Log("✓ Database populated")
|
||||
t.Log("")
|
||||
|
||||
// Restart Litestream after population
|
||||
t.Log("Restarting Litestream after population...")
|
||||
if err := db.StartLitestreamWithConfig(configPath); err != nil {
|
||||
t.Fatalf("Failed to restart Litestream: %v", err)
|
||||
}
|
||||
t.Logf("✓ Litestream restarted (PID: %d)", db.LitestreamPID)
|
||||
t.Log("")
|
||||
|
||||
// Start load generator for overnight test
|
||||
t.Log("Starting load generator for overnight S3 test...")
|
||||
t.Log("Configuration:")
|
||||
t.Logf(" Duration: %v", duration)
|
||||
t.Logf(" Write rate: 100 writes/second (higher for S3 testing)")
|
||||
t.Logf(" Pattern: wave (simulates varying load)")
|
||||
t.Logf(" Workers: 8")
|
||||
t.Log("")
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
// Setup signal handler for graceful interruption
|
||||
testInfo := &TestInfo{
|
||||
StartTime: startTime,
|
||||
Duration: duration,
|
||||
DB: db,
|
||||
cancel: cancel,
|
||||
}
|
||||
setupSignalHandler(t, cancel, testInfo)
|
||||
|
||||
// Run load generation in background
|
||||
loadDone := make(chan error, 1)
|
||||
go func() {
|
||||
loadDone <- db.GenerateLoad(ctx, 100, duration, "wave")
|
||||
}()
|
||||
|
||||
// Monitor every 60 seconds with S3-specific metrics
|
||||
t.Log("Overnight S3 test is running!")
|
||||
t.Log("Monitor will report every 60 seconds")
|
||||
t.Log("Press Ctrl+C twice within 5 seconds to stop early")
|
||||
t.Log("================================================")
|
||||
t.Log("")
|
||||
t.Logf("The test will run for %v. Monitor progress below.", duration)
|
||||
t.Log("")
|
||||
|
||||
refreshStats := func() {
|
||||
testInfo.RowCount, _ = db.GetRowCount("load_test")
|
||||
if testInfo.RowCount == 0 {
|
||||
testInfo.RowCount, _ = db.GetRowCount("test_table_0")
|
||||
}
|
||||
if testInfo.RowCount == 0 {
|
||||
testInfo.RowCount, _ = db.GetRowCount("test_data")
|
||||
}
|
||||
testInfo.FileCount = CountS3Objects(t, s3URL)
|
||||
}
|
||||
|
||||
logMetrics := func() {
|
||||
logS3Metrics(t, db, s3URL)
|
||||
if db.LitestreamCmd != nil && db.LitestreamCmd.ProcessState != nil {
|
||||
t.Error("✗ Litestream stopped unexpectedly!")
|
||||
if testInfo.cancel != nil {
|
||||
testInfo.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MonitorSoakTest(t, db, ctx, testInfo, refreshStats, logMetrics)
|
||||
|
||||
// Wait for load generation to complete
|
||||
if err := <-loadDone; err != nil {
|
||||
t.Logf("Load generation completed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("")
|
||||
t.Log("Load generation completed.")
|
||||
|
||||
// Final statistics
|
||||
t.Log("")
|
||||
t.Log("================================================")
|
||||
t.Log("Final Statistics")
|
||||
t.Log("================================================")
|
||||
t.Log("")
|
||||
|
||||
// Stop Litestream
|
||||
t.Log("Stopping Litestream...")
|
||||
if err := db.StopLitestream(); err != nil {
|
||||
t.Logf("Warning: Failed to stop Litestream cleanly: %v", err)
|
||||
}
|
||||
|
||||
// Database statistics
|
||||
t.Log("Database Statistics:")
|
||||
if dbSize, err := db.GetDatabaseSize(); err == nil {
|
||||
t.Logf(" Final size: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
}
|
||||
|
||||
// Count rows
|
||||
var rowCount int
|
||||
var err error
|
||||
if rowCount, err = db.GetRowCount("load_test"); err != nil {
|
||||
if rowCount, err = db.GetRowCount("test_table_0"); err != nil {
|
||||
if rowCount, err = db.GetRowCount("test_data"); err != nil {
|
||||
t.Logf(" Warning: Could not get row count: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
t.Logf(" Total rows: %d", rowCount)
|
||||
}
|
||||
t.Log("")
|
||||
|
||||
// S3 statistics
|
||||
t.Log("S3 Statistics:")
|
||||
finalObjects := CountS3Objects(t, s3URL)
|
||||
t.Logf(" Total objects: %d", finalObjects)
|
||||
|
||||
if s3Size := GetS3StorageSize(t, s3URL); s3Size > 0 {
|
||||
t.Logf(" Total S3 storage: %.2f MB", float64(s3Size)/(1024*1024))
|
||||
}
|
||||
t.Log("")
|
||||
|
||||
// Check for errors
|
||||
errors, _ := db.CheckForErrors()
|
||||
criticalErrors := 0
|
||||
for _, errLine := range errors {
|
||||
if !containsAny(errLine, []string{"page size not initialized"}) {
|
||||
criticalErrors++
|
||||
}
|
||||
}
|
||||
t.Logf(" Critical errors: %d", criticalErrors)
|
||||
t.Log("")
|
||||
|
||||
// Test restoration from S3
|
||||
t.Log("Testing restoration from S3...")
|
||||
restoredPath := filepath.Join(db.TempDir, "restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restoration from S3 failed: %v", err)
|
||||
}
|
||||
t.Log("✓ Restoration successful!")
|
||||
|
||||
// Compare row counts
|
||||
var restoredCount int
|
||||
if restoredCount, err = getRowCountFromPath(restoredPath, "load_test"); err != nil {
|
||||
if restoredCount, err = getRowCountFromPath(restoredPath, "test_table_0"); err != nil {
|
||||
if restoredCount, err = getRowCountFromPath(restoredPath, "test_data"); err != nil {
|
||||
t.Logf(" Warning: Could not get restored row count: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil && rowCount > 0 {
|
||||
if rowCount == restoredCount {
|
||||
t.Logf("✓ Row counts match! (%d rows)", restoredCount)
|
||||
} else {
|
||||
t.Logf("⚠ Row count mismatch! Original: %d, Restored: %d", rowCount, restoredCount)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate
|
||||
t.Log("")
|
||||
t.Log("Validating restored database...")
|
||||
if err := db.Validate(restoredPath); err != nil {
|
||||
t.Fatalf("Validation failed: %v", err)
|
||||
}
|
||||
t.Log("✓ Validation passed!")
|
||||
|
||||
// Analyze test results
|
||||
analysis := AnalyzeSoakTest(t, db, duration)
|
||||
PrintSoakTestAnalysis(t, analysis)
|
||||
|
||||
// Test Summary
|
||||
t.Log("================================================")
|
||||
t.Log("Test Summary")
|
||||
t.Log("================================================")
|
||||
|
||||
testPassed := true
|
||||
issues := []string{}
|
||||
|
||||
if criticalErrors > 0 {
|
||||
testPassed = false
|
||||
issues = append(issues, fmt.Sprintf("Critical errors detected: %d", criticalErrors))
|
||||
}
|
||||
|
||||
if finalObjects == 0 {
|
||||
testPassed = false
|
||||
issues = append(issues, "No objects stored in S3")
|
||||
}
|
||||
|
||||
if testPassed {
|
||||
t.Log("✓ TEST PASSED!")
|
||||
t.Log("")
|
||||
t.Logf("Successfully replicated to AWS S3 (%d objects)", finalObjects)
|
||||
t.Log("The configuration is ready for production use.")
|
||||
} else {
|
||||
t.Log("⚠ TEST COMPLETED WITH ISSUES:")
|
||||
for _, issue := range issues {
|
||||
t.Logf(" - %s", issue)
|
||||
}
|
||||
t.Log("")
|
||||
t.Log("Review the logs for details:")
|
||||
logPath, _ := db.GetLitestreamLog()
|
||||
t.Logf(" %s", logPath)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
t.Log("")
|
||||
t.Logf("Test duration: %v", time.Since(startTime).Round(time.Second))
|
||||
t.Logf("Results available in: %s", db.TempDir)
|
||||
t.Logf("S3 replica data in: %s", s3URL)
|
||||
t.Log("================================================")
|
||||
}
|
||||
|
||||
// logS3Metrics logs S3-specific metrics
|
||||
func logS3Metrics(t *testing.T, db *TestDB, s3URL string) {
|
||||
t.Helper()
|
||||
|
||||
// Basic database metrics
|
||||
LogSoakMetrics(t, db, "overnight-s3")
|
||||
|
||||
// S3-specific metrics
|
||||
t.Log("")
|
||||
t.Log(" S3 Statistics:")
|
||||
|
||||
objectCount := CountS3Objects(t, s3URL)
|
||||
t.Logf(" Total objects: %d", objectCount)
|
||||
|
||||
if s3Size := GetS3StorageSize(t, s3URL); s3Size > 0 {
|
||||
t.Logf(" Total storage: %.2f MB", float64(s3Size)/(1024*1024))
|
||||
}
|
||||
}
|
||||
|
||||
// getRowCountFromPath gets row count from a database file path
|
||||
func getRowCountFromPath(dbPath, table string) (int, error) {
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
var count int
|
||||
query := fmt.Sprintf("SELECT COUNT(*) FROM %s", table)
|
||||
if err := db.QueryRow(query).Scan(&count); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
215
tests/integration/overnight_test.go
Normal file
215
tests/integration/overnight_test.go
Normal file
@@ -0,0 +1,215 @@
|
||||
//go:build integration && long
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func TestOvernightFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping long integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
startTime := time.Now()
|
||||
duration := GetTestDuration(t, 8*time.Hour)
|
||||
t.Logf("Testing: Overnight file-based replication (duration: %v)", duration)
|
||||
t.Log("Default: 8 hours, configurable via test duration")
|
||||
|
||||
db := SetupTestDB(t, "overnight-file")
|
||||
defer db.Cleanup()
|
||||
defer db.PrintTestSummary(t, "Overnight File Replication", startTime)
|
||||
|
||||
t.Log("[1] Creating and populating database...")
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
if err := db.Populate("100MB"); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Database populated to 100MB")
|
||||
|
||||
t.Log("[2] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
t.Log("[3] Generating sustained load...")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
config := DefaultLoadConfig()
|
||||
config.WriteRate = 50
|
||||
config.Duration = duration
|
||||
config.Pattern = LoadPatternWave
|
||||
config.PayloadSize = 2 * 1024
|
||||
config.Workers = 4
|
||||
|
||||
ticker := time.NewTicker(1 * time.Minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
fileCount, _ := db.GetReplicaFileCount()
|
||||
dbSize, _ := db.GetDatabaseSize()
|
||||
t.Logf("[Progress] Files: %d, DB Size: %.2f MB, Elapsed: %v",
|
||||
fileCount, float64(dbSize)/(1024*1024), time.Since(time.Now().Add(-duration)))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
|
||||
t.Fatalf("Load generation failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Load generation complete")
|
||||
|
||||
time.Sleep(1 * time.Minute)
|
||||
|
||||
t.Log("[4] Final statistics...")
|
||||
fileCount, err := db.GetReplicaFileCount()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check replica: %v", err)
|
||||
}
|
||||
|
||||
dbSize, err := db.GetDatabaseSize()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get database size: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Final LTX files: %d", fileCount)
|
||||
t.Logf("Final DB size: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
|
||||
t.Log("[5] Checking for errors...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
if len(errors) > 20 {
|
||||
t.Fatalf("Too many errors (%d), test may be unstable", len(errors))
|
||||
} else if len(errors) > 0 {
|
||||
t.Logf("Found %d errors (acceptable for long test)", len(errors))
|
||||
} else {
|
||||
t.Log("✓ No errors detected")
|
||||
}
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[6] Testing final restore...")
|
||||
restoredPath := filepath.Join(db.TempDir, "overnight-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
t.Log("[7] Full validation...")
|
||||
if err := db.Validate(restoredPath); err != nil {
|
||||
t.Fatalf("Validation failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Validation passed")
|
||||
t.Log("TEST PASSED: Overnight file replication successful")
|
||||
}
|
||||
|
||||
func TestOvernightComprehensive(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping long integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
startTime := time.Now()
|
||||
duration := GetTestDuration(t, 8*time.Hour)
|
||||
t.Logf("Testing: Comprehensive overnight test (duration: %v)", duration)
|
||||
|
||||
db := SetupTestDB(t, "overnight-comprehensive")
|
||||
defer db.Cleanup()
|
||||
defer db.PrintTestSummary(t, "Overnight Comprehensive Test", startTime)
|
||||
|
||||
t.Log("[1] Creating large database...")
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
if err := db.Populate("500MB"); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Database populated to 500MB")
|
||||
|
||||
t.Log("[2] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
t.Log("[3] Generating mixed workload...")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
config := DefaultLoadConfig()
|
||||
config.WriteRate = 100
|
||||
config.Duration = duration
|
||||
config.Pattern = LoadPatternWave
|
||||
config.PayloadSize = 4 * 1024
|
||||
config.ReadRatio = 0.3
|
||||
config.Workers = 8
|
||||
|
||||
ticker := time.NewTicker(5 * time.Minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
fileCount, _ := db.GetReplicaFileCount()
|
||||
dbSize, _ := db.GetDatabaseSize()
|
||||
t.Logf("[Progress] Files: %d, DB Size: %.2f MB", fileCount, float64(dbSize)/(1024*1024))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
|
||||
t.Fatalf("Load generation failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Load generation complete")
|
||||
|
||||
time.Sleep(2 * time.Minute)
|
||||
|
||||
db.StopLitestream()
|
||||
|
||||
t.Log("[4] Final validation...")
|
||||
restoredPath := filepath.Join(db.TempDir, "comprehensive-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
if err := db.Validate(restoredPath); err != nil {
|
||||
t.Fatalf("Validation failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Comprehensive test passed")
|
||||
t.Log("TEST PASSED: Overnight comprehensive test successful")
|
||||
}
|
||||
121
tests/integration/quick_test.go
Normal file
121
tests/integration/quick_test.go
Normal file
@@ -0,0 +1,121 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func TestQuickValidation(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
startTime := time.Now()
|
||||
duration := GetTestDuration(t, 30*time.Minute)
|
||||
t.Logf("Testing: Quick validation test (duration: %v)", duration)
|
||||
t.Log("Default: 30 minutes, configurable via test duration")
|
||||
|
||||
db := SetupTestDB(t, "quick-validation")
|
||||
defer db.Cleanup()
|
||||
defer db.PrintTestSummary(t, "Quick Validation Test", startTime)
|
||||
|
||||
t.Log("[1] Creating and populating database...")
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
if err := db.Populate("10MB"); err != nil {
|
||||
t.Fatalf("Failed to populate database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Database populated to 10MB")
|
||||
|
||||
t.Log("[2] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
t.Log("[3] Generating wave pattern load...")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||
defer cancel()
|
||||
|
||||
config := DefaultLoadConfig()
|
||||
config.WriteRate = 100
|
||||
config.Duration = duration
|
||||
config.Pattern = LoadPatternWave
|
||||
config.PayloadSize = 4 * 1024
|
||||
config.Workers = 4
|
||||
|
||||
if err := db.GenerateLoad(ctx, config.WriteRate, config.Duration, string(config.Pattern)); err != nil && ctx.Err() == nil {
|
||||
t.Fatalf("Load generation failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Load generation complete")
|
||||
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
t.Log("[4] Checking replica status...")
|
||||
fileCount, err := db.GetReplicaFileCount()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check replica: %v", err)
|
||||
}
|
||||
|
||||
if fileCount == 0 {
|
||||
t.Fatal("No LTX segments created!")
|
||||
}
|
||||
|
||||
t.Logf("✓ LTX segments created: %d files", fileCount)
|
||||
|
||||
dbSize, err := db.GetDatabaseSize()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get database size: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Database size: %.2f MB", float64(dbSize)/(1024*1024))
|
||||
|
||||
t.Log("[5] Checking for errors...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
if len(errors) > 10 {
|
||||
t.Fatalf("Too many critical errors (%d), showing first 5:\n%v", len(errors), errors[:5])
|
||||
} else if len(errors) > 0 {
|
||||
t.Logf("Found %d errors (showing first 3):", len(errors))
|
||||
for i := 0; i < min(len(errors), 3); i++ {
|
||||
t.Logf(" %s", errors[i])
|
||||
}
|
||||
} else {
|
||||
t.Log("✓ No errors detected")
|
||||
}
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[6] Testing restore...")
|
||||
restoredPath := filepath.Join(db.TempDir, "quick-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
t.Log("[7] Validating restoration...")
|
||||
if err := db.QuickValidate(restoredPath); err != nil {
|
||||
t.Fatalf("Validation failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Validation passed")
|
||||
t.Log("TEST PASSED: Quick validation successful")
|
||||
}
|
||||
342
tests/integration/scenario_test.go
Normal file
342
tests/integration/scenario_test.go
Normal file
@@ -0,0 +1,342 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func TestFreshStart(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
t.Log("Testing: Starting replication with a fresh (empty) database")
|
||||
t.Log("This tests if Litestream works correctly when it creates the database from scratch")
|
||||
|
||||
db := SetupTestDB(t, "fresh-start")
|
||||
defer db.Cleanup()
|
||||
|
||||
t.Log("[1] Starting Litestream with non-existent database...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[2] Creating database while Litestream is running...")
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
|
||||
if _, err := sqlDB.Exec("PRAGMA journal_mode=WAL"); err != nil {
|
||||
t.Fatalf("Failed to set WAL mode: %v", err)
|
||||
}
|
||||
|
||||
if _, err := sqlDB.Exec("CREATE TABLE test (id INTEGER PRIMARY KEY, data TEXT)"); err != nil {
|
||||
t.Fatalf("Failed to create table: %v", err)
|
||||
}
|
||||
|
||||
if _, err := sqlDB.Exec("INSERT INTO test (data) VALUES ('initial data')"); err != nil {
|
||||
t.Fatalf("Failed to insert initial data: %v", err)
|
||||
}
|
||||
sqlDB.Close()
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
t.Log("[3] Checking if Litestream detected the database...")
|
||||
log, err := db.GetLitestreamLog()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read log: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Litestream log snippet:\n%s", log[:min(len(log), 500)])
|
||||
|
||||
t.Log("[4] Adding data to test replication...")
|
||||
sqlDB, err = sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
|
||||
for i := 1; i <= 100; i++ {
|
||||
if _, err := sqlDB.Exec("INSERT INTO test (data) VALUES (?)", fmt.Sprintf("row %d", i)); err != nil {
|
||||
t.Fatalf("Failed to insert row %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
sqlDB.Close()
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
t.Log("[5] Checking for errors...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
if len(errors) > 1 {
|
||||
t.Logf("Found %d errors (showing first 3):", len(errors))
|
||||
for i := 0; i < min(len(errors), 3); i++ {
|
||||
t.Logf(" %s", errors[i])
|
||||
}
|
||||
} else {
|
||||
t.Log("✓ No significant errors")
|
||||
}
|
||||
|
||||
t.Log("[6] Checking replica files...")
|
||||
fileCount, err := db.GetReplicaFileCount()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get replica file count: %v", err)
|
||||
}
|
||||
|
||||
if fileCount == 0 {
|
||||
t.Fatal("✗ No replica files created!")
|
||||
}
|
||||
|
||||
t.Logf("✓ Replica created with %d LTX files", fileCount)
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[7] Testing restore...")
|
||||
restoredPath := filepath.Join(db.TempDir, "fresh-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("✗ Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
origCount, err := db.GetRowCount("test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get original row count: %v", err)
|
||||
}
|
||||
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
restCount, err := restoredDB.GetRowCount("test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get restored row count: %v", err)
|
||||
}
|
||||
|
||||
if origCount != restCount {
|
||||
t.Fatalf("✗ Data mismatch: Original=%d, Restored=%d", origCount, restCount)
|
||||
}
|
||||
|
||||
t.Logf("✓ Data integrity verified: %d rows", origCount)
|
||||
t.Log("TEST PASSED: Fresh start works correctly")
|
||||
}
|
||||
|
||||
func TestDatabaseIntegrity(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
t.Log("Testing: Complex data patterns and integrity after restore")
|
||||
|
||||
db := SetupTestDB(t, "integrity-test")
|
||||
defer db.Cleanup()
|
||||
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("[1] Creating complex schema...")
|
||||
sqlDB, err := sql.Open("sqlite3", db.Path)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
if err := CreateComplexTestSchema(sqlDB); err != nil {
|
||||
t.Fatalf("Failed to create schema: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Schema created")
|
||||
|
||||
t.Log("[2] Populating with test data...")
|
||||
if err := PopulateComplexTestData(sqlDB, 10, 5, 3); err != nil {
|
||||
t.Fatalf("Failed to populate data: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Data populated (10 users, 50 posts, 150 comments)")
|
||||
|
||||
t.Log("[3] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
db.StopLitestream()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Log("[4] Checking integrity of original database...")
|
||||
var integrityResult string
|
||||
if err := sqlDB.QueryRow("PRAGMA integrity_check").Scan(&integrityResult); err != nil {
|
||||
t.Fatalf("Integrity check failed: %v", err)
|
||||
}
|
||||
|
||||
if integrityResult != "ok" {
|
||||
t.Fatalf("Source database integrity check failed: %s", integrityResult)
|
||||
}
|
||||
|
||||
t.Log("✓ Source database integrity OK")
|
||||
|
||||
t.Log("[5] Restoring database...")
|
||||
restoredPath := filepath.Join(db.TempDir, "integrity-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
t.Log("[6] Checking integrity of restored database...")
|
||||
restoredDB, err := sql.Open("sqlite3", restoredPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open restored database: %v", err)
|
||||
}
|
||||
defer restoredDB.Close()
|
||||
|
||||
if err := restoredDB.QueryRow("PRAGMA integrity_check").Scan(&integrityResult); err != nil {
|
||||
t.Fatalf("Restored integrity check failed: %v", err)
|
||||
}
|
||||
|
||||
if integrityResult != "ok" {
|
||||
t.Fatalf("Restored database integrity check failed: %s", integrityResult)
|
||||
}
|
||||
|
||||
t.Log("✓ Restored database integrity OK")
|
||||
|
||||
t.Log("[7] Validating data consistency...")
|
||||
tables := []string{"users", "posts", "comments"}
|
||||
for _, table := range tables {
|
||||
var sourceCount, restoredCount int
|
||||
|
||||
if err := sqlDB.QueryRow(fmt.Sprintf("SELECT COUNT(*) FROM %s", table)).Scan(&sourceCount); err != nil {
|
||||
t.Fatalf("Failed to count source %s: %v", table, err)
|
||||
}
|
||||
|
||||
if err := restoredDB.QueryRow(fmt.Sprintf("SELECT COUNT(*) FROM %s", table)).Scan(&restoredCount); err != nil {
|
||||
t.Fatalf("Failed to count restored %s: %v", table, err)
|
||||
}
|
||||
|
||||
if sourceCount != restoredCount {
|
||||
t.Fatalf("Count mismatch for %s: source=%d, restored=%d", table, sourceCount, restoredCount)
|
||||
}
|
||||
|
||||
t.Logf("✓ Table %s: %d rows match", table, sourceCount)
|
||||
}
|
||||
|
||||
t.Log("TEST PASSED: Database integrity maintained through replication")
|
||||
}
|
||||
|
||||
func TestDatabaseDeletion(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
RequireBinaries(t)
|
||||
|
||||
t.Log("Testing: Database deletion during active replication")
|
||||
|
||||
db := SetupTestDB(t, "deletion-test")
|
||||
defer db.Cleanup()
|
||||
|
||||
if err := db.Create(); err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
|
||||
t.Log("[1] Creating test table and data...")
|
||||
if err := CreateTestTable(t, db.Path); err != nil {
|
||||
t.Fatalf("Failed to create table: %v", err)
|
||||
}
|
||||
|
||||
if err := InsertTestData(t, db.Path, 100); err != nil {
|
||||
t.Fatalf("Failed to insert test data: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Created table with 100 rows")
|
||||
|
||||
t.Log("[2] Starting Litestream...")
|
||||
if err := db.StartLitestream(); err != nil {
|
||||
t.Fatalf("Failed to start Litestream: %v", err)
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
fileCount, _ := db.GetReplicaFileCount()
|
||||
t.Logf("✓ Replication started (%d files)", fileCount)
|
||||
|
||||
t.Log("[3] Deleting database files...")
|
||||
os.Remove(db.Path)
|
||||
os.Remove(db.Path + "-wal")
|
||||
os.Remove(db.Path + "-shm")
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
t.Log("✓ Database deleted")
|
||||
|
||||
t.Log("[4] Checking Litestream behavior...")
|
||||
errors, err := db.CheckForErrors()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check errors: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Litestream reported %d error messages (expected after database deletion)", len(errors))
|
||||
|
||||
db.StopLitestream()
|
||||
|
||||
t.Log("[5] Verifying replica is still intact...")
|
||||
finalFileCount, err := db.GetReplicaFileCount()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to check replica: %v", err)
|
||||
}
|
||||
|
||||
if finalFileCount == 0 {
|
||||
t.Fatalf("Replica appears to be empty or missing")
|
||||
}
|
||||
|
||||
t.Logf("✓ Replica exists with %d files (was %d - compaction may have reduced count)", finalFileCount, fileCount)
|
||||
|
||||
t.Log("[6] Testing restore from replica...")
|
||||
restoredPath := filepath.Join(db.TempDir, "deletion-restored.db")
|
||||
if err := db.Restore(restoredPath); err != nil {
|
||||
t.Fatalf("Restore failed: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✓ Restore successful")
|
||||
|
||||
restoredDB := &TestDB{Path: restoredPath, t: t}
|
||||
restCount, err := restoredDB.GetRowCount("test_data")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get restored row count: %v", err)
|
||||
}
|
||||
|
||||
if restCount != 100 {
|
||||
t.Fatalf("Expected 100 rows, got %d", restCount)
|
||||
}
|
||||
|
||||
t.Logf("✓ Restored database has correct data: %d rows", restCount)
|
||||
t.Log("TEST PASSED: Replica survives source database deletion")
|
||||
}
|
||||
|
||||
// TestReplicaFailover was removed because Litestream no longer supports
|
||||
// multiple replicas on a single database (see cmd/litestream/main.go).
|
||||
// The bash script test-replica-failover.sh was also non-functional.
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
1150
tests/integration/soak_helpers.go
Normal file
1150
tests/integration/soak_helpers.go
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user