Improve test scripts with comprehensive validation and MinIO S3 testing (#763)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Cory LaNou
2025-10-14 10:44:46 -05:00
committed by GitHub
parent 6465f6b18b
commit 7e64a4cded
5 changed files with 1099 additions and 105 deletions

392
scripts/test-comprehensive.sh Executable file
View File

@@ -0,0 +1,392 @@
#!/bin/bash
set -euo pipefail
# Comprehensive validation test with aggressive settings
# This test exercises all Litestream features: replication, snapshots, compaction, checkpoints
# Can be run for any duration - defaults to 2 hours for thorough testing
TEST_DURATION="${TEST_DURATION:-2h}"
TEST_DIR="/tmp/litestream-comprehensive-$(date +%Y%m%d-%H%M%S)"
DB_PATH="$TEST_DIR/test.db"
REPLICA_PATH="$TEST_DIR/replica"
CONFIG_FILE="$TEST_DIR/litestream.yml"
LOG_DIR="$TEST_DIR/logs"
echo "================================================"
echo "Litestream Comprehensive Validation Test"
echo "================================================"
echo "Duration: $TEST_DURATION"
echo "Test directory: $TEST_DIR"
echo "Start time: $(date)"
echo ""
echo "This test uses aggressive settings to validate:"
echo " - Continuous replication"
echo " - Snapshot generation (every 10m)"
echo " - Compaction (30s/1m/5m intervals)"
echo " - Checkpoint operations"
echo " - Database restoration"
echo ""
cleanup() {
echo ""
echo "Cleaning up..."
# Kill all spawned processes
jobs -p | xargs -r kill 2>/dev/null || true
wait
echo "Test completed at: $(date)"
echo "Results saved in: $TEST_DIR"
}
trap cleanup EXIT INT TERM
# Create directories
mkdir -p "$TEST_DIR" "$LOG_DIR" "$REPLICA_PATH"
# Build binaries if needed
echo "Building binaries..."
if [ ! -f bin/litestream ]; then
go build -o bin/litestream ./cmd/litestream
fi
if [ ! -f bin/litestream-test ]; then
go build -o bin/litestream-test ./cmd/litestream-test
fi
# Create test database and populate BEFORE starting litestream
echo "Creating and populating test database..."
sqlite3 "$DB_PATH" <<EOF
PRAGMA journal_mode=WAL;
PRAGMA page_size=4096;
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
created_at INTEGER DEFAULT (strftime('%s', 'now'))
);
EOF
# Populate database with initial data (50MB to ensure activity)
echo "Populating database (50MB initial data)..."
bin/litestream-test populate -db "$DB_PATH" -target-size 50MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Create configuration with Ben's recommended aggressive settings
echo "Creating test configuration with aggressive intervals..."
cat > "$CONFIG_FILE" <<EOF
# Aggressive snapshot settings per Ben's request
snapshot:
interval: 10m # Snapshots every 10 minutes
retention: 1h # Keep data for 1 hour
# Aggressive compaction levels: 30s/1m/5m/15m/30m intervals
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
dbs:
- path: $DB_PATH
# Checkpoint settings to ensure checkpoints happen
checkpoint-interval: 1m # Check for checkpoint every minute
min-checkpoint-page-count: 100 # Low threshold to trigger checkpoints
max-checkpoint-page-count: 5000 # Force checkpoint at this size
replicas:
- type: file
path: $REPLICA_PATH
retention-check-interval: 5m # Check retention every 5 minutes
EOF
echo "Starting litestream..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "ERROR: Litestream failed to start!"
tail -50 "$LOG_DIR/litestream.log"
exit 1
fi
echo "Litestream running (PID: $LITESTREAM_PID)"
echo ""
# Start load generator with heavy sustained load
echo "Starting load generator (heavy sustained load)..."
bin/litestream-test load \
-db "$DB_PATH" \
-write-rate 500 \
-duration "$TEST_DURATION" \
-pattern wave \
-payload-size 4096 \
-read-ratio 0.3 \
-workers 8 \
> "$LOG_DIR/load.log" 2>&1 &
LOAD_PID=$!
echo "Load generator running (PID: $LOAD_PID)"
echo ""
# Monitor function with detailed metrics
monitor_comprehensive() {
local last_checkpoint_count=0
local last_compaction_count=0
local last_sync_count=0
while true; do
sleep 60 # Check every minute
echo "[$(date +%H:%M:%S)] Status Report"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Database metrics
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
echo " Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
# WAL file size (indicates write activity)
if [ -f "$DB_PATH-wal" ]; then
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
fi
# Row count
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
fi
fi
# Replication metrics
if [ -d "$REPLICA_PATH" ]; then
# Count snapshot files
SNAPSHOTS=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
# Count LTX files (WAL segments)
LTX_FILES=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
echo " Replica: $SNAPSHOTS snapshots, $LTX_FILES segments, size: $REPLICA_SIZE"
fi
# Operation metrics (with delta since last check)
if [ -f "$LOG_DIR/litestream.log" ]; then
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" 2>/dev/null)
CHECKPOINT_COUNT=${CHECKPOINT_COUNT:-0}
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null)
COMPACTION_COUNT=${COMPACTION_COUNT:-0}
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null)
SYNC_COUNT=${SYNC_COUNT:-0}
CHECKPOINT_DELTA=$((CHECKPOINT_COUNT - last_checkpoint_count))
COMPACTION_DELTA=$((COMPACTION_COUNT - last_compaction_count))
SYNC_DELTA=$((SYNC_COUNT - last_sync_count))
echo " Operations: $CHECKPOINT_COUNT checkpoints (+$CHECKPOINT_DELTA), $COMPACTION_COUNT compactions (+$COMPACTION_DELTA)"
echo " Syncs: $SYNC_COUNT total (+$SYNC_DELTA in last minute)"
last_checkpoint_count=$CHECKPOINT_COUNT
last_compaction_count=$COMPACTION_COUNT
last_sync_count=$SYNC_COUNT
fi
# Check for errors (excluding known non-critical)
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$ERROR_COUNT" -gt 0 ]; then
echo " ⚠ Critical errors: $ERROR_COUNT"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
fi
# Load generator status
if [ -f "$LOG_DIR/load.log" ]; then
LOAD_STATUS=$(tail -1 "$LOG_DIR/load.log" 2>/dev/null | grep -oE "writes_per_sec=[0-9.]+" | cut -d= -f2 || echo "0")
echo " Write rate: ${LOAD_STATUS:-0} writes/sec"
fi
# Check processes
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo " ✗ Litestream stopped unexpectedly!"
break
fi
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
echo " ✓ Load test completed"
break
fi
echo ""
done
}
echo "Running comprehensive test for $TEST_DURATION..."
echo "Monitor will report every 60 seconds"
echo "================================================"
echo ""
# Start monitoring in background
monitor_comprehensive &
MONITOR_PID=$!
# Wait for load test to complete
wait "$LOAD_PID" 2>/dev/null || true
# Stop the monitor
kill $MONITOR_PID 2>/dev/null || true
wait $MONITOR_PID 2>/dev/null || true
echo ""
echo "================================================"
echo "Final Test Results"
echo "================================================"
# Final statistics
echo "Database Statistics:"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
# Find the actual table name
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo " Total rows: $ROW_COUNT"
fi
echo ""
echo "Replication Statistics:"
if [ -d "$REPLICA_PATH" ]; then
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
echo " Snapshots created: $SNAPSHOT_COUNT"
echo " LTX segments: $LTX_COUNT"
echo " Replica size: $REPLICA_SIZE"
fi
echo ""
echo "Operation Counts:"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
else
COMPACTION_COUNT="0"
CHECKPOINT_COUNT="0"
SYNC_COUNT="0"
ERROR_COUNT="0"
fi
echo " Compactions: $COMPACTION_COUNT"
echo " Checkpoints: $CHECKPOINT_COUNT"
echo " Syncs: $SYNC_COUNT"
echo " Errors: $ERROR_COUNT"
# Validation test
echo ""
echo "Testing validation..."
bin/litestream-test validate \
-source "$DB_PATH" \
-replica "$REPLICA_PATH" \
> "$LOG_DIR/validate.log" 2>&1
if [ $? -eq 0 ]; then
echo " ✓ Validation passed!"
else
echo " ✗ Validation failed!"
tail -10 "$LOG_DIR/validate.log"
fi
# Test restoration
echo ""
echo "Testing restoration..."
RESTORE_DB="$TEST_DIR/restored.db"
bin/litestream restore -o "$RESTORE_DB" "file://$REPLICA_PATH" > "$LOG_DIR/restore.log" 2>&1
if [ $? -eq 0 ]; then
# Get row count from restored database
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
else
RESTORED_COUNT="0"
fi
if [ "$RESTORED_COUNT" = "$ROW_COUNT" ]; then
echo " ✓ Restoration successful! ($RESTORED_COUNT rows match)"
else
echo " ⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
fi
else
echo " ✗ Restoration failed!"
tail -10 "$LOG_DIR/restore.log"
fi
# Summary
echo ""
echo "================================================"
echo "Test Summary"
echo "================================================"
# Count critical errors (exclude known non-critical ones)
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
# Determine test result
TEST_PASSED=true
ISSUES=""
if [ "$CRITICAL_ERROR_COUNT" -gt 0 ]; then
TEST_PASSED=false
ISSUES="$ISSUES\n - Critical errors detected: $CRITICAL_ERROR_COUNT"
fi
if [ "$LTX_COUNT" -eq 0 ]; then
TEST_PASSED=false
ISSUES="$ISSUES\n - No LTX segments created (replication not working)"
fi
if [ "$CHECKPOINT_COUNT" -eq 0 ]; then
ISSUES="$ISSUES\n - No checkpoints recorded (may need more aggressive settings)"
fi
if [ "$COMPACTION_COUNT" -eq 0 ]; then
ISSUES="$ISSUES\n - No compactions occurred (unexpected for this test duration)"
fi
if [ "$TEST_PASSED" = true ]; then
echo "✓ COMPREHENSIVE TEST PASSED!"
echo ""
echo "Successfully validated:"
echo " - Continuous replication ($LTX_COUNT segments)"
echo " - Compaction ($COMPACTION_COUNT operations)"
[ "$CHECKPOINT_COUNT" -gt 0 ] && echo " - Checkpoints ($CHECKPOINT_COUNT operations)"
[ "$SNAPSHOT_COUNT" -gt 0 ] && echo " - Snapshots ($SNAPSHOT_COUNT created)"
echo " - Database restoration"
echo ""
echo "The configuration is ready for production use."
else
echo "⚠ TEST COMPLETED WITH ISSUES:"
echo -e "$ISSUES"
echo ""
echo "Review the logs for details:"
echo " $LOG_DIR/litestream.log"
fi
echo ""
echo "Full test results available in: $TEST_DIR"
echo "================================================"

464
scripts/test-minio-s3.sh Executable file
View File

@@ -0,0 +1,464 @@
#!/bin/bash
set -euo pipefail
# MinIO S3-compatible test with Docker
# This test runs Litestream against a local MinIO instance to simulate S3 behavior
TEST_DURATION="${TEST_DURATION:-2h}"
TEST_DIR="/tmp/litestream-minio-$(date +%Y%m%d-%H%M%S)"
DB_PATH="$TEST_DIR/test.db"
CONFIG_FILE="$TEST_DIR/litestream.yml"
LOG_DIR="$TEST_DIR/logs"
# MinIO settings - use alternative ports to avoid conflicts
MINIO_CONTAINER_NAME="litestream-minio-test"
MINIO_PORT=9100
MINIO_CONSOLE_PORT=9101
MINIO_ROOT_USER="minioadmin"
MINIO_ROOT_PASSWORD="minioadmin"
MINIO_BUCKET="litestream-test"
MINIO_ENDPOINT="http://localhost:${MINIO_PORT}"
S3_PATH="s3://${MINIO_BUCKET}/litestream-test-$(date +%Y%m%d-%H%M%S)"
echo "================================================"
echo "Litestream MinIO S3 Test"
echo "================================================"
echo "Duration: $TEST_DURATION"
echo "Test directory: $TEST_DIR"
echo "MinIO endpoint: $MINIO_ENDPOINT"
echo "MinIO bucket: $MINIO_BUCKET"
echo "Start time: $(date)"
echo ""
# Check for Docker
if ! command -v docker &> /dev/null; then
echo "Error: Docker is not installed or not in PATH"
echo "Please install Docker to run this test"
exit 1
fi
cleanup() {
echo ""
echo "================================================"
echo "Cleaning up..."
echo "================================================"
# Kill all spawned processes
jobs -p | xargs -r kill 2>/dev/null || true
wait 2>/dev/null || true
# Stop and remove MinIO container
if [ -n "${MINIO_CONTAINER_NAME:-}" ]; then
echo "Stopping MinIO container..."
docker stop "$MINIO_CONTAINER_NAME" 2>/dev/null || true
docker rm "$MINIO_CONTAINER_NAME" 2>/dev/null || true
fi
echo ""
echo "Test completed at: $(date)"
echo "Results saved in: $TEST_DIR"
}
trap cleanup EXIT INT TERM
# Create directories
mkdir -p "$TEST_DIR" "$LOG_DIR"
# Clean up any existing container
if docker ps -a | grep -q "$MINIO_CONTAINER_NAME"; then
echo "Removing existing MinIO container..."
docker stop "$MINIO_CONTAINER_NAME" 2>/dev/null || true
docker rm "$MINIO_CONTAINER_NAME" 2>/dev/null || true
fi
# Start MinIO container
echo "Starting MinIO container..."
docker run -d \
--name "$MINIO_CONTAINER_NAME" \
-p "${MINIO_PORT}:9000" \
-p "${MINIO_CONSOLE_PORT}:9001" \
-e "MINIO_ROOT_USER=${MINIO_ROOT_USER}" \
-e "MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}" \
minio/minio server /data --console-address ":9001"
echo "Waiting for MinIO to start..."
sleep 5
# Check if MinIO is running
if ! docker ps | grep -q "$MINIO_CONTAINER_NAME"; then
echo "Error: MinIO container failed to start"
docker logs "$MINIO_CONTAINER_NAME" 2>&1
exit 1
fi
echo "MinIO is running!"
echo " API: http://localhost:${MINIO_PORT} (mapped from container port 9000)"
echo " Console: http://localhost:${MINIO_CONSOLE_PORT} (mapped from container port 9001)"
echo " Credentials: ${MINIO_ROOT_USER}/${MINIO_ROOT_PASSWORD}"
echo ""
# Create MinIO bucket using mc (MinIO Client) in Docker
echo "Creating MinIO bucket..."
docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc mb "minio/${MINIO_BUCKET}" 2>/dev/null || true
echo "Bucket '${MINIO_BUCKET}' ready"
echo ""
# Build binaries if needed
echo "Building binaries..."
if [ ! -f bin/litestream ]; then
go build -o bin/litestream ./cmd/litestream
fi
if [ ! -f bin/litestream-test ]; then
go build -o bin/litestream-test ./cmd/litestream-test
fi
# Create and populate test database
echo "Creating and populating test database..."
sqlite3 "$DB_PATH" <<EOF
PRAGMA journal_mode=WAL;
PRAGMA page_size=4096;
CREATE TABLE IF NOT EXISTS test_data (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data BLOB,
created_at INTEGER DEFAULT (strftime('%s', 'now'))
);
EOF
# Populate database with initial data
echo "Populating database (50MB initial data)..."
bin/litestream-test populate -db "$DB_PATH" -target-size 50MB -batch-size 1000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Create Litestream configuration for MinIO
echo "Creating Litestream configuration for MinIO S3..."
cat > "$CONFIG_FILE" <<EOF
# MinIO S3 endpoint configuration
access-key-id: ${MINIO_ROOT_USER}
secret-access-key: ${MINIO_ROOT_PASSWORD}
# Aggressive snapshot settings for testing
snapshot:
interval: 10m # Snapshots every 10 minutes
retention: 1h # Keep data for 1 hour
# Aggressive compaction levels: 30s/1m/5m/15m/30m intervals
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
dbs:
- path: $DB_PATH
# Checkpoint settings
checkpoint-interval: 1m
min-checkpoint-page-count: 100
max-checkpoint-page-count: 5000
replicas:
- url: ${S3_PATH}
endpoint: ${MINIO_ENDPOINT}
region: us-east-1
force-path-style: true
skip-verify: true
retention-check-interval: 5m
EOF
echo "Starting litestream with MinIO backend..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo "ERROR: Litestream failed to start!"
echo "Last 50 lines of log:"
tail -50 "$LOG_DIR/litestream.log"
exit 1
fi
echo "Litestream running (PID: $LITESTREAM_PID)"
echo ""
# Start load generator
echo "Starting load generator (heavy sustained load)..."
bin/litestream-test load \
-db "$DB_PATH" \
-write-rate 500 \
-duration "$TEST_DURATION" \
-pattern wave \
-payload-size 4096 \
-read-ratio 0.3 \
-workers 8 \
> "$LOG_DIR/load.log" 2>&1 &
LOAD_PID=$!
echo "Load generator running (PID: $LOAD_PID)"
echo ""
# Monitor function for MinIO
monitor_minio() {
local last_checkpoint_count=0
local last_compaction_count=0
local last_sync_count=0
while true; do
sleep 60
echo "[$(date +%H:%M:%S)] Status Report"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Database metrics
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
echo " Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
if [ -f "$DB_PATH-wal" ]; then
WAL_SIZE=$(stat -f%z "$DB_PATH-wal" 2>/dev/null || stat -c%s "$DB_PATH-wal" 2>/dev/null)
echo " WAL size: $(numfmt --to=iec-i --suffix=B $WAL_SIZE 2>/dev/null || echo "$WAL_SIZE bytes")"
fi
# Row count
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
echo " Rows in database: $ROW_COUNT"
fi
fi
# MinIO/S3 metrics using docker exec
echo ""
echo " MinIO S3 Statistics:"
# Count objects in MinIO
OBJECT_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
# Count LTX files (modern format) and snapshots
LTX_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "\.ltx" || echo "0")
SNAPSHOT_COUNT=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "snapshot" || echo "0")
echo " Total objects: $OBJECT_COUNT"
echo " LTX segments: $LTX_COUNT"
echo " Snapshots: $SNAPSHOT_COUNT"
# Operation metrics
if [ -f "$LOG_DIR/litestream.log" ]; then
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" 2>/dev/null)
CHECKPOINT_COUNT=${CHECKPOINT_COUNT:-0}
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null)
COMPACTION_COUNT=${COMPACTION_COUNT:-0}
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null)
SYNC_COUNT=${SYNC_COUNT:-0}
CHECKPOINT_DELTA=$((CHECKPOINT_COUNT - last_checkpoint_count))
COMPACTION_DELTA=$((COMPACTION_COUNT - last_compaction_count))
SYNC_DELTA=$((SYNC_COUNT - last_sync_count))
echo ""
echo " Operations: $CHECKPOINT_COUNT checkpoints (+$CHECKPOINT_DELTA), $COMPACTION_COUNT compactions (+$COMPACTION_DELTA)"
echo " Syncs: $SYNC_COUNT total (+$SYNC_DELTA in last minute)"
last_checkpoint_count=$CHECKPOINT_COUNT
last_compaction_count=$COMPACTION_COUNT
last_sync_count=$SYNC_COUNT
fi
# Check for errors
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$ERROR_COUNT" -gt 0 ]; then
echo " ⚠ Critical errors: $ERROR_COUNT"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -2
fi
# Check processes
if ! kill -0 "$LITESTREAM_PID" 2>/dev/null; then
echo " ✗ Litestream stopped unexpectedly!"
break
fi
if ! kill -0 "$LOAD_PID" 2>/dev/null; then
echo " ✓ Load test completed"
break
fi
echo ""
done
}
echo "Running MinIO S3 test for $TEST_DURATION..."
echo "Monitor will report every 60 seconds"
echo "================================================"
echo ""
# Start monitoring in background
monitor_minio &
MONITOR_PID=$!
# Wait for load test to complete
wait "$LOAD_PID" 2>/dev/null || true
# Stop the monitor
kill $MONITOR_PID 2>/dev/null || true
wait $MONITOR_PID 2>/dev/null || true
echo ""
echo "================================================"
echo "Final Test Results"
echo "================================================"
# Final statistics
echo "Database Statistics:"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo " Final size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo " Total rows: $ROW_COUNT"
fi
echo ""
echo "MinIO S3 Statistics:"
FINAL_OBJECTS=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
FINAL_LTX=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "\.ltx" || echo "0")
FINAL_SNAPSHOTS=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc ls "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | grep -c "snapshot" || echo "0")
echo " Total objects in MinIO: $FINAL_OBJECTS"
echo " LTX segments: $FINAL_LTX"
echo " Snapshots: $FINAL_SNAPSHOTS"
# Get storage size
STORAGE_INFO=$(docker run --rm --link "${MINIO_CONTAINER_NAME}:minio" \
-e "MC_HOST_minio=http://${MINIO_ROOT_USER}:${MINIO_ROOT_PASSWORD}@minio:9000" \
minio/mc du "minio/${MINIO_BUCKET}/" --recursive 2>/dev/null | tail -1 || echo "0")
echo " Total storage used: $STORAGE_INFO"
echo ""
echo "Operation Counts:"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -c "checkpoint" "$LOG_DIR/litestream.log" || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
else
COMPACTION_COUNT="0"
CHECKPOINT_COUNT="0"
SYNC_COUNT="0"
ERROR_COUNT="0"
fi
echo " Compactions: $COMPACTION_COUNT"
echo " Checkpoints: $CHECKPOINT_COUNT"
echo " Syncs: $SYNC_COUNT"
echo " Errors: $ERROR_COUNT"
# Test restoration from MinIO
echo ""
echo "Testing restoration from MinIO S3..."
RESTORE_DB="$TEST_DIR/restored.db"
# Export credentials for litestream restore
export AWS_ACCESS_KEY_ID="${MINIO_ROOT_USER}"
export AWS_SECRET_ACCESS_KEY="${MINIO_ROOT_PASSWORD}"
# Create a config file for restoration
cat > "$TEST_DIR/restore.yml" <<EOF
access-key-id: ${MINIO_ROOT_USER}
secret-access-key: ${MINIO_ROOT_PASSWORD}
EOF
bin/litestream restore \
-config "$TEST_DIR/restore.yml" \
-o "$RESTORE_DB" \
"$S3_PATH" > "$LOG_DIR/restore.log" 2>&1
if [ $? -eq 0 ]; then
echo "✓ Restoration successful!"
# Compare row counts
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
RESTORED_COUNT="0"
fi
if [ "$ROW_COUNT" = "$RESTORED_COUNT" ]; then
echo "✓ Row counts match! ($RESTORED_COUNT rows)"
else
echo "⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
fi
else
echo "✗ Restoration failed!"
tail -20 "$LOG_DIR/restore.log"
fi
# Summary
echo ""
echo "================================================"
echo "Test Summary"
echo "================================================"
CRITICAL_ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ')
if [ "$CRITICAL_ERROR_COUNT" -eq 0 ] && [ "$FINAL_OBJECTS" -gt 0 ]; then
echo "✓ MINIO S3 TEST PASSED!"
echo ""
echo "Successfully validated:"
echo " - S3-compatible replication to MinIO"
echo " - Stored $FINAL_OBJECTS objects"
echo " - Compactions: $COMPACTION_COUNT"
echo " - Syncs: $SYNC_COUNT"
[ "$CHECKPOINT_COUNT" -gt 0 ] && echo " - Checkpoints: $CHECKPOINT_COUNT"
[ "$FINAL_SNAPSHOTS" -gt 0 ] && echo " - Snapshots: $FINAL_SNAPSHOTS"
echo " - Database restoration from S3"
else
echo "⚠ TEST COMPLETED WITH ISSUES:"
[ "$CRITICAL_ERROR_COUNT" -gt 0 ] && echo " - Critical errors detected: $CRITICAL_ERROR_COUNT"
[ "$FINAL_OBJECTS" -eq 0 ] && echo " - No objects stored in MinIO"
echo ""
echo "Review the logs for details:"
echo " $LOG_DIR/litestream.log"
fi
echo ""
echo "MinIO Console: http://localhost:${MINIO_CONSOLE_PORT}"
echo "Credentials: ${MINIO_ROOT_USER}/${MINIO_ROOT_PASSWORD}"
echo ""
echo "Full test results available in: $TEST_DIR"
echo "================================================"

View File

@@ -98,34 +98,32 @@ cat > "$CONFIG_FILE" <<EOF
# access-key-id: ${AWS_ACCESS_KEY_ID}
# secret-access-key: ${AWS_SECRET_ACCESS_KEY}
# Snapshot every 10 minutes
snapshot:
interval: 10m
retention: 720h # Keep data for 30 days
# Compaction settings - very frequent for testing
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
- interval: 1h
dbs:
- path: $DB_PATH
# Checkpoint settings - frequent for testing
checkpoint-interval: 30s
min-checkpoint-page-count: 1000
max-checkpoint-page-count: 10000
replicas:
- url: ${S3_PATH}
region: ${AWS_REGION}
# Snapshot every 10 minutes
snapshot-interval: 10m
# Retention settings - keep data for 30 days
retention: 720h
retention-check-interval: 1h
# Compaction settings - very frequent for testing
compaction:
- duration: 30s
interval: 30s
- duration: 1m
interval: 1m
- duration: 5m
interval: 5m
- duration: 1h
interval: 15m
- duration: 6h
interval: 30m
- duration: 24h
interval: 1h
# S3-specific settings
force-path-style: false
skip-verify: false
@@ -133,11 +131,6 @@ dbs:
# Optional: Server-side encryption
# sse: AES256
# sse-kms-key-id: your-kms-key-id
# Checkpoint settings - frequent for testing
checkpoint-interval: 30s
min-checkpoint-page-count: 1000
max-checkpoint-page-count: 10000
EOF
echo ""
@@ -207,14 +200,26 @@ monitor_s3_test() {
echo " Total S3 storage: $(numfmt --to=iec-i --suffix=B $S3_SIZE 2>/dev/null || echo "$S3_SIZE bytes")" | tee -a "$LOG_DIR/monitor.log"
fi
# Check for errors
# Count operations
echo "" | tee -a "$LOG_DIR/monitor.log"
ERROR_COUNT=$(grep -c "ERROR\|error" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo "Errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo "Operations:" | tee -a "$LOG_DIR/monitor.log"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ' || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo " Compactions: $COMPACTION_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Checkpoints: $CHECKPOINT_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Syncs: $SYNC_COUNT" | tee -a "$LOG_DIR/monitor.log"
fi
# Check for errors (exclude known non-critical)
echo "" | tee -a "$LOG_DIR/monitor.log"
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo "Critical errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
if [ "$ERROR_COUNT" -gt 0 ]; then
echo "Recent errors:" | tee -a "$LOG_DIR/monitor.log"
grep "ERROR\|error" "$LOG_DIR/litestream.log" | tail -5 | tee -a "$LOG_DIR/monitor.log"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -5 | tee -a "$LOG_DIR/monitor.log"
fi
# Check for S3-specific errors
@@ -255,8 +260,22 @@ MONITOR_PID=$!
echo "Monitor started with PID: $MONITOR_PID"
echo ""
echo "Initial database population..."
echo "Initial database population (before starting litestream)..."
# Kill litestream temporarily to populate database
kill "$LITESTREAM_PID" 2>/dev/null || true
wait "$LITESTREAM_PID" 2>/dev/null || true
bin/litestream-test populate -db "$DB_PATH" -target-size 100MB -batch-size 10000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Restart litestream
echo "Restarting litestream after population..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
echo ""
echo "Starting load generator for overnight S3 test..."
@@ -304,7 +323,61 @@ echo ""
wait "$LOAD_PID"
echo ""
echo "Load generation completed. Testing restoration from S3..."
echo "Load generation completed."
# Final statistics
echo ""
echo "================================================"
echo "Final Statistics"
echo "================================================"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
# Find actual table name
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo "Total rows: $ROW_COUNT"
fi
echo ""
echo "S3 Statistics:"
# Count objects in S3
SNAPSHOT_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.snapshot\.lz4" || echo "0")
WAL_COUNT=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | grep -c "\.wal\.lz4" || echo "0")
TOTAL_OBJECTS=$(aws s3 ls "${S3_PATH}/" --recursive 2>/dev/null | wc -l | tr -d ' ' || echo "0")
S3_SIZE=$(aws s3 ls "${S3_PATH}/" --recursive --summarize 2>/dev/null | grep "Total Size" | awk '{print $3}' || echo "0")
echo " Snapshots in S3: $SNAPSHOT_COUNT"
echo " WAL segments in S3: $WAL_COUNT"
echo " Total objects: $TOTAL_OBJECTS"
if [ "$S3_SIZE" != "0" ]; then
echo " Total S3 storage: $(numfmt --to=iec-i --suffix=B $S3_SIZE 2>/dev/null || echo "$S3_SIZE bytes")"
fi
echo ""
echo "Operation Counts:"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo " Compactions: $COMPACTION_COUNT"
echo " Checkpoints: $CHECKPOINT_COUNT"
echo " Syncs: $SYNC_COUNT"
echo " Critical errors: $ERROR_COUNT"
fi
echo ""
echo "Testing restoration from S3..."
# Test restoration
RESTORE_DB="$TEST_DIR/restored.db"
@@ -314,17 +387,22 @@ bin/litestream restore -o "$RESTORE_DB" "$S3_PATH" > "$LOG_DIR/restore.log" 2>&1
if [ $? -eq 0 ]; then
echo "✓ Restoration successful!"
# Compare row counts
ORIGINAL_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
echo "Original database rows: $ORIGINAL_COUNT"
echo "Restored database rows: $RESTORED_COUNT"
if [ "$ORIGINAL_COUNT" = "$RESTORED_COUNT" ]; then
echo "✓ Row counts match!"
# Compare row counts - use same table detection logic
TABLES=$(sqlite3 "$RESTORE_DB" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
RESTORED_COUNT=$(sqlite3 "$RESTORE_DB" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
echo "✗ Row count mismatch!"
RESTORED_COUNT="0"
fi
if [ "$ROW_COUNT" = "$RESTORED_COUNT" ]; then
echo "✓ Row counts match! ($RESTORED_COUNT rows)"
else
echo "⚠ Row count mismatch! Original: $ROW_COUNT, Restored: $RESTORED_COUNT"
fi
else
echo "✗ Restoration failed! Check $LOG_DIR/restore.log"

View File

@@ -72,38 +72,31 @@ cat > "$CONFIG_FILE" <<EOF
# Litestream configuration for overnight testing
# with aggressive compaction and snapshot intervals
# Snapshot every 10 minutes
snapshot:
interval: 10m
retention: 720h # Keep everything for analysis
# Compaction settings - very frequent for testing
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 15m
- interval: 30m
- interval: 1h
dbs:
- path: $DB_PATH
replicas:
- type: file
path: $REPLICA_PATH
# Snapshot every 10 minutes
snapshot-interval: 10m
# Retention settings - keep everything for analysis
retention: 720h
retention-check-interval: 1h
# Compaction settings - very frequent for testing
compaction:
- duration: 30s
interval: 30s
- duration: 1m
interval: 1m
- duration: 5m
interval: 5m
- duration: 1h
interval: 15m
- duration: 6h
interval: 30m
- duration: 24h
interval: 1h
# Checkpoint after every 1000 frames (frequent for testing)
checkpoint-interval: 30s
min-checkpoint-page-count: 1000
max-checkpoint-page-count: 10000
replicas:
- type: file
path: $REPLICA_PATH
retention-check-interval: 1h
EOF
echo ""
@@ -151,35 +144,47 @@ monitor_test() {
echo "" | tee -a "$LOG_DIR/monitor.log"
echo "Replica Statistics:" | tee -a "$LOG_DIR/monitor.log"
# Count snapshots
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*.snapshot.lz4" 2>/dev/null | wc -l | tr -d ' ')
# Count snapshots (for file replica, look for snapshot.ltx files)
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
echo " Snapshots: $SNAPSHOT_COUNT" | tee -a "$LOG_DIR/monitor.log"
# Count WAL segments by age
# Count LTX segments by age (file replicas use .ltx not .wal.lz4)
if [ -d "$REPLICA_PATH" ]; then
WAL_30S=$(find "$REPLICA_PATH" -name "*.wal.lz4" -mmin -0.5 2>/dev/null | wc -l | tr -d ' ')
WAL_1M=$(find "$REPLICA_PATH" -name "*.wal.lz4" -mmin -1 2>/dev/null | wc -l | tr -d ' ')
WAL_5M=$(find "$REPLICA_PATH" -name "*.wal.lz4" -mmin -5 2>/dev/null | wc -l | tr -d ' ')
WAL_TOTAL=$(find "$REPLICA_PATH" -name "*.wal.lz4" 2>/dev/null | wc -l | tr -d ' ')
LTX_30S=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -0.5 2>/dev/null | wc -l | tr -d ' ')
LTX_1M=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -1 2>/dev/null | wc -l | tr -d ' ')
LTX_5M=$(find "$REPLICA_PATH" -name "*.ltx" -mmin -5 2>/dev/null | wc -l | tr -d ' ')
LTX_TOTAL=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
echo " WAL segments (last 30s): $WAL_30S" | tee -a "$LOG_DIR/monitor.log"
echo " WAL segments (last 1m): $WAL_1M" | tee -a "$LOG_DIR/monitor.log"
echo " WAL segments (last 5m): $WAL_5M" | tee -a "$LOG_DIR/monitor.log"
echo " WAL segments (total): $WAL_TOTAL" | tee -a "$LOG_DIR/monitor.log"
echo " LTX segments (last 30s): $LTX_30S" | tee -a "$LOG_DIR/monitor.log"
echo " LTX segments (last 1m): $LTX_1M" | tee -a "$LOG_DIR/monitor.log"
echo " LTX segments (last 5m): $LTX_5M" | tee -a "$LOG_DIR/monitor.log"
echo " LTX segments (total): $LTX_TOTAL" | tee -a "$LOG_DIR/monitor.log"
# Replica size
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" 2>/dev/null | cut -f1)
echo " Total replica size: $REPLICA_SIZE" | tee -a "$LOG_DIR/monitor.log"
fi
# Check for errors in litestream log
# Count operations
echo "" | tee -a "$LOG_DIR/monitor.log"
ERROR_COUNT=$(grep -c "ERROR\|error" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo "Errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo "Operations:" | tee -a "$LOG_DIR/monitor.log"
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" 2>/dev/null | wc -l | tr -d ' ' || echo "0")
SYNC_COUNT=$(grep -c "replica sync" "$LOG_DIR/litestream.log" 2>/dev/null || echo "0")
echo " Compactions: $COMPACTION_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Checkpoints: $CHECKPOINT_COUNT" | tee -a "$LOG_DIR/monitor.log"
echo " Syncs: $SYNC_COUNT" | tee -a "$LOG_DIR/monitor.log"
fi
# Check for errors in litestream log (exclude known non-critical)
echo "" | tee -a "$LOG_DIR/monitor.log"
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" 2>/dev/null | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo "Critical errors in litestream log: $ERROR_COUNT" | tee -a "$LOG_DIR/monitor.log"
if [ "$ERROR_COUNT" -gt 0 ]; then
echo "Recent errors:" | tee -a "$LOG_DIR/monitor.log"
grep "ERROR\|error" "$LOG_DIR/litestream.log" | tail -5 | tee -a "$LOG_DIR/monitor.log"
grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | tail -5 | tee -a "$LOG_DIR/monitor.log"
fi
# Process status
@@ -209,8 +214,22 @@ MONITOR_PID=$!
echo "Monitor started with PID: $MONITOR_PID"
echo ""
echo "Initial database population..."
echo "Initial database population (before starting litestream)..."
# Kill litestream temporarily to populate database
kill "$LITESTREAM_PID" 2>/dev/null || true
wait "$LITESTREAM_PID" 2>/dev/null || true
bin/litestream-test populate -db "$DB_PATH" -target-size 100MB -batch-size 10000 > "$LOG_DIR/populate.log" 2>&1
if [ $? -ne 0 ]; then
echo "Warning: Population failed, but continuing..."
cat "$LOG_DIR/populate.log"
fi
# Restart litestream
echo "Restarting litestream after population..."
LOG_LEVEL=debug bin/litestream replicate -config "$CONFIG_FILE" > "$LOG_DIR/litestream.log" 2>&1 &
LITESTREAM_PID=$!
sleep 3
echo ""
echo "Starting load generator for overnight test..."
@@ -255,7 +274,51 @@ echo ""
wait "$LOAD_PID"
echo ""
echo "Load generation completed. Running validation..."
echo "Load generation completed."
# Final statistics
echo ""
echo "================================================"
echo "Final Statistics"
echo "================================================"
if [ -f "$DB_PATH" ]; then
DB_SIZE=$(stat -f%z "$DB_PATH" 2>/dev/null || stat -c%s "$DB_PATH" 2>/dev/null)
# Find actual table name
TABLES=$(sqlite3 "$DB_PATH" ".tables" 2>/dev/null)
if echo "$TABLES" | grep -q "load_test"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM load_test" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_table_0"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_table_0" 2>/dev/null || echo "0")
elif echo "$TABLES" | grep -q "test_data"; then
ROW_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM test_data" 2>/dev/null || echo "0")
else
ROW_COUNT="0"
fi
echo "Database size: $(numfmt --to=iec-i --suffix=B $DB_SIZE 2>/dev/null || echo "$DB_SIZE bytes")"
echo "Total rows: $ROW_COUNT"
fi
if [ -d "$REPLICA_PATH" ]; then
SNAPSHOT_COUNT=$(find "$REPLICA_PATH" -name "*snapshot*.ltx" 2>/dev/null | wc -l | tr -d ' ')
LTX_COUNT=$(find "$REPLICA_PATH" -name "*.ltx" 2>/dev/null | wc -l | tr -d ' ')
REPLICA_SIZE=$(du -sh "$REPLICA_PATH" | cut -f1)
echo "Snapshots created: $SNAPSHOT_COUNT"
echo "LTX segments: $LTX_COUNT"
echo "Replica size: $REPLICA_SIZE"
fi
if [ -f "$LOG_DIR/litestream.log" ]; then
COMPACTION_COUNT=$(grep -c "compaction complete" "$LOG_DIR/litestream.log" || echo "0")
CHECKPOINT_COUNT=$(grep -iE "checkpoint|checkpointed" "$LOG_DIR/litestream.log" | wc -l | tr -d ' ' || echo "0")
ERROR_COUNT=$(grep -i "ERROR" "$LOG_DIR/litestream.log" | grep -v "page size not initialized" | wc -l | tr -d ' ' || echo "0")
echo "Compactions: $COMPACTION_COUNT"
echo "Checkpoints: $CHECKPOINT_COUNT"
echo "Critical errors: $ERROR_COUNT"
fi
echo ""
echo "Running validation..."
bin/litestream-test validate \
-source "$DB_PATH" \
-replica "$REPLICA_PATH" \

View File

@@ -67,32 +67,29 @@ fi
# Create aggressive test configuration
echo "Creating test configuration..."
cat > "$CONFIG_FILE" <<EOF
# Very aggressive snapshot settings for quick testing
snapshot:
interval: 1m # Snapshots every minute
retention: 30m # Keep data for 30 minutes
# Frequent compaction levels for testing
levels:
- interval: 30s
- interval: 1m
- interval: 5m
- interval: 10m
dbs:
- path: $DB_PATH
replicas:
- type: file
path: $REPLICA_PATH
# Very aggressive settings for quick testing
snapshot-interval: 1m
retention: 30m
retention-check-interval: 2m
# Frequent compaction for testing
compaction:
- duration: 30s
interval: 30s
- duration: 1m
interval: 1m
- duration: 5m
interval: 5m
- duration: 15m
interval: 10m
# Aggressive checkpoint settings
checkpoint-interval: 30s
min-checkpoint-page-count: 10
max-checkpoint-page-count: 10000
replicas:
- type: file
path: $REPLICA_PATH
retention-check-interval: 2m
EOF
echo "Starting litestream..."