feat(replicate): add IPC control commands for dynamic start/stop (#1010)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Cory LaNou
2026-01-17 10:32:41 -06:00
committed by GitHub
parent 87d1f0d781
commit ed28f2ee62
8 changed files with 603 additions and 2 deletions

View File

@@ -159,6 +159,10 @@ func (m *Main) Run(ctx context.Context, args []string) (err error) {
slog.Info("litestream shut down")
return err
case "start":
return (&StartCommand{}).Run(ctx, args)
case "stop":
return (&StopCommand{}).Run(ctx, args)
case "reset":
return (&ResetCommand{}).Run(ctx, args)
case "restore":
@@ -198,7 +202,9 @@ The commands are:
replicate runs a server to replicate databases
reset reset local state for a database
restore recovers database backup from a replica
start start replication for a database
status display replication status for databases
stop stop replication for a database
version prints the binary version
`[1:])
}
@@ -211,6 +217,9 @@ type Config struct {
// Bind address for serving metrics.
Addr string `yaml:"addr"`
// Socket configuration for control commands.
Socket litestream.SocketConfig `yaml:"socket"`
// List of stages in a multi-level compaction.
// Only includes L1 through the last non-snapshot level.
Levels []*CompactionLevelConfig `yaml:"levels"`
@@ -292,6 +301,7 @@ func DefaultConfig() Config {
Interval: &defaultSnapshotInterval,
Retention: &defaultSnapshotRetention,
},
Socket: litestream.DefaultSocketConfig(),
L0Retention: &defaultL0Retention,
L0RetentionCheckInterval: &defaultL0RetentionCheckInterval,
ShutdownSyncTimeout: &defaultShutdownSyncTimeout,

View File

@@ -41,6 +41,9 @@ type ReplicateCommand struct {
// MCP server
MCP *MCPServer
// Server for IPC control commands.
Server *litestream.Server
// Manages the set of databases & compaction levels.
Store *litestream.Store
@@ -272,6 +275,17 @@ func (c *ReplicateCommand) Run(ctx context.Context) (err error) {
return fmt.Errorf("cannot open store: %w", err)
}
// Start control server if socket is enabled
if c.Config.Socket.Enabled {
c.Server = litestream.NewServer(c.Store)
c.Server.SocketPath = c.Config.Socket.Path
c.Server.SocketPerms = c.Config.Socket.Permissions
c.Server.PathExpander = expand
if err := c.Server.Start(); err != nil {
slog.Warn("failed to start control server", "error", err)
}
}
for _, entry := range watchables {
monitor, err := NewDirectoryMonitor(ctx, c.Store, entry.config, entry.dbs)
if err != nil {
@@ -402,6 +416,11 @@ func (c *ReplicateCommand) Close(ctx context.Context) error {
}
c.directoryMonitors = nil
if c.Server != nil {
if err := c.Server.Close(); err != nil {
slog.Error("error closing control server", "error", err)
}
}
if c.Store != nil {
if err := c.Store.Close(ctx); err != nil {
slog.Error("failed to close database", "error", err)

106
cmd/litestream/start.go Normal file
View File

@@ -0,0 +1,106 @@
package main
import (
"bytes"
"context"
"encoding/json"
"flag"
"fmt"
"io"
"net"
"net/http"
"time"
"github.com/benbjohnson/litestream"
)
// StartCommand represents the command to start replication for a database.
type StartCommand struct{}
// Run executes the start command.
func (c *StartCommand) Run(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("litestream-start", flag.ContinueOnError)
timeout := fs.Int("timeout", 30, "timeout in seconds")
socketPath := fs.String("socket", "/var/run/litestream.sock", "control socket path")
fs.Usage = c.Usage
if err := fs.Parse(args); err != nil {
return err
}
if fs.NArg() == 0 {
return fmt.Errorf("database path required")
}
if fs.NArg() > 1 {
return fmt.Errorf("too many arguments")
}
dbPath := fs.Arg(0)
// Create HTTP client that connects via Unix socket with timeout
clientTimeout := time.Duration(*timeout) * time.Second
client := &http.Client{
Timeout: clientTimeout,
Transport: &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.DialTimeout("unix", *socketPath, clientTimeout)
},
},
}
req := litestream.StartRequest{
Path: dbPath,
Timeout: *timeout,
}
reqBody, err := json.Marshal(req)
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}
resp, err := client.Post("http://localhost/start", "application/json", bytes.NewReader(reqBody))
if err != nil {
return fmt.Errorf("failed to connect to control socket: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
var errResp litestream.ErrorResponse
if err := json.Unmarshal(body, &errResp); err == nil && errResp.Error != "" {
return fmt.Errorf("start failed: %s", errResp.Error)
}
return fmt.Errorf("start failed: %s", string(body))
}
var result litestream.StartResponse
if err := json.Unmarshal(body, &result); err != nil {
return fmt.Errorf("failed to parse response: %w", err)
}
output, err := json.MarshalIndent(result, "", " ")
if err != nil {
return fmt.Errorf("failed to format response: %w", err)
}
fmt.Println(string(output))
return nil
}
// Usage prints the help text for the start command.
func (c *StartCommand) Usage() {
fmt.Println(`
usage: litestream start [OPTIONS] DB_PATH
Start replication for a database.
Options:
-timeout SECONDS
Maximum time to wait in seconds (default: 30).
-socket PATH
Path to control socket (default: /var/run/litestream.sock).
`[1:])
}

107
cmd/litestream/stop.go Normal file
View File

@@ -0,0 +1,107 @@
package main
import (
"bytes"
"context"
"encoding/json"
"flag"
"fmt"
"io"
"net"
"net/http"
"time"
"github.com/benbjohnson/litestream"
)
// StopCommand represents the command to stop replication for a database.
type StopCommand struct{}
// Run executes the stop command.
func (c *StopCommand) Run(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("litestream-stop", flag.ContinueOnError)
timeout := fs.Int("timeout", 30, "timeout in seconds")
socketPath := fs.String("socket", "/var/run/litestream.sock", "control socket path")
fs.Usage = c.Usage
if err := fs.Parse(args); err != nil {
return err
}
if fs.NArg() == 0 {
return fmt.Errorf("database path required")
}
if fs.NArg() > 1 {
return fmt.Errorf("too many arguments")
}
dbPath := fs.Arg(0)
// Create HTTP client that connects via Unix socket with timeout
clientTimeout := time.Duration(*timeout) * time.Second
client := &http.Client{
Timeout: clientTimeout,
Transport: &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.DialTimeout("unix", *socketPath, clientTimeout)
},
},
}
req := litestream.StopRequest{
Path: dbPath,
Timeout: *timeout,
}
reqBody, err := json.Marshal(req)
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}
resp, err := client.Post("http://localhost/stop", "application/json", bytes.NewReader(reqBody))
if err != nil {
return fmt.Errorf("failed to connect to control socket: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
var errResp litestream.ErrorResponse
if err := json.Unmarshal(body, &errResp); err == nil && errResp.Error != "" {
return fmt.Errorf("stop failed: %s", errResp.Error)
}
return fmt.Errorf("stop failed: %s", string(body))
}
var result litestream.StopResponse
if err := json.Unmarshal(body, &result); err != nil {
return fmt.Errorf("failed to parse response: %w", err)
}
output, err := json.MarshalIndent(result, "", " ")
if err != nil {
return fmt.Errorf("failed to format response: %w", err)
}
fmt.Println(string(output))
return nil
}
// Usage prints the help text for the stop command.
func (c *StopCommand) Usage() {
fmt.Println(`
usage: litestream stop [OPTIONS] DB_PATH
Stop replication for a database.
Stop always waits for shutdown and final sync.
Options:
-timeout SECONDS
Maximum time to wait in seconds (default: 30).
-socket PATH
Path to control socket (default: /var/run/litestream.sock).
`[1:])
}