1
0
Fork 0

Merge pull request #1410 from rqlite/snapshot-v2

Simplify WAL-based Snapshotting
master
Philip O'Toole 11 months ago committed by GitHub
commit b37ac2d351
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -38,6 +38,7 @@ When officially released 8.0 will support (mostly) seamless upgrades from the 7.
- [PR #1395](https://github.com/rqlite/rqlite/pull/1395): More DB-level and Snapshotting statistics.
- [PR #1399](https://github.com/rqlite/rqlite/pull/1399): Better trailing flags error message.
- [PR #1404](https://github.com/rqlite/rqlite/pull/1404): Add an interface between Store and Snapshot Store.
- [PR #1410](https://github.com/rqlite/rqlite/pull/1410): Implement simpler WAL-based snapshotting.
## 7.21.4 (July 8th 2023)
### Implementation changes and bug fixes

@ -1,223 +0,0 @@
package snapshot
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
)
// Sink is a sink for writing snapshot data to a Snapshot store.
type Sink struct {
str *Store
workDir string
curGenDir string
nextGenDir string
meta *Meta
nWritten int64
dataFD *os.File
logger *log.Logger
closed bool
}
// NewSink creates a new Sink object.
func NewSink(str *Store, workDir, currGenDir, nextGenDir string, meta *Meta) *Sink {
return &Sink{
str: str,
workDir: workDir,
curGenDir: currGenDir,
nextGenDir: nextGenDir,
meta: meta,
logger: log.New(os.Stderr, "[snapshot-sink] ", log.LstdFlags),
}
}
// Open opens the sink for writing.
func (s *Sink) Open() error {
dataPath := filepath.Join(s.workDir, "snapshot-data.tmp")
dataFD, err := os.Create(dataPath)
if err != nil {
return err
}
s.dataFD = dataFD
return nil
}
// Write writes snapshot data to the sink. The snapshot is not in place
// until Close is called.
func (s *Sink) Write(p []byte) (n int, err error) {
n, err = s.dataFD.Write(p)
s.nWritten += int64(n)
return
}
// ID returns the ID of the snapshot being written.
func (s *Sink) ID() string {
return s.meta.ID
}
// Cancel cancels the snapshot. Cancel must be called if the snapshot is not
// going to be closed.
func (s *Sink) Cancel() error {
s.closed = true
s.cleanup() // Best effort, ignore errors.
return nil
}
// Close closes the sink, and finalizes creation of the snapshot. It is critical
// that Close is called, or the snapshot will not be in place.
func (s *Sink) Close() error {
if s.closed {
return nil
}
s.closed = true
defer s.cleanup()
if err := s.processSnapshotData(); err != nil {
return err
}
if !s.str.noAutoreap {
return s.str.Reap()
}
return nil
}
func (s *Sink) processSnapshotData() error {
if s.nWritten == 0 {
return nil
}
if _, err := s.dataFD.Seek(0, 0); err != nil {
return err
}
strHdr, _, err := NewStreamHeaderFromReader(s.dataFD)
if err != nil {
return fmt.Errorf("error reading stream header: %v", err)
}
if strHdr.GetVersion() != streamVersion {
return fmt.Errorf("unsupported snapshot version %d", strHdr.GetVersion())
}
if incSnap := strHdr.GetIncrementalSnapshot(); incSnap != nil {
return s.processIncrementalSnapshot(incSnap)
}
fullSnap := strHdr.GetFullSnapshot()
if fullSnap == nil {
return fmt.Errorf("got nil FullSnapshot")
}
return s.processFullSnapshot(fullSnap)
}
func (s *Sink) processIncrementalSnapshot(incSnap *IncrementalSnapshot) error {
s.logger.Printf("processing incremental snapshot")
incSnapDir := tmpName(filepath.Join(s.curGenDir, s.meta.ID))
if err := os.Mkdir(incSnapDir, 0755); err != nil {
return fmt.Errorf("error creating incremental snapshot directory: %v", err)
}
walPath := filepath.Join(incSnapDir, snapWALFile)
if err := os.WriteFile(walPath, incSnap.Data, 0644); err != nil {
return fmt.Errorf("error writing WAL data: %v", err)
}
if err := s.writeMeta(incSnapDir, false); err != nil {
return err
}
// We're done! Move the directory into place.
dstDir, err := moveFromTmpSync(incSnapDir)
if err != nil {
s.logger.Printf("failed to move incremental snapshot directory into place: %s", err)
return err
}
s.logger.Printf("incremental snapshot (ID %s) written to %s", s.meta.ID, dstDir)
return nil
}
func (s *Sink) processFullSnapshot(fullSnap *FullSnapshot) error {
s.logger.Printf("processing full snapshot")
// We need a new generational directory, and need to create the first
// snapshot in that directory.
nextGenDir := tmpName(s.nextGenDir)
if err := os.MkdirAll(nextGenDir, 0755); err != nil {
return fmt.Errorf("error creating full snapshot directory: %v", err)
}
// Rebuild the SQLite database from the snapshot data.
sqliteBasePath := filepath.Join(nextGenDir, baseSqliteFile)
if err := ReplayDB(fullSnap, s.dataFD, sqliteBasePath); err != nil {
return fmt.Errorf("error replaying DB: %v", err)
}
// Now create the first snapshot directory in the new generation.
snapDir := filepath.Join(nextGenDir, s.meta.ID)
if err := os.MkdirAll(snapDir, 0755); err != nil {
return fmt.Errorf("error creating full snapshot directory: %v", err)
}
if err := s.writeMeta(snapDir, true); err != nil {
return err
}
// We're done! Move the generational directory into place.
dstDir, err := moveFromTmpSync(nextGenDir)
if err != nil {
s.logger.Printf("failed to move full snapshot directory into place: %s", err)
return err
}
// XXXX need to clear out any snaphot directories older than the one
// we just created. Maybe this should be done at startup? It's an edge case.
// Yeah, this is why empty snap directories need the "full" flag.
// Any snapshot directories older than a full snapshot directory can be
// removed.
s.logger.Printf("full snapshot (ID %s) written to %s", s.meta.ID, dstDir)
return nil
}
func (s *Sink) writeMeta(dir string, full bool) error {
s.meta.Full = full
return writeMeta(dir, s.meta)
}
func (s *Sink) cleanup() error {
if s.dataFD != nil {
if err := s.dataFD.Close(); err != nil {
return err
}
if err := os.Remove(s.dataFD.Name()); err != nil {
return err
}
}
if err := os.RemoveAll(tmpName(s.nextGenDir)); err != nil {
return err
}
if err := os.RemoveAll(tmpName(s.curGenDir)); err != nil {
return err
}
return nil
}
func writeMeta(dir string, meta *Meta) error {
fh, err := os.Create(filepath.Join(dir, metaFileName))
if err != nil {
return fmt.Errorf("error creating meta file: %v", err)
}
defer fh.Close()
// Write out as JSON
enc := json.NewEncoder(fh)
if err = enc.Encode(meta); err != nil {
return fmt.Errorf("failed to encode meta: %v", err)
}
if err := fh.Sync(); err != nil {
return err
}
return fh.Close()
}

@ -1,250 +0,0 @@
package snapshot
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"testing"
"github.com/hashicorp/raft"
"github.com/rqlite/rqlite/command/encoding"
"github.com/rqlite/rqlite/db"
)
func Test_NewSinkOpenCloseOK(t *testing.T) {
tmpDir := t.TempDir()
workDir := filepath.Join(tmpDir, "work")
mustCreateDir(workDir)
currGenDir := filepath.Join(tmpDir, "curr")
nextGenDir := filepath.Join(tmpDir, "next")
str := mustNewStoreForSinkTest(t)
s := NewSink(str, workDir, currGenDir, nextGenDir, &Meta{})
if err := s.Open(); err != nil {
t.Fatal(err)
}
if err := s.Close(); err != nil {
t.Fatal(err)
}
}
func Test_SinkFullSnapshot(t *testing.T) {
tmpDir := t.TempDir()
workDir := filepath.Join(tmpDir, "work")
mustCreateDir(workDir)
currGenDir := filepath.Join(tmpDir, "curr")
nextGenDir := filepath.Join(tmpDir, "next")
str := mustNewStoreForSinkTest(t)
s := NewSink(str, workDir, currGenDir, nextGenDir, makeMeta("snap-1234", 3, 2, 1))
if err := s.Open(); err != nil {
t.Fatal(err)
}
sqliteFile := "testdata/db-and-wals/backup.db"
wal0 := "testdata/db-and-wals/wal-00"
wal1 := "testdata/db-and-wals/wal-01"
wal2 := "testdata/db-and-wals/wal-02"
wal3 := "testdata/db-and-wals/wal-03"
stream, err := NewFullStream(sqliteFile, wal0, wal1, wal2, wal3)
if err != nil {
t.Fatal(err)
}
defer stream.Close()
if io.Copy(s, stream); err != nil {
t.Fatal(err)
}
if err := s.Close(); err != nil {
t.Fatal(err)
}
// Next generation directory should exist and contain a snapshot.
if !dirExists(nextGenDir) {
t.Fatalf("next generation directory %s does not exist", nextGenDir)
}
if !dirExists(filepath.Join(nextGenDir, "snap-1234")) {
t.Fatalf("next generation directory %s does not contain snapshot directory", nextGenDir)
}
if !fileExists(filepath.Join(nextGenDir, baseSqliteFile)) {
t.Fatalf("next generation directory %s does not contain base SQLite file", nextGenDir)
}
expMetaPath := filepath.Join(nextGenDir, "snap-1234", metaFileName)
if !fileExists(expMetaPath) {
t.Fatalf("meta file does not exist at %s", expMetaPath)
}
// Check SQLite database has been created correctly.
db, err := db.Open(filepath.Join(nextGenDir, baseSqliteFile), false, false)
if err != nil {
t.Fatal(err)
}
defer db.Close()
rows, err := db.QueryStringStmt("SELECT COUNT(*) FROM foo")
if err != nil {
t.Fatal(err)
}
if exp, got := `[{"columns":["COUNT(*)"],"types":["integer"],"values":[[4]]}]`, asJSON(rows); exp != got {
t.Fatalf("unexpected results for query, expected %s, got %s", exp, got)
}
}
func Test_SinkIncrementalSnapshot(t *testing.T) {
tmpDir := t.TempDir()
workDir := filepath.Join(tmpDir, "work")
mustCreateDir(workDir)
currGenDir := filepath.Join(tmpDir, "curr")
mustCreateDir(currGenDir)
nextGenDir := filepath.Join(tmpDir, "next")
str := mustNewStoreForSinkTest(t)
s := NewSink(str, workDir, currGenDir, nextGenDir, makeMeta("snap-1234", 3, 2, 1))
if err := s.Open(); err != nil {
t.Fatal(err)
}
walData := mustReadFile("testdata/db-and-wals/wal-00")
stream, err := NewIncrementalStream(walData)
if err != nil {
t.Fatal(err)
}
defer stream.Close()
if io.Copy(s, stream); err != nil {
t.Fatal(err)
}
if err := s.Close(); err != nil {
t.Fatal(err)
}
if dirExists(nextGenDir) {
t.Fatalf("next generation directory %s exists", nextGenDir)
}
if !dirExists(filepath.Join(currGenDir, "snap-1234")) {
t.Fatalf("current generation directory %s does not contain snapshot directory", currGenDir)
}
expWALPath := filepath.Join(currGenDir, "snap-1234", snapWALFile)
if !fileExists(expWALPath) {
t.Fatalf("WAL file does not exist at %s", expWALPath)
}
if !bytes.Equal(walData, mustReadFile(expWALPath)) {
t.Fatalf("WAL file data does not match")
}
expMetaPath := filepath.Join(currGenDir, "snap-1234", metaFileName)
if !fileExists(expMetaPath) {
t.Fatalf("meta file does not exist at %s", expMetaPath)
}
}
func Test_SinkIncrementalSnapshot_NoWALData(t *testing.T) {
tmpDir := t.TempDir()
workDir := filepath.Join(tmpDir, "work")
mustCreateDir(workDir)
currGenDir := filepath.Join(tmpDir, "curr")
mustCreateDir(currGenDir)
nextGenDir := filepath.Join(tmpDir, "next")
str := mustNewStoreForSinkTest(t)
s := NewSink(str, workDir, currGenDir, nextGenDir, makeMeta("snap-1234", 3, 2, 1))
if err := s.Open(); err != nil {
t.Fatal(err)
}
stream, err := NewIncrementalStream(nil)
if err != nil {
t.Fatal(err)
}
defer stream.Close()
if io.Copy(s, stream); err != nil {
t.Fatal(err)
}
if err := s.Close(); err != nil {
t.Fatal(err)
}
if dirExists(nextGenDir) {
t.Fatalf("next generation directory %s exists", nextGenDir)
}
if !dirExists(filepath.Join(currGenDir, "snap-1234")) {
t.Fatalf("current generation directory %s does not contain snapshot directory", currGenDir)
}
expWALPath := filepath.Join(currGenDir, "snap-1234", snapWALFile)
if !emptyFileExists(expWALPath) {
t.Fatalf("expected empty WAL file at %s", expWALPath)
}
expMetaPath := filepath.Join(currGenDir, "snap-1234", metaFileName)
if !fileExists(expMetaPath) {
t.Fatalf("meta file does not exist at %s", expMetaPath)
}
}
func mustNewStoreForSinkTest(t *testing.T) *Store {
tmpDir := t.TempDir()
str, err := NewStore(tmpDir)
if err != nil {
t.Fatal(err)
}
return str
}
func mustCreateDir(path string) {
if err := os.MkdirAll(path, 0755); err != nil {
panic(err)
}
}
func mustReadFile(path string) []byte {
b, err := os.ReadFile(path)
if err != nil {
panic(err)
}
return b
}
func emptyFileExists(path string) bool {
info, err := os.Stat(path)
if err != nil {
return false
}
return info.Size() == 0
}
func makeTestConfiguration(i, a string) raft.Configuration {
return raft.Configuration{
Servers: []raft.Server{
{
ID: raft.ServerID(i),
Address: raft.ServerAddress(a),
},
},
}
}
func makeMeta(id string, index, term, cfgIndex uint64) *Meta {
return &Meta{
SnapshotMeta: raft.SnapshotMeta{
ID: id,
Index: index,
Term: term,
Configuration: makeTestConfiguration("1", "localhost:1"),
ConfigurationIndex: cfgIndex,
Version: 1,
},
}
}
func asJSON(v interface{}) string {
enc := encoding.Encoder{}
b, err := enc.JSONMarshal(v)
if err != nil {
panic(fmt.Sprintf("failed to JSON marshal value: %s", err.Error()))
}
return string(b)
}

@ -1,120 +0,0 @@
package snapshot
import (
"expvar"
"fmt"
"io"
"os"
"path/filepath"
"time"
"github.com/hashicorp/raft"
"github.com/rqlite/rqlite/db"
)
// Snapshot represents a snapshot of the database state.
type Snapshot struct {
walData []byte
files []string
}
// NewWALSnapshot creates a new snapshot from a WAL.
func NewWALSnapshot(b []byte) *Snapshot {
return &Snapshot{
walData: b,
}
}
// NewFullSnapshot creates a new snapshot from a SQLite file and WALs.
func NewFullSnapshot(files ...string) *Snapshot {
return &Snapshot{
files: files,
}
}
// Persist writes the snapshot to the given sink.
func (s *Snapshot) Persist(sink raft.SnapshotSink) error {
startT := time.Now()
stream, err := s.OpenStream()
if err != nil {
return err
}
defer stream.Close()
n, err := io.Copy(sink, stream)
if err != nil {
return err
}
dur := time.Since(startT)
stats.Get(persistSize).(*expvar.Int).Set(n)
stats.Get(persistDuration).(*expvar.Int).Set(dur.Milliseconds())
return err
}
// Release is a no-op.
func (s *Snapshot) Release() {}
// OpenStream returns a stream for reading the snapshot.
func (s *Snapshot) OpenStream() (*Stream, error) {
if len(s.files) > 0 {
return NewFullStream(s.files...)
}
return NewIncrementalStream(s.walData)
}
// ReplayDB reconstructs the database from the given reader, and writes it to
// the given path.
func ReplayDB(fullSnap *FullSnapshot, r io.Reader, path string) error {
dbInfo := fullSnap.GetDb()
if dbInfo == nil {
return fmt.Errorf("got nil DB info")
}
sqliteBaseFD, err := os.Create(path)
if err != nil {
return fmt.Errorf("error creating SQLite file: %v", err)
}
defer sqliteBaseFD.Close()
if _, err := io.CopyN(sqliteBaseFD, r, dbInfo.Size); err != nil {
return fmt.Errorf("error writing SQLite file data: %v", err)
}
if sqliteBaseFD.Sync() != nil {
return fmt.Errorf("error syncing SQLite file: %v", err)
}
if err := sqliteBaseFD.Close(); err != nil {
return fmt.Errorf("error closing SQLite file: %v", err)
}
// Write out any WALs.
var walFiles []string
for i, wal := range fullSnap.GetWals() {
if err := func() error {
if wal == nil {
return fmt.Errorf("got nil WAL")
}
walName := filepath.Join(filepath.Dir(path), baseSqliteWALFile+fmt.Sprintf("%d", i))
walFD, err := os.Create(walName)
if err != nil {
return fmt.Errorf("error creating WAL file: %v", err)
}
defer walFD.Close()
if _, err := io.CopyN(walFD, r, wal.Size); err != nil {
return fmt.Errorf("error writing WAL file data: %v", err)
}
if walFD.Sync() != nil {
return fmt.Errorf("error syncing WAL file: %v", err)
}
walFiles = append(walFiles, walName)
return nil
}(); err != nil {
return err
}
}
// Checkpoint the WAL files into the base SQLite file
if err := db.ReplayWAL(path, walFiles, false); err != nil {
return fmt.Errorf("error checkpointing WAL: %v", err)
}
return nil
}

@ -1,882 +0,0 @@
package snapshot
import (
"encoding/json"
"errors"
"expvar"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
sync "sync"
"time"
"github.com/hashicorp/raft"
"github.com/rqlite/rqlite/db"
)
func init() {
stats = expvar.NewMap("snapshot")
ResetStats()
}
const (
minSnapshotRetain = 2
generationsDir = "generations"
firstGeneration = "0000000001"
baseSqliteFile = "base.sqlite"
baseSqliteWALFile = "base.sqlite-wal"
snapWALFile = "wal"
metaFileName = "meta.json"
tmpSuffix = ".tmp"
)
const (
persistSize = "latest_persist_size"
persistDuration = "latest_persist_duration"
reap_snapshots_duration = "reap_snapshots_duration"
numSnapshotsReaped = "num_snapshots_reaped"
numGenerationsReaped = "num_generations_reaped"
)
var (
// ErrRetainCountTooLow is returned when the retain count is too low.
ErrRetainCountTooLow = errors.New("retain count must be >= 2")
// ErrSnapshotNotFound is returned when a snapshot is not found.
ErrSnapshotNotFound = errors.New("snapshot not found")
// ErrSnapshotBaseMissing is returned when a snapshot base SQLite file is missing.
ErrSnapshotBaseMissing = errors.New("snapshot base SQLite file missing")
)
// stats captures stats for the Store.
var stats *expvar.Map
// ResetStats resets the expvar stats for this module. Mostly for test purposes.
func ResetStats() {
stats.Init()
stats.Add(persistSize, 0)
stats.Add(persistDuration, 0)
stats.Add(reap_snapshots_duration, 0)
stats.Add(numSnapshotsReaped, 0)
stats.Add(numGenerationsReaped, 0)
}
// Meta represents the metadata for a snapshot.
type Meta struct {
raft.SnapshotMeta
Full bool
}
// LockingSink is a wrapper around a SnapshotSink that ensures that the
// Store has handed out only 1 sink at a time.
type LockingSink struct {
raft.SnapshotSink
str *Store
}
// Close closes the sink, unlocking the Store for creation of a new sink.
func (s *LockingSink) Close() error {
s.str.sinkMu.Unlock()
return s.SnapshotSink.Close()
}
// Cancel cancels the sink, unlocking the Store for creation of a new sink.
func (s *LockingSink) Cancel() error {
s.str.sinkMu.Unlock()
return s.SnapshotSink.Cancel()
}
// Store is a store for snapshots.
type Store struct {
rootDir string
workDir string
generationsDir string
sinkMu sync.Mutex
noAutoreap bool
logger *log.Logger
}
// NewStore creates a new Store object.
func NewStore(dir string) (*Store, error) {
genDir := filepath.Join(dir, generationsDir)
if err := os.MkdirAll(genDir, 0755); err != nil {
return nil, err
}
s := &Store{
rootDir: dir,
workDir: filepath.Join(dir, "scratchpad"),
generationsDir: genDir,
logger: log.New(os.Stderr, "[snapshot-store] ", log.LstdFlags),
}
if err := s.check(); err != nil {
return nil, fmt.Errorf("check failed: %s", err)
}
return s, nil
}
// Create creates a new Sink object, ready for writing a snapshot. Sinks make certain assumptions about
// the state of the store, and if those assumptions were changed by another Sink writing to the store
// it could cause failures. Therefore we only allow 1 Sink to be in existence at a time. This shouldn't
// be a problem, since snapshots are taken infrequently in one at a time.
func (s *Store) Create(version raft.SnapshotVersion, index, term uint64, configuration raft.Configuration,
configurationIndex uint64, trans raft.Transport) (retSink raft.SnapshotSink, retErr error) {
s.sinkMu.Lock()
defer func() {
if retErr != nil {
s.sinkMu.Unlock()
}
}()
currGenDir, ok, err := s.GetCurrentGenerationDir()
if err != nil {
return nil, err
}
nextGenDir, err := s.GetNextGenerationDir()
if err != nil {
return nil, err
}
if !ok {
// With an empty store, the snapshot will be written to the same directory
// regardless of whether it's a full or incremental snapshot.
currGenDir = nextGenDir
}
meta := &Meta{
SnapshotMeta: raft.SnapshotMeta{
ID: snapshotName(term, index),
Index: index,
Term: term,
Configuration: configuration,
ConfigurationIndex: configurationIndex,
Version: version,
},
}
sink := NewSink(s, s.workDir, currGenDir, nextGenDir, meta)
if err := sink.Open(); err != nil {
sink.Cancel()
return nil, fmt.Errorf("failed to open Sink: %v", err)
}
return &LockingSink{sink, s}, nil
}
// List returns a list of all the snapshots in the Store.
func (s *Store) List() ([]*raft.SnapshotMeta, error) {
gen, ok, err := s.GetCurrentGenerationDir()
if err != nil {
return nil, err
}
if !ok {
return nil, nil
}
snapshots, err := s.getSnapshots(gen)
if err != nil {
return nil, err
}
// Convert to the type Raft expects and make only 1 available.
var snaps = []*raft.SnapshotMeta{}
if len(snapshots) > 0 {
snaps = append(snaps, &snapshots[0].SnapshotMeta)
}
return snaps, nil
}
// Open opens the snapshot with the given ID.
func (s *Store) Open(id string) (*raft.SnapshotMeta, io.ReadCloser, error) {
generations, err := s.GetGenerations()
if err != nil {
return nil, nil, err
}
var meta *raft.SnapshotMeta
for i := len(generations) - 1; i >= 0; i-- {
genDir := filepath.Join(s.generationsDir, generations[i])
snapshots, err := s.getSnapshots(genDir)
if err != nil {
return nil, nil, err
}
if len(snapshots) == 0 {
continue
}
sort.Sort(metaSlice(snapshots))
if !metaSlice(snapshots).Contains(id) {
// Try the previous generation.
continue
}
// Always include the base SQLite file. There may not be a snapshot directory
// if it's been checkpointed due to snapshot-reaping.
baseSqliteFilePath := filepath.Join(genDir, baseSqliteFile)
if !fileExists(baseSqliteFilePath) {
return nil, nil, ErrSnapshotBaseMissing
}
files := []string{baseSqliteFilePath}
for _, snap := range snapshots {
if !snap.Full {
// Only include WAL files for incremental snapshots, since base SQLite database
// is always included
snapWALFilePath := filepath.Join(genDir, snap.ID, snapWALFile)
if !fileExists(snapWALFilePath) {
return nil, nil, fmt.Errorf("WAL file %s does not exist", snapWALFilePath)
}
files = append(files, snapWALFilePath)
}
if snap.ID == id {
// Stop after we've reached the requested snapshot
meta = &raft.SnapshotMeta{
ID: snap.ID,
Index: snap.Index,
Term: snap.Term,
Configuration: snap.Configuration,
ConfigurationIndex: snap.ConfigurationIndex,
Version: snap.Version,
}
break
}
}
str, err := NewFullStream(files...)
if err != nil {
return nil, nil, err
}
meta.Size = str.Size()
s.logger.Printf("opened snapshot %s successfully (size=%d)", id, meta.Size)
return meta, str, nil
}
return nil, nil, ErrSnapshotNotFound
}
// Dir returns the directory where the snapshots are stored.
func (s *Store) Dir() string {
return s.rootDir
}
// Restore restores the snapshot with the given ID to the given path.
func (s *Store) Restore(id string, dir string) (string, error) {
_, rc, err := s.Open(id)
if err != nil {
return "", err
}
defer rc.Close()
return s.RestoreFromReader(rc, dir)
}
// RestoreFromReader restores the snapshot from the given reader to the given path.
func (s *Store) RestoreFromReader(r io.Reader, dir string) (string, error) {
// Create the destination directory and SQLite file path
if err := os.MkdirAll(dir, 0755); err != nil {
return "", err
}
sqliteFD, err := os.CreateTemp(dir, "restored-*.sqlite")
if err != nil {
return "", err
}
if err := sqliteFD.Close(); err != nil {
return "", err
}
strHdr, _, err := NewStreamHeaderFromReader(r)
if err != nil {
return "", fmt.Errorf("error reading stream header: %v", err)
}
fullSnap := strHdr.GetFullSnapshot()
if fullSnap == nil {
return "", fmt.Errorf("got nil FullSnapshot")
}
if err := ReplayDB(fullSnap, r, sqliteFD.Name()); err != nil {
return "", fmt.Errorf("error replaying DB: %v", err)
}
return sqliteFD.Name(), nil
}
// Stats returns stats about the Snapshot Store.
func (s *Store) Stats() (map[string]interface{}, error) {
ng, err := s.GetNextGeneration()
if err != nil {
return nil, err
}
dirSize, err := dirSize(s.rootDir)
if err != nil {
return nil, err
}
stats := map[string]interface{}{
"root_dir": s.rootDir,
"size": dirSize,
"full_needed": s.FullNeeded(),
"next_generation": ng,
"auto_reap": !s.noAutoreap,
}
snaps, err := s.List()
if err != nil {
return nil, err
}
if len(snaps) > 0 {
var snapsAvailable []string
for i := range snaps {
snapsAvailable = append(snapsAvailable, snaps[i].ID)
}
stats["available"] = snapsAvailable
}
generations, err := s.GetGenerations()
if err != nil {
return nil, err
}
if len(generations) > 0 {
stats["generations"] = generations
}
return stats, nil
}
// FullNeeded returns true if the next type of snapshot needed
// by the Store is a full snapshot.
func (s *Store) FullNeeded() bool {
currGenDir, ok, err := s.GetCurrentGenerationDir()
if err != nil {
return false
}
return !ok || !fileExists(filepath.Join(currGenDir, baseSqliteFile))
}
// GetNextGeneration returns the name of the next generation.
func (s *Store) GetNextGeneration() (string, error) {
generations, err := s.GetGenerations()
if err != nil {
return "", err
}
nextGen := 1
if len(generations) > 0 {
i, err := strconv.Atoi(generations[len(generations)-1])
if err != nil {
return "", err
}
nextGen = i + 1
}
return fmt.Sprintf("%010d", nextGen), nil
}
// GetNextGenerationDir returns the directory path of the next generation.
// It is not guaranteed to exist.
func (s *Store) GetNextGenerationDir() (string, error) {
nextGen, err := s.GetNextGeneration()
if err != nil {
return "", err
}
return filepath.Join(s.generationsDir, nextGen), nil
}
// GetGenerations returns a list of all existing generations, sorted
// from oldest to newest.
func (s *Store) GetGenerations() ([]string, error) {
entries, err := os.ReadDir(s.generationsDir)
if err != nil {
return nil, err
}
var generations []string
for _, entry := range entries {
if !entry.IsDir() || isTmpName(entry.Name()) {
continue
}
if _, err := strconv.Atoi(entry.Name()); err != nil {
continue
}
generations = append(generations, entry.Name())
}
return generations, nil
}
// GetCurrentGenerationDir returns the directory path of the current generation.
// If there are no generations, the function returns false, but no error.
func (s *Store) GetCurrentGenerationDir() (string, bool, error) {
generations, err := s.GetGenerations()
if err != nil {
return "", false, err
}
if len(generations) == 0 {
return "", false, nil
}
return filepath.Join(s.generationsDir, generations[len(generations)-1]), true, nil
}
// Reap reaps old generations, and reaps snapshots within the remaining generation.
func (s *Store) Reap() error {
if _, err := s.ReapGenerations(); err != nil {
return fmt.Errorf("failed to reap generations during reap: %s", err)
}
currDir, ok, err := s.GetCurrentGenerationDir()
if err != nil {
return fmt.Errorf("failed to get current generation directory during reap: %s", err)
}
if ok {
_, err = s.ReapSnapshots(currDir, 2)
if err != nil {
return fmt.Errorf("failed to reap snapshots during reap: %s", err)
}
}
return nil
}
// ReapGenerations removes old generations. It returns the number of generations
// removed, or an error.
func (s *Store) ReapGenerations() (int, error) {
generations, err := s.GetGenerations()
if err != nil {
return 0, err
}
if len(generations) == 0 {
return 0, nil
}
n := 0
for i := 0; i < len(generations)-1; i++ {
genDir := filepath.Join(s.generationsDir, generations[i])
if err := os.RemoveAll(genDir); err != nil {
return n, err
}
s.logger.Printf("reaped generation %s successfully", generations[i])
n++
}
stats.Add(numGenerationsReaped, int64(n))
return n, nil
}
// ReapSnapshots removes snapshots that are no longer needed. It does this by
// checkpointing WAL-based snapshots into the base SQLite file. The function
// returns the number of snapshots removed, or an error. The retain parameter
// specifies the number of snapshots to retain.
func (s *Store) ReapSnapshots(dir string, retain int) (n int, err error) {
startT := time.Now()
defer func() {
stats.Add(numSnapshotsReaped, int64(n))
if err == nil {
dur := time.Since(startT)
stats.Get(reap_snapshots_duration).(*expvar.Int).Set(dur.Milliseconds())
}
}()
if retain < minSnapshotRetain {
return 0, ErrRetainCountTooLow
}
snapshots, err := s.getSnapshots(dir)
if err != nil {
s.logger.Printf("failed to get snapshots in directory %s: %s", dir, err)
return 0, err
}
// Keeping multiple snapshots makes it much easier to reason about the fixing
// up the Snapshot store if we crash in the middle of snapshotting or reaping.
if len(snapshots) <= retain {
return 0, nil
}
// We need to checkpoint the WAL files starting with the oldest snapshot. We'll
// do this by opening the base SQLite file and then replaying the WAL files into it.
// We'll then delete each snapshot once we've checkpointed it.
sort.Sort(metaSlice(snapshots))
baseSqliteFilePath := filepath.Join(dir, baseSqliteFile)
n = 0
for _, snap := range snapshots[0 : len(snapshots)-retain] {
snapDirPath := filepath.Join(dir, snap.ID) // Path to the snapshot directory
walFileInSnapshot := filepath.Join(snapDirPath, snapWALFile) // Path to the WAL file in the snapshot
walToCheckpointFilePath := filepath.Join(dir, baseSqliteWALFile) // Path to the WAL file to checkpoint
// If the snapshot directory doesn't contain a WAL file, then the base SQLite
// file is the snapshot state, and there is no checkpointing to do.
if fileExists(walFileInSnapshot) {
// Copy the WAL file from the snapshot to a temporary location beside the base SQLite file.
// We do this so that we only delete the snapshot directory once we can be sure that
// we've copied it out fully. Renaming is not atomic on every OS, so let's be sure. We
// also use a temporary file name, so we know where the WAL came from if we exit here
// and need to clean up on a restart.
if err := copyWALFromSnapshot(walFileInSnapshot, walToCheckpointFilePath); err != nil {
s.logger.Printf("failed to copy WAL file from snapshot %s: %s", walFileInSnapshot, err)
return n, err
}
// Checkpoint the WAL file into the base SQLite file
if err := db.ReplayWAL(baseSqliteFilePath, []string{walToCheckpointFilePath}, false); err != nil {
s.logger.Printf("failed to checkpoint WAL file %s: %s", walToCheckpointFilePath, err)
return n, err
}
} else {
if err := removeDirSync(snapDirPath); err != nil {
s.logger.Printf("failed to remove full snapshot directory %s: %s", snapDirPath, err)
return n, err
}
}
n++
s.logger.Printf("reaped snapshot %s successfully", snap.ID)
}
return n, nil
}
// getSnapshots returns a list of all the snapshots in the given directory, sorted from
// most recently created to oldest created.
func (s *Store) getSnapshots(dir string) ([]*Meta, error) {
var snapMeta []*Meta
entries, err := os.ReadDir(dir)
if err != nil {
// If the directory doesn't exist, that's fine, just return an empty list
if os.IsNotExist(err) {
return snapMeta, nil
}
return nil, err
}
// Populate the metadata
for _, entry := range entries {
// Ignore any files or temporary snapshots
if !entry.IsDir() || isTmpName(entry.Name()) {
continue
}
// Try to read the meta data
meta, err := s.readMeta(filepath.Join(dir, entry.Name()))
if err != nil {
return nil, fmt.Errorf("failed to read meta for snapshot %s: %s", entry.Name(), err)
}
snapMeta = append(snapMeta, meta)
}
// Sort the snapshot, reverse so we get new -> old
sort.Sort(sort.Reverse(metaSlice(snapMeta)))
return snapMeta, nil
}
// readMeta is used to read the meta data in a given snapshot directory.
func (s *Store) readMeta(dir string) (*Meta, error) {
// Open the meta file
metaPath := filepath.Join(dir, metaFileName)
fh, err := os.Open(metaPath)
if err != nil {
return nil, err
}
defer fh.Close()
// Read in the JSON
meta := &Meta{}
dec := json.NewDecoder(fh)
if err := dec.Decode(meta); err != nil {
return nil, err
}
return meta, nil
}
func (s *Store) check() (retError error) {
defer s.logger.Printf("check complete")
s.logger.Printf("checking snapshot store at %s", s.rootDir)
var n int
if err := s.resetWorkDir(); err != nil {
return fmt.Errorf("failed to reset work directory: %s", err)
}
// Simplify logic by reaping generations first.
n, err := s.ReapGenerations()
if err != nil {
return fmt.Errorf("failed to reap generations: %s", err)
}
s.logger.Printf("reaped %d generations", n)
// Remove any temporary generational directories. They represent operations
// that were interrupted.
entries, err := os.ReadDir(s.generationsDir)
if err != nil {
return err
}
for _, entry := range entries {
if !isTmpName(entry.Name()) {
continue
}
if err := os.RemoveAll(filepath.Join(s.generationsDir, entry.Name())); err != nil {
return fmt.Errorf("failed to remove temporary generation directory %s: %s", entry.Name(), err)
}
n++
}
s.logger.Printf("removed %d temporary generation directories", n)
// Remove any temporary files in the current generation.
currGenDir, ok, err := s.GetCurrentGenerationDir()
if err != nil {
return err
}
if !ok {
return nil
}
entries, err = os.ReadDir(currGenDir)
if err != nil {
return err
}
n = 0
for _, entry := range entries {
if isTmpName(entry.Name()) {
if err := os.RemoveAll(filepath.Join(currGenDir, entry.Name())); err != nil {
return fmt.Errorf("failed to remove temporary file %s: %s", entry.Name(), err)
}
n++
}
}
s.logger.Printf("removed %d temporary files from current generation", n)
baseSqliteFilePath := filepath.Join(currGenDir, baseSqliteFile)
baseSqliteWALFilePath := filepath.Join(currGenDir, baseSqliteWALFile)
// Any snapshots in the current generation?
snapshots, err := s.getSnapshots(currGenDir)
if err != nil {
return fmt.Errorf("failed to get snapshots: %s", err)
}
if len(snapshots) == 0 {
// An empty current generation is useless. This could happen if the very first
// snapshot was interrupted after writing the base SQLite file, but before
// moving its snapshot directory into place.
if err := os.RemoveAll(currGenDir); err != nil {
return fmt.Errorf("failed to remove empty current generation directory %s: %s", currGenDir, err)
}
s.logger.Printf("removed an empty current generation directory")
return nil
}
// If we have no base file, we shouldn't have any snapshot directories. If we
// do it's an inconsistent state which we cannot repair, and needs to be flagged.
if !fileExists(baseSqliteFilePath) {
return ErrSnapshotBaseMissing
}
s.logger.Printf("found base SQLite file at %s", baseSqliteFilePath)
// If we have a WAL file in the current generation which ends with the same ID as
// the oldest snapshot, then the copy of the WAL from the snapshot and subsequent
// checkpointing was interrupted. We need to redo the move-from-snapshot operation.
sort.Sort(metaSlice(snapshots))
walSnapshotCopyPath := walSnapCopyName(currGenDir, snapshots[0].ID)
snapDirPath := filepath.Join(currGenDir, snapshots[0].ID)
if fileExists(walSnapshotCopyPath) {
s.logger.Printf("found uncheckpointed copy of WAL file from snapshot %s", snapshots[0].ID)
if err := os.Remove(walSnapshotCopyPath); err != nil {
return fmt.Errorf("failed to remove copy of WAL file %s: %s", walSnapshotCopyPath, err)
}
if err := copyWALFromSnapshot(snapDirPath, baseSqliteWALFilePath); err != nil {
s.logger.Printf("failed to copy WAL file from snapshot %s: %s", snapshots[0].ID, err)
return err
}
// Now we can remove the snapshot directory.
if err := removeDirSync(snapDirPath); err != nil {
return fmt.Errorf("failed to remove snapshot directory %s: %s", snapDirPath, err)
}
s.logger.Printf("completed copy of WAL file from snapshot %s", snapshots[0].ID)
}
// If we have a base SQLite file, and a WAL file sitting beside it, this implies
// that we were interrupted before completing a checkpoint operation, as part of
// reaping snapshots. Complete the checkpoint operation now.
if fileExists(baseSqliteFilePath) && fileExists(baseSqliteWALFilePath) {
if err := db.ReplayWAL(baseSqliteFilePath, []string{baseSqliteWALFilePath},
false); err != nil {
return fmt.Errorf("failed to replay WALs: %s", err)
}
if err := os.Remove(baseSqliteWALFilePath); err != nil {
return fmt.Errorf("failed to remove WAL file %s: %s", baseSqliteWALFilePath, err)
}
s.logger.Printf("completed checkpoint of WAL file %s", baseSqliteWALFilePath)
}
return nil
}
func (s *Store) resetWorkDir() error {
if err := os.RemoveAll(s.workDir); err != nil {
return fmt.Errorf("failed to remove work directory %s: %s", s.workDir, err)
}
if err := os.MkdirAll(s.workDir, 0755); err != nil {
return fmt.Errorf("failed to create work directory %s: %s", s.workDir, err)
}
return nil
}
// copyWALFromSnapshot copies the WAL file from the snapshot at the given path
// to the file at the given path. It does this in stages, so that we can be sure
// that the copy is complete before deleting the snapshot directory.
func copyWALFromSnapshot(srcWALPath string, dstWALPath string) error {
snapName := filepath.Base(srcWALPath)
snapDirPath := filepath.Dir(srcWALPath)
dstWALDir := filepath.Dir(dstWALPath)
walFileInSnapshotCopy := walSnapCopyName(dstWALDir, snapName)
if err := copyFileSync(srcWALPath, walFileInSnapshotCopy); err != nil {
return fmt.Errorf("failed to copy WAL file %s from snapshot: %s", srcWALPath, err)
}
// Delete the snapshot directory, since we have what we need now.
if err := removeDirSync(snapDirPath); err != nil {
return fmt.Errorf("failed to remove incremental snapshot directory %s: %s", snapDirPath, err)
}
// NOT HANDLING CRASHING HERE. XXXX FIX IN CHECK
// Move the WAL file to the correct name for checkpointing.
if err := os.Rename(walFileInSnapshotCopy, dstWALPath); err != nil {
return fmt.Errorf("failed to move WAL file %s: %s", walFileInSnapshotCopy, err)
}
return nil
}
// walSnapCopyName returns the path of the file used for the intermediate copy of
// the WAL file, for a given source snapshot. dstDir is the directory where the
// copy will be placed, and snapName is the name of the source snapshot.
func walSnapCopyName(dstDir, snapName string) string {
return filepath.Join(dstDir, baseSqliteWALFile+"."+snapName)
}
func isTmpName(name string) bool {
return filepath.Ext(name) == tmpSuffix
}
func fileExists(path string) bool {
_, err := os.Stat(path)
return !os.IsNotExist(err)
}
func dirExists(path string) bool {
stat, err := os.Stat(path)
return err == nil && stat.IsDir()
}
func copyFileSync(src, dst string) error {
srcFd, err := os.Open(src)
if err != nil {
return err
}
defer srcFd.Close()
dstFd, err := os.Create(dst)
if err != nil {
return err
}
defer dstFd.Close()
if _, err = io.Copy(dstFd, srcFd); err != nil {
return err
}
return dstFd.Sync()
}
func parentDir(dir string) string {
return filepath.Dir(dir)
}
func tmpName(path string) string {
return path + tmpSuffix
}
func nonTmpName(path string) string {
return strings.TrimSuffix(path, tmpSuffix)
}
func moveFromTmpSync(src string) (string, error) {
dst := nonTmpName(src)
if err := os.Rename(src, dst); err != nil {
return "", err
}
return dst, syncDirParentMaybe(dst)
}
func removeDirSync(dir string) error {
if err := os.RemoveAll(dir); err != nil {
return err
}
return syncDirParentMaybe(dir)
}
func syncDir(dir string) error {
fh, err := os.Open(dir)
if err != nil {
return err
}
defer fh.Close()
return fh.Sync()
}
// syncDirParentMaybe syncs the parent directory of the given
// directory, but only on non-Windows platforms.
func syncDirParentMaybe(dir string) error {
if runtime.GOOS == "windows" {
return nil
}
return syncDir(parentDir(dir))
}
// snapshotName generates a name for the snapshot.
func snapshotName(term, index uint64) string {
now := time.Now()
msec := now.UnixNano() / int64(time.Millisecond)
return fmt.Sprintf("%d-%d-%d", term, index, msec)
}
// metaSlice is a sortable slice of Meta, which are sorted
// by term, index, and then ID. Snapshots are sorted from oldest to newest.
type metaSlice []*Meta
func (s metaSlice) Len() int {
return len(s)
}
func (s metaSlice) Less(i, j int) bool {
if s[i].Term != s[j].Term {
return s[i].Term < s[j].Term
}
if s[i].Index != s[j].Index {
return s[i].Index < s[j].Index
}
return s[i].ID < s[j].ID
}
func (s metaSlice) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s metaSlice) Contains(id string) bool {
for _, snap := range s {
if snap.ID == id {
return true
}
}
return false
}
// dirSize returns the total size of all files in the given directory
func dirSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
// If the file doesn't exist, we can ignore it. Snapshot files might
// disappear during walking.
if os.IsNotExist(err) {
return nil
}
return err
}
if !info.IsDir() {
size += info.Size()
}
return err
})
return size, err
}

@ -1,683 +0,0 @@
package snapshot
import (
"bytes"
"io"
"strings"
"testing"
"github.com/hashicorp/raft"
"github.com/rqlite/rqlite/db"
)
func Test_NewStore(t *testing.T) {
tmpDir := t.TempDir()
s, err := NewStore(tmpDir)
if err != nil {
t.Fatal(err)
}
if s == nil {
t.Fatal("expected non-nil store")
}
generations, err := s.GetGenerations()
if err != nil {
t.Fatalf("failed to get generations: %s", err.Error())
}
if len(generations) != 0 {
t.Fatalf("expected 0 generation, got %d", len(generations))
}
_, ok, err := s.GetCurrentGenerationDir()
if err != nil {
t.Fatalf("failed to get current generation dir: %s", err.Error())
}
if ok {
t.Fatalf("expected current generation dir not to exist")
}
nextGenDir, err := s.GetNextGenerationDir()
if err != nil {
t.Fatalf("failed to get next generation dir: %s", err.Error())
}
if !strings.HasSuffix(nextGenDir, firstGeneration) {
t.Fatalf("expected next generation dir to be empty, got %s", nextGenDir)
}
}
func Test_NewStore_ListOpenEmpty(t *testing.T) {
dir := t.TempDir()
s, err := NewStore(dir)
if err != nil {
t.Fatalf("failed to create snapshot store: %s", err)
}
if !s.FullNeeded() {
t.Fatalf("expected full snapshots to be needed")
}
if snaps, err := s.List(); err != nil {
t.Fatalf("failed to list snapshots: %s", err)
} else if len(snaps) != 0 {
t.Fatalf("expected 1 snapshots, got %d", len(snaps))
}
if _, _, err := s.Open("non-existent"); err != ErrSnapshotNotFound {
t.Fatalf("expected ErrSnapshotNotFound, got %s", err)
}
}
// Test_WALSnapshotStore_CreateFull performs detailed testing of the
// snapshot creation process. It is critical that snapshots are created
// correctly, so this test is thorough.
func Test_Store_CreateFullThenIncremental(t *testing.T) {
checkSnapshotCount := func(s *Store, exp int) *raft.SnapshotMeta {
snaps, err := s.List()
if err != nil {
t.Fatalf("failed to list snapshots: %s", err)
}
if exp, got := exp, len(snaps); exp != got {
t.Fatalf("expected %d snapshots, got %d", exp, got)
}
if len(snaps) == 0 {
return nil
}
return snaps[0]
}
dir := t.TempDir()
str, err := NewStore(dir)
if err != nil {
t.Fatalf("failed to create snapshot store: %s", err)
}
if !str.FullNeeded() {
t.Fatalf("expected full snapshots to be needed")
}
testConfig1 := makeTestConfiguration("1", "2")
sink, err := str.Create(1, 22, 33, testConfig1, 4, nil)
if err != nil {
t.Fatalf("failed to create 1st snapshot sink: %s", err)
}
//////////////////////////////////////////////////////////////////////////
// Create a full snapshot and write it to the sink.
fullSnap := NewFullSnapshot("testdata/db-and-wals/backup.db")
if err := fullSnap.Persist(sink); err != nil {
t.Fatalf("failed to persist full snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close sink: %s", err)
}
if str.FullNeeded() {
t.Fatalf("full snapshot still needed")
}
meta := checkSnapshotCount(str, 1)
if meta.Index != 22 || meta.Term != 33 {
t.Fatalf("unexpected snapshot metadata: %+v", meta)
}
// Open the latest snapshot and check that it's correct.
raftMeta, rc, err := str.Open(meta.ID)
if err != nil {
t.Fatalf("failed to open snapshot %s: %s", meta.ID, err)
}
crc := &countingReadCloser{rc: rc}
streamHdr, _, err := NewStreamHeaderFromReader(crc)
if err != nil {
t.Fatalf("error reading stream header: %v", err)
}
streamSnap := streamHdr.GetFullSnapshot()
if streamSnap == nil {
t.Fatal("got nil FullSnapshot")
}
dbInfo := streamSnap.GetDb()
if dbInfo == nil {
t.Fatal("got nil DB info")
}
if !compareReaderToFile(crc, "testdata/db-and-wals/backup.db") {
t.Fatalf("database file does not match what is in snapshot")
}
// should be no more data
if _, err := crc.Read(make([]byte, 1)); err != io.EOF {
t.Fatalf("expected EOF, got %v", err)
}
if err := crc.Close(); err != nil {
t.Fatalf("failed to close snapshot reader: %s", err)
}
if exp, got := raftMeta.Size, int64(crc.n); exp != got {
t.Fatalf("expected snapshot size to be %d, got %d", exp, got)
}
crc.Close()
//////////////////////////////////////////////////////////////////////////
// Incremental snapshot next
sink, err = str.Create(2, 55, 66, testConfig1, 4, nil)
if err != nil {
t.Fatalf("failed to create 2nd snapshot sink: %s", err)
}
walData := mustReadFile("testdata/db-and-wals/wal-00")
incSnap := NewWALSnapshot(walData)
if err := incSnap.Persist(sink); err != nil {
t.Fatalf("failed to persist incremental snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close sink: %s", err)
}
meta = checkSnapshotCount(str, 1)
if meta.Index != 55 || meta.Term != 66 {
t.Fatalf("unexpected snapshot metadata: %+v", meta)
}
// Open the latest snapshot again, and recreate the database so we
// can check its contents.
raftMeta, rc, err = str.Open(meta.ID)
if err != nil {
t.Fatalf("failed to open snapshot %s: %s", meta.ID, err)
}
crc = &countingReadCloser{rc: rc}
streamHdr, _, err = NewStreamHeaderFromReader(crc)
if err != nil {
t.Fatalf("error reading stream header: %v", err)
}
streamSnap = streamHdr.GetFullSnapshot()
if streamSnap == nil {
t.Fatal("got nil FullSnapshot")
}
tmpFile := t.TempDir() + "/db"
if err := ReplayDB(streamSnap, crc, tmpFile); err != nil {
t.Fatalf("failed to replay database: %s", err)
}
checkDB, err := db.Open(tmpFile, false, true)
if err != nil {
t.Fatalf("failed to open database: %s", err)
}
defer checkDB.Close()
// Database should now have 1 one after replaying the WAL.
rows, err := checkDB.QueryStringStmt("SELECT * FROM foo")
if err != nil {
t.Fatalf("failed to query database: %s", err)
}
if exp, got := `[{"columns":["id","value"],"types":["integer","text"],"values":[[1,"Row 0"]]}]`, asJSON(rows); exp != got {
t.Fatalf("unexpected results for query, exp %s, got %s", exp, got)
}
// should be no more data
if _, err := crc.Read(make([]byte, 1)); err != io.EOF {
t.Fatalf("expected EOF, got %v", err)
}
if exp, got := raftMeta.Size, int64(crc.n); exp != got {
t.Fatalf("expected snapshot size to be %d, got %d", exp, got)
}
crc.Close()
//////////////////////////////////////////////////////////////////////////
// Do it again!
sink, err = str.Create(2, 77, 88, testConfig1, 4, nil)
if err != nil {
t.Fatalf("failed to create 2nd snapshot sink: %s", err)
}
walData = mustReadFile("testdata/db-and-wals/wal-01")
incSnap = NewWALSnapshot(walData)
if err := incSnap.Persist(sink); err != nil {
t.Fatalf("failed to persist incremental snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close sink: %s", err)
}
meta = checkSnapshotCount(str, 1)
if meta.Index != 77 || meta.Term != 88 {
t.Fatalf("unexpected snapshot metadata: %+v", meta)
}
// Open the latest snapshot again, and recreate the database so we
// can check its contents.
raftMeta, rc, err = str.Open(meta.ID)
if err != nil {
t.Fatalf("failed to open snapshot %s: %s", meta.ID, err)
}
crc = &countingReadCloser{rc: rc}
streamHdr, _, err = NewStreamHeaderFromReader(crc)
if err != nil {
t.Fatalf("error reading stream header: %v", err)
}
streamSnap = streamHdr.GetFullSnapshot()
if streamSnap == nil {
t.Fatal("got nil FullSnapshot")
}
tmpFile = t.TempDir() + "/db"
if err := ReplayDB(streamSnap, crc, tmpFile); err != nil {
t.Fatalf("failed to replay database: %s", err)
}
checkDB, err = db.Open(tmpFile, false, true)
if err != nil {
t.Fatalf("failed to open database: %s", err)
}
defer checkDB.Close()
rows, err = checkDB.QueryStringStmt("SELECT * FROM foo")
if err != nil {
t.Fatalf("failed to query database: %s", err)
}
if exp, got := `[{"columns":["id","value"],"types":["integer","text"],"values":[[1,"Row 0"],[2,"Row 1"]]}]`, asJSON(rows); exp != got {
t.Fatalf("unexpected results for query, exp %s, got %s", exp, got)
}
// should be no more data
if _, err := crc.Read(make([]byte, 1)); err != io.EOF {
t.Fatalf("expected EOF, got %v", err)
}
if exp, got := raftMeta.Size, int64(crc.n); exp != got {
t.Fatalf("expected snapshot size to be %d, got %d", exp, got)
}
crc.Close()
//////////////////////////////////////////////////////////////////////////
// One last time, after a reaping took place in the middle.
sink, err = str.Create(2, 100, 200, testConfig1, 4, nil)
if err != nil {
t.Fatalf("failed to create 2nd snapshot sink: %s", err)
}
walData = mustReadFile("testdata/db-and-wals/wal-02")
incSnap = NewWALSnapshot(walData)
if err := incSnap.Persist(sink); err != nil {
t.Fatalf("failed to persist incremental snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close sink: %s", err)
}
meta = checkSnapshotCount(str, 1)
if meta.Index != 100 || meta.Term != 200 {
t.Fatalf("unexpected snapshot metadata: %+v", meta)
}
// Open the latest snapshot again, and recreate the database so we
// can check its contents.
raftMeta, rc, err = str.Open(meta.ID)
if err != nil {
t.Fatalf("failed to open snapshot %s: %s", meta.ID, err)
}
crc = &countingReadCloser{rc: rc}
streamHdr, _, err = NewStreamHeaderFromReader(crc)
if err != nil {
t.Fatalf("error reading stream header: %v", err)
}
streamSnap = streamHdr.GetFullSnapshot()
if streamSnap == nil {
t.Fatal("got nil FullSnapshot")
}
tmpFile = t.TempDir() + "/db"
if err := ReplayDB(streamSnap, crc, tmpFile); err != nil {
t.Fatalf("failed to replay database: %s", err)
}
checkDB, err = db.Open(tmpFile, false, true)
if err != nil {
t.Fatalf("failed to open database: %s", err)
}
defer checkDB.Close()
rows, err = checkDB.QueryStringStmt("SELECT * FROM foo")
if err != nil {
t.Fatalf("failed to query database: %s", err)
}
if exp, got := `[{"columns":["id","value"],"types":["integer","text"],"values":[[1,"Row 0"],[2,"Row 1"],[3,"Row 2"]]}]`, asJSON(rows); exp != got {
t.Fatalf("unexpected results for query, exp %s, got %s", exp, got)
}
// should be no more data
if _, err := crc.Read(make([]byte, 1)); err != io.EOF {
t.Fatalf("expected EOF, got %v", err)
}
if exp, got := raftMeta.Size, int64(crc.n); exp != got {
t.Fatalf("expected snapshot size to be %d, got %d", exp, got)
}
crc.Close()
// Finally, test via Restore() function.
tmpDir := t.TempDir()
rPath, err := str.Restore(meta.ID, tmpDir)
if err != nil {
t.Fatalf("failed to restore snapshot: %s", err)
}
restoredDB, err := db.Open(rPath, false, true)
if err != nil {
t.Fatalf("failed to open database: %s", err)
}
defer restoredDB.Close()
rows, err = restoredDB.QueryStringStmt("SELECT * FROM foo")
if err != nil {
t.Fatalf("failed to query database: %s", err)
}
if exp, got := `[{"columns":["id","value"],"types":["integer","text"],"values":[[1,"Row 0"],[2,"Row 1"],[3,"Row 2"]]}]`, asJSON(rows); exp != got {
t.Fatalf("unexpected results for query, exp %s, got %s", exp, got)
}
}
// Test_WALSnapshotStore_CreateFullThenFull ensures two full snapshots
// can be created and persisted back-to-back.
func Test_Store_CreateFullThenFull(t *testing.T) {
checkSnapshotCount := func(s *Store, exp int) *raft.SnapshotMeta {
snaps, err := s.List()
if err != nil {
t.Fatalf("failed to list snapshots: %s", err)
}
if exp, got := exp, len(snaps); exp != got {
t.Fatalf("expected %d snapshots, got %d", exp, got)
}
if len(snaps) == 0 {
return nil
}
return snaps[0]
}
dir := t.TempDir()
str, err := NewStore(dir)
if err != nil {
t.Fatalf("failed to create snapshot store: %s", err)
}
if !str.FullNeeded() {
t.Fatalf("expected full snapshots to be needed")
}
testConfig1 := makeTestConfiguration("1", "2")
//////////////////////////////////////////////////////////////////////////
// Create a full snapshot and write it to the sink.
sink, err := str.Create(1, 22, 33, testConfig1, 4, nil)
if err != nil {
t.Fatalf("failed to create 1st snapshot sink: %s", err)
}
fullSnap := NewFullSnapshot("testdata/db-and-wals/backup.db")
if err := fullSnap.Persist(sink); err != nil {
t.Fatalf("failed to persist full snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close sink: %s", err)
}
if str.FullNeeded() {
t.Fatalf("full snapshot still needed")
}
meta := checkSnapshotCount(str, 1)
if meta.Index != 22 || meta.Term != 33 {
t.Fatalf("unexpected snapshot metadata: %+v", meta)
}
//////////////////////////////////////////////////////////////////////////
// Create a second full snapshot and write it to the sink.
sink, err = str.Create(1, 44, 55, testConfig1, 4, nil)
if err != nil {
t.Fatalf("failed to create 1st snapshot sink: %s", err)
}
fullSnap = NewFullSnapshot("testdata/db-and-wals/backup.db")
if err := fullSnap.Persist(sink); err != nil {
t.Fatalf("failed to persist full snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close sink: %s", err)
}
if str.FullNeeded() {
t.Fatalf("full snapshot still needed")
}
meta = checkSnapshotCount(str, 1)
if meta.Index != 44 || meta.Term != 55 {
t.Fatalf("unexpected snapshot metadata: %+v", meta)
}
}
func Test_Store_ReapGenerations(t *testing.T) {
dir := t.TempDir()
s, err := NewStore(dir)
if err != nil {
t.Fatalf("failed to create snapshot store: %s", err)
}
testCurrGenDirIs := func(exp string) string {
curGenDir, ok, err := s.GetCurrentGenerationDir()
if err != nil {
t.Fatalf("failed to get current generation dir: %s", err.Error())
}
if !ok {
t.Fatalf("expected current generation dir to exist")
}
if curGenDir != exp {
t.Fatalf("expected current generation dir to be %s, got %s", exp, curGenDir)
}
return curGenDir
}
testGenCountIs := func(exp int) {
generations, err := s.GetGenerations()
if err != nil {
t.Fatalf("failed to get generations: %s", err.Error())
}
if exp, got := exp, len(generations); exp != got {
t.Fatalf("expected %d generations, got %d", exp, got)
}
}
testReapsOK := func(expN int) {
n, err := s.ReapGenerations()
if err != nil {
t.Fatalf("reaping failed: %s", err.Error())
}
if n != expN {
t.Fatalf("expected %d generations to be reaped, got %d", expN, n)
}
}
var nextGenDir string
nextGenDir, err = s.GetNextGenerationDir()
if err != nil {
t.Fatalf("failed to get next generation dir: %s", err.Error())
}
mustCreateDir(nextGenDir)
testCurrGenDirIs(nextGenDir)
testReapsOK(0)
// Create another generation and then tell the Store to reap.
nextGenDir, err = s.GetNextGenerationDir()
if err != nil {
t.Fatalf("failed to get next generation dir: %s", err.Error())
}
mustCreateDir(nextGenDir)
testGenCountIs(2)
testReapsOK(1)
testCurrGenDirIs(nextGenDir)
// Finally, test reaping lots of generations.
for i := 0; i < 10; i++ {
nextGenDir, err = s.GetNextGenerationDir()
if err != nil {
t.Fatalf("failed to get next generation dir: %s", err.Error())
}
mustCreateDir(nextGenDir)
}
testGenCountIs(11)
testReapsOK(10)
testGenCountIs(1)
testCurrGenDirIs(nextGenDir)
}
func compareReaderToFile(r io.Reader, path string) bool {
b := mustReadFile(path)
rb := mustReadAll(r)
return bytes.Equal(b, rb)
}
func mustReadAll(r io.Reader) []byte {
b, err := io.ReadAll(r)
if err != nil {
panic(err)
}
return b
}
type countingReadCloser struct {
rc io.ReadCloser
n int
}
func (c *countingReadCloser) Read(p []byte) (int, error) {
n, err := c.rc.Read(p)
c.n += n
return n, err
}
func (c *countingReadCloser) Close() error {
return c.rc.Close()
}
func Test_StoreReaping(t *testing.T) {
dir := t.TempDir()
str, err := NewStore(dir)
if err != nil {
t.Fatalf("failed to create snapshot store: %s", err)
}
str.noAutoreap = true
testConfig := makeTestConfiguration("1", "2")
// Create a full snapshot.
snapshot := NewFullSnapshot("testdata/db-and-wals/backup.db")
sink, err := str.Create(1, 1, 1, testConfig, 4, nil)
if err != nil {
t.Fatalf("failed to create snapshot sink: %s", err)
}
stream, err := snapshot.OpenStream()
if err != nil {
t.Fatalf("failed to open snapshot stream: %s", err)
}
_, err = io.Copy(sink, stream)
if err != nil {
t.Fatalf("failed to write snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close snapshot sink: %s", err)
}
createIncSnapshot := func(index, term uint64, file string) {
snapshot := NewWALSnapshot(mustReadFile(file))
sink, err := str.Create(1, index, term, testConfig, 4, nil)
if err != nil {
t.Fatalf("failed to create snapshot sink: %s", err)
}
stream, err := snapshot.OpenStream()
if err != nil {
t.Fatalf("failed to open snapshot stream: %s", err)
}
_, err = io.Copy(sink, stream)
if err != nil {
t.Fatalf("failed to write snapshot: %s", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("failed to close snapshot sink: %s", err)
}
}
createIncSnapshot(3, 2, "testdata/db-and-wals/wal-00")
createIncSnapshot(5, 3, "testdata/db-and-wals/wal-01")
createIncSnapshot(7, 4, "testdata/db-and-wals/wal-02")
createIncSnapshot(9, 5, "testdata/db-and-wals/wal-03")
// There should be 5 snapshot directories in the current generation.
generationsDir, ok, err := str.GetCurrentGenerationDir()
if err != nil {
t.Fatalf("failed to get generations dir: %s", err)
}
if !ok {
t.Fatalf("expected generations dir to exist")
}
snaps, err := str.getSnapshots(generationsDir)
if err != nil {
t.Fatalf("failed to list snapshots: %s", err)
}
if exp, got := 5, len(snaps); exp != got {
t.Fatalf("expected %d snapshots, got %d", exp, got)
}
for _, snap := range snaps[0:4] {
if snap.Full {
t.Fatalf("snapshot %s is full", snap.ID)
}
}
if !snaps[4].Full {
t.Fatalf("snapshot %s is incremental", snaps[4].ID)
}
// Reap just the first snapshot, which is full.
n, err := str.ReapSnapshots(generationsDir, 4)
if err != nil {
t.Fatalf("failed to reap full snapshot: %s", err)
}
if exp, got := 1, n; exp != got {
t.Fatalf("expected %d snapshots to be reaped, got %d", exp, got)
}
snaps, err = str.getSnapshots(generationsDir)
if err != nil {
t.Fatalf("failed to list snapshots: %s", err)
}
if exp, got := 4, len(snaps); exp != got {
t.Fatalf("expected %d snapshots, got %d", exp, got)
}
// Reap all but the last two snapshots. The remaining snapshots
// should all be incremental.
n, err = str.ReapSnapshots(generationsDir, 2)
if err != nil {
t.Fatalf("failed to reap snapshots: %s", err)
}
if exp, got := 2, n; exp != got {
t.Fatalf("expected %d snapshots to be reaped, got %d", exp, got)
}
snaps, err = str.getSnapshots(generationsDir)
if err != nil {
t.Fatalf("failed to list snapshots: %s", err)
}
if exp, got := 2, len(snaps); exp != got {
t.Fatalf("expected %d snapshots, got %d", exp, got)
}
for _, snap := range snaps {
if snap.Full {
t.Fatalf("snapshot %s is full", snap.ID)
}
}
if snaps[0].Index != 9 && snaps[1].Term != 5 {
t.Fatal("snap 0 is wrong, exp: ", snaps[0].Index, snaps[1].Term)
}
if snaps[1].Index != 7 && snaps[1].Term != 3 {
t.Fatal("snap 1 is wrong, exp:", snaps[1].Index, snaps[1].Term)
}
// Open the latest snapshot, write it to disk, and check its contents.
_, rc, err := str.Open(snaps[0].ID)
if err != nil {
t.Fatalf("failed to open snapshot %s: %s", snaps[0].ID, err)
}
defer rc.Close()
strHdr, _, err := NewStreamHeaderFromReader(rc)
if err != nil {
t.Fatalf("error reading stream header: %v", err)
}
streamSnap := strHdr.GetFullSnapshot()
if streamSnap == nil {
t.Fatal("got nil FullSnapshot")
}
tmpFile := t.TempDir() + "/db"
if err := ReplayDB(streamSnap, rc, tmpFile); err != nil {
t.Fatalf("failed to replay database: %s", err)
}
// Check the database.
db, err := db.Open(tmpFile, false, true)
if err != nil {
t.Fatalf("failed to open database: %s", err)
}
defer db.Close()
rows, err := db.QueryStringStmt("SELECT COUNT(*) FROM foo")
if err != nil {
t.Fatalf("failed to query database: %s", err)
}
if exp, got := `[{"columns":["COUNT(*)"],"types":["integer"],"values":[[4]]}]`, asJSON(rows); exp != got {
t.Fatalf("unexpected results for query exp: %s got: %s", exp, got)
}
}

@ -1,246 +0,0 @@
package snapshot
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"google.golang.org/protobuf/proto"
)
const (
strHeaderLenSize = 8
streamVersion = 1
)
// NewStreamHeader creates a new StreamHeader.
func NewStreamHeader() *StreamHeader {
return &StreamHeader{
Version: streamVersion,
}
}
// NewStreamHeaderFromReader reads a StreamHeader from the given reader.
func NewStreamHeaderFromReader(r io.Reader) (*StreamHeader, int64, error) {
var totalSizeRead int64
b := make([]byte, strHeaderLenSize)
n, err := io.ReadFull(r, b)
if err != nil {
return nil, 0, fmt.Errorf("error reading snapshot header length: %v", err)
}
totalSizeRead += int64(n)
strHdrLen := binary.LittleEndian.Uint64(b)
b = make([]byte, strHdrLen)
n, err = io.ReadFull(r, b)
if err != nil {
return nil, 0, fmt.Errorf("error reading snapshot header %v", err)
}
totalSizeRead += int64(n)
strHdr := &StreamHeader{}
err = proto.Unmarshal(b, strHdr)
if err != nil {
return nil, 0, fmt.Errorf("error unmarshaling FSM snapshot: %v", err)
}
if strHdr.GetVersion() != streamVersion {
return nil, 0, fmt.Errorf("unsupported snapshot version %d", strHdr.GetVersion())
}
return strHdr, totalSizeRead, nil
}
// FileSize returns the total size of the files in the snapshot.
func (s *StreamHeader) FileSize() int64 {
if fs := s.GetFullSnapshot(); fs != nil {
var size int64
for _, di := range fs.Wals {
size += di.Size
}
size += fs.Db.Size
return size
}
return 0
}
// Stream is a stream of data that can be read from a snapshot.
type Stream struct {
headerLen int64
readClosers []io.ReadCloser
readClosersIdx int
totalFileSize int64
}
// NewIncrementalStream creates a new stream from a byte slice, presumably
// representing WAL data.
func NewIncrementalStream(data []byte) (*Stream, error) {
strHdr := NewStreamHeader()
strHdr.Payload = &StreamHeader_IncrementalSnapshot{
IncrementalSnapshot: &IncrementalSnapshot{
Data: data,
},
}
strHdrPb, err := proto.Marshal(strHdr)
if err != nil {
return nil, err
}
buf := make([]byte, strHeaderLenSize)
binary.LittleEndian.PutUint64(buf, uint64(len(strHdrPb)))
buf = append(buf, strHdrPb...)
return &Stream{
headerLen: int64(len(strHdrPb)),
readClosers: []io.ReadCloser{newRCBuffer(buf)},
}, nil
}
// NewFullStream creates a new stream from a SQLite file and 0 or more
// WAL files.
func NewFullStream(files ...string) (*Stream, error) {
if len(files) == 0 {
return nil, errors.New("no files provided")
}
var totalFileSize int64
// First file must be the SQLite database file.
fi, err := os.Stat(files[0])
if err != nil {
return nil, err
}
dbDataInfo := &FullSnapshot_DataInfo{
Size: fi.Size(),
}
totalFileSize += fi.Size()
// Rest, if any, are WAL files.
walDataInfos := make([]*FullSnapshot_DataInfo, len(files)-1)
for i := 1; i < len(files); i++ {
fi, err := os.Stat(files[i])
if err != nil {
return nil, err
}
walDataInfos[i-1] = &FullSnapshot_DataInfo{
Size: fi.Size(),
}
totalFileSize += fi.Size()
}
strHdr := NewStreamHeader()
strHdr.Payload = &StreamHeader_FullSnapshot{
FullSnapshot: &FullSnapshot{
Db: dbDataInfo,
Wals: walDataInfos,
},
}
strHdrPb, err := proto.Marshal(strHdr)
if err != nil {
return nil, err
}
buf := make([]byte, strHeaderLenSize)
binary.LittleEndian.PutUint64(buf, uint64(len(strHdrPb)))
buf = append(buf, strHdrPb...)
var readClosers []io.ReadCloser
readClosers = append(readClosers, newRCBuffer(buf))
for _, file := range files {
fd, err := os.Open(file)
if err != nil {
for _, rc := range readClosers {
rc.Close() // Ignore the error during cleanup
}
return nil, err
}
readClosers = append(readClosers, fd)
}
return &Stream{
headerLen: int64(len(strHdrPb)),
readClosers: readClosers,
totalFileSize: strHdr.FileSize(),
}, nil
}
// Size returns the total number of bytes that will be read from the stream,
// if the stream is fully read.
func (s *Stream) Size() int64 {
return int64(strHeaderLenSize + int64(s.headerLen) + s.totalFileSize)
}
// Read reads from the stream.
func (s *Stream) Read(p []byte) (n int, err error) {
if s.readClosersIdx >= len(s.readClosers) {
return 0, io.EOF
}
n, err = s.readClosers[s.readClosersIdx].Read(p)
if err != nil {
if err == io.EOF {
s.readClosersIdx++
if s.readClosersIdx < len(s.readClosers) {
err = nil
}
}
}
return n, err
}
// Close closes the stream.
func (s *Stream) Close() error {
for _, r := range s.readClosers {
if err := r.Close(); err != nil {
return err
}
}
return nil
}
// FilesFromStream reads a stream and returns the files contained within it.
// The first file is the SQLite database file, and the rest are WAL files.
// The function will return an error if the stream does not contain a
// FullSnapshot.
func FilesFromStream(r io.Reader) (string, []string, error) {
strHdr, _, err := NewStreamHeaderFromReader(r)
if err != nil {
return "", nil, fmt.Errorf("error reading stream header: %v", err)
}
fullSnap := strHdr.GetFullSnapshot()
if fullSnap == nil {
return "", nil, fmt.Errorf("got nil FullSnapshot")
}
dbInfo := fullSnap.GetDb()
if dbInfo == nil {
return "", nil, fmt.Errorf("got nil DB info")
}
sqliteFd, err := os.CreateTemp("", "stream-db.sqlite3")
if _, err := io.CopyN(sqliteFd, r, dbInfo.Size); err != nil {
return "", nil, fmt.Errorf("error writing SQLite file data: %v", err)
}
if sqliteFd.Close(); err != nil {
return "", nil, fmt.Errorf("error closing SQLite data file %v", err)
}
var walFiles []string
for i, di := range fullSnap.Wals {
tmpFd, err := os.CreateTemp("", fmt.Sprintf("stream-wal-%d.wal", i))
if err != nil {
return "", nil, fmt.Errorf("error creating WAL file: %v", err)
}
if _, err := io.CopyN(tmpFd, r, di.Size); err != nil {
return "", nil, fmt.Errorf("error writing WAL file data: %v", err)
}
if err := tmpFd.Close(); err != nil {
return "", nil, fmt.Errorf("error closing WAL file: %v", err)
}
walFiles = append(walFiles, tmpFd.Name())
}
return sqliteFd.Name(), walFiles, nil
}
func newRCBuffer(b []byte) io.ReadCloser {
return io.NopCloser(bytes.NewBuffer(b))
}

@ -1,404 +0,0 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.31.0
// protoc v3.6.1
// source: snapshot/stream_header.pb
package snapshot
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type IncrementalSnapshot struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"`
}
func (x *IncrementalSnapshot) Reset() {
*x = IncrementalSnapshot{}
if protoimpl.UnsafeEnabled {
mi := &file_snapshot_stream_header_pb_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *IncrementalSnapshot) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*IncrementalSnapshot) ProtoMessage() {}
func (x *IncrementalSnapshot) ProtoReflect() protoreflect.Message {
mi := &file_snapshot_stream_header_pb_msgTypes[0]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use IncrementalSnapshot.ProtoReflect.Descriptor instead.
func (*IncrementalSnapshot) Descriptor() ([]byte, []int) {
return file_snapshot_stream_header_pb_rawDescGZIP(), []int{0}
}
func (x *IncrementalSnapshot) GetData() []byte {
if x != nil {
return x.Data
}
return nil
}
type FullSnapshot struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Db *FullSnapshot_DataInfo `protobuf:"bytes,3,opt,name=db,proto3" json:"db,omitempty"`
Wals []*FullSnapshot_DataInfo `protobuf:"bytes,4,rep,name=wals,proto3" json:"wals,omitempty"`
}
func (x *FullSnapshot) Reset() {
*x = FullSnapshot{}
if protoimpl.UnsafeEnabled {
mi := &file_snapshot_stream_header_pb_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *FullSnapshot) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FullSnapshot) ProtoMessage() {}
func (x *FullSnapshot) ProtoReflect() protoreflect.Message {
mi := &file_snapshot_stream_header_pb_msgTypes[1]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FullSnapshot.ProtoReflect.Descriptor instead.
func (*FullSnapshot) Descriptor() ([]byte, []int) {
return file_snapshot_stream_header_pb_rawDescGZIP(), []int{1}
}
func (x *FullSnapshot) GetDb() *FullSnapshot_DataInfo {
if x != nil {
return x.Db
}
return nil
}
func (x *FullSnapshot) GetWals() []*FullSnapshot_DataInfo {
if x != nil {
return x.Wals
}
return nil
}
type StreamHeader struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Version int32 `protobuf:"varint,1,opt,name=version,proto3" json:"version,omitempty"`
// Types that are assignable to Payload:
//
// *StreamHeader_IncrementalSnapshot
// *StreamHeader_FullSnapshot
Payload isStreamHeader_Payload `protobuf_oneof:"payload"`
}
func (x *StreamHeader) Reset() {
*x = StreamHeader{}
if protoimpl.UnsafeEnabled {
mi := &file_snapshot_stream_header_pb_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *StreamHeader) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*StreamHeader) ProtoMessage() {}
func (x *StreamHeader) ProtoReflect() protoreflect.Message {
mi := &file_snapshot_stream_header_pb_msgTypes[2]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use StreamHeader.ProtoReflect.Descriptor instead.
func (*StreamHeader) Descriptor() ([]byte, []int) {
return file_snapshot_stream_header_pb_rawDescGZIP(), []int{2}
}
func (x *StreamHeader) GetVersion() int32 {
if x != nil {
return x.Version
}
return 0
}
func (m *StreamHeader) GetPayload() isStreamHeader_Payload {
if m != nil {
return m.Payload
}
return nil
}
func (x *StreamHeader) GetIncrementalSnapshot() *IncrementalSnapshot {
if x, ok := x.GetPayload().(*StreamHeader_IncrementalSnapshot); ok {
return x.IncrementalSnapshot
}
return nil
}
func (x *StreamHeader) GetFullSnapshot() *FullSnapshot {
if x, ok := x.GetPayload().(*StreamHeader_FullSnapshot); ok {
return x.FullSnapshot
}
return nil
}
type isStreamHeader_Payload interface {
isStreamHeader_Payload()
}
type StreamHeader_IncrementalSnapshot struct {
IncrementalSnapshot *IncrementalSnapshot `protobuf:"bytes,2,opt,name=incremental_snapshot,json=incrementalSnapshot,proto3,oneof"`
}
type StreamHeader_FullSnapshot struct {
FullSnapshot *FullSnapshot `protobuf:"bytes,3,opt,name=full_snapshot,json=fullSnapshot,proto3,oneof"`
}
func (*StreamHeader_IncrementalSnapshot) isStreamHeader_Payload() {}
func (*StreamHeader_FullSnapshot) isStreamHeader_Payload() {}
type FullSnapshot_DataInfo struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Size int64 `protobuf:"varint,1,opt,name=size,proto3" json:"size,omitempty"`
}
func (x *FullSnapshot_DataInfo) Reset() {
*x = FullSnapshot_DataInfo{}
if protoimpl.UnsafeEnabled {
mi := &file_snapshot_stream_header_pb_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *FullSnapshot_DataInfo) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FullSnapshot_DataInfo) ProtoMessage() {}
func (x *FullSnapshot_DataInfo) ProtoReflect() protoreflect.Message {
mi := &file_snapshot_stream_header_pb_msgTypes[3]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FullSnapshot_DataInfo.ProtoReflect.Descriptor instead.
func (*FullSnapshot_DataInfo) Descriptor() ([]byte, []int) {
return file_snapshot_stream_header_pb_rawDescGZIP(), []int{1, 0}
}
func (x *FullSnapshot_DataInfo) GetSize() int64 {
if x != nil {
return x.Size
}
return 0
}
var File_snapshot_stream_header_pb protoreflect.FileDescriptor
var file_snapshot_stream_header_pb_rawDesc = []byte{
0x0a, 0x19, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2f, 0x73, 0x74, 0x72, 0x65, 0x61,
0x6d, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x70, 0x62, 0x12, 0x08, 0x73, 0x74, 0x72,
0x65, 0x61, 0x6d, 0x65, 0x72, 0x22, 0x29, 0x0a, 0x13, 0x49, 0x6e, 0x63, 0x72, 0x65, 0x6d, 0x65,
0x6e, 0x74, 0x61, 0x6c, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x12, 0x12, 0x0a, 0x04,
0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61,
0x22, 0x94, 0x01, 0x0a, 0x0c, 0x46, 0x75, 0x6c, 0x6c, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f,
0x74, 0x12, 0x2f, 0x0a, 0x02, 0x64, 0x62, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e,
0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x65, 0x72, 0x2e, 0x46, 0x75, 0x6c, 0x6c, 0x53, 0x6e, 0x61,
0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x02,
0x64, 0x62, 0x12, 0x33, 0x0a, 0x04, 0x77, 0x61, 0x6c, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b,
0x32, 0x1f, 0x2e, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x65, 0x72, 0x2e, 0x46, 0x75, 0x6c, 0x6c,
0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x49, 0x6e, 0x66,
0x6f, 0x52, 0x04, 0x77, 0x61, 0x6c, 0x73, 0x1a, 0x1e, 0x0a, 0x08, 0x44, 0x61, 0x74, 0x61, 0x49,
0x6e, 0x66, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
0x03, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x22, 0xc6, 0x01, 0x0a, 0x0c, 0x53, 0x74, 0x72, 0x65,
0x61, 0x6d, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73,
0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69,
0x6f, 0x6e, 0x12, 0x52, 0x0a, 0x14, 0x69, 0x6e, 0x63, 0x72, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61,
0x6c, 0x5f, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b,
0x32, 0x1d, 0x2e, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x65, 0x72, 0x2e, 0x49, 0x6e, 0x63, 0x72,
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x48,
0x00, 0x52, 0x13, 0x69, 0x6e, 0x63, 0x72, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x53, 0x6e,
0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x12, 0x3d, 0x0a, 0x0d, 0x66, 0x75, 0x6c, 0x6c, 0x5f, 0x73,
0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e,
0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x65, 0x72, 0x2e, 0x46, 0x75, 0x6c, 0x6c, 0x53, 0x6e, 0x61,
0x70, 0x73, 0x68, 0x6f, 0x74, 0x48, 0x00, 0x52, 0x0c, 0x66, 0x75, 0x6c, 0x6c, 0x53, 0x6e, 0x61,
0x70, 0x73, 0x68, 0x6f, 0x74, 0x42, 0x09, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64,
0x42, 0x23, 0x5a, 0x21, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x72,
0x71, 0x6c, 0x69, 0x74, 0x65, 0x2f, 0x72, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x2f, 0x73, 0x6e, 0x61,
0x70, 0x73, 0x68, 0x6f, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (
file_snapshot_stream_header_pb_rawDescOnce sync.Once
file_snapshot_stream_header_pb_rawDescData = file_snapshot_stream_header_pb_rawDesc
)
func file_snapshot_stream_header_pb_rawDescGZIP() []byte {
file_snapshot_stream_header_pb_rawDescOnce.Do(func() {
file_snapshot_stream_header_pb_rawDescData = protoimpl.X.CompressGZIP(file_snapshot_stream_header_pb_rawDescData)
})
return file_snapshot_stream_header_pb_rawDescData
}
var file_snapshot_stream_header_pb_msgTypes = make([]protoimpl.MessageInfo, 4)
var file_snapshot_stream_header_pb_goTypes = []interface{}{
(*IncrementalSnapshot)(nil), // 0: streamer.IncrementalSnapshot
(*FullSnapshot)(nil), // 1: streamer.FullSnapshot
(*StreamHeader)(nil), // 2: streamer.StreamHeader
(*FullSnapshot_DataInfo)(nil), // 3: streamer.FullSnapshot.DataInfo
}
var file_snapshot_stream_header_pb_depIdxs = []int32{
3, // 0: streamer.FullSnapshot.db:type_name -> streamer.FullSnapshot.DataInfo
3, // 1: streamer.FullSnapshot.wals:type_name -> streamer.FullSnapshot.DataInfo
0, // 2: streamer.StreamHeader.incremental_snapshot:type_name -> streamer.IncrementalSnapshot
1, // 3: streamer.StreamHeader.full_snapshot:type_name -> streamer.FullSnapshot
4, // [4:4] is the sub-list for method output_type
4, // [4:4] is the sub-list for method input_type
4, // [4:4] is the sub-list for extension type_name
4, // [4:4] is the sub-list for extension extendee
0, // [0:4] is the sub-list for field type_name
}
func init() { file_snapshot_stream_header_pb_init() }
func file_snapshot_stream_header_pb_init() {
if File_snapshot_stream_header_pb != nil {
return
}
if !protoimpl.UnsafeEnabled {
file_snapshot_stream_header_pb_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*IncrementalSnapshot); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_snapshot_stream_header_pb_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*FullSnapshot); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_snapshot_stream_header_pb_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*StreamHeader); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_snapshot_stream_header_pb_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*FullSnapshot_DataInfo); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
}
file_snapshot_stream_header_pb_msgTypes[2].OneofWrappers = []interface{}{
(*StreamHeader_IncrementalSnapshot)(nil),
(*StreamHeader_FullSnapshot)(nil),
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_snapshot_stream_header_pb_rawDesc,
NumEnums: 0,
NumMessages: 4,
NumExtensions: 0,
NumServices: 0,
},
GoTypes: file_snapshot_stream_header_pb_goTypes,
DependencyIndexes: file_snapshot_stream_header_pb_depIdxs,
MessageInfos: file_snapshot_stream_header_pb_msgTypes,
}.Build()
File_snapshot_stream_header_pb = out.File
file_snapshot_stream_header_pb_rawDesc = nil
file_snapshot_stream_header_pb_goTypes = nil
file_snapshot_stream_header_pb_depIdxs = nil
}

@ -1,25 +0,0 @@
syntax = "proto3";
package streamer;
option go_package = "github.com/rqlite/rqlite/snapshot";
message IncrementalSnapshot {
bytes data = 1;
}
message FullSnapshot {
message DataInfo {
int64 size = 1;
}
DataInfo db = 3;
repeated DataInfo wals = 4;
}
message StreamHeader {
int32 version = 1;
oneof payload {
IncrementalSnapshot incremental_snapshot = 2;
FullSnapshot full_snapshot = 3;
}
}

@ -1,209 +0,0 @@
package snapshot
import (
"bytes"
"io"
"os"
"testing"
)
func Test_NewStreamHeader(t *testing.T) {
strHdr := NewStreamHeader()
if strHdr == nil {
t.Fatal("StreamHeader is nil")
}
if strHdr.Version != streamVersion {
t.Errorf("StreamHeader version is incorrect, got: %d, want: %d", strHdr.Version, streamVersion)
}
if strHdr.Payload != nil {
t.Error("StreamHeader payload should be nil")
}
if strHdr.FileSize() != 0 {
t.Errorf("Expected file size to be 0, got: %d", strHdr.FileSize())
}
}
func Test_StreamHeaderFileSize(t *testing.T) {
strHdr := NewStreamHeader()
if strHdr == nil {
t.Fatal("StreamHeader is nil")
}
// Test with no full snapshot
if size := strHdr.FileSize(); size != 0 {
t.Errorf("Expected file size to be 0 for no full snapshot, got: %d", size)
}
// Test with a full snapshot
dbSize := int64(100)
walSizes := []int64{200, 300}
strHdr.Payload = &StreamHeader_FullSnapshot{
FullSnapshot: &FullSnapshot{
Db: &FullSnapshot_DataInfo{
Size: dbSize,
},
Wals: []*FullSnapshot_DataInfo{
{Size: walSizes[0]},
{Size: walSizes[1]},
},
},
}
expectedSize := dbSize + walSizes[0] + walSizes[1]
if size := strHdr.FileSize(); size != expectedSize {
t.Errorf("Expected file size to be %d, got: %d", expectedSize, size)
}
}
func Test_NewIncrementalStream(t *testing.T) {
data := []byte("test data")
stream, err := NewIncrementalStream(data)
if err != nil {
t.Fatalf("Failed to create new incremental stream: %v", err)
}
if stream == nil {
t.Fatal("Expected non-nil stream, got nil")
}
// Get the header
strHdr, n, err := NewStreamHeaderFromReader(stream)
if err != nil {
t.Fatalf("Failed to read from stream: %v", err)
}
if n != stream.Size() {
t.Errorf("Expected to read %d bytes, got: %d", stream.Size(), n)
}
if strHdr.FileSize() != 0 {
t.Errorf("Expected file size to be 0, got: %d", strHdr.FileSize())
}
// Check the data
if strHdr.GetIncrementalSnapshot() == nil {
t.Error("StreamHeader payload should not be nil")
}
if !bytes.Equal(strHdr.GetIncrementalSnapshot().Data, data) {
t.Errorf("Expected data to be %s, got: %s", data, strHdr.GetIncrementalSnapshot().Data)
}
// Should be no more data
buf := make([]byte, 1)
if _, err := stream.Read(buf); err != io.EOF {
t.Fatalf("Expected EOF, got: %v", err)
}
if err := stream.Close(); err != nil {
t.Fatalf("unexpected error closing IncrementalStream: %v", err)
}
}
func Test_NewFullStream(t *testing.T) {
contents := [][]byte{
[]byte("test1.db contents"),
[]byte("test1.db-wal0 contents"),
[]byte("test1.db-wal1 contents"),
}
contentsSz := int64(0)
files := make([]string, len(contents))
for i, c := range contents {
files[i] = mustWriteToTemp(c)
contentsSz += int64(len(c))
}
defer func() {
for _, f := range files {
os.Remove(f)
}
}()
str, err := NewFullStream(files...)
if err != nil {
t.Fatalf("unexpected error creating FullStream: %v", err)
}
totalSizeRead := int64(0)
// Get the header
strHdr, sz, err := NewStreamHeaderFromReader(str)
if err != nil {
t.Fatalf("Failed to read from stream: %v", err)
}
if strHdr.FileSize() != contentsSz {
t.Errorf("Expected file size to be %d, got: %d", contentsSz, strHdr.FileSize())
}
totalSizeRead += sz
// Read the database contents and compare to the first file.
fullSnapshot := strHdr.GetFullSnapshot()
if fullSnapshot == nil {
t.Fatalf("got nil FullSnapshot")
}
dbData := fullSnapshot.GetDb()
if dbData == nil {
t.Fatalf("got nil Db")
}
if dbData.Size != int64(len(contents[0])) {
t.Errorf("unexpected Db size, got: %d, want: %d", dbData.Size, len(contents[0]))
}
buf := make([]byte, dbData.Size)
n, err := io.ReadFull(str, buf)
if err != nil {
t.Fatalf("unexpected error reading from FullEncoder: %v", err)
}
totalSizeRead += int64(n)
if string(buf) != string(contents[0]) {
t.Errorf("unexpected database contents, got: %s, want: %s", buf, contents[0])
}
// Check the "WALs"
if len(fullSnapshot.GetWals()) != 2 {
t.Fatalf("unexpected number of WALs, got: %d, want: %d", len(fullSnapshot.GetWals()), 2)
}
for i := 0; i < len(fullSnapshot.GetWals()); i++ {
walData := fullSnapshot.GetWals()[i]
if walData == nil {
t.Fatalf("got nil WAL")
}
if walData.Size != int64(len(contents[i+1])) {
t.Errorf("unexpected WAL size, got: %d, want: %d", walData.Size, len(contents[i+1]))
}
buf = make([]byte, walData.Size)
n, err = io.ReadFull(str, buf)
if err != nil {
t.Fatalf("unexpected error reading from FullEncoder: %v", err)
}
totalSizeRead += int64(n)
if string(buf) != string(contents[i+1]) {
t.Errorf("unexpected WAL contents, got: %s, want: %s", buf, contents[i+1])
}
}
// Should be no more data to read
buf = make([]byte, 1)
n, err = str.Read(buf)
if err != io.EOF {
t.Fatalf("expected EOF, got: %v", err)
}
totalSizeRead += int64(n)
// Verify that the total number of bytes read from the FullEncoder
// matches the expected size
if totalSizeRead != str.Size() {
t.Errorf("unexpected total number of bytes read from FullEncoder, got: %d, want: %d", totalSizeRead, str.Size())
}
if err := str.Close(); err != nil {
t.Fatalf("unexpected error closing FullStream: %v", err)
}
}
func mustWriteToTemp(b []byte) string {
f, err := os.CreateTemp("", "snapshot-enc-dec-test-*")
if err != nil {
panic(err)
}
defer f.Close()
if _, err := f.Write(b); err != nil {
panic(err)
}
return f.Name()
}

@ -1,159 +0,0 @@
package snapshot
import (
"bytes"
"compress/gzip"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"math"
"unsafe"
)
// V1Encoder creates a new V1 snapshot.
type V1Encoder struct {
data []byte
}
// NewV1Encoder returns an initialized V1 encoder
func NewV1Encoder(b []byte) *V1Encoder {
return &V1Encoder{
data: b,
}
}
// WriteTo writes the snapshot to the given writer.
func (v *V1Encoder) WriteTo(w io.Writer) (int64, error) {
var totalN int64
// Indicate that the data is compressed by writing max uint64 value first.
if err := binary.Write(w, binary.LittleEndian, uint64(math.MaxUint64)); err != nil {
return 0, fmt.Errorf("failed to write max uint64: %w", err)
}
totalN += 8 // 8 bytes for uint64
// Get compressed copy of data.
cdata, err := v.compressedData()
if err != nil {
return 0, fmt.Errorf("failed to get compressed data: %w", err)
}
// Write size of compressed data.
if err := binary.Write(w, binary.LittleEndian, uint64(len(cdata))); err != nil {
return 0, fmt.Errorf("failed to write compressed data size: %w", err)
}
totalN += 8 // 8 bytes for uint64
if len(cdata) != 0 {
// Write compressed data.
n, err := w.Write(cdata)
if err != nil {
return 0, fmt.Errorf("failed to write compressed data: %w", err)
}
totalN += int64(n)
}
return totalN, nil
}
func (v *V1Encoder) compressedData() ([]byte, error) {
if v.data == nil {
return nil, nil
}
var buf bytes.Buffer
gz, err := gzip.NewWriterLevel(&buf, gzip.BestCompression)
if err != nil {
return nil, err
}
if _, err := gz.Write(v.data); err != nil {
return nil, err
}
if err := gz.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// V1Decoder reads a V1 snapshot.
type V1Decoder struct {
r io.Reader
}
// NewV1Decoder returns an initialized V1 decoder
func NewV1Decoder(r io.Reader) *V1Decoder {
return &V1Decoder{
r: r,
}
}
// WriteTo writes the decoded snapshot data to the given writer.
func (v *V1Decoder) WriteTo(w io.Writer) (int64, error) {
var uint64Size uint64
inc := int64(unsafe.Sizeof(uint64Size))
// Read all the data into RAM, since we have to decode known-length
// chunks of various forms.
var offset int64
b, err := ioutil.ReadAll(v.r)
if err != nil {
return 0, fmt.Errorf("readall: %s", err)
}
// Get size of data, checking for compression.
compressed := false
sz, err := readUint64(b[offset : offset+inc])
if err != nil {
return 0, fmt.Errorf("read compression check: %s", err)
}
offset = offset + inc
if sz == math.MaxUint64 {
compressed = true
// Data is actually compressed, read actual size next.
sz, err = readUint64(b[offset : offset+inc])
if err != nil {
return 0, fmt.Errorf("read compressed size: %s", err)
}
offset = offset + inc
}
// Now read in the data, decompressing if necessary.
var totalN int64
if sz > 0 {
if compressed {
gz, err := gzip.NewReader(bytes.NewReader(b[offset : offset+int64(sz)]))
if err != nil {
return 0, err
}
n, err := io.Copy(w, gz)
if err != nil {
return 0, fmt.Errorf("data decompress: %s", err)
}
totalN += n
if err := gz.Close(); err != nil {
return 0, err
}
} else {
// write the data directly
n, err := w.Write(b[offset : offset+int64(sz)])
if err != nil {
return 0, fmt.Errorf("uncompressed data write: %s", err)
}
totalN += int64(n)
}
}
return totalN, nil
}
func readUint64(b []byte) (uint64, error) {
var sz uint64
if err := binary.Read(bytes.NewReader(b), binary.LittleEndian, &sz); err != nil {
return 0, err
}
return sz, nil
}

@ -1,129 +0,0 @@
package snapshot
import (
"bytes"
"compress/gzip"
"encoding/binary"
"io"
"io/ioutil"
"math"
"testing"
)
func Test_V1EncoderCreate(t *testing.T) {
// Original data to compress and write.
data := []byte("test data")
// Create new V1 snapshot.
snap := NewV1Encoder(data)
// Write snapshot to buffer.
var buf bytes.Buffer
n, err := snap.WriteTo(&buf)
if err != nil {
t.Fatalf("failed to write snapshot: %v", err)
}
// Read back the data from the buffer.
r := bytes.NewReader(buf.Bytes())
// Read and verify the compression flag.
var flag uint64
if err := binary.Read(r, binary.LittleEndian, &flag); err != nil {
t.Fatalf("failed to read compression flag: %v", err)
}
if flag != math.MaxUint64 {
t.Fatalf("compression flag is wrong")
}
// Read and verify the size of the compressed data.
var size uint64
if err := binary.Read(r, binary.LittleEndian, &size); err != nil {
t.Fatalf("failed to read compressed data size: %v", err)
}
if size != uint64(n-16) { // Subtract 16 bytes for the flag and size.
t.Fatalf("unexpected compressed data size; got %v, want %v", size, uint64(n-16))
}
// Read and verify the compressed data.
cdata := make([]byte, size)
if _, err := io.ReadFull(r, cdata); err != nil {
t.Fatalf("failed to read compressed data: %v", err)
}
gr, err := gzip.NewReader(bytes.NewReader(cdata))
if err != nil {
t.Fatalf("failed to create gzip reader: %v", err)
}
decData, err := ioutil.ReadAll(gr)
if err != nil {
t.Fatalf("failed to decompress data: %v", err)
}
if !bytes.Equal(decData, data) {
t.Fatalf("unexpected decompressed data; got %q, want %q", decData, data)
}
}
func Test_V1EncoderNilSlice(t *testing.T) {
v := NewV1Encoder(nil)
var buf bytes.Buffer
n, err := v.WriteTo(&buf)
if err != nil {
t.Fatalf("failed to write to buffer: %v", err)
}
if n != 16 { // 16 bytes for the flag and size.
t.Errorf("unexpected number of bytes written; got %d, want %d", n, 16)
}
r := bytes.NewReader(buf.Bytes())
// Read and verify the compression flag.
var flag uint64
if err := binary.Read(r, binary.LittleEndian, &flag); err != nil {
t.Fatalf("failed to read compression flag: %v", err)
}
if flag != math.MaxUint64 {
t.Errorf("unexpected compression flag")
}
// Read and verify the size of the compressed data.
var size uint64
if err := binary.Read(r, binary.LittleEndian, &size); err != nil {
t.Fatalf("failed to read compressed data size: %v", err)
}
if size != 0 { // The compressed data size should be 0.
t.Errorf("unexpected compressed data size; got %d, want %d", size, 0)
}
// Verify that there is no more data.
if r.Len() != 0 {
t.Errorf("unexpected remaining data; got %d, want %d", r.Len(), 0)
}
}
func TestV1Decoder(t *testing.T) {
// Create a test data.
data := []byte("This is a test data.")
// Create a new V1Encoder and compress the data.
encoder := NewV1Encoder(data)
var encBuf bytes.Buffer
_, err := encoder.WriteTo(&encBuf)
if err != nil {
t.Fatalf("Failed to write to encoder: %v", err)
}
// Create a new V1Decoder and decode the compressed data.
decoder := NewV1Decoder(&encBuf)
var decBuf bytes.Buffer
_, err = decoder.WriteTo(&decBuf)
if err != nil {
t.Fatalf("Failed to write to decoder: %v", err)
}
// The original and decoded data should match.
if !bytes.Equal(data, decBuf.Bytes()) {
t.Fatalf("Data mismatch; got %s, want %s", decBuf.Bytes(), data)
}
}

@ -0,0 +1,157 @@
Snapshot()
If the Snapshot Store (SS) needs a full snapshot, then:
- checkpoint the WAL
- set the path to the SQLite file in the FSMSnapshot object
If the SS needs an incremental snapshot
- If WAL is larger than SQLite file: (OR compact WAL -- will be the minimum changes required to copy)
- copy WAL bytes to snapshot (using memory for storage)
- checkpoint WAL into SQLite file.
Persist()
- create <snap>.tmp directory in SS (already created by Sink Open())
- check type of snapshot
- if full:
- move SQLite file to SS root, named as <snap>.db
- delete <snap-1>.db, if it exists, and any earlier snap directories too.
- else:
- move WAL file to beside existing SQLite file. Name it <snap>.db-wal. Sync everything.
- rename <snap>.tmp to <snap>
- rename <snap-1>.db to <snap>.db
- checkpoint <snap>.db-wal <snap>.db
- open and close <snap>.db to remove any WAL file
- delete any older snapshot directories <snap-1....-n>
Recover()
- if there is a directory ending with .tmp:
- call it <snap-tmp>
- indicates an incomplete snapshotting process
- delete all files and directories that contain the string <snap-tmp>
- get the newest snapshot, if any
- call this <snap>
- there should be a DB file named <snap>.db. If not rename <snap-1>.db to <snap>.db. If there is no .db file, panic?
- if there is a WAL file called <snap>.db-wal checkpoint it into <snap>.db.
- open and close <snap>.db to remove any WAL file
- delete any older snapshot directories <snap-1....-n>, including any <snap-1......-n>.db
Misc:
- need to lock between Open()/Close() and a Sink existing, until Persist/Release is called.
COMPACTION:
Let's discuss the SQLite WAL file. When I say "compact the WAL file", what do you think I mean?
ChatGPT
When you refer to "compacting the SQLite WAL (Write-Ahead Logging) file," I interpret this as the process of reducing the size of the existing WAL file by eliminating redundant or unnecessary frames. Specifically, if multiple frames in the WAL file pertain to the same database page, these could be reduced to a single frame that contains the most recent state of that page. This operation would result in a new WAL file that is more space-efficient while still ensuring that all changes represented in the original WAL are retained. This is in line with the typical purpose of file compaction in various storage systems, which is to improve space utilization and potentially read/write performance.
User
Give me a step by step process for compacting a WAL file.
ChatGPT
Certainly. Here is a step-by-step procedure to compact a SQLite Write-Ahead Log (WAL) file:
Initialize: Create a new, empty WAL file to store the compacted frames.
Read Header: Read the header of the existing WAL file to identify its format, version, and other essential metadata. Copy this header information to the new WAL file.
Create Frame Map: Initialize an empty map to keep track of the most recent frame for each page number in the existing WAL file. The map's keys will be the page numbers, and the values will be the corresponding most recent frames.
Iterate Through Existing WAL: Read the existing WAL file frame by frame. For each frame:
Extract the page number from the frame header.
Check the frame map to see if this page number already exists.
If it does, replace the existing frame in the map with the current frame.
If it doesn't, add the current frame to the map.
Write to New WAL: Iterate through the frame map and write each frame to the new WAL file. This should be done in the same order the frames appear in the original WAL file to maintain any implied dependencies between the frames.
Update Checksums: Compute and update the checksum for each frame in the new WAL file. This is crucial for maintaining the integrity of the data.
Finalize New WAL: Once all frames have been written and checksums updated, close the new WAL file.
Validation: Optionally, you may wish to validate the new, compacted WAL file by applying it to a test database and ensuring it brings the database to the expected state.
Swap Files: Once you're sure the new WAL file is accurate, you can replace the existing WAL file with the new one. This step should be done with caution, considering the implications on database consistency.
Checkpoint: After the swap, perform a SQLite checkpoint operation to ensure that the changes from the new WAL are applied to the database.
By following these steps, you should have a compacted WAL file that maintains the integrity and semantics of the original one.
User
Let's talk about step 5. Imagine I have a WAL file with these pages. The digit in the page is the page number, the letter is the version. Version, say, b, is newer than version a. Here is the sequence:
1a,1b,2a,4a,5a,1,c,6,7,6a,8,9
List the pages, in sequence, as they will appear in the new WAL.
ChatGPT
In the compacted WAL, the goal is to retain only the most recent frame for each page. The sequence in the existing WAL is as follows:
1a, 1b, 2a, 4a, 5a, 1c, 6, 7, 6a, 8, 9
For page 1, the most recent frame is "1c".
For page 2, the most recent frame is "2a".
For page 4, the most recent frame is "4a".
For page 5, the most recent frame is "5a".
For page 6, the most recent frame is "6a".
For page 7, the most recent frame is "7".
For page 8, the most recent frame is "8".
For page 9, the most recent frame is "9".
In the new, compacted WAL, the sequence of frames would be:
1c, 2a, 4a, 5a, 6a, 7, 8, 9
It should be noted that the order in which these frames appear in the new WAL depends on the sequence in the original WAL, which is important for maintaining any implied dependencies between the frames.
User
Page 2a, for example, comes after page 1a and 1b in the original WAL file. However it will come after page 1c in the rewritten WAL file. Will this be an issue?
ChatGPT
The sequence of frames within a WAL is significant because the purpose of the WAL is to record changes to a database in the order they occurred. This ensures that the database can be correctly reconstructed from a snapshot and the subsequent changes logged in the WAL. If the sequence of frames is altered during the compaction of the WAL, it may disrupt this order, potentially causing issues when SQLite attempts to replay the WAL to bring the database to a consistent state.
Therefore, if a compacted WAL is to be used in place of the original, it is critical to maintain the original sequence of the most recent frames for each page. In your example, the sequence "1c, 2a" in the compacted WAL correctly reflects that changes to page 1 should be applied before changes to page 2, just as "1a, 1b, 2a" did in the original WAL. This ensures that dependencies between frames are preserved.
Thus, while compacting a WAL can remove redundant frames, altering the sequence of remaining frames is generally not advisable due to the potential for disrupting the order in which changes should be applied.
User
Right, but before changes are applied to page 2, those changes will also include changes from page 1c. Is this going to be an issue?
ChatGPT
In a typical Write-Ahead Logging (WAL) implementation, each frame represents a change made to a particular page at a specific point in time. The frames are applied in sequence to ensure that the database reaches a consistent state that reflects all changes made.
If frame "1c" is more recent than "2a" and both frames are applied during the same WAL replay, then "1c" should be applied before "2a" to achieve the most recent and consistent database state. This is in line with the original sequence of changes as they were made to the database.
If, however, there are dependencies between pages—such as constraints, triggers, or transactional consistency requirements—that require changes to page 1 to be applied in a certain relation to changes to page 2, then the sequence could matter. In such cases, disrupting the sequence could result in a database state that violates these dependencies.
For the SQLite WAL specifically, SQLite should be able to properly apply the changes as long as the WAL frames are sequentially correct for each page. SQLite performs atomic commits by applying all changes in a WAL file as a single transaction during a checkpoint. Therefore, as long as the integrity of individual transactions is maintained within the compacted WAL, applying frame "1c" before "2a" should not be an issue.
In summary, it will not be an issue if frame "1c" is applied before frame "2a" as long as there are no explicit dependencies between these pages that rely on a different order of operations.
User
So from your step-by-step instructions above, it sounds like I also need, say, as slice to keep track of the order of the pages I read from the WAL. Keys in a map do no guarantee any order.
ChatGPT
Indeed, the order of keys in a Go map is not guaranteed to be consistent. Therefore, using a slice to keep track of the order of pages as you read them from the original WAL is advisable. This ensures that, when you write frames to the new, compacted WAL, you maintain the original sequence of page changes.
In summary, a slice can serve as an ordered log of page numbers you encounter, ensuring that the frames in the compacted WAL file are written in an order consistent with their original sequence. This is particularly important for preserving the integrity of transactions and any other dependencies between the pages.
User
Is there any other way you would suggest? Any other Go type?
ChatGPT
Another Go type to consider for maintaining the order of pages is a slice of structs, where each struct contains the page number and associated metadata, such as the version or offset in the original WAL. This would allow you to keep additional contextual information alongside the sequence, which might be beneficial depending on your specific requirements.
For instance, you might define the struct as follows:
go
type PageMeta struct {
PageNumber int
Version string
// any other metadata fields
}
You could then use a slice of these structs to maintain order and metadata:
go
var orderedPages []PageMeta
This approach allows you to both maintain the order of pages and keep any additional information that might be important for the compaction process or for debugging.
It's a slight expansion upon the basic slice of integers approach but offers more flexibility.

@ -0,0 +1,198 @@
package snapshot2
import (
"fmt"
"os"
"path/filepath"
"github.com/hashicorp/raft"
"github.com/rqlite/rqlite/db"
)
// Sink is a sink for writing snapshot data to a Snapshot store.
type Sink struct {
str *Store
meta *raft.SnapshotMeta
snapDirPath string
snapTmpDirPath string
dataFD *os.File
opened bool
}
// NewSink creates a new Sink object.
func NewSink(str *Store, meta *raft.SnapshotMeta) *Sink {
return &Sink{
str: str,
meta: meta,
}
}
// Open opens the sink for writing.
func (s *Sink) Open() error {
if s.opened {
return nil
}
s.opened = true
// Make temp snapshot directory
s.snapDirPath = filepath.Join(s.str.Dir(), s.meta.ID)
s.snapTmpDirPath = tmpName(s.snapDirPath)
if err := os.MkdirAll(s.snapTmpDirPath, 0755); err != nil {
return err
}
dataPath := filepath.Join(s.snapTmpDirPath, s.meta.ID+".data")
dataFD, err := os.Create(dataPath)
if err != nil {
return err
}
s.dataFD = dataFD
return nil
}
// Write writes snapshot data to the sink. The snapshot is not in place
// until Close is called.
func (s *Sink) Write(p []byte) (n int, err error) {
return s.dataFD.Write(p)
}
// ID returns the ID of the snapshot being written.
func (s *Sink) ID() string {
return s.meta.ID
}
// Cancel cancels the snapshot. Cancel must be called if the snapshot is not
// going to be closed.
func (s *Sink) Cancel() error {
if !s.opened {
return nil
}
s.opened = false
if err := s.dataFD.Close(); err != nil {
return err
}
s.dataFD = nil
return RemoveAllTmpSnapshotData(s.str.Dir())
}
// Close closes the sink, and finalizes creation of the snapshot. It is critical
// that Close is called, or the snapshot will not be in place.
func (s *Sink) Close() error {
if !s.opened {
return nil
}
s.opened = false
if err := s.dataFD.Close(); err != nil {
return err
}
// Write meta data
if err := s.writeMeta(s.snapTmpDirPath); err != nil {
return err
}
if err := s.processSnapshotData(); err != nil {
return err
}
// Get size of SQLite file and set in meta.
dbPath, err := s.str.getDBPath()
if err != nil {
return err
}
fi, err := os.Stat(dbPath)
if err != nil {
return err
}
if err := updateMetaSize(s.snapDirPath, fi.Size()); err != nil {
return fmt.Errorf("failed to update snapshot meta size: %s", err.Error())
}
_, err = s.str.Reap()
return err
}
func (s *Sink) processSnapshotData() (retErr error) {
defer func() {
if retErr != nil {
RemoveAllTmpSnapshotData(s.str.Dir())
}
}()
// Check the state of the store before processing this new snapshot. This
// allows us to perform some sanity checks on the incoming snapshot data.
snapshots, err := s.str.getSnapshots()
if err != nil {
return err
}
if db.IsValidSQLiteFile(s.dataFD.Name()) {
if err := os.Rename(s.dataFD.Name(), filepath.Join(s.str.Dir(), s.meta.ID+".db")); err != nil {
return err
}
} else if db.IsValidSQLiteWALFile(s.dataFD.Name()) {
if len(snapshots) == 0 {
// We are trying to create our first snapshot from a WAL file, which is invalid.
return fmt.Errorf("data for first snapshot is a WAL file")
} else {
// We have at least one previous snapshot. That means we should have a valid SQLite file
// for the previous snapshot.
snapPrev := snapshots[len(snapshots)-1]
snapPrevDB := filepath.Join(s.str.Dir(), snapPrev.ID+".db")
if !db.IsValidSQLiteFile(snapPrevDB) {
return fmt.Errorf("previous snapshot data is not a SQLite file: %s", snapPrevDB)
}
}
if err := os.Rename(s.dataFD.Name(), filepath.Join(s.str.Dir(), s.meta.ID+".db-wal")); err != nil {
return err
}
} else {
return fmt.Errorf("invalid snapshot data file: %s", s.dataFD.Name())
}
// Indicate snapshot data been successfully persisted to disk by renaming
// the temp directory to a non-temporary name.
if err := os.Rename(s.snapTmpDirPath, s.snapDirPath); err != nil {
return err
}
if err := syncDirMaybe(s.str.Dir()); err != nil {
return err
}
// Now check if we need to replay any WAL file into the previous SQLite file. This is
// the final step of any snapshot process.
snapshots, err = s.str.getSnapshots()
if err != nil {
return err
}
if len(snapshots) >= 2 {
snapPrev := snapshots[len(snapshots)-2]
snapNew := snapshots[len(snapshots)-1]
snapPrevDB := filepath.Join(s.str.Dir(), snapPrev.ID+".db")
snapNewDB := filepath.Join(s.str.Dir(), snapNew.ID+".db")
snapNewWAL := filepath.Join(s.str.Dir(), snapNew.ID+".db-wal")
if db.IsValidSQLiteWALFile(snapNewWAL) {
// The most recent snapshot was created from a WAL file, so we need to replay
// that WAL file into the previous SQLite file.
if err := os.Rename(snapPrevDB, snapNewDB); err != nil {
return err
}
if err := openCloseDB(snapNewDB); err != nil {
return err
}
}
}
if err := syncDirMaybe(s.str.Dir()); err != nil {
return err
}
s.str.Reap()
return nil
}
func (s *Sink) writeMeta(dir string) error {
return writeMeta(dir, s.meta)
}

@ -0,0 +1,318 @@
package snapshot2
import (
"bytes"
"fmt"
"io"
"os"
"testing"
"github.com/hashicorp/raft"
"github.com/rqlite/rqlite/command/encoding"
"github.com/rqlite/rqlite/db"
)
func Test_NewSinkCancel(t *testing.T) {
sink := NewSink(mustStore(t), makeRaftMeta("snap-1234", 3, 2, 1))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if sink.ID() != "snap-1234" {
t.Fatalf("Unexpected ID: %s", sink.ID())
}
if err := sink.Cancel(); err != nil {
t.Fatalf("Failed to cancel unopened sink: %v", err)
}
}
func Test_NewSinkClose(t *testing.T) {
sink := NewSink(mustStore(t), makeRaftMeta("snap-1234", 3, 2, 1))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if sink.ID() != "snap-1234" {
t.Fatalf("Unexpected ID: %s", sink.ID())
}
if err := sink.Close(); err != nil {
t.Fatalf("Failed to cancel unopened sink: %v", err)
}
}
func Test_NewSinkOpenCancel(t *testing.T) {
sink := NewSink(mustStore(t), makeRaftMeta("snap-1234", 3, 2, 1))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if err := sink.Open(); err != nil {
t.Fatalf("Failed to open sink: %v", err)
}
if err := sink.Cancel(); err != nil {
t.Fatalf("Failed to cancel opened sink: %v", err)
}
}
func Test_NewSinkOpenCloseFail(t *testing.T) {
sink := NewSink(mustStore(t), makeRaftMeta("snap-1234", 3, 2, 1))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if err := sink.Open(); err != nil {
t.Fatalf("Failed to open sink: %v", err)
}
if err := sink.Close(); err == nil {
t.Fatalf("Expected error closing opened sink without data")
}
}
func Test_SinkFullSnapshot(t *testing.T) {
store := mustStore(t)
sink := NewSink(store, makeRaftMeta("snap-1234", 3, 2, 1))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if err := sink.Open(); err != nil {
t.Fatalf("Failed to open sink: %v", err)
}
sqliteFile := mustOpenFile(t, "testdata/db-and-wals/backup.db")
defer sqliteFile.Close()
n, err := io.Copy(sink, sqliteFile)
if err != nil {
t.Fatalf("Failed to copy SQLite file: %v", err)
}
if n != mustGetFileSize(t, "testdata/db-and-wals/backup.db") {
t.Fatalf("Unexpected number of bytes copied: %d", n)
}
if err := sink.Close(); err != nil {
t.Fatalf("Failed to close sink: %v", err)
}
// Check snapshot is available and correct.
expMeta := makeRaftMeta("snap-1234", 3, 2, 1)
metas, err := store.List()
if err != nil {
t.Fatalf("Failed to list snapshots: %v", err)
}
if len(metas) != 1 {
t.Fatalf("Expected 1 snapshot, got %d", len(metas))
}
compareMetas(t, expMeta, metas[0])
meta, fd, err := store.Open("snap-1234")
if err != nil {
t.Fatalf("Failed to open snapshot: %v", err)
}
defer fd.Close()
compareMetas(t, expMeta, meta)
if !compareReaderToFile(t, fd, "testdata/db-and-wals/backup.db") {
t.Fatalf("Snapshot data does not match")
}
// Write a second full snapshot, it should be installed without issue.
sink = NewSink(store, makeRaftMeta("snap-5678", 4, 3, 2))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if err := sink.Open(); err != nil {
t.Fatalf("Failed to open sink: %v", err)
}
sqliteFile2 := mustOpenFile(t, "testdata/db-and-wals/full2.db")
defer sqliteFile2.Close()
n, err = io.Copy(sink, sqliteFile2)
if err != nil {
t.Fatalf("Failed to copy second SQLite file: %v", err)
}
if n != mustGetFileSize(t, "testdata/db-and-wals/full2.db") {
t.Fatalf("Unexpected number of bytes copied: %d", n)
}
if err := sink.Close(); err != nil {
t.Fatalf("Failed to close sink: %v", err)
}
// Check second snapshot is available and correct.
expMeta2 := makeRaftMeta("snap-5678", 4, 3, 2)
metas2, err := store.List()
if err != nil {
t.Fatalf("Failed to list snapshots: %v", err)
}
if len(metas2) != 1 {
t.Fatalf("Expected 1 snapshot, got %d", len(metas))
}
compareMetas(t, expMeta2, metas2[0])
meta2, fd2, err := store.Open("snap-5678")
if err != nil {
t.Fatalf("Failed to open second snapshot: %v", err)
}
defer fd2.Close()
compareMetas(t, expMeta2, meta2)
if !compareReaderToFile(t, fd2, "testdata/db-and-wals/full2.db") {
t.Fatalf("second full snapshot data does not match")
}
}
// Test_SinkWALSnapshotEmptyStoreFail ensures that if a WAL file is
// written to empty store, an error is returned.
func Test_SinkWALSnapshotEmptyStoreFail(t *testing.T) {
store := mustStore(t)
sink := NewSink(store, makeRaftMeta("snap-1234", 3, 2, 1))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if err := sink.Open(); err != nil {
t.Fatalf("Failed to open sink: %v", err)
}
sqliteFile := mustOpenFile(t, "testdata/db-and-wals/wal-00")
defer sqliteFile.Close()
n, err := io.Copy(sink, sqliteFile)
if err != nil {
t.Fatalf("Failed to copy SQLite file: %v", err)
}
if n != mustGetFileSize(t, "testdata/db-and-wals/wal-00") {
t.Fatalf("Unexpected number of bytes copied: %d", n)
}
if err := sink.Close(); err == nil {
t.Fatalf("unexpected success closing sink after writing WAL data")
}
// Peek inside the Store, there should be zero data inside.
files, err := os.ReadDir(store.Dir())
if err != nil {
t.Fatalf("Failed to read dir: %v", err)
}
if len(files) != 0 {
t.Fatalf("Expected 0 files inside Store, got %d", len(files))
}
}
// Test_SinkCreateFullThenWALSnapshots performs detailed testing of the
// snapshot creation process. It is critical that snapshots are created
// correctly, so this test is thorough.
func Test_SinkCreateFullThenWALSnapshots(t *testing.T) {
store := mustStore(t)
createSnapshot := func(id string, index, term, cfgIndex uint64, file string) {
sink := NewSink(store, makeRaftMeta(id, index, term, cfgIndex))
if sink == nil {
t.Fatalf("Failed to create new sink")
}
if err := sink.Open(); err != nil {
t.Fatalf("Failed to open sink: %v", err)
}
wal := mustOpenFile(t, file)
defer wal.Close()
_, err := io.Copy(sink, wal)
if err != nil {
t.Fatalf("Failed to copy WAL file: %v", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("Failed to close sink: %v", err)
}
}
createSnapshot("snap-1234", 3, 2, 1, "testdata/db-and-wals/backup.db")
createSnapshot("snap-2345", 4, 3, 2, "testdata/db-and-wals/wal-00")
createSnapshot("snap-3456", 5, 4, 3, "testdata/db-and-wals/wal-01")
createSnapshot("snap-4567", 6, 5, 4, "testdata/db-and-wals/wal-02")
createSnapshot("snap-5678", 7, 6, 5, "testdata/db-and-wals/wal-03")
// Check the database state inside the Store.
dbPath, err := store.getDBPath()
if err != nil {
t.Fatalf("Failed to get DB path: %v", err)
}
checkDB, err := db.Open(dbPath, false, true)
if err != nil {
t.Fatalf("failed to open database at %s: %s", dbPath, err)
}
defer checkDB.Close()
rows, err := checkDB.QueryStringStmt("SELECT COUNT(*) FROM foo")
if err != nil {
t.Fatalf("failed to query database: %s", err)
}
if exp, got := `[{"columns":["COUNT(*)"],"types":["integer"],"values":[[4]]}]`, asJSON(rows); exp != got {
t.Fatalf("unexpected results for query exp: %s got: %s", exp, got)
}
}
func compareMetas(t *testing.T, m1, m2 *raft.SnapshotMeta) {
t.Helper()
if m1.ID != m2.ID {
t.Fatalf("Unexpected snapshot ID: %s", m1.ID)
}
if m1.Index != m2.Index {
t.Fatalf("Unexpected snapshot index: %d", m1.Index)
}
if m1.Term != m2.Term {
t.Fatalf("Unexpected snapshot term: %d", m1.Term)
}
if m1.ConfigurationIndex != m2.ConfigurationIndex {
t.Fatalf("Unexpected snapshot configuration index: %d", m1.ConfigurationIndex)
}
if m1.Version != m2.Version {
t.Fatalf("Unexpected snapshot version: %d", m1.Version)
}
}
func compareReaderToFile(t *testing.T, r io.Reader, path string) bool {
t.Helper()
fd := mustOpenFile(t, path)
defer fd.Close()
return compareReaderToReader(t, r, fd)
}
func compareReaderToReader(t *testing.T, r1, r2 io.Reader) bool {
t.Helper()
buf1, err := io.ReadAll(r1)
if err != nil {
t.Fatalf("Failed to read from reader 1: %v", err)
}
buf2, err := io.ReadAll(r2)
if err != nil {
t.Fatalf("Failed to read from reader 2: %v", err)
}
return bytes.Equal(buf1, buf2)
}
func mustStore(t *testing.T) *Store {
t.Helper()
str, err := NewStore(t.TempDir())
if err != nil {
t.Fatalf("Failed to create store: %v", err)
}
return str
}
func makeRaftMeta(id string, index, term, cfgIndex uint64) *raft.SnapshotMeta {
return &raft.SnapshotMeta{
ID: id,
Index: index,
Term: term,
Configuration: makeTestConfiguration("1", "localhost:1"),
ConfigurationIndex: cfgIndex,
Version: 1,
}
}
func mustOpenFile(t *testing.T, path string) *os.File {
t.Helper()
fd, err := os.Open(path)
if err != nil {
t.Fatalf("Failed to open file: %v", err)
}
return fd
}
func mustGetFileSize(t *testing.T, path string) int64 {
stat, err := os.Stat(path)
if err != nil {
t.Fatalf("Failed to stat file: %v", err)
}
return stat.Size()
}
func asJSON(v interface{}) string {
enc := encoding.Encoder{}
b, err := enc.JSONMarshal(v)
if err != nil {
panic(fmt.Sprintf("failed to JSON marshal value: %s", err.Error()))
}
return string(b)
}

@ -0,0 +1,40 @@
package snapshot2
import (
"expvar"
"io"
"time"
"github.com/hashicorp/raft"
)
// Snapshot represents a snapshot of the database state.
type Snapshot struct {
rc io.ReadCloser
}
// NewSnapshot creates a new snapshot.
func NewSnapshot(rc io.ReadCloser) *Snapshot {
return &Snapshot{
rc: rc,
}
}
// Persist writes the snapshot to the given sink.
func (s *Snapshot) Persist(sink raft.SnapshotSink) error {
defer s.rc.Close()
startT := time.Now()
n, err := io.Copy(sink, s.rc)
if err != nil {
return err
}
dur := time.Since(startT)
stats.Get(persistSize).(*expvar.Int).Set(n)
stats.Get(persistDuration).(*expvar.Int).Set(dur.Milliseconds())
return err
}
// Release releases the snapshot.
func (s *Snapshot) Release() {}

@ -0,0 +1,498 @@
package snapshot2
import (
"encoding/json"
"expvar"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"sync"
"time"
"github.com/hashicorp/raft"
"github.com/rqlite/rqlite/db"
)
const (
persistSize = "latest_persist_size"
persistDuration = "latest_persist_duration"
reap_snapshots_duration = "reap_snapshots_duration"
numSnapshotsReaped = "num_snapshots_reaped"
)
const (
metaFileName = "meta.json"
tmpSuffix = ".tmp"
)
// stats captures stats for the Store.
var stats *expvar.Map
func init() {
stats = expvar.NewMap("snapshot")
ResetStats()
}
// ResetStats resets the expvar stats for this module. Mostly for test purposes.
func ResetStats() {
stats.Init()
stats.Add(persistSize, 0)
stats.Add(persistDuration, 0)
stats.Add(reap_snapshots_duration, 0)
stats.Add(numSnapshotsReaped, 0)
}
// LockingSink is a wrapper around a SnapshotSink that ensures that the
// Store has handed out only 1 sink at a time.
type LockingSink struct {
raft.SnapshotSink
str *Store
}
// Close closes the sink, unlocking the Store for creation of a new sink.
func (s *LockingSink) Close() error {
defer s.str.sinkMu.Unlock()
return s.SnapshotSink.Close()
}
// Cancel cancels the sink, unlocking the Store for creation of a new sink.
func (s *LockingSink) Cancel() error {
defer s.str.sinkMu.Unlock()
return s.SnapshotSink.Cancel()
}
// Store stores Snapshots.
type Store struct {
dir string
sinkMu sync.Mutex
logger *log.Logger
}
// NewStore returns a new Snapshot Store.
func NewStore(dir string) (*Store, error) {
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, err
}
str := &Store{
dir: dir,
logger: log.New(os.Stderr, "[snapshot-store] ", log.LstdFlags),
}
str.logger.Printf("store initialized using %s", dir)
if err := str.check(); err != nil {
return nil, fmt.Errorf("check failed: %s", err)
}
return str, nil
}
// Create creates a new Sink object, ready for writing a snapshot. Sinks make certain assumptions about
// the state of the store, and if those assumptions were changed by another Sink writing to the store
// it could cause failures. Therefore we only allow 1 Sink to be in existence at a time. This shouldn't
// be a problem, since snapshots are taken infrequently in one at a time.
func (s *Store) Create(version raft.SnapshotVersion, index, term uint64, configuration raft.Configuration,
configurationIndex uint64, trans raft.Transport) (retSink raft.SnapshotSink, retErr error) {
s.sinkMu.Lock()
defer func() {
if retErr != nil {
s.sinkMu.Unlock()
}
}()
meta := &raft.SnapshotMeta{
ID: snapshotName(term, index),
Index: index,
Term: term,
Configuration: configuration,
ConfigurationIndex: configurationIndex,
Version: version,
}
sink := NewSink(s, meta)
if err := sink.Open(); err != nil {
return nil, err
}
return &LockingSink{sink, s}, nil
}
// List returns a list of all the snapshots in the Store. It returns the snapshots
// in newest to oldest order.
func (s *Store) List() ([]*raft.SnapshotMeta, error) {
snapshots, err := s.getSnapshots()
if err != nil {
return nil, err
}
var snapMeta []*raft.SnapshotMeta
if len(snapshots) > 0 {
snapshotDir := filepath.Join(s.dir, snapshots[0].ID)
meta, err := readMeta(snapshotDir)
if err != nil {
return nil, err
}
snapMeta = append(snapMeta, meta)
}
return snapMeta, nil
}
// Open opens the snapshot with the given ID.
func (s *Store) Open(id string) (*raft.SnapshotMeta, io.ReadCloser, error) {
meta, err := readMeta(filepath.Join(s.dir, id))
if err != nil {
return nil, nil, err
}
fd, err := os.Open(filepath.Join(s.dir, id+".db"))
if err != nil {
return nil, nil, err
}
return meta, fd, nil
}
// Stats returns stats about the Snapshot Store.
func (s *Store) Stats() (map[string]interface{}, error) {
return nil, nil
}
// Reap reaps all snapshots, except the most recent one. Returns the number of
// snapshots reaped.
func (s *Store) Reap() (int, error) {
snapshots, err := s.getSnapshots()
if err != nil {
return 0, err
}
if len(snapshots) <= 1 {
return 0, nil
}
// Remove all snapshots, and all associated data, except the newest one.
n := 0
for _, snap := range snapshots[:len(snapshots)-1] {
if err := removeAllPrefix(s.dir, snap.ID); err != nil {
return n, err
}
s.logger.Printf("reaped snapshot %s", snap.ID)
n++
}
return n, nil
}
// Dir returns the directory where the snapshots are stored.
func (s *Store) Dir() string {
return s.dir
}
// check checks the Store for any inconsistencies, and repairs
// any inconsistencies it finds. Inconsistencies can happen
// if the system crashes during snapshotting.
func (s *Store) check() (retError error) {
defer func() {
syncDirMaybe(s.dir)
s.logger.Printf("check complete")
}()
s.logger.Printf("checking snapshot store at %s", s.dir)
if err := RemoveAllTmpSnapshotData(s.dir); err != nil {
return err
}
snapshots, err := s.getSnapshots()
if err != nil {
return err
}
if len(snapshots) == 0 {
// Nothing to do!
return nil
} else if len(snapshots) == 1 {
// We only have one snapshot. Confirm we have a valid SQLite file
// for that snapshot.
snap := snapshots[0]
snapDB := filepath.Join(s.dir, snap.ID+".db")
if !db.IsValidSQLiteFile(snapDB) {
return fmt.Errorf("sole snapshot data is not a valid SQLite file: %s", snap.ID)
}
} else {
// Do we have a valid SQLite file for the most recent snapshot?
snap := snapshots[len(snapshots)-1]
snapDB := filepath.Join(s.dir, snap.ID+".db")
snapDir := filepath.Join(s.dir, snap.ID)
if db.IsValidSQLiteFile(snapDB) {
// Open and close it, which will replay any WAL file into it.
return openCloseDB(snapDB)
}
// We better have a SQLite file for the previous snapshot.
snapPrev := snapshots[len(snapshots)-2]
snapPrevDB := filepath.Join(s.dir, snapPrev.ID+".db")
if !db.IsValidSQLiteFile(snapPrevDB) {
return fmt.Errorf("previous snapshot data is not a SQLite file: %s", snapPrev.ID)
}
// Rename the previous SQLite file to the current snapshot, and then replay any WAL file into it.
if err := os.Rename(snapPrevDB, snapDB); err != nil {
return err
}
if err := openCloseDB(snapDB); err != nil {
return err
}
// Ensure the size is set in the Snapshot's meta.
fi, err := os.Stat(snapDB)
if err != nil {
return err
}
if err := updateMetaSize(snapDir, fi.Size()); err != nil {
return err
}
}
return nil
}
// getSnapshots returns a list of all snapshots in the store, sorted
// from oldest to newest.
func (s *Store) getSnapshots() ([]*raft.SnapshotMeta, error) {
// Get the eligible snapshots
snapshots, err := os.ReadDir(s.dir)
if err != nil {
return nil, err
}
// Populate the metadata
var snapMeta []*raft.SnapshotMeta
for _, snap := range snapshots {
// Ignore any files
if !snap.IsDir() {
continue
}
// Ignore any temporary snapshots
dirName := snap.Name()
if isTmpName(dirName) {
continue
}
// Try to read the meta data
meta, err := readMeta(filepath.Join(s.dir, dirName))
if err != nil {
return nil, fmt.Errorf("failed to read meta for snapshot %s: %s", dirName, err)
}
// Append, but only return up to the retain count
snapMeta = append(snapMeta, meta)
}
sort.Sort(snapMetaSlice(snapMeta))
return snapMeta, nil
}
// getDBPath returns the path to the database file for the most recent snapshot.
// It is mostly useful for testing.
func (s *Store) getDBPath() (string, error) {
snapshots, err := s.getSnapshots()
if err != nil {
return "", err
}
if len(snapshots) == 0 {
return "", nil
}
return filepath.Join(s.dir, snapshots[len(snapshots)-1].ID+".db"), nil
}
// RemoveAllTmpSnapshotData removes all temporary Snapshot data from the directory.
// This process is defined as follows: for every directory in dir, if the directory
// is a temporary directory, remove the directory. Then remove all other files
// that contain the name of a temporary directory, minus the temporary suffix,
// as prefix.
func RemoveAllTmpSnapshotData(dir string) error {
files, err := os.ReadDir(dir)
if err != nil {
return nil
}
for _, d := range files {
// If the directory is a temporary directory, remove it.
if d.IsDir() && isTmpName(d.Name()) {
files, err := filepath.Glob(filepath.Join(dir, nonTmpName(d.Name())) + "*")
if err != nil {
return err
}
fullTmpDirPath := filepath.Join(dir, d.Name())
for _, f := range files {
if f == fullTmpDirPath {
// Delete the directory last as a sign the deletion is complete.
continue
}
if err := os.Remove(f); err != nil {
return err
}
}
if err := os.RemoveAll(fullTmpDirPath); err != nil {
return err
}
}
}
return nil
}
// snapshotName generates a name for the snapshot.
func snapshotName(term, index uint64) string {
now := time.Now()
msec := now.UnixNano() / int64(time.Millisecond)
return fmt.Sprintf("%d-%d-%d", term, index, msec)
}
func parentDir(dir string) string {
return filepath.Dir(dir)
}
func tmpName(path string) string {
return path + tmpSuffix
}
func nonTmpName(path string) string {
return strings.TrimSuffix(path, tmpSuffix)
}
func isTmpName(name string) bool {
return filepath.Ext(name) == tmpSuffix
}
func fileExists(path string) bool {
_, err := os.Stat(path)
return !os.IsNotExist(err)
}
func dirExists(path string) bool {
stat, err := os.Stat(path)
return err == nil && stat.IsDir()
}
func dirIsEmpty(dir string) (bool, error) {
files, err := os.ReadDir(dir)
if err != nil {
return false, err
}
return len(files) == 0, nil
}
func syncDir(dir string) error {
fh, err := os.Open(dir)
if err != nil {
return err
}
defer fh.Close()
return fh.Sync()
}
func removeDirSync(dir string) error {
if err := os.RemoveAll(dir); err != nil {
return err
}
return syncDirParentMaybe(dir)
}
// syncDirParentMaybe syncs the parent directory of the given
// directory, but only on non-Windows platforms.
func syncDirParentMaybe(dir string) error {
if runtime.GOOS == "windows" {
return nil
}
return syncDir(parentDir(dir))
}
// syncDirParentMaybe syncsthe given directory, but only on non-Windows platforms.
func syncDirMaybe(dir string) error {
if runtime.GOOS == "windows" {
return nil
}
return syncDir(dir)
}
// removeAllPrefix removes all files in the given directory that have the given prefix.
func removeAllPrefix(path, prefix string) error {
files, err := filepath.Glob(filepath.Join(path, prefix) + "*")
if err != nil {
return err
}
for _, f := range files {
if err := os.RemoveAll(f); err != nil {
return err
}
}
return nil
}
// readMeta is used to read the meta data in a given snapshot directory.
func readMeta(dir string) (*raft.SnapshotMeta, error) {
metaPath := filepath.Join(dir, metaFileName)
fh, err := os.Open(metaPath)
if err != nil {
return nil, err
}
defer fh.Close()
meta := &raft.SnapshotMeta{}
dec := json.NewDecoder(fh)
if err := dec.Decode(meta); err != nil {
return nil, err
}
return meta, nil
}
// writeMeta is used to write the meta data in a given snapshot directory.
func writeMeta(dir string, meta *raft.SnapshotMeta) error {
fh, err := os.Create(filepath.Join(dir, metaFileName))
if err != nil {
return fmt.Errorf("error creating meta file: %v", err)
}
defer fh.Close()
// Write out as JSON
enc := json.NewEncoder(fh)
if err = enc.Encode(meta); err != nil {
return fmt.Errorf("failed to encode meta: %v", err)
}
if err := fh.Sync(); err != nil {
return err
}
return fh.Close()
}
func updateMetaSize(dir string, sz int64) error {
meta, err := readMeta(dir)
if err != nil {
return err
}
meta.Size = sz
return writeMeta(dir, meta)
}
func openCloseDB(path string) error {
d, err := db.Open(path, false, true)
if err != nil {
return err
}
return d.Close()
}
type snapMetaSlice []*raft.SnapshotMeta
// Implement the sort interface for []*fileSnapshotMeta.
func (s snapMetaSlice) Len() int {
return len(s)
}
func (s snapMetaSlice) Less(i, j int) bool {
if s[i].Term != s[j].Term {
return s[i].Term < s[j].Term
}
if s[i].Index != s[j].Index {
return s[i].Index < s[j].Index
}
return s[i].ID < s[j].ID
}
func (s snapMetaSlice) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}

@ -0,0 +1,186 @@
package snapshot2
import (
"os"
"testing"
"github.com/hashicorp/raft"
)
func Test_RemoveAllTmpSnapshotData(t *testing.T) {
dir := t.TempDir()
if err := RemoveAllTmpSnapshotData(dir); err != nil {
t.Fatalf("Failed to remove all tmp snapshot data: %v", err)
}
if !pathExists(dir) {
t.Fatalf("Expected dir to exist, but it does not")
}
directories, err := os.ReadDir(dir)
if err != nil {
t.Fatalf("Failed to read dir: %v", err)
}
if len(directories) != 0 {
t.Fatalf("Expected dir to be empty, got %d files", len(directories))
}
mustTouchDir(t, dir+"/dir")
mustTouchFile(t, dir+"/file")
if err := RemoveAllTmpSnapshotData(dir); err != nil {
t.Fatalf("Failed to remove all tmp snapshot data: %v", err)
}
if !pathExists(dir + "/dir") {
t.Fatalf("Expected dir to exist, but it does not")
}
if !pathExists(dir + "/file") {
t.Fatalf("Expected file to exist, but it does not")
}
mustTouchDir(t, dir+"/snapshot1234.tmp")
mustTouchFile(t, dir+"/snapshot1234.db")
mustTouchFile(t, dir+"/snapshot1234.db-wal")
mustTouchFile(t, dir+"/snapshot1234-5678")
if err := RemoveAllTmpSnapshotData(dir); err != nil {
t.Fatalf("Failed to remove all tmp snapshot data: %v", err)
}
if !pathExists(dir + "/dir") {
t.Fatalf("Expected dir to exist, but it does not")
}
if !pathExists(dir + "/file") {
t.Fatalf("Expected file to exist, but it does not")
}
if pathExists(dir + "/snapshot1234.tmp") {
t.Fatalf("Expected snapshot1234.tmp to not exist, but it does")
}
if pathExists(dir + "/snapshot1234.db") {
t.Fatalf("Expected snapshot1234.db to not exist, but it does")
}
if pathExists(dir + "/snapshot1234.db-wal") {
t.Fatalf("Expected snapshot1234.db-wal to not exist, but it does")
}
if pathExists(dir + "/snapshot1234-5678") {
t.Fatalf("Expected /snapshot1234-5678 to not exist, but it does")
}
mustTouchFile(t, dir+"/snapshotABCD.tmp")
if err := RemoveAllTmpSnapshotData(dir); err != nil {
t.Fatalf("Failed to remove all tmp snapshot data: %v", err)
}
if !pathExists(dir + "/snapshotABCD.tmp") {
t.Fatalf("Expected /snapshotABCD.tmp to exist, but it does not")
}
}
func Test_NewStore(t *testing.T) {
dir := t.TempDir()
store, err := NewStore(dir)
if err != nil {
t.Fatalf("Failed to create new store: %v", err)
}
if store.Dir() != dir {
t.Errorf("Expected store directory to be %s, got %s", dir, store.Dir())
}
}
func Test_StoreEmpty(t *testing.T) {
dir := t.TempDir()
store, _ := NewStore(dir)
snaps, err := store.List()
if err != nil {
t.Fatalf("Failed to list snapshots: %v", err)
}
if len(snaps) != 0 {
t.Errorf("Expected no snapshots, got %d", len(snaps))
}
_, _, err = store.Open("non-existent")
if err == nil {
t.Fatalf("Expected error opening non-existent snapshot, got nil")
}
n, err := store.Reap()
if err != nil {
t.Fatalf("Failed to reap snapshots from empty store: %v", err)
}
if n != 0 {
t.Errorf("Expected no snapshots reaped, got %d", n)
}
if _, err := store.Stats(); err != nil {
t.Fatalf("Failed to get stats from empty store: %v", err)
}
}
func Test_StoreCreateCancel(t *testing.T) {
dir := t.TempDir()
store, err := NewStore(dir)
if err != nil {
t.Fatalf("Failed to create new store: %v", err)
}
sink, err := store.Create(1, 2, 3, makeTestConfiguration("1", "localhost:1"), 1, nil)
if err != nil {
t.Fatalf("Failed to create sink: %v", err)
}
if sink.ID() == "" {
t.Errorf("Expected sink ID to not be empty, got empty string")
}
// Should be a tmp directory with the name of the sink ID
if !pathExists(dir + "/" + sink.ID() + tmpSuffix) {
t.Errorf("Expected directory with name %s, but it does not exist", sink.ID())
}
// Test writing to the sink
if n, err := sink.Write([]byte("hello")); err != nil {
t.Fatalf("Failed to write to sink: %v", err)
} else if n != 5 {
t.Errorf("Expected 5 bytes written, got %d", n)
}
// Test canceling the sink
if err := sink.Cancel(); err != nil {
t.Fatalf("Failed to cancel sink: %v", err)
}
// Should not be a tmp directory with the name of the sink ID
if pathExists(dir + "/" + sink.ID() + tmpSuffix) {
t.Errorf("Expected directory with name %s to not exist, but it does", sink.ID())
}
}
func mustTouchFile(t *testing.T, path string) {
t.Helper()
fd, err := os.Create(path)
if err != nil {
t.Fatalf("Failed to create file: %v", err)
}
if err := fd.Close(); err != nil {
t.Fatalf("Failed to close file: %v", err)
}
}
func mustTouchDir(t *testing.T, path string) {
t.Helper()
if err := os.Mkdir(path, 0700); err != nil {
t.Fatalf("Failed to create directory: %v", err)
}
}
func pathExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}
func makeTestConfiguration(i, a string) raft.Configuration {
return raft.Configuration{
Servers: []raft.Server{
{
ID: raft.ServerID(i),
Address: raft.ServerAddress(a),
},
},
}
}

Binary file not shown.

@ -1,4 +1,4 @@
package snapshot
package snapshot2
import (
"compress/gzip"
@ -23,7 +23,6 @@ const (
// 'old' directory is removed before the function returns.
func Upgrade(old, new string, logger *log.Logger) error {
newTmpDir := tmpName(new)
newGenerationDir := filepath.Join(newTmpDir, generationsDir, firstGeneration)
// If a temporary version of the new snapshot exists, remove it. This implies a
// previous upgrade attempt was interrupted. We will need to start over.
@ -43,7 +42,7 @@ func Upgrade(old, new string, logger *log.Logger) error {
if oldIsEmpty {
logger.Printf("old snapshot directory %s is empty, nothing to upgrade", old)
if err := os.RemoveAll(old); err != nil {
return fmt.Errorf("failed to remove old snapshot directory %s: %s", old, err)
return fmt.Errorf("failed to remove empty old snapshot directory %s: %s", old, err)
}
return nil
}
@ -76,26 +75,22 @@ func Upgrade(old, new string, logger *log.Logger) error {
return fmt.Errorf("no snapshot to upgrade in old snapshots directory %s", old)
}
// Write out the new meta file.
newSnapshotPath := filepath.Join(newGenerationDir, oldMeta.ID)
// Write out the new meta file in the new snapshot directory.
newSnapshotPath := filepath.Join(newTmpDir, oldMeta.ID)
if err := os.MkdirAll(newSnapshotPath, 0755); err != nil {
return fmt.Errorf("failed to create new snapshot directory %s: %s", newSnapshotPath, err)
}
newMeta := &Meta{
SnapshotMeta: *oldMeta,
Full: true,
}
if err := writeMeta(newSnapshotPath, newMeta); err != nil {
return fmt.Errorf("failed to write new snapshot meta file: %s", err)
if err := writeMeta(newSnapshotPath, oldMeta); err != nil {
return fmt.Errorf("failed to write new snapshot meta file to %s: %s", newSnapshotPath, err)
}
// Ensure all file handles are closed before any directory is renamed or removed.
if err := func() error {
// Write SQLite data into generation directory, as the base SQLite file.
newSqliteBasePath := filepath.Join(newGenerationDir, baseSqliteFile)
newSqliteFd, err := os.Create(newSqliteBasePath)
// Write SQLite database file into new snapshto dir.
newSqlitePath := filepath.Join(newTmpDir, oldMeta.ID+".db")
newSqliteFd, err := os.Create(newSqlitePath)
if err != nil {
return fmt.Errorf("failed to create new SQLite file %s: %s", newSqliteBasePath, err)
return fmt.Errorf("failed to create new SQLite file %s: %s", newSqlitePath, err)
}
defer newSqliteFd.Close()
@ -113,17 +108,17 @@ func Upgrade(old, new string, logger *log.Logger) error {
}
gzipReader, err := gzip.NewReader(stateFd)
if err != nil {
return fmt.Errorf("failed to create gzip reader for new SQLite file %s: %s", newSqliteBasePath, err)
return fmt.Errorf("failed to create gzip reader for new SQLite file %s: %s", newSqlitePath, err)
}
defer gzipReader.Close()
if _, err := io.Copy(newSqliteFd, gzipReader); err != nil {
return fmt.Errorf("failed to copy old SQLite file %s to new SQLite file %s: %s", oldStatePath,
newSqliteBasePath, err)
newSqlitePath, err)
}
// Sanity-check the SQLite data.
if !db.IsValidSQLiteFile(newSqliteBasePath) {
return fmt.Errorf("migrated SQLite file %s is not valid", newSqliteBasePath)
if !db.IsValidSQLiteFile(newSqlitePath) {
return fmt.Errorf("migrated SQLite file %s is not valid", newSqlitePath)
}
return nil
}(); err != nil {
@ -143,6 +138,7 @@ func Upgrade(old, new string, logger *log.Logger) error {
return fmt.Errorf("failed to remove old snapshot directory %s: %s", old, err)
}
logger.Printf("upgraded snapshot directory %s to %s", old, new)
return nil
}
@ -182,14 +178,6 @@ func getNewest7Snapshot(dir string) (*raft.SnapshotMeta, error) {
return raftMetaSlice(snapshots).Newest(), nil
}
func dirIsEmpty(dir string) (bool, error) {
files, err := os.ReadDir(dir)
if err != nil {
return false, err
}
return len(files) == 0, nil
}
// raftMetaSlice is a sortable slice of Raft Meta, which are sorted
// by term, index, and then ID. Snapshots are sorted from oldest to newest.
type raftMetaSlice []*raft.SnapshotMeta

@ -1,4 +1,4 @@
package snapshot
package snapshot2
import (
"fmt"
@ -38,23 +38,13 @@ func Test_Upgrade_OK(t *testing.T) {
t.Fatalf("failed to upgrade empty directories: %s", err)
}
// Create new SnapshotStore from the upgraded directory.
// Create new SnapshotStore from the upgraded directory, to verify its
// contents.
store, err := NewStore(newTemp)
if err != nil {
t.Fatalf("failed to create new snapshot store: %s", err)
}
currGen, ok, err := store.GetCurrentGenerationDir()
if err != nil {
t.Fatalf("failed to get current generation directory: %s", err)
}
if !ok {
t.Fatalf("no current generation directory")
}
if exp, got := firstGeneration, filepath.Base(currGen); exp != got {
t.Fatalf("expected current generation directory %s, got %s", exp, got)
}
snapshots, err := store.List()
if err != nil {
t.Fatalf("failed to list snapshots: %s", err)

@ -7,9 +7,6 @@ type DBConfig struct {
// Enforce Foreign Key constraints
FKConstraints bool `json:"fk_constraints"`
// Disable WAL mode if running in on-disk mode
DisableWAL bool `json:"disable_wal"`
}
// NewDBConfig returns a new DB config instance.

@ -4,6 +4,7 @@
package store
import (
"bytes"
"errors"
"expvar"
"fmt"
@ -23,7 +24,7 @@ import (
"github.com/rqlite/rqlite/command/chunking"
sql "github.com/rqlite/rqlite/db"
rlog "github.com/rqlite/rqlite/log"
"github.com/rqlite/rqlite/snapshot"
"github.com/rqlite/rqlite/snapshot2"
)
var (
@ -62,6 +63,7 @@ var (
)
const (
restoreScratchPattern = "rqlite-restore-*"
raftDBPath = "raft.db" // Changing this will break backwards compatibility.
peersPath = "raft/peers.json"
peersInfoPath = "raft/peers.info"
@ -152,9 +154,6 @@ func ResetStats() {
type SnapshotStore interface {
raft.SnapshotStore
// FullNeeded returns whether the Snapshot Store needs a full snapshot.
FullNeeded() bool
// Stats returns stats about the Snapshot Store.
Stats() (map[string]interface{}, error)
}
@ -388,12 +387,12 @@ func (s *Store) Open() (retErr error) {
// Upgrade any pre-existing snapshots.
oldSnapshotDir := filepath.Join(s.raftDir, "snapshots")
snapshotDir := filepath.Join(s.raftDir, "rsnapshots")
if err := snapshot.Upgrade(oldSnapshotDir, snapshotDir, s.logger); err != nil {
if err := snapshot2.Upgrade(oldSnapshotDir, snapshotDir, s.logger); err != nil {
return fmt.Errorf("failed to upgrade snapshots: %s", err)
}
// Create store for the Snapshots.
snapshotStore, err := snapshot.NewStore(filepath.Join(snapshotDir))
snapshotStore, err := snapshot2.NewStore(filepath.Join(snapshotDir))
if err != nil {
return fmt.Errorf("failed to create snapshot store: %s", err)
}
@ -439,12 +438,23 @@ func (s *Store) Open() (retErr error) {
s.logger.Printf("first log index: %d, last log index: %d, last applied index: %d, last command log index: %d:",
s.firstIdxOnOpen, s.lastIdxOnOpen, s.lastAppliedIdxOnOpen, s.lastCommandIdxOnOpen)
s.db, err = createOnDisk(nil, s.dbPath, s.dbConf.FKConstraints, !s.dbConf.DisableWAL)
s.db, err = createOnDisk(nil, s.dbPath, s.dbConf.FKConstraints, true)
if err != nil {
return fmt.Errorf("failed to create on-disk database: %s", err)
}
s.logger.Printf("created on-disk database at open")
// Clean up any files from aborted restores.
files, err := filepath.Glob(filepath.Join(s.db.Path(), restoreScratchPattern))
if err != nil {
return fmt.Errorf("failed to locate temporary restore files: %s", err.Error())
}
for _, f := range files {
if err := os.Remove(f); err != nil {
return fmt.Errorf("failed to remove temporary restore file %s: %s", f, err.Error())
}
}
// Instantiate the Raft system.
ra, err := raft.NewRaft(config, s, s.raftLog, s.raftStable, s.snapshotStore, s.raftTn)
if err != nil {
@ -568,18 +578,16 @@ func (s *Store) Close(wait bool) (retErr error) {
return err
}
// If in WAL mode, open-and-close again to remove the -wal file. This is not
// Open-and-close again to remove the -wal file. This is not
// strictly necessary, since any on-disk database files will be removed when
// rqlite next starts, but it leaves the directory containing the database
// file in a cleaner state.
if !s.dbConf.DisableWAL {
walDB, err := sql.Open(s.dbPath, s.dbConf.FKConstraints, true)
if err != nil {
return err
}
if err := walDB.Close(); err != nil {
return err
}
walDB, err := sql.Open(s.dbPath, s.dbConf.FKConstraints, true)
if err != nil {
return err
}
if err := walDB.Close(); err != nil {
return err
}
return nil
@ -1642,8 +1650,13 @@ func (s *Store) Database(leader bool) ([]byte, error) {
func (s *Store) Snapshot() (raft.FSMSnapshot, error) {
startT := time.Now()
fNeeded := s.snapshotStore.FullNeeded()
fPLog := fullPretty(fNeeded)
currSnaps, err := s.snapshotStore.List()
if err != nil {
return nil, err
}
fullNeeded := len(currSnaps) == 0
fPLog := fullPretty(fullNeeded)
s.logger.Printf("initiating %s snapshot on node ID %s", fPLog, s.raftID)
defer func() {
s.numSnapshotsMu.Lock()
@ -1655,11 +1668,15 @@ func (s *Store) Snapshot() (raft.FSMSnapshot, error) {
defer s.queryTxMu.Unlock()
var fsmSnapshot raft.FSMSnapshot
if fNeeded {
if fullNeeded {
if err := s.db.Checkpoint(); err != nil {
return nil, err
}
fsmSnapshot = snapshot.NewFullSnapshot(s.db.Path())
dbFD, err := os.Open(s.db.Path())
if err != nil {
return nil, err
}
fsmSnapshot = snapshot2.NewSnapshot(dbFD)
stats.Add(numSnapshotsFull, 1)
} else {
var b []byte
@ -1675,7 +1692,7 @@ func (s *Store) Snapshot() (raft.FSMSnapshot, error) {
return nil, err
}
}
fsmSnapshot = snapshot.NewWALSnapshot(b)
fsmSnapshot = snapshot2.NewSnapshot(io.NopCloser(bytes.NewBuffer(b)))
if err != nil {
return nil, err
}
@ -1699,26 +1716,24 @@ func (s *Store) Restore(rc io.ReadCloser) error {
s.logger.Printf("initiating node restore on node ID %s", s.raftID)
startT := time.Now()
strHdr, _, err := snapshot.NewStreamHeaderFromReader(rc)
// Create a scatch file to write the restore data to it.
tmpFile, err := os.CreateTemp(filepath.Dir(s.db.Path()), restoreScratchPattern)
if err != nil {
return fmt.Errorf("error reading stream header: %v", err)
return fmt.Errorf("error creating temporary file for restore operation: %v", err)
}
defer os.Remove(tmpFile.Name())
fullSnap := strHdr.GetFullSnapshot()
if fullSnap == nil {
return fmt.Errorf("got nil FullSnapshot")
// Copy it from the reader to the temporary file.
_, err = io.Copy(tmpFile, rc)
if err != nil {
return fmt.Errorf("error copying restore data: %v", err)
}
tmpFile, err := os.CreateTemp(filepath.Dir(s.db.Path()), "rqlite-restore-*")
if tmpFile.Close(); err != nil {
return fmt.Errorf("error creating temporary file for restore operation: %v", err)
}
defer os.Remove(tmpFile.Name())
if err := snapshot.ReplayDB(fullSnap, rc, tmpFile.Name()); err != nil {
return fmt.Errorf("error replaying DB: %v", err)
}
// Must wipe out all pre-existing state if being asked to do a restore.
// Must wipe out all pre-existing state if being asked to do a restore, and put
// the new database in place.
if err := s.db.Close(); err != nil {
return fmt.Errorf("failed to close pre-restore database: %s", err)
}
@ -1730,7 +1745,7 @@ func (s *Store) Restore(rc io.ReadCloser) error {
}
var db *sql.DB
db, err = sql.Open(s.dbPath, s.dbConf.FKConstraints, !s.dbConf.DisableWAL)
db, err = sql.Open(s.dbPath, s.dbConf.FKConstraints, true)
if err != nil {
return fmt.Errorf("open SQLite file during restore: %s", err)
}
@ -1905,22 +1920,17 @@ func RecoverNode(dataDir string, logger *log.Logger, logs raft.LogStore, stable
return err
}
// Now, create a temporary database. If there is a snapshot, we will read data from
// that snapshot into it.
// Get a path to a temporary file to use for a temporary database.
tmpDBPath := filepath.Join(dataDir, "recovery.db")
if err := os.WriteFile(tmpDBPath, nil, 0660); err != nil {
return fmt.Errorf("failed to create temporary recovery database file: %s", err)
}
defer os.Remove(tmpDBPath)
// Attempt to restore any latest snapshot.
var (
snapshotIndex uint64
snapshotTerm uint64
snapshots, err = snaps.List()
snapshotIndex uint64
snapshotTerm uint64
)
snapshots, err = snaps.List()
snapshots, err := snaps.List()
if err != nil {
return fmt.Errorf("failed to list snapshots: %s", err)
}
@ -1933,17 +1943,9 @@ func RecoverNode(dataDir string, logger *log.Logger, logs raft.LogStore, stable
return fmt.Errorf("failed to open snapshot %s: %s", snapID, err)
}
defer rc.Close()
strHdr, _, err := snapshot.NewStreamHeaderFromReader(rc)
_, err = copyFromReaderToFile(tmpDBPath, rc)
if err != nil {
return fmt.Errorf("error reading stream header during recovery: %v", err)
}
fullSnap := strHdr.GetFullSnapshot()
if fullSnap == nil {
return fmt.Errorf("got nil FullSnapshot during recovery")
}
if err := snapshot.ReplayDB(fullSnap, rc, tmpDBPath); err != nil {
return fmt.Errorf("error replaying DB during recovery: %v", err)
return fmt.Errorf("failed to copy snapshot %s to temporary database: %s", snapID, err)
}
snapshotIndex = snapshots[0].Index
snapshotTerm = snapshots[0].Term
@ -1951,7 +1953,6 @@ func RecoverNode(dataDir string, logger *log.Logger, logs raft.LogStore, stable
}(); err != nil {
return err
}
}
// Now, open the database so we can replay any outstanding Raft log entries.
@ -1996,7 +1997,11 @@ func RecoverNode(dataDir string, logger *log.Logger, logs raft.LogStore, stable
if err := db.Checkpoint(); err != nil {
return fmt.Errorf("failed to checkpoint database: %s", err)
}
fsmSnapshot := snapshot.NewFullSnapshot(tmpDBPath) // tmpDBPath contains full state now.
tmpDBFD, err := os.Open(tmpDBPath)
if err != nil {
return fmt.Errorf("failed to open temporary database file: %s", err)
}
fsmSnapshot := snapshot2.NewSnapshot(tmpDBFD) // tmpDBPath contains full state now.
sink, err := snaps.Create(1, lastIndex, lastTerm, conf, 1, tn)
if err != nil {
return fmt.Errorf("failed to create snapshot: %v", err)
@ -2179,6 +2184,15 @@ func createOnDisk(b []byte, path string, fkConstraints, wal bool) (*sql.DB, erro
return sql.Open(path, fkConstraints, wal)
}
func copyFromReaderToFile(path string, r io.Reader) (int64, error) {
fd, err := os.Create(path)
if err != nil {
return 0, err
}
defer fd.Close()
return io.Copy(fd, r)
}
// prettyVoter converts bool to "voter" or "non-voter"
func prettyVoter(v bool) string {
if v {

Loading…
Cancel
Save