From 4d605ac9ebaf72661de6be539feeb09bb9fd946b Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Thu, 28 Dec 2023 09:50:42 -0500 Subject: [PATCH 1/6] Store WAL path in store, to avoid races https://app.circleci.com/pipelines/github/rqlite/rqlite/3966/workflows/a460ab69-2827-4a72-9a79-1aceda68ee7d/jobs/31288 --- db/db.go | 5 +++++ db/db_test.go | 3 +++ store/store.go | 26 ++++++++++++++------------ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/db/db.go b/db/db.go index d01b16e3..79b883d5 100644 --- a/db/db.go +++ b/db/db.go @@ -111,6 +111,11 @@ type PoolStats struct { MaxLifetimeClosed int64 `json:"max_lifetime_closed"` } +// WALPath returns the path to the WAL file for the given database path. +func WALPath(dbPath string) string { + return dbPath + "-wal" +} + // IsValidSQLiteFile checks that the supplied path looks like a SQLite file. // A non-existent file is considered invalid. func IsValidSQLiteFile(path string) bool { diff --git a/db/db_test.go b/db/db_test.go index 11d3fee5..a329bdfe 100644 --- a/db/db_test.go +++ b/db/db_test.go @@ -86,6 +86,9 @@ func Test_DBPaths(t *testing.T) { if exp, got := pathWAL+"-wal", dbWAL.WALPath(); exp != got { t.Fatalf("expected WAL path %s, got %s", exp, got) } + if p1, p2 := WALPath(pathWAL), dbWAL.WALPath(); p1 != p2 { + t.Fatalf("WAL paths are not equal (%s != %s)", p1, p2) + } db, path := mustCreateOnDiskDatabase() defer db.Close() diff --git a/store/store.go b/store/store.go index beefc1f3..fddb9c4e 100644 --- a/store/store.go +++ b/store/store.go @@ -215,14 +215,15 @@ type Store struct { restorePath string restoreDoneCh chan struct{} - raft *raft.Raft // The consensus mechanism. - ly Layer - raftTn *NodeTransport - raftID string // Node ID. - dbConf *DBConfig // SQLite database config. - dbPath string // Path to underlying SQLite file. - dbDir string // Path to directory containing SQLite file. - db *sql.DB // The underlying SQLite store. + raft *raft.Raft // The consensus mechanism. + ly Layer + raftTn *NodeTransport + raftID string // Node ID. + dbConf *DBConfig // SQLite database config. + dbPath string // Path to underlying SQLite file. + walPath string // Path to WAL file. + dbDir string // Path to directory containing SQLite file. + db *sql.DB // The underlying SQLite store. queryTxMu sync.RWMutex @@ -337,6 +338,7 @@ func New(ly Layer, c *Config) *Store { raftID: c.ID, dbConf: c.DBConf, dbPath: dbPath, + walPath: sql.WALPath(dbPath), dbDir: filepath.Dir(dbPath), leaderObservers: make([]chan<- struct{}, 0), reqMarshaller: command.NewRequestMarshaler(), @@ -1736,8 +1738,8 @@ func (s *Store) fsmSnapshot() (fSnap raft.FSMSnapshot, retErr error) { } else { compactedBuf := bytes.NewBuffer(nil) var err error - if pathExistsWithData(s.db.WALPath()) { - walFD, err := os.Open(s.db.WALPath()) + if pathExistsWithData(s.walPath) { + walFD, err := os.Open(s.walPath) if err != nil { return nil, err } @@ -1756,7 +1758,7 @@ func (s *Store) fsmSnapshot() (fSnap raft.FSMSnapshot, retErr error) { } walFD.Close() // We need it closed for the next step. - walSz, err := fileSize(s.db.WALPath()) + walSz, err := fileSize(s.walPath) if err != nil { return nil, err } @@ -1973,7 +1975,7 @@ func (s *Store) runWALSnapshotting() (closeCh, doneCh chan struct{}) { for { select { case <-ticker.C: - sz, err := s.db.WALSize() + sz, err := fileSize(s.walPath) if err != nil { s.logger.Printf("failed to get WAL size: %s", err.Error()) continue From b30cb1824752d995a7af0793587f533333605f33 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Thu, 28 Dec 2023 09:52:31 -0500 Subject: [PATCH 2/6] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index abf6f5f6..4399756c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ This release adds new control over Raft snapshotting, a key part of the Raft con ### Implementation changes and bug fixes - [PR #1528](https://github.com/rqlite/rqlite/pull/1528): Support setting trailing logs for user-requested snapshot. - [PR #1529](https://github.com/rqlite/rqlite/pull/1529): Remove obsolete code related to user-triggered snapshots. +- [PR #1536](https://github.com/rqlite/rqlite/pull/1536): Store WAL path in store, to avoid races. ## 8.13.5 (December 26th 2023) ### Implementation changes and bug fixes From a3540d7a95c56453eb6613704d54fb95e4b74676 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Thu, 28 Dec 2023 10:33:52 -0500 Subject: [PATCH 3/6] Block sinks while getting stats --- snapshot/store.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/snapshot/store.go b/snapshot/store.go index c1c2d7e5..485e640e 100644 --- a/snapshot/store.go +++ b/snapshot/store.go @@ -178,6 +178,11 @@ func (s *Store) SetFullNeeded() error { // Stats returns stats about the Snapshot Store. func (s *Store) Stats() (map[string]interface{}, error) { + // Keep the store locked while we get the stats. Sinks can change the store + // in two ways: by creating a new snapshot, or by reaping old snapshots. + s.sinkMu.Lock() + defer s.sinkMu.Unlock() + snapshots, err := s.getSnapshots() if err != nil { return nil, err From 593d20714de960f03b0b0643a197e8cc1aee4c20 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Thu, 28 Dec 2023 10:36:31 -0500 Subject: [PATCH 4/6] Don't lock --- snapshot/store.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/snapshot/store.go b/snapshot/store.go index 485e640e..c1c2d7e5 100644 --- a/snapshot/store.go +++ b/snapshot/store.go @@ -178,11 +178,6 @@ func (s *Store) SetFullNeeded() error { // Stats returns stats about the Snapshot Store. func (s *Store) Stats() (map[string]interface{}, error) { - // Keep the store locked while we get the stats. Sinks can change the store - // in two ways: by creating a new snapshot, or by reaping old snapshots. - s.sinkMu.Lock() - defer s.sinkMu.Unlock() - snapshots, err := s.getSnapshots() if err != nil { return nil, err From 4095561a3f31f144705120cc9d5d2fb692ca2b0c Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Thu, 28 Dec 2023 10:39:35 -0500 Subject: [PATCH 5/6] Only report Snapshot stats if available --- snapshot/store.go | 4 +++- store/store.go | 15 +++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/snapshot/store.go b/snapshot/store.go index c1c2d7e5..8a798660 100644 --- a/snapshot/store.go +++ b/snapshot/store.go @@ -176,7 +176,9 @@ func (s *Store) SetFullNeeded() error { return f.Close() } -// Stats returns stats about the Snapshot Store. +// Stats returns stats about the Snapshot Store. This function may return +// an error if the Store is in an inconsistent state. In that case the stats +// returned may be incomplete or invalid. func (s *Store) Stats() (map[string]interface{}, error) { snapshots, err := s.getSnapshots() if err != nil { diff --git a/store/store.go b/store/store.go index fddb9c4e..ca2fe1bb 100644 --- a/store/store.go +++ b/store/store.go @@ -952,11 +952,6 @@ func (s *Store) Stats() (map[string]interface{}, error) { return nil, err } - snapsStats, err := s.snapshotStore.Stats() - if err != nil { - return nil, err - } - lAppliedIdx, err := s.boltStore.GetAppliedIndex() if err != nil { return nil, err @@ -981,7 +976,6 @@ func (s *Store) Stats() (map[string]interface{}, error) { "apply_timeout": s.ApplyTimeout.String(), "heartbeat_timeout": s.HeartbeatTimeout.String(), "election_timeout": s.ElectionTimeout.String(), - "snapshot_store": snapsStats, "snapshot_threshold": s.SnapshotThreshold, "snapshot_interval": s.SnapshotInterval.String(), "reap_timeout": s.ReapTimeout.String(), @@ -995,6 +989,15 @@ func (s *Store) Stats() (map[string]interface{}, error) { "sqlite3": dbStatus, "db_conf": s.dbConf, } + + // Snapshot stats may be in flux if a snapshot is in progress. Only + // report them if they are available. + snapsStats, err := s.snapshotStore.Stats() + if err == nil { + status["snapshot_store"] = snapsStats + return nil, err + } + return status, nil } From 4fa94cc4659feb398cc8d85146dd47b5ed4772ef Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Thu, 28 Dec 2023 10:42:25 -0500 Subject: [PATCH 6/6] Fix logic --- store/store.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/store/store.go b/store/store.go index ca2fe1bb..c6233dc4 100644 --- a/store/store.go +++ b/store/store.go @@ -995,9 +995,7 @@ func (s *Store) Stats() (map[string]interface{}, error) { snapsStats, err := s.snapshotStore.Stats() if err == nil { status["snapshot_store"] = snapsStats - return nil, err } - return status, nil }