From 4cd3c438f49853bf53243b9b7bca6813c5eaf1f7 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 06:56:48 -0500 Subject: [PATCH 1/8] Add more involved WAL checkpoint unit test This test passes, but if VACUUM is performed, the dst database becomes corrupt. This shows that VACUUM introduces issues which warrant a full snapshot after any proposed auto-vacuum. https://github.com/rqlite/rqlite/issues/1609 --- db/state_test.go | 117 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/db/state_test.go b/db/state_test.go index 5a19047e..b25b1a45 100644 --- a/db/state_test.go +++ b/db/state_test.go @@ -302,6 +302,123 @@ func Test_WALReplayOK(t *testing.T) { }) } +func Test_WALReplayOK_Complex(t *testing.T) { + srcPath := mustTempFile() + defer os.Remove(srcPath) + srcWALPath := srcPath + "-wal" + dstPath := srcPath + "-dst" + + srcDB, err := Open(srcPath, false, true) + if err != nil { + t.Fatalf("failed to open database in WAL mode: %s", err.Error()) + } + defer srcDB.Close() + + if _, err := srcDB.ExecuteStringStmt("CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)"); err != nil { + t.Fatalf("failed to create table: %s", err.Error()) + } + if err := srcDB.Checkpoint(); err != nil { + t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) + } + mustCopyFile(dstPath, srcPath) + + var dstWALs []string + defer func() { + for _, p := range dstWALs { + os.Remove(p) + } + }() + for i := 0; i < 20; i++ { + for j := 0; j < 2000; j++ { + if _, err := srcDB.ExecuteStringStmt(fmt.Sprintf(`INSERT INTO foo(name) VALUES("fiona-%d")`, i)); err != nil { + t.Fatalf("error executing insertion into table: %s", err.Error()) + } + } + dstWALPath := fmt.Sprintf("%s-%d", dstPath, i) + mustCopyFile(dstWALPath, srcWALPath) + dstWALs = append(dstWALs, dstWALPath) + + // if i%5 == 0 { + // if err := srcDB.Vacuum(); err != nil { + // t.Fatalf("failed to vacuum database during INSERT: %s", err.Error()) + // } + // } + if err := srcDB.Checkpoint(); err != nil { + t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) + } + } + + // Create some other type of transactions in src - first DELETE, then UPDATE. + if _, err := srcDB.ExecuteStringStmt(`DELETE FROM foo WHERE id >= 100 AND id <= 200`); err != nil { + t.Fatalf("error executing deletion from table: %s", err.Error()) + } + dstWALPath := fmt.Sprintf("%s-postdelete", dstPath) + mustCopyFile(dstWALPath, srcWALPath) + dstWALs = append(dstWALs, dstWALPath) + if err := srcDB.Checkpoint(); err != nil { + t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) + } + + if _, err := srcDB.ExecuteStringStmt(`UPDATE foo SET name="fiona-updated" WHERE id >= 300 AND id <= 600`); err != nil { + t.Fatalf("error executing update of table: %s", err.Error()) + } + dstWALPath = fmt.Sprintf("%s-postupdate", dstPath) + mustCopyFile(dstWALPath, srcWALPath) + dstWALs = append(dstWALs, dstWALPath) + // if err := srcDB.Vacuum(); err != nil { + // t.Fatalf("failed to vacuum database post UPDATE: %s", err.Error()) + // } + if err := srcDB.Checkpoint(); err != nil { + t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) + } + + for i := 0; i < 20; i++ { + createTable := fmt.Sprintf("CREATE TABLE bar%d (id INTEGER NOT NULL PRIMARY KEY, name TEXT)", i) + if _, err := srcDB.ExecuteStringStmt(createTable); err != nil { + t.Fatalf("failed to create table: %s", err.Error()) + } + } + dstWALPath = fmt.Sprintf("%s-create-tables", dstPath) + mustCopyFile(dstWALPath, srcWALPath) + dstWALs = append(dstWALs, dstWALPath) + if err := srcDB.Checkpoint(); err != nil { + t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) + } + + // Replay all the WALs into dst and check the data looks good. Then compare + // the data in src and dst. + if err := ReplayWAL(dstPath, dstWALs, false); err != nil { + t.Fatalf("failed to replay WALs: %s", err.Error()) + } + dstDB, err := Open(dstPath, false, true) + if err != nil { + t.Fatalf("failed to open dst database: %s", err.Error()) + } + defer dstDB.Close() + + // Run various queries to make sure src and dst are the same. + for _, q := range []string{ + "SELECT COUNT(*) FROM foo", + "SELECT COUNT(*) FROM foo WHERE name='fiona-updated'", + "SELECT COUNT(*) FROM foo WHERE name='no-one'", + "SELECT COUNT(*) FROM sqlite_master WHERE type='table'", + } { + r, err := srcDB.QueryStringStmt(q) + if err != nil { + t.Fatalf("failed to query table: %s", err.Error()) + } + srcRes := asJSON(r) + + r, err = dstDB.QueryStringStmt(q) + if err != nil { + t.Fatalf("failed to query table: %s", err.Error()) + } + if exp, got := srcRes, asJSON(r); exp != got { + t.Fatalf("unexpected results for query (%s) of dst, expected %s, got %s", q, exp, got) + } + } +} + func Test_WALReplayFailures(t *testing.T) { dbDir := mustTempDir() defer os.RemoveAll(dbDir) From 8f8612d6da5b44a621efbbf98b4c84112843dede Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 06:59:37 -0500 Subject: [PATCH 2/8] Comment test --- db/state_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/db/state_test.go b/db/state_test.go index b25b1a45..9a324609 100644 --- a/db/state_test.go +++ b/db/state_test.go @@ -302,6 +302,9 @@ func Test_WALReplayOK(t *testing.T) { }) } +// Test_WALReplayOK_Complex tests that WAL files are replayed as expected in a more +// complex scenario. If VACUUM is performed, the dst database becomes corrupt. This +// shows that VACUUM introduces issues which warrant a full snapshot after auto-vacuum. func Test_WALReplayOK_Complex(t *testing.T) { srcPath := mustTempFile() defer os.Remove(srcPath) From 8f98fd4d1c3d27baf0079248510c64e93c226bd1 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 07:10:11 -0500 Subject: [PATCH 3/8] Actually, WAL should be copied after VACUUM! --- db/state_test.go | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/db/state_test.go b/db/state_test.go index 9a324609..2efd4962 100644 --- a/db/state_test.go +++ b/db/state_test.go @@ -337,15 +337,16 @@ func Test_WALReplayOK_Complex(t *testing.T) { t.Fatalf("error executing insertion into table: %s", err.Error()) } } + + if i%5 == 0 { + if err := srcDB.Vacuum(); err != nil { + t.Fatalf("failed to vacuum database during INSERT: %s", err.Error()) + } + } + dstWALPath := fmt.Sprintf("%s-%d", dstPath, i) mustCopyFile(dstWALPath, srcWALPath) dstWALs = append(dstWALs, dstWALPath) - - // if i%5 == 0 { - // if err := srcDB.Vacuum(); err != nil { - // t.Fatalf("failed to vacuum database during INSERT: %s", err.Error()) - // } - // } if err := srcDB.Checkpoint(); err != nil { t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) } @@ -355,6 +356,9 @@ func Test_WALReplayOK_Complex(t *testing.T) { if _, err := srcDB.ExecuteStringStmt(`DELETE FROM foo WHERE id >= 100 AND id <= 200`); err != nil { t.Fatalf("error executing deletion from table: %s", err.Error()) } + if err := srcDB.Vacuum(); err != nil { + t.Fatalf("failed to vacuum database post DELETE: %s", err.Error()) + } dstWALPath := fmt.Sprintf("%s-postdelete", dstPath) mustCopyFile(dstWALPath, srcWALPath) dstWALs = append(dstWALs, dstWALPath) @@ -368,9 +372,6 @@ func Test_WALReplayOK_Complex(t *testing.T) { dstWALPath = fmt.Sprintf("%s-postupdate", dstPath) mustCopyFile(dstWALPath, srcWALPath) dstWALs = append(dstWALs, dstWALPath) - // if err := srcDB.Vacuum(); err != nil { - // t.Fatalf("failed to vacuum database post UPDATE: %s", err.Error()) - // } if err := srcDB.Checkpoint(); err != nil { t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) } @@ -408,13 +409,13 @@ func Test_WALReplayOK_Complex(t *testing.T) { } { r, err := srcDB.QueryStringStmt(q) if err != nil { - t.Fatalf("failed to query table: %s", err.Error()) + t.Fatalf("failed to query srcDB table: %s", err.Error()) } srcRes := asJSON(r) r, err = dstDB.QueryStringStmt(q) if err != nil { - t.Fatalf("failed to query table: %s", err.Error()) + t.Fatalf("failed to query dstDB table: %s", err.Error()) } if exp, got := srcRes, asJSON(r); exp != got { t.Fatalf("unexpected results for query (%s) of dst, expected %s, got %s", q, exp, got) From 6398eac6d5f29e670afed9505085468e4367d12d Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 07:14:24 -0500 Subject: [PATCH 4/8] Fix test --- db/state_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/db/state_test.go b/db/state_test.go index 2efd4962..744237ce 100644 --- a/db/state_test.go +++ b/db/state_test.go @@ -344,6 +344,8 @@ func Test_WALReplayOK_Complex(t *testing.T) { } } + // Now copy the WAL! Has to happen after any possible VACUUM since the VACUUM will + // rewrite the WAL. dstWALPath := fmt.Sprintf("%s-%d", dstPath, i) mustCopyFile(dstWALPath, srcWALPath) dstWALs = append(dstWALs, dstWALPath) @@ -389,6 +391,18 @@ func Test_WALReplayOK_Complex(t *testing.T) { t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) } + // Do a VACUUM and copy the WAL again, to test the flow of copying the WAL + // immediately before a VACUUM. + if err := srcDB.Vacuum(); err != nil { + t.Fatalf("failed to vacuum database post CREATE: %s", err.Error()) + } + dstWALPath = fmt.Sprintf("%s-post-create-tables", dstPath) + mustCopyFile(dstWALPath, srcWALPath) + dstWALs = append(dstWALs, dstWALPath) + if err := srcDB.Checkpoint(); err != nil { + t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) + } + // Replay all the WALs into dst and check the data looks good. Then compare // the data in src and dst. if err := ReplayWAL(dstPath, dstWALs, false); err != nil { From 5a27dac7b21a0c92b2bedfa495dfe72b7b901eb3 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 07:24:45 -0500 Subject: [PATCH 5/8] Run integrity check at end of test --- db/db.go | 11 +++++++++++ db/state_test.go | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/db/db.go b/db/db.go index a3ba3310..b3772f96 100644 --- a/db/db.go +++ b/db/db.go @@ -372,6 +372,17 @@ func (db *DB) Vacuum() error { return err } +// IntegrityCheck runs a PRAGMA integrity_check on the database. +// If full is true, a full integrity check is performed, otherwise +// a quick check. It returns after hitting the first integrity +// failure, if any. +func (db *DB) IntegrityCheck(full bool) ([]*command.QueryRows, error) { + if full { + return db.QueryStringStmt("PRAGMA integrity_check(1)") + } + return db.QueryStringStmt("PRAGMA quick_check(1)") +} + // SetSynchronousMode sets the synchronous mode of the database. func (db *DB) SetSynchronousMode(mode string) error { if mode != "OFF" && mode != "NORMAL" && mode != "FULL" && mode != "EXTRA" { diff --git a/db/state_test.go b/db/state_test.go index 744237ce..c3f4c23e 100644 --- a/db/state_test.go +++ b/db/state_test.go @@ -317,6 +317,8 @@ func Test_WALReplayOK_Complex(t *testing.T) { } defer srcDB.Close() + ////////////////////////////////////////////////////////////////////////// + // Create the very first table so first WAL is created. if _, err := srcDB.ExecuteStringStmt("CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)"); err != nil { t.Fatalf("failed to create table: %s", err.Error()) } @@ -325,6 +327,9 @@ func Test_WALReplayOK_Complex(t *testing.T) { } mustCopyFile(dstPath, srcPath) + ////////////////////////////////////////////////////////////////////////// + // INSERT a bunch of records, sometimes doing a VACUUM, + // but always copying the WAL. var dstWALs []string defer func() { for _, p := range dstWALs { @@ -354,6 +359,7 @@ func Test_WALReplayOK_Complex(t *testing.T) { } } + ////////////////////////////////////////////////////////////////////////// // Create some other type of transactions in src - first DELETE, then UPDATE. if _, err := srcDB.ExecuteStringStmt(`DELETE FROM foo WHERE id >= 100 AND id <= 200`); err != nil { t.Fatalf("error executing deletion from table: %s", err.Error()) @@ -378,6 +384,8 @@ func Test_WALReplayOK_Complex(t *testing.T) { t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) } + ////////////////////////////////////////////////////////////////////////// + // Create a bunch of new tables, copy the WAL afterwards. for i := 0; i < 20; i++ { createTable := fmt.Sprintf("CREATE TABLE bar%d (id INTEGER NOT NULL PRIMARY KEY, name TEXT)", i) if _, err := srcDB.ExecuteStringStmt(createTable); err != nil { @@ -391,8 +399,9 @@ func Test_WALReplayOK_Complex(t *testing.T) { t.Fatalf("failed to checkpoint database in WAL mode: %s", err.Error()) } + ////////////////////////////////////////////////////////////////////////// // Do a VACUUM and copy the WAL again, to test the flow of copying the WAL - // immediately before a VACUUM. + // immediately before a VACUUM (up above) if err := srcDB.Vacuum(); err != nil { t.Fatalf("failed to vacuum database post CREATE: %s", err.Error()) } @@ -435,6 +444,14 @@ func Test_WALReplayOK_Complex(t *testing.T) { t.Fatalf("unexpected results for query (%s) of dst, expected %s, got %s", q, exp, got) } } + + r, err := dstDB.IntegrityCheck(true) + if err != nil { + t.Fatalf("failed to run integrity check on dst: %s", err.Error()) + } + if exp, got := "ok", asJSON(r); `[{"columns":["integrity_check"],"types":["text"],"values":[["ok"]]}]` != got { + t.Fatalf("unexpected results for integrity check of dst, expected %s, got %s", exp, got) + } } func Test_WALReplayFailures(t *testing.T) { From 28b93e4b6feaed61bba76c13cac0dd460c70222a Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 07:26:06 -0500 Subject: [PATCH 6/8] CHANGELOG --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd1bd6cf..42b93055 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,7 @@ -## 8.16.7 (January 18th 2024) +## 8.16.8 (unreleased) +### Implementation changes and bug fixes +- [PR #1615](https://github.com/rqlite/rqlite/pull/1615): Add extensive WAL checkpoint test at the database level. + The releases changes the default logging level for the Raft subsystem from `INFO` to `WARN`. This results is less logging by the Raft subsystem. If you prefer the previous `INFO` level of logging, it can be re-enabled via the command line flag `-raft-log-level=INFO`. ### Implementation changes and bug fixes - [PR #1607](https://github.com/rqlite/rqlite/pull/1607): Remove use of deprecated `ioutil`. From 7f4cd1f8205ac5b064c2702ea6c01425f90dfefa Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 07:29:57 -0500 Subject: [PATCH 7/8] Fix CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42b93055..d1e8a758 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Implementation changes and bug fixes - [PR #1615](https://github.com/rqlite/rqlite/pull/1615): Add extensive WAL checkpoint test at the database level. +## 8.16.7 (January 18th 2024) The releases changes the default logging level for the Raft subsystem from `INFO` to `WARN`. This results is less logging by the Raft subsystem. If you prefer the previous `INFO` level of logging, it can be re-enabled via the command line flag `-raft-log-level=INFO`. ### Implementation changes and bug fixes - [PR #1607](https://github.com/rqlite/rqlite/pull/1607): Remove use of deprecated `ioutil`. From 45f53861532a8407189b2eac7a606eea59b71e98 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 19 Jan 2024 07:35:13 -0500 Subject: [PATCH 8/8] Fix comments --- db/state_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/db/state_test.go b/db/state_test.go index c3f4c23e..f188b4d5 100644 --- a/db/state_test.go +++ b/db/state_test.go @@ -303,8 +303,7 @@ func Test_WALReplayOK(t *testing.T) { } // Test_WALReplayOK_Complex tests that WAL files are replayed as expected in a more -// complex scenario. If VACUUM is performed, the dst database becomes corrupt. This -// shows that VACUUM introduces issues which warrant a full snapshot after auto-vacuum. +// complex scenario, including showing the interaction with VACUUM. func Test_WALReplayOK_Complex(t *testing.T) { srcPath := mustTempFile() defer os.Remove(srcPath) @@ -445,11 +444,12 @@ func Test_WALReplayOK_Complex(t *testing.T) { } } + // Finally, run an integrity check on dst. r, err := dstDB.IntegrityCheck(true) if err != nil { t.Fatalf("failed to run integrity check on dst: %s", err.Error()) } - if exp, got := "ok", asJSON(r); `[{"columns":["integrity_check"],"types":["text"],"values":[["ok"]]}]` != got { + if exp, got := `[{"columns":["integrity_check"],"types":["text"],"values":[["ok"]]}]`, asJSON(r); exp != got { t.Fatalf("unexpected results for integrity check of dst, expected %s, got %s", exp, got) } }