1
0
Fork 0

Remove metrics

master
Philip O'Toole 8 months ago
parent 70397e9234
commit 52eba8049a

@ -72,19 +72,13 @@ func Test_WALDatabaseCheckpoint_RestartTruncate(t *testing.T) {
}
}
walPreBytes, err := os.ReadFile(db.WALPath())
if err != nil {
t.Fatalf("failed to read wal file: %s", err.Error())
}
walPreBytes := mustReadBytes(db.WALPath())
if err := db.Checkpoint(CheckpointRestart); err != nil {
t.Fatalf("failed to checkpoint database: %s", err.Error())
}
walPostBytes, err := os.ReadFile(db.WALPath())
if err != nil {
t.Fatalf("failed to read wal file: %s", err.Error())
}
walPostBytes := mustReadBytes(db.WALPath())
if !bytes.Equal(walPreBytes, walPostBytes) {
t.Fatalf("wal files should be identical after checkpoint restart")
t.Fatalf("wal file should be unchanged after checkpoint restart")
}
// query the data to make sure all is well.
@ -117,6 +111,8 @@ func Test_WALDatabaseCheckpoint_RestartTruncate(t *testing.T) {
}
}
// Test_WALDatabaseCheckpoint_RestartTimeout tests that a restart checkpoint
// does time out as expected if there is a long running read.
func Test_WALDatabaseCheckpoint_RestartTimeout(t *testing.T) {
path := mustTempFile()
defer os.Remove(path)
@ -154,12 +150,78 @@ func Test_WALDatabaseCheckpoint_RestartTimeout(t *testing.T) {
t.Fatalf("expected %s, got %s", exp, got)
}
if err := db.CheckpointWithTimeout(CheckpointRestart, 250*time.Millisecond); err == nil {
t.Fatal("expected error due to failure to checkpoint")
}
blockingDB.Close()
if err := db.CheckpointWithTimeout(CheckpointRestart, 250*time.Millisecond); err != nil {
t.Fatalf("failed to checkpoint database: %s", err.Error())
}
}
// Test_WALDatabaseCheckpoint_TruncateTimeout tests that a truncate checkpoint
// does time out as expected if there is a long running read. It also confirms
// that the WAL file is not modified as a result of this failure.
func Test_WALDatabaseCheckpoint_TruncateTimeout(t *testing.T) {
path := mustTempFile()
defer os.Remove(path)
db, err := Open(path, false, true)
if err != nil {
t.Fatalf("failed to open database in WAL mode: %s", err.Error())
}
defer db.Close()
_, err = db.ExecuteStringStmt(`CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)`)
if err != nil {
t.Fatalf("failed to execute on single node: %s", err.Error())
}
for i := 0; i < 50; i++ {
_, err := db.ExecuteStringStmt(`INSERT INTO foo(name) VALUES("fiona")`)
if err != nil {
t.Fatalf("failed to execute INSERT on single node: %s", err.Error())
}
}
preWALBytes := mustReadBytes(db.WALPath())
blockingDB, err := Open(path, false, true)
if err != nil {
t.Fatalf("failed to open blocking database in WAL mode: %s", err.Error())
}
defer blockingDB.Close()
_, err = blockingDB.QueryStringStmt(`BEGIN TRANSACTION`)
if err != nil {
t.Fatalf("failed to execute query on single node: %s", err.Error())
}
rows, err := blockingDB.QueryStringStmt(`SELECT COUNT(*) FROM foo`)
if err != nil {
t.Fatalf("failed to execute query on single node: %s", err.Error())
}
if exp, got := `[{"columns":["COUNT(*)"],"types":["integer"],"values":[[50]]}]`, asJSON(rows); exp != got {
t.Fatalf("expected %s, got %s", exp, got)
}
if err := db.CheckpointWithTimeout(CheckpointTruncate, 250*time.Millisecond); err == nil {
t.Fatal("expected error due to failure to checkpoint")
}
postWALBytes := mustReadBytes(db.WALPath())
if !bytes.Equal(preWALBytes, postWALBytes) {
t.Fatalf("wal file should be unchanged after checkpoint failure")
}
blockingDB.Close()
if err := db.CheckpointWithTimeout(CheckpointTruncate, 250*time.Millisecond); err != nil {
t.Fatalf("failed to checkpoint database: %s", err.Error())
}
if mustFileSize(db.WALPath()) != 0 {
t.Fatalf("wal file should be zero length after checkpoint truncate")
}
}
func mustReadBytes(path string) []byte {
b, err := os.ReadFile(path)
if err != nil {
panic(err)
}
return b
}

@ -117,7 +117,6 @@ const (
numSnapshotsFull = "num_snapshots_full"
numSnapshotsIncremental = "num_snapshots_incremental"
numFullCheckpointFailed = "num_full_checkpoint_failed"
numWALCheckpointRestartFailed = "num_wal_checkpoint_restart_failed"
numWALCheckpointTruncateFailed = "num_wal_checkpoint_truncate_failed"
numAutoVacuums = "num_auto_vacuums"
numAutoVacuumsFailed = "num_auto_vacuums_failed"
@ -141,7 +140,6 @@ const (
numRemovedBeforeJoins = "num_removed_before_joins"
numDBStatsErrors = "num_db_stats_errors"
snapshotCreateDuration = "snapshot_create_duration"
snapshotCreateChkRestartDuration = "snapshot_create_chk_restart_duration"
snapshotCreateChkTruncateDuration = "snapshot_create_chk_truncate_duration"
snapshotCreateWALCompactDuration = "snapshot_create_wal_compact_duration"
snapshotPersistDuration = "snapshot_persist_duration"
@ -174,7 +172,6 @@ func ResetStats() {
stats.Add(numSnapshotsFull, 0)
stats.Add(numSnapshotsIncremental, 0)
stats.Add(numFullCheckpointFailed, 0)
stats.Add(numWALCheckpointRestartFailed, 0)
stats.Add(numWALCheckpointTruncateFailed, 0)
stats.Add(numAutoVacuums, 0)
stats.Add(numAutoVacuumsFailed, 0)
@ -198,7 +195,6 @@ func ResetStats() {
stats.Add(numRemovedBeforeJoins, 0)
stats.Add(numDBStatsErrors, 0)
stats.Add(snapshotCreateDuration, 0)
stats.Add(snapshotCreateChkRestartDuration, 0)
stats.Add(snapshotCreateChkTruncateDuration, 0)
stats.Add(snapshotCreateWALCompactDuration, 0)
stats.Add(snapshotPersistDuration, 0)
@ -1941,18 +1937,6 @@ func (s *Store) fsmSnapshot() (fSnap raft.FSMSnapshot, retErr error) {
compactedBuf := bytes.NewBuffer(nil)
var err error
if pathExistsWithData(s.walPath) {
// Attempt to checkpoint everything into the main database file. Only
// if this works should we bother to compact-scan the WAL. It's possible
// it fails if some query is in progress. If it fails, return an error
// and Raft will retry later. But if it succeeds it means that all readers
// are reading from the main database file.
chkRStartTime := time.Now()
if err := s.db.Checkpoint(sql.CheckpointRestart); err != nil {
stats.Add(numWALCheckpointRestartFailed, 1)
return nil, err
}
stats.Get(snapshotCreateChkRestartDuration).(*expvar.Int).Set(time.Since(chkRStartTime).Milliseconds())
compactStartTime := time.Now()
// Read a compacted version of the WAL into memory, and write it
// to the Snapshot store.
@ -1975,10 +1959,9 @@ func (s *Store) fsmSnapshot() (fSnap raft.FSMSnapshot, retErr error) {
walFD.Close() // We need it closed for the next step.
stats.Get(snapshotCreateWALCompactDuration).(*expvar.Int).Set(time.Since(compactStartTime).Milliseconds())
// Clean-up by truncating the WAL. This should be fast because all the pages
// have been checkpointed into the main database file, and writes are
// blocked during this process by Raft. In otherwords, the WAL file should
// be unchanged.
// Now that we're written a (compacted) copy of the WAL to the Snapshot,
// we can truncate the WAL. We use truncate mode so that the next WAL
// contains just changes since the this snapshot.
walSz, err := fileSize(s.walPath)
if err != nil {
return nil, err
@ -1986,7 +1969,8 @@ func (s *Store) fsmSnapshot() (fSnap raft.FSMSnapshot, retErr error) {
chkTStartTime := time.Now()
if err := s.db.Checkpoint(sql.CheckpointTruncate); err != nil {
stats.Add(numWALCheckpointTruncateFailed, 1)
return nil, fmt.Errorf("failed to truncate WAL: %s", err.Error())
return nil, fmt.Errorf("snapshot can't complete due to WAL checkpoint failure (will retry): %s",
err.Error())
}
stats.Get(snapshotCreateChkTruncateDuration).(*expvar.Int).Set(time.Since(chkTStartTime).Milliseconds())
stats.Get(snapshotWALSize).(*expvar.Int).Set(int64(compactedBuf.Len()))

Loading…
Cancel
Save