From 81a234abd00aae78fc3e37e918c1d3299f537fdc Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Wed, 25 Aug 2021 09:26:03 -0400 Subject: [PATCH 1/7] Command line option for SQLite on-disk file path --- cmd/rqlited/main.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/rqlited/main.go b/cmd/rqlited/main.go index 1396392a..a18db1a1 100644 --- a/cmd/rqlited/main.go +++ b/cmd/rqlited/main.go @@ -62,6 +62,7 @@ var discoID string var expvar bool var pprofEnabled bool var onDisk bool +var onDiskPath string var fkConstraints bool var raftLogLevel string var raftNonVoter bool @@ -110,6 +111,7 @@ func init() { flag.BoolVar(&expvar, "expvar", true, "Serve expvar data on HTTP server") flag.BoolVar(&pprofEnabled, "pprof", true, "Serve pprof data on HTTP server") flag.BoolVar(&onDisk, "on-disk", false, "Use an on-disk SQLite database") + flag.StringVar(&onDiskPath, "on-disk-path", "", "Specify path for SQLite on-disk database file. If not set, use file in data directory") flag.BoolVar(&fkConstraints, "fk", false, "Enable SQLite foreign key constraints") flag.BoolVar(&showVersion, "version", false, "Show version information and exit") flag.BoolVar(&raftNonVoter, "raft-non-voter", false, "Configure as non-voting node") From 105a894fe59fd34ac14a37d55d2fcfa6c5171d3b Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Wed, 25 Aug 2021 12:54:34 -0400 Subject: [PATCH 2/7] Support explicit SQLite path at Store level --- cmd/rqlited/main.go | 1 + store/db_config.go | 3 +++ store/store.go | 9 ++++++-- store/store_test.go | 51 ++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 57 insertions(+), 7 deletions(-) diff --git a/cmd/rqlited/main.go b/cmd/rqlited/main.go index a18db1a1..88b92ed5 100644 --- a/cmd/rqlited/main.go +++ b/cmd/rqlited/main.go @@ -198,6 +198,7 @@ func main() { } dbConf := store.NewDBConfig(!onDisk) dbConf.FKConstraints = fkConstraints + dbConf.OnDiskPath = onDiskPath str := store.New(raftTn, &store.StoreConfig{ DBConf: dbConf, diff --git a/store/db_config.go b/store/db_config.go index d0e11402..2690286c 100644 --- a/store/db_config.go +++ b/store/db_config.go @@ -5,6 +5,9 @@ type DBConfig struct { // Whether the database is in-memory only. Memory bool `json:"memory"` + // SQLite on-disk path + OnDiskPath string `json:"on_disk_path,omitempty"` + // Enforce Foreign Key constraints FKConstraints bool `json:"fk_constraints"` } diff --git a/store/store.go b/store/store.go index d3b57ad9..66d2b2dc 100644 --- a/store/store.go +++ b/store/store.go @@ -185,12 +185,17 @@ func New(ln Listener, c *StoreConfig) *Store { logger = log.New(os.Stderr, "[store] ", log.LstdFlags) } + dbPath := filepath.Join(c.Dir, sqliteFile) + if c.DBConf.OnDiskPath != "" { + dbPath = c.DBConf.OnDiskPath + } + return &Store{ ln: ln, raftDir: c.Dir, raftID: c.ID, dbConf: c.DBConf, - dbPath: filepath.Join(c.Dir, sqliteFile), + dbPath: dbPath, reqMarshaller: command.NewRequestMarshaler(), logger: logger, ApplyTimeout: applyTimeout, @@ -209,7 +214,7 @@ func (s *Store) Open(enableBootstrap bool) error { if !s.dbConf.Memory { dbType = "on-disk" } - s.logger.Printf("configured for an %s database", dbType) + s.logger.Printf("configured for an %s database at %s", dbType, s.dbPath) s.logger.Printf("ensuring directory at %s exists", s.raftDir) err := os.MkdirAll(s.raftDir, 0755) diff --git a/store/store_test.go b/store/store_test.go index b4368c63..183962c5 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -226,6 +226,7 @@ func Test_SingleNodeExecuteQueryTx(t *testing.T) { } } +// Test_SingleNodeInMemFK tests that basic foreign-key related functionality works. func Test_SingleNodeInMemFK(t *testing.T) { s := mustNewStoreFK(true) defer os.RemoveAll(s.Path()) @@ -251,6 +252,41 @@ func Test_SingleNodeInMemFK(t *testing.T) { } } +// Test_SingleNodeSQLitePath ensures that basic functionality works when the SQLite database path +// is explicitly specificed. +func Test_SingleNodeSQLitePath(t *testing.T) { + s := mustNewStoreSQLitePath() + defer os.RemoveAll(s.Path()) + + if err := s.Open(true); err != nil { + t.Fatalf("failed to open single-node store: %s", err.Error()) + } + defer s.Close(true) + s.WaitForLeader(10 * time.Second) + + er := executeRequestFromStrings([]string{ + `CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)`, + `INSERT INTO foo(id, name) VALUES(1, "fiona")`, + }, false, false) + _, err := s.Execute(er) + if err != nil { + t.Fatalf("failed to execute on single node: %s", err.Error()) + } + + qr := queryRequestFromString("SELECT * FROM foo", false, false) + qr.Level = command.QueryRequest_QUERY_REQUEST_LEVEL_NONE + r, err := s.Query(qr) + if err != nil { + t.Fatalf("failed to query single node: %s", err.Error()) + } + if exp, got := `["id","name"]`, asJSON(r[0].Columns); exp != got { + t.Fatalf("unexpected results for query\nexp: %s\ngot: %s", exp, got) + } + if exp, got := `[[1,"fiona"]]`, asJSON(r[0].Values); exp != got { + t.Fatalf("unexpected results for query\nexp: %s\ngot: %s", exp, got) + } +} + func Test_SingleNodeBackupBinary(t *testing.T) { t.Parallel() @@ -1191,14 +1227,15 @@ func Test_State(t *testing.T) { } } -func mustNewStoreAtPath(path string, inmem, fk bool) *Store { +func mustNewStoreAtPaths(dataPath, sqlitePath string, inmem, fk bool) *Store { cfg := NewDBConfig(inmem) cfg.FKConstraints = fk + cfg.OnDiskPath = sqlitePath s := New(mustMockLister("localhost:0"), &StoreConfig{ DBConf: cfg, - Dir: path, - ID: path, // Could be any unique string. + Dir: dataPath, + ID: dataPath, // Could be any unique string. }) if s == nil { panic("failed to create new store") @@ -1207,11 +1244,15 @@ func mustNewStoreAtPath(path string, inmem, fk bool) *Store { } func mustNewStore(inmem bool) *Store { - return mustNewStoreAtPath(mustTempDir(), inmem, false) + return mustNewStoreAtPaths(mustTempDir(), "", inmem, false) } func mustNewStoreFK(inmem bool) *Store { - return mustNewStoreAtPath(mustTempDir(), inmem, true) + return mustNewStoreAtPaths(mustTempDir(), "", inmem, true) +} + +func mustNewStoreSQLitePath() *Store { + return mustNewStoreAtPaths(mustTempDir(), filepath.Join(mustTempDir(), "explicit-path.db"), false, true) } type mockSnapshotSink struct { From 29c35261e88a89d5a6b256e779a07179c84b32a0 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Wed, 25 Aug 2021 12:55:49 -0400 Subject: [PATCH 3/7] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80ad419d..03bb0d61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Systems running earlier 6.x software can be upgraded to this release without doi ### New features - [PR #859](https://github.com/rqlite/rqlite/pull/859): Support transparent Execute and Query request forwarding. Fixes [issue #330](https://github.com/rqlite/rqlite/issues/330). +- [PR #873](https://github.com/rqlite/rqlite/pull/873): Support explicitly specifying SQLite on-disk file path. ### Implementation changes and bug fixes - [PR #863](https://github.com/rqlite/rqlite/pull/863): Add gauge-like metric for Snapshot timings. From 9965f520a857b855ddbfe4548d89c21fefd6d690 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Wed, 25 Aug 2021 13:20:50 -0400 Subject: [PATCH 4/7] Add a performance-related guide --- DOC/PERFORMANCE.md | 35 +++++++++++++++++++++++++++++++++++ cmd/rqlited/main.go | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 DOC/PERFORMANCE.md diff --git a/DOC/PERFORMANCE.md b/DOC/PERFORMANCE.md new file mode 100644 index 00000000..2570906d --- /dev/null +++ b/DOC/PERFORMANCE.md @@ -0,0 +1,35 @@ +# Performance + +rqlite performance -- defined as the number of database updates performed in a given period of time -- is primarily determined by two factors: +- Disk performance +- Network latency + +## Disk +Disk performance is the single biggest determinant of rqlite performance. This is because every change to the system must go through the Raft subsystem, and the Raft subsystem calls `fsync()` after every write to its log. Raft does this to ensure that the change is safely persisted in permanent storage before applying those changes to the SQLite database. This is why rqlite runs with an in-memory database by default, as using as on-disk SQLite database would put even more load on the disk, reducing the disk throughput available to Raft. + +## Network +When running a rqlite cluster, network latency is also a factor. This is because Raft must contact every node **twice** before a change is committed to the Raft log. Obviously the faster your network, the shorter time contact each node will take. + +# Improving Performance + +There are a few ways to improve performance, but not all will be suitable for a given application. + +## Batching +The more SQLite statements you can include in a single request to a rqlite node, the better the system will perform. + +## Use more powerful hardware +Obviously running rqlite on better disks, better networks, or both, will improve performance. + +## Use a memory-backed filesystem +It is possible to run rqlite entirely on-top of a memory-backed file system. This means that **both** the Raft log and SQLite database would be stored in memory only. For example, on Linux you can create a memory-based filesystem like so: +```bash +mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk +``` +**This comes with risks, however**. The behavior of rqlite when a node fails, but the Raft log has not been permanently persisted, **is not defined**. But if your policy is to completely deprovision your rqlite node, or rqlite cluster, in the event of any node failure, this option might work for you. Testing shows that using rqlite with a memory-only file system can result in 100x improvement in performance. + +## Improving read-write concurrency +SQLite can offer better concurrent read and write support when using an on-disk database, compared to in-memory databases. But as explained above, using an on-disk SQLite database can significant impact performance. Of course, the database-update performance will be so much better with an in-memory database, that improving read-write concurrency may not be needed in practise. + +However if you enable an on-disk SQLite database, but then place the SQLite database on a memory-backed file system, you can have the best of both worlds. You can dedicate your disk to the Raft log, but still get better read-write concurrency with SQLite. + +An alternative approach would be to place the SQLite on-disk database on a different disk, but this is unlikely to be as performant as an in-memory file system. \ No newline at end of file diff --git a/cmd/rqlited/main.go b/cmd/rqlited/main.go index 88b92ed5..2f2cf672 100644 --- a/cmd/rqlited/main.go +++ b/cmd/rqlited/main.go @@ -111,7 +111,7 @@ func init() { flag.BoolVar(&expvar, "expvar", true, "Serve expvar data on HTTP server") flag.BoolVar(&pprofEnabled, "pprof", true, "Serve pprof data on HTTP server") flag.BoolVar(&onDisk, "on-disk", false, "Use an on-disk SQLite database") - flag.StringVar(&onDiskPath, "on-disk-path", "", "Specify path for SQLite on-disk database file. If not set, use file in data directory") + flag.StringVar(&onDiskPath, "on-disk-path", "", "Path for SQLite on-disk database file. If not set, use file in data directory") flag.BoolVar(&fkConstraints, "fk", false, "Enable SQLite foreign key constraints") flag.BoolVar(&showVersion, "version", false, "Show version information and exit") flag.BoolVar(&raftNonVoter, "raft-non-voter", false, "Configure as non-voting node") From 5c26673c6f7c5de91608c5496afbe400c7cb14ad Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Wed, 25 Aug 2021 13:24:47 -0400 Subject: [PATCH 5/7] Link from README to PERFORMANCE doc --- DOC/PERFORMANCE.md | 6 ++++++ README.md | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/DOC/PERFORMANCE.md b/DOC/PERFORMANCE.md index 2570906d..6da20600 100644 --- a/DOC/PERFORMANCE.md +++ b/DOC/PERFORMANCE.md @@ -1,9 +1,13 @@ # Performance +rqlite replicates SQLite for fault-tolerance. It does not replicate it for performance. In fact performance is reduced due to a standalone SQLite database due to nature of distributed systems. + rqlite performance -- defined as the number of database updates performed in a given period of time -- is primarily determined by two factors: - Disk performance - Network latency +Depending on your machine (particularly its IO performance) and network, individual INSERT performance could be anything from 10 operations per second to more than 200 operations per second. + ## Disk Disk performance is the single biggest determinant of rqlite performance. This is because every change to the system must go through the Raft subsystem, and the Raft subsystem calls `fsync()` after every write to its log. Raft does this to ensure that the change is safely persisted in permanent storage before applying those changes to the SQLite database. This is why rqlite runs with an in-memory database by default, as using as on-disk SQLite database would put even more load on the disk, reducing the disk throughput available to Raft. @@ -17,6 +21,8 @@ There are a few ways to improve performance, but not all will be suitable for a ## Batching The more SQLite statements you can include in a single request to a rqlite node, the better the system will perform. +By using the [bulk API](https://github.com/rqlite/rqlite/blob/master/DOC/BULK.md), transactions, or both, throughput will increase significantly, often by 2 orders of magnitude. This speed-up is due to the way Raft and SQLite work. So for high throughput, execute as many operations as possible within a single transaction. + ## Use more powerful hardware Obviously running rqlite on better disks, better networks, or both, will improve performance. diff --git a/README.md b/README.md index ad6453cf..48b06840 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,7 @@ $ rqlite rqlite has a rich HTTP API, allowing full control over writing to, and querying from, rqlite. Check out [the documentation](https://github.com/rqlite/rqlite/blob/master/DOC/DATA_API.md) for full details. There are also [client libraries available](https://github.com/rqlite). ## Performance -rqlite replicates SQLite for fault-tolerance. It does not replicate it for performance. In fact performance is reduced somewhat due to the network round-trips. - -Depending on your machine (particularly its IO performance) and network, individual INSERT performance could be anything from 10 operations per second to more than 200 operations per second. However, by using the [bulk API](https://github.com/rqlite/rqlite/blob/master/DOC/BULK.md), transactions, or both, throughput will increase significantly, often by 2 orders of magnitude. This speed-up is due to the way Raft and SQLite work. So for high throughput, execute as many operations as possible within a single transaction. +You can learn more about rqlite performance, and how to improve it, [here](https://github.com/rqlite/rqlite/blob/master/DOC/PERFORMANCE.md). ### In-memory databases By default rqlite uses an [in-memory SQLite database](https://www.sqlite.org/inmemorydb.html) to maximise performance. In this mode no actual SQLite file is created and the entire database is stored in memory. If you wish rqlite to use an actual file-based SQLite database, pass `-on-disk` to rqlite on start-up. From bc9e5ded91a230cc672c9b6340e53b511bd3b765 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Wed, 25 Aug 2021 13:34:15 -0400 Subject: [PATCH 6/7] Typos in performance doc --- DOC/PERFORMANCE.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DOC/PERFORMANCE.md b/DOC/PERFORMANCE.md index 6da20600..f9be7adb 100644 --- a/DOC/PERFORMANCE.md +++ b/DOC/PERFORMANCE.md @@ -1,6 +1,6 @@ # Performance -rqlite replicates SQLite for fault-tolerance. It does not replicate it for performance. In fact performance is reduced due to a standalone SQLite database due to nature of distributed systems. +rqlite replicates SQLite for fault-tolerance. It does not replicate it for performance. In fact performance is reduced due to a standalone SQLite database due to the nature of distributed systems. _There is no such thing as a free lunch_. rqlite performance -- defined as the number of database updates performed in a given period of time -- is primarily determined by two factors: - Disk performance @@ -12,7 +12,7 @@ Depending on your machine (particularly its IO performance) and network, individ Disk performance is the single biggest determinant of rqlite performance. This is because every change to the system must go through the Raft subsystem, and the Raft subsystem calls `fsync()` after every write to its log. Raft does this to ensure that the change is safely persisted in permanent storage before applying those changes to the SQLite database. This is why rqlite runs with an in-memory database by default, as using as on-disk SQLite database would put even more load on the disk, reducing the disk throughput available to Raft. ## Network -When running a rqlite cluster, network latency is also a factor. This is because Raft must contact every node **twice** before a change is committed to the Raft log. Obviously the faster your network, the shorter time contact each node will take. +When running a rqlite cluster, network latency is also a factor. This is because Raft must contact every node **twice** before a change is committed to the Raft log. Obviously the faster your network, the shorter the time to contact each node. # Improving Performance @@ -31,11 +31,11 @@ It is possible to run rqlite entirely on-top of a memory-backed file system. Thi ```bash mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk ``` -**This comes with risks, however**. The behavior of rqlite when a node fails, but the Raft log has not been permanently persisted, **is not defined**. But if your policy is to completely deprovision your rqlite node, or rqlite cluster, in the event of any node failure, this option might work for you. Testing shows that using rqlite with a memory-only file system can result in 100x improvement in performance. +**This comes with risks, however**. The behavior of rqlite when a node fails, but committed entries the Raft log have not actually been permanently persisted, **is not defined**. But if your policy is to completely deprovision your rqlite node, or rqlite cluster, in the event of any node failure, this option may be of interest to you. Perhaps you always rebuild your rqlite cluster from a different source of data, so can recover an rqlite cluster regardless of its state. Testing shows that using rqlite with a memory-only file system can result in 100x improvement in performance. ## Improving read-write concurrency -SQLite can offer better concurrent read and write support when using an on-disk database, compared to in-memory databases. But as explained above, using an on-disk SQLite database can significant impact performance. Of course, the database-update performance will be so much better with an in-memory database, that improving read-write concurrency may not be needed in practise. +SQLite can offer better concurrent read and write support when using an on-disk database, compared to in-memory databases. But as explained above, using an on-disk SQLite database can significant impact performance. But since the database-update performance will be so much better with an in-memory database, improving read-write concurrency may not be needed in practise. However if you enable an on-disk SQLite database, but then place the SQLite database on a memory-backed file system, you can have the best of both worlds. You can dedicate your disk to the Raft log, but still get better read-write concurrency with SQLite. -An alternative approach would be to place the SQLite on-disk database on a different disk, but this is unlikely to be as performant as an in-memory file system. \ No newline at end of file +An alternative approach would be to place the SQLite on-disk database on a disk different than that storing the Raft log, but this is unlikely to be as performant as an in-memory file system for the SQLite database. \ No newline at end of file From a295fb2370e3ddfc6e7b725369403ac895d85b4c Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Wed, 25 Aug 2021 13:37:08 -0400 Subject: [PATCH 7/7] More Performance doc fixes --- DOC/PERFORMANCE.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DOC/PERFORMANCE.md b/DOC/PERFORMANCE.md index f9be7adb..992c8937 100644 --- a/DOC/PERFORMANCE.md +++ b/DOC/PERFORMANCE.md @@ -1,6 +1,6 @@ # Performance -rqlite replicates SQLite for fault-tolerance. It does not replicate it for performance. In fact performance is reduced due to a standalone SQLite database due to the nature of distributed systems. _There is no such thing as a free lunch_. +rqlite replicates SQLite for fault-tolerance. It does not replicate it for performance. In fact performance is reduced relative to a standalone SQLite database due to the nature of distributed systems. _There is no such thing as a free lunch_. rqlite performance -- defined as the number of database updates performed in a given period of time -- is primarily determined by two factors: - Disk performance @@ -36,6 +36,6 @@ mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk ## Improving read-write concurrency SQLite can offer better concurrent read and write support when using an on-disk database, compared to in-memory databases. But as explained above, using an on-disk SQLite database can significant impact performance. But since the database-update performance will be so much better with an in-memory database, improving read-write concurrency may not be needed in practise. -However if you enable an on-disk SQLite database, but then place the SQLite database on a memory-backed file system, you can have the best of both worlds. You can dedicate your disk to the Raft log, but still get better read-write concurrency with SQLite. +However if you enable an on-disk SQLite database, but then place the SQLite database on a memory-backed file system, you can have the best of both worlds. You can dedicate your disk to the Raft log, but still get better read-write concurrency with SQLite. You can specify the SQLite database file path via the `-on-disk-path` flag. An alternative approach would be to place the SQLite on-disk database on a disk different than that storing the Raft log, but this is unlikely to be as performant as an in-memory file system for the SQLite database. \ No newline at end of file