From 6c25a8278e7a463a16eafb66bc2995c412943aa2 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Thu, 8 Dec 2022 22:40:58 +0800 Subject: [PATCH] Options to tune RocksDB engine --- Cargo.lock | 90 +++++++++++++++++----------------- README.md | 2 + TUNING_ROCKSDB.md | 19 +++++++ cozo-core/Cargo.toml | 12 ++--- cozo-core/src/storage/rocks.rs | 32 +++++++++--- cozorocks/Cargo.toml | 2 +- cozorocks/bridge/db.cpp | 31 ++++++++++++ cozorocks/src/bridge/db.rs | 6 +++ cozorocks/src/bridge/mod.rs | 2 + cozoserver/Cargo.toml | 2 +- 10 files changed, 138 insertions(+), 60 deletions(-) create mode 100644 TUNING_ROCKSDB.md diff --git a/Cargo.lock b/Cargo.lock index 72136edc..c493fe0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -637,7 +637,7 @@ dependencies = [ [[package]] name = "cozorocks" -version = "0.1.1" +version = "0.1.2" dependencies = [ "cc", "cxx", @@ -761,9 +761,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a41a86530d0fe7f5d9ea779916b7cadd2d4f9add748b99c2c029cbbdfaf453" +checksum = "bdf07d07d6531bfcdbe9b8b739b104610c6508dcc4d63b410585faf338241daf" dependencies = [ "cc", "cxxbridge-flags", @@ -773,9 +773,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06416d667ff3e3ad2df1cd8cd8afae5da26cf9cec4d0825040f88b5ca659a2f0" +checksum = "d2eb5b96ecdc99f72657332953d4d9c50135af1bac34277801cc3937906ebd39" dependencies = [ "cc", "codespan-reporting", @@ -788,15 +788,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "820a9a2af1669deeef27cb271f476ffd196a2c4b6731336011e0ba63e2c7cf71" +checksum = "ac040a39517fd1674e0f32177648334b0f4074625b5588a64519804ba0553b12" [[package]] name = "cxxbridge-macro" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470" +checksum = "1362b0ddcfc4eb0a1f57b68bd77dd99f0e826958a96abd0ae9bd092e114ffed6" dependencies = [ "proc-macro2", "quote", @@ -871,12 +871,12 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.9.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0" dependencies = [ - "atty", "humantime", + "is-terminal", "log", "regex", "termcolor", @@ -931,9 +931,9 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b9663d381d07ae25dc88dbdf27df458faa83a9b25336bcac83d5e452b5fc9d3" +checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" dependencies = [ "cfg-if 1.0.0", "libc", @@ -1475,9 +1475,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.5.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f88c5561171189e69df9d98bcf18fd5f9558300f7ea7b801eb8a0fd748bd8745" +checksum = "ec947b7a4ce12e3b87e353abae7ce124d025b6c7d6c5aea5cc0bcf92e9510ded" [[package]] name = "is-terminal" @@ -1579,9 +1579,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.137" +version = "0.2.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" +checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" [[package]] name = "libloading" @@ -1974,9 +1974,9 @@ dependencies = [ [[package]] name = "num-format" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b862ff8df690cf089058c98b183676a7ed0f974cc08b426800093227cbff3b" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" dependencies = [ "arrayvec", "itoa 1.0.4", @@ -2060,9 +2060,9 @@ checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" [[package]] name = "openssl" -version = "0.10.43" +version = "0.10.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020433887e44c27ff16365eaa2d380547a94544ad509aff6eb5b6e3e0b27b376" +checksum = "29d971fd5722fec23977260f6e81aa67d2f22cadbdc2aa049f1022d9a3be1566" dependencies = [ "bitflags", "cfg-if 1.0.0", @@ -2101,9 +2101,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.78" +version = "0.9.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07d5c8cb6e57b3a3612064d7b18b117912b4ce70955c2504d4b741c9e244b132" +checksum = "5454462c0eced1e97f2ec09036abc8da362e66802f66fd20f86854d9d8cbcbc4" dependencies = [ "autocfg", "cc", @@ -2221,9 +2221,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "pest" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f400b0f7905bf702f9f3dc3df5a121b16c54e9e8012c082905fdf09a931861a" +checksum = "cc8bed3549e0f9b0a2a78bf7c0018237a2cdf085eecbbc048e52612438e4e9d0" dependencies = [ "thiserror", "ucd-trie", @@ -2231,9 +2231,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "423c2ba011d6e27b02b482a3707c773d19aec65cc024637aec44e19652e66f63" +checksum = "cdc078600d06ff90d4ed238f0119d84ab5d43dbaad278b0e33a8820293b32344" dependencies = [ "pest", "pest_generator", @@ -2241,9 +2241,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e64e6c2c85031c02fdbd9e5c72845445ca0a724d419aa0bc068ac620c9935c1" +checksum = "28a1af60b1c4148bb269006a750cff8e2ea36aff34d2d96cf7be0b14d1bed23c" dependencies = [ "pest", "pest_meta", @@ -2254,9 +2254,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57959b91f0a133f89a68be874a5c88ed689c19cd729ecdb5d762ebf16c64d662" +checksum = "fec8605d59fc2ae0c6c1aefc0c7c7a9769732017c0ce07f7a9cfffa7b4404f20" dependencies = [ "once_cell", "pest", @@ -2895,9 +2895,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.36.4" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb93e85278e08bb5788653183213d3a60fc242b10cb9be96586f5a73dcb67c23" +checksum = "a3807b5d10909833d3e9acd1eb5fb988f79376ff10fce42937de71a449c4c588" dependencies = [ "bitflags", "errno", @@ -3027,9 +3027,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53f64bb4ba0191d6d0676e1b141ca55047d83b74f5607e6d8eb88126c52c2dc" +checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" dependencies = [ "serde_derive", ] @@ -3045,9 +3045,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55492425aa53521babf6137309e7d34c20bbfbbfcfe2c7f3a047fd1f6b92c0c" +checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4" dependencies = [ "proc-macro2", "quote", @@ -3183,9 +3183,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlite" -version = "0.30.2" +version = "0.30.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd558ce5689e757e328478da05f6416e5ce566482662fcbe7ce5ee75bf9aa717" +checksum = "12e072cb5fb89b3fe5e9c9584676348feb503f9fb3ae829d9868171bc5372d48" dependencies = [ "libc", "sqlite3-sys", @@ -3578,9 +3578,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76ce4a75fb488c605c54bf610f221cea8b0dafb53333c1a67e8ee199dcd2ae3" +checksum = "eab6d665857cc6ca78d6e80303a02cea7a7851e85dfbd77cbdc09bd129f1ef46" dependencies = [ "autocfg", "bytes", @@ -3590,7 +3590,7 @@ dependencies = [ "num_cpus", "pin-project-lite", "socket2", - "winapi", + "windows-sys 0.42.0", ] [[package]] @@ -3669,9 +3669,9 @@ dependencies = [ [[package]] name = "typenum" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "ucd-trie" diff --git a/README.md b/README.md index 19cc1bc8..fde0543f 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,8 @@ which is helpful even if you are not using Rust. Even if a storage/platform is not officially supported, you can still try to compile your version to use, maybe with some tweaks in the code. +You can [tune the RockDB engine](TUNING_ROCKSDB.md) for more performance. + ## Architecture The Cozo database consists of three layers stuck on top of each other, diff --git a/TUNING_ROCKSDB.md b/TUNING_ROCKSDB.md new file mode 100644 index 00000000..b9f3ce4b --- /dev/null +++ b/TUNING_ROCKSDB.md @@ -0,0 +1,19 @@ +# Tuning RocksDB for Cozo + +RocksDB has a lot of options, and by tuning them you can achieve better performance +for your workload. This is probably unnecessary for 95% of users, but if you are the +remaining 5%, Cozo gives you the options to tune RocksDB directly if you are using the +RocksDB storage engine. + +When you create the CozoDB instance with the RocksDB backend option, you are asked to +provide a path to a directory to store the data (will be created if it does not exist). +If you put a file named `options` inside this directory, the engine will expect this +to be a [RocksDB options file](https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File) +and use it. If you are using `cozoserver`, you will get a log message if +this feature is activated. + +Note that improperly set options can make your database misbehave! +In general, you should run your database once, copy the options file from `data/OPTIONS-XXXXXX` +from within your database directory, and use that as a base for your customization. +If you are not an expert on RocksDB, we suggest you limit your changes to adjusting those numerical +options that you at least have a vague understanding. \ No newline at end of file diff --git a/cozo-core/Cargo.toml b/cozo-core/Cargo.toml index 45942cfa..12d8c461 100644 --- a/cozo-core/Cargo.toml +++ b/cozo-core/Cargo.toml @@ -28,8 +28,9 @@ minimal = ["storage-sqlite"] ## but does not support much concurrency. storage-sqlite = ["dep:sqlite", "dep:sqlite3-src"] ## Enables the [RocksDB](http://rocksdb.org/) backend. -## RocksDB is hard (even impossible) to compile on some platforms, uses quite a lot of resources -## including background threads, but is very performant and supports a high level of concurrency. +## RocksDB is hard to compile on some platforms, uses more resources than SQLite, +## but is very performant and supports an extremely high level of concurrency. +## You can also [fine-tune](https://github.com/cozodb/cozo/blob/main/TUNING_ROCKSDB.md) RocksDB options. storage-rocksdb = ["dep:cozorocks"] ## Enables the graph algorithms. graph-algo = ["dep:nalgebra"] @@ -50,8 +51,7 @@ nothread = [] ## Enables the [Sled](https://github.com/spacejam/sled) backend. ## Sled is slower than Sqlite for the usual workload of Cozo, can use quite a lot of disk space, -## may not be stable enough, but supports a higher level of concurrency and is much easier to compile -## than RockDB. +## and may not be stable enough. In general you should use RocksDB instead. storage-sled = ["dep:sled"] ## Enables the [TiKV](https://tikv.org/) client backend. ## The only reason that you may want to use this is that your data does not fit in a single machine. @@ -96,7 +96,7 @@ rand = "0.8.5" miette = { version = "5.5.0", features = ["fancy"] } lazy_static = "1.4.0" log = "0.4.17" -env_logger = "0.9.3" +env_logger = "0.10.0" smallvec = { version = "1.8.1", features = ["serde", "write", "union", "const_generics", "const_new"] } smartstring = { version = "1.0.1", features = ["serde"] } serde_json = "1.0.81" @@ -127,7 +127,7 @@ rayon = { version = "1.5.3", optional = true } nalgebra = { version = "0.31.1", optional = true } minreq = { version = "2.6.0", features = ["https-rustls"], optional = true } tikv-jemallocator-global = { version = "0.5.0", optional = true } -cozorocks = { path = "../cozorocks", version = "0.1.1", optional = true } +cozorocks = { path = "../cozorocks", version = "0.1.2", optional = true } sled = { version = "0.34.7", optional = true } tikv-client = { version = "0.1.0", optional = true } tokio = { version = "1.21.2", optional = true } diff --git a/cozo-core/src/storage/rocks.rs b/cozo-core/src/storage/rocks.rs index aebfca71..f7bdc42f 100644 --- a/cozo-core/src/storage/rocks.rs +++ b/cozo-core/src/storage/rocks.rs @@ -7,8 +7,9 @@ */ use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; +use log::info; use miette::{miette, IntoDiagnostic, Result, WrapErr}; use cozorocks::{DbBuilder, DbIter, RocksDb, Tx}; @@ -68,17 +69,34 @@ pub fn new_cozo_rocksdb(path: impl AsRef) -> Result> { } }; - let mut store_path = path_buf; + let mut store_path = path_buf.clone(); store_path.push("data"); + + let store_path = store_path + .to_str() + .ok_or_else(|| miette!("bad path name"))?; + + let mut options_path = path_buf.clone(); + options_path.push("options"); + + let options_path = if Path::exists(&options_path) { + info!( + "RockDB storage engine will use options file {}", + options_path.to_string_lossy() + ); + options_path + .to_str() + .ok_or_else(|| miette!("bad path name"))? + } else { + "" + }; + let db_builder = builder .create_if_missing(is_new) .use_capped_prefix_extractor(true, KEY_PREFIX_LEN) .use_bloom_filter(true, 9.9, true) - .path( - store_path - .to_str() - .ok_or_else(|| miette!("bad path name"))?, - ); + .path(store_path) + .options_path(options_path); let db = db_builder.build()?; diff --git a/cozorocks/Cargo.toml b/cozorocks/Cargo.toml index 8a29f22c..8124b335 100644 --- a/cozorocks/Cargo.toml +++ b/cozorocks/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cozorocks" -version = "0.1.1" +version = "0.1.2" edition = "2021" license = "MPL-2.0" authors = ["Ziyang Hu"] diff --git a/cozorocks/bridge/db.cpp b/cozorocks/bridge/db.cpp index fa3fc967..d14b3112 100644 --- a/cozorocks/bridge/db.cpp +++ b/cozorocks/bridge/db.cpp @@ -8,6 +8,7 @@ #include #include "db.h" #include "cozorocks/src/bridge/mod.rs.h" +#include "rocksdb/utilities/options_util.h" Options default_db_options() { Options options = Options(); @@ -55,6 +56,36 @@ ColumnFamilyOptions default_cf_options() { shared_ptr open_db(const DbOpts &opts, RocksDbStatus &status) { auto options = default_db_options(); + shared_ptr cache = nullptr; + + if (opts.block_cache_size > 0) { + cache = NewLRUCache(1 * 1024 * 1024 * 1024); + } + + if (!opts.options_path.empty()) { + DBOptions loaded_db_opt; + std::vector loaded_cf_descs; + ConfigOptions config_options; + string options_path = string(opts.options_path); + Status s = LoadOptionsFromFile(config_options, options_path, &loaded_db_opt, + &loaded_cf_descs); + if (!s.ok()) { + write_status(s, status); + return nullptr; + } + + if (cache != nullptr) { + for (size_t i = 0; i < loaded_cf_descs.size(); ++i) { + auto* loaded_bbt_opt = + loaded_cf_descs[0] + .options.table_factory->GetOptions(); + loaded_bbt_opt->block_cache = cache; + } + } + + options = Options(loaded_db_opt, loaded_cf_descs[0].options); + } + if (opts.prepare_for_bulk_load) { options.PrepareForBulkLoad(); } diff --git a/cozorocks/src/bridge/db.rs b/cozorocks/src/bridge/db.rs index 67742df3..f16015b2 100644 --- a/cozorocks/src/bridge/db.rs +++ b/cozorocks/src/bridge/db.rs @@ -20,6 +20,7 @@ impl<'a> Default for DbOpts<'a> { fn default() -> Self { Self { db_path: "", + options_path: "", prepare_for_bulk_load: false, increase_parallelism: 0, optimize_level_style_compaction: false, @@ -37,6 +38,7 @@ impl<'a> Default for DbOpts<'a> { use_fixed_prefix_extractor: false, fixed_prefix_extractor_len: 0, destroy_on_exit: false, + block_cache_size: 0, } } } @@ -46,6 +48,10 @@ impl<'a> DbBuilder<'a> { self.opts.db_path = path; self } + pub fn options_path(mut self, options_path: &'a str) -> Self { + self.opts.options_path = options_path; + self + } pub fn prepare_for_bulk_load(mut self, val: bool) -> Self { self.opts.prepare_for_bulk_load = val; self diff --git a/cozorocks/src/bridge/mod.rs b/cozorocks/src/bridge/mod.rs index 8ca809d1..37a163b5 100644 --- a/cozorocks/src/bridge/mod.rs +++ b/cozorocks/src/bridge/mod.rs @@ -22,6 +22,7 @@ pub(crate) mod ffi { #[derive(Debug, Clone)] struct DbOpts<'a> { pub db_path: &'a str, + pub options_path: &'a str, pub prepare_for_bulk_load: bool, pub increase_parallelism: usize, pub optimize_level_style_compaction: bool, @@ -39,6 +40,7 @@ pub(crate) mod ffi { pub use_fixed_prefix_extractor: bool, pub fixed_prefix_extractor_len: usize, pub destroy_on_exit: bool, + pub block_cache_size: usize, } #[derive(Clone, Debug, Eq, PartialEq)] diff --git a/cozoserver/Cargo.toml b/cozoserver/Cargo.toml index 99fbb09e..8915f8ed 100644 --- a/cozoserver/Cargo.toml +++ b/cozoserver/Cargo.toml @@ -49,7 +49,7 @@ storage-tikv = ["cozo/storage-tikv"] cozo = { version = "0.2.2", path = "../cozo-core", default-features = false } clap = { version = "4.0.26", features = ["derive"] } rouille = "3.5.0" -env_logger = "0.9.3" +env_logger = "0.10.0" log = "0.4.17" rand = "0.8.5" serde_derive = "1.0.137"