Options to tune RocksDB engine

main
Ziyang Hu 2 years ago
parent 59aab748ee
commit 6c25a8278e

90
Cargo.lock generated

@ -637,7 +637,7 @@ dependencies = [
[[package]]
name = "cozorocks"
version = "0.1.1"
version = "0.1.2"
dependencies = [
"cc",
"cxx",
@ -761,9 +761,9 @@ dependencies = [
[[package]]
name = "cxx"
version = "1.0.82"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4a41a86530d0fe7f5d9ea779916b7cadd2d4f9add748b99c2c029cbbdfaf453"
checksum = "bdf07d07d6531bfcdbe9b8b739b104610c6508dcc4d63b410585faf338241daf"
dependencies = [
"cc",
"cxxbridge-flags",
@ -773,9 +773,9 @@ dependencies = [
[[package]]
name = "cxx-build"
version = "1.0.82"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06416d667ff3e3ad2df1cd8cd8afae5da26cf9cec4d0825040f88b5ca659a2f0"
checksum = "d2eb5b96ecdc99f72657332953d4d9c50135af1bac34277801cc3937906ebd39"
dependencies = [
"cc",
"codespan-reporting",
@ -788,15 +788,15 @@ dependencies = [
[[package]]
name = "cxxbridge-flags"
version = "1.0.82"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "820a9a2af1669deeef27cb271f476ffd196a2c4b6731336011e0ba63e2c7cf71"
checksum = "ac040a39517fd1674e0f32177648334b0f4074625b5588a64519804ba0553b12"
[[package]]
name = "cxxbridge-macro"
version = "1.0.82"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470"
checksum = "1362b0ddcfc4eb0a1f57b68bd77dd99f0e826958a96abd0ae9bd092e114ffed6"
dependencies = [
"proc-macro2",
"quote",
@ -871,12 +871,12 @@ dependencies = [
[[package]]
name = "env_logger"
version = "0.9.3"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0"
dependencies = [
"atty",
"humantime",
"is-terminal",
"log",
"regex",
"termcolor",
@ -931,9 +931,9 @@ dependencies = [
[[package]]
name = "filetime"
version = "0.2.18"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b9663d381d07ae25dc88dbdf27df458faa83a9b25336bcac83d5e452b5fc9d3"
checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9"
dependencies = [
"cfg-if 1.0.0",
"libc",
@ -1475,9 +1475,9 @@ dependencies = [
[[package]]
name = "ipnet"
version = "2.5.1"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f88c5561171189e69df9d98bcf18fd5f9558300f7ea7b801eb8a0fd748bd8745"
checksum = "ec947b7a4ce12e3b87e353abae7ce124d025b6c7d6c5aea5cc0bcf92e9510ded"
[[package]]
name = "is-terminal"
@ -1579,9 +1579,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.137"
version = "0.2.138"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89"
checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8"
[[package]]
name = "libloading"
@ -1974,9 +1974,9 @@ dependencies = [
[[package]]
name = "num-format"
version = "0.4.3"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54b862ff8df690cf089058c98b183676a7ed0f974cc08b426800093227cbff3b"
checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
dependencies = [
"arrayvec",
"itoa 1.0.4",
@ -2060,9 +2060,9 @@ checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
[[package]]
name = "openssl"
version = "0.10.43"
version = "0.10.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "020433887e44c27ff16365eaa2d380547a94544ad509aff6eb5b6e3e0b27b376"
checksum = "29d971fd5722fec23977260f6e81aa67d2f22cadbdc2aa049f1022d9a3be1566"
dependencies = [
"bitflags",
"cfg-if 1.0.0",
@ -2101,9 +2101,9 @@ dependencies = [
[[package]]
name = "openssl-sys"
version = "0.9.78"
version = "0.9.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07d5c8cb6e57b3a3612064d7b18b117912b4ce70955c2504d4b741c9e244b132"
checksum = "5454462c0eced1e97f2ec09036abc8da362e66802f66fd20f86854d9d8cbcbc4"
dependencies = [
"autocfg",
"cc",
@ -2221,9 +2221,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
[[package]]
name = "pest"
version = "2.5.0"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f400b0f7905bf702f9f3dc3df5a121b16c54e9e8012c082905fdf09a931861a"
checksum = "cc8bed3549e0f9b0a2a78bf7c0018237a2cdf085eecbbc048e52612438e4e9d0"
dependencies = [
"thiserror",
"ucd-trie",
@ -2231,9 +2231,9 @@ dependencies = [
[[package]]
name = "pest_derive"
version = "2.5.0"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "423c2ba011d6e27b02b482a3707c773d19aec65cc024637aec44e19652e66f63"
checksum = "cdc078600d06ff90d4ed238f0119d84ab5d43dbaad278b0e33a8820293b32344"
dependencies = [
"pest",
"pest_generator",
@ -2241,9 +2241,9 @@ dependencies = [
[[package]]
name = "pest_generator"
version = "2.5.0"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e64e6c2c85031c02fdbd9e5c72845445ca0a724d419aa0bc068ac620c9935c1"
checksum = "28a1af60b1c4148bb269006a750cff8e2ea36aff34d2d96cf7be0b14d1bed23c"
dependencies = [
"pest",
"pest_meta",
@ -2254,9 +2254,9 @@ dependencies = [
[[package]]
name = "pest_meta"
version = "2.5.0"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57959b91f0a133f89a68be874a5c88ed689c19cd729ecdb5d762ebf16c64d662"
checksum = "fec8605d59fc2ae0c6c1aefc0c7c7a9769732017c0ce07f7a9cfffa7b4404f20"
dependencies = [
"once_cell",
"pest",
@ -2895,9 +2895,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.36.4"
version = "0.36.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb93e85278e08bb5788653183213d3a60fc242b10cb9be96586f5a73dcb67c23"
checksum = "a3807b5d10909833d3e9acd1eb5fb988f79376ff10fce42937de71a449c4c588"
dependencies = [
"bitflags",
"errno",
@ -3027,9 +3027,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.148"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e53f64bb4ba0191d6d0676e1b141ca55047d83b74f5607e6d8eb88126c52c2dc"
checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055"
dependencies = [
"serde_derive",
]
@ -3045,9 +3045,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.148"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a55492425aa53521babf6137309e7d34c20bbfbbfcfe2c7f3a047fd1f6b92c0c"
checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4"
dependencies = [
"proc-macro2",
"quote",
@ -3183,9 +3183,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "sqlite"
version = "0.30.2"
version = "0.30.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd558ce5689e757e328478da05f6416e5ce566482662fcbe7ce5ee75bf9aa717"
checksum = "12e072cb5fb89b3fe5e9c9584676348feb503f9fb3ae829d9868171bc5372d48"
dependencies = [
"libc",
"sqlite3-sys",
@ -3578,9 +3578,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokio"
version = "1.22.0"
version = "1.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d76ce4a75fb488c605c54bf610f221cea8b0dafb53333c1a67e8ee199dcd2ae3"
checksum = "eab6d665857cc6ca78d6e80303a02cea7a7851e85dfbd77cbdc09bd129f1ef46"
dependencies = [
"autocfg",
"bytes",
@ -3590,7 +3590,7 @@ dependencies = [
"num_cpus",
"pin-project-lite",
"socket2",
"winapi",
"windows-sys 0.42.0",
]
[[package]]
@ -3669,9 +3669,9 @@ dependencies = [
[[package]]
name = "typenum"
version = "1.15.0"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
[[package]]
name = "ucd-trie"

@ -214,6 +214,8 @@ which is helpful even if you are not using Rust.
Even if a storage/platform is not officially supported,
you can still try to compile your version to use, maybe with some tweaks in the code.
You can [tune the RockDB engine](TUNING_ROCKSDB.md) for more performance.
## Architecture
The Cozo database consists of three layers stuck on top of each other,

@ -0,0 +1,19 @@
# Tuning RocksDB for Cozo
RocksDB has a lot of options, and by tuning them you can achieve better performance
for your workload. This is probably unnecessary for 95% of users, but if you are the
remaining 5%, Cozo gives you the options to tune RocksDB directly if you are using the
RocksDB storage engine.
When you create the CozoDB instance with the RocksDB backend option, you are asked to
provide a path to a directory to store the data (will be created if it does not exist).
If you put a file named `options` inside this directory, the engine will expect this
to be a [RocksDB options file](https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File)
and use it. If you are using `cozoserver`, you will get a log message if
this feature is activated.
Note that improperly set options can make your database misbehave!
In general, you should run your database once, copy the options file from `data/OPTIONS-XXXXXX`
from within your database directory, and use that as a base for your customization.
If you are not an expert on RocksDB, we suggest you limit your changes to adjusting those numerical
options that you at least have a vague understanding.

@ -28,8 +28,9 @@ minimal = ["storage-sqlite"]
## but does not support much concurrency.
storage-sqlite = ["dep:sqlite", "dep:sqlite3-src"]
## Enables the [RocksDB](http://rocksdb.org/) backend.
## RocksDB is hard (even impossible) to compile on some platforms, uses quite a lot of resources
## including background threads, but is very performant and supports a high level of concurrency.
## RocksDB is hard to compile on some platforms, uses more resources than SQLite,
## but is very performant and supports an extremely high level of concurrency.
## You can also [fine-tune](https://github.com/cozodb/cozo/blob/main/TUNING_ROCKSDB.md) RocksDB options.
storage-rocksdb = ["dep:cozorocks"]
## Enables the graph algorithms.
graph-algo = ["dep:nalgebra"]
@ -50,8 +51,7 @@ nothread = []
## Enables the [Sled](https://github.com/spacejam/sled) backend.
## Sled is slower than Sqlite for the usual workload of Cozo, can use quite a lot of disk space,
## may not be stable enough, but supports a higher level of concurrency and is much easier to compile
## than RockDB.
## and may not be stable enough. In general you should use RocksDB instead.
storage-sled = ["dep:sled"]
## Enables the [TiKV](https://tikv.org/) client backend.
## The only reason that you may want to use this is that your data does not fit in a single machine.
@ -96,7 +96,7 @@ rand = "0.8.5"
miette = { version = "5.5.0", features = ["fancy"] }
lazy_static = "1.4.0"
log = "0.4.17"
env_logger = "0.9.3"
env_logger = "0.10.0"
smallvec = { version = "1.8.1", features = ["serde", "write", "union", "const_generics", "const_new"] }
smartstring = { version = "1.0.1", features = ["serde"] }
serde_json = "1.0.81"
@ -127,7 +127,7 @@ rayon = { version = "1.5.3", optional = true }
nalgebra = { version = "0.31.1", optional = true }
minreq = { version = "2.6.0", features = ["https-rustls"], optional = true }
tikv-jemallocator-global = { version = "0.5.0", optional = true }
cozorocks = { path = "../cozorocks", version = "0.1.1", optional = true }
cozorocks = { path = "../cozorocks", version = "0.1.2", optional = true }
sled = { version = "0.34.7", optional = true }
tikv-client = { version = "0.1.0", optional = true }
tokio = { version = "1.21.2", optional = true }

@ -7,8 +7,9 @@
*/
use std::fs;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use log::info;
use miette::{miette, IntoDiagnostic, Result, WrapErr};
use cozorocks::{DbBuilder, DbIter, RocksDb, Tx};
@ -68,17 +69,34 @@ pub fn new_cozo_rocksdb(path: impl AsRef<str>) -> Result<Db<RocksDbStorage>> {
}
};
let mut store_path = path_buf;
let mut store_path = path_buf.clone();
store_path.push("data");
let store_path = store_path
.to_str()
.ok_or_else(|| miette!("bad path name"))?;
let mut options_path = path_buf.clone();
options_path.push("options");
let options_path = if Path::exists(&options_path) {
info!(
"RockDB storage engine will use options file {}",
options_path.to_string_lossy()
);
options_path
.to_str()
.ok_or_else(|| miette!("bad path name"))?
} else {
""
};
let db_builder = builder
.create_if_missing(is_new)
.use_capped_prefix_extractor(true, KEY_PREFIX_LEN)
.use_bloom_filter(true, 9.9, true)
.path(
store_path
.to_str()
.ok_or_else(|| miette!("bad path name"))?,
);
.path(store_path)
.options_path(options_path);
let db = db_builder.build()?;

@ -1,6 +1,6 @@
[package]
name = "cozorocks"
version = "0.1.1"
version = "0.1.2"
edition = "2021"
license = "MPL-2.0"
authors = ["Ziyang Hu"]

@ -8,6 +8,7 @@
#include <memory>
#include "db.h"
#include "cozorocks/src/bridge/mod.rs.h"
#include "rocksdb/utilities/options_util.h"
Options default_db_options() {
Options options = Options();
@ -55,6 +56,36 @@ ColumnFamilyOptions default_cf_options() {
shared_ptr <RocksDbBridge> open_db(const DbOpts &opts, RocksDbStatus &status) {
auto options = default_db_options();
shared_ptr<Cache> cache = nullptr;
if (opts.block_cache_size > 0) {
cache = NewLRUCache(1 * 1024 * 1024 * 1024);
}
if (!opts.options_path.empty()) {
DBOptions loaded_db_opt;
std::vector<ColumnFamilyDescriptor> loaded_cf_descs;
ConfigOptions config_options;
string options_path = string(opts.options_path);
Status s = LoadOptionsFromFile(config_options, options_path, &loaded_db_opt,
&loaded_cf_descs);
if (!s.ok()) {
write_status(s, status);
return nullptr;
}
if (cache != nullptr) {
for (size_t i = 0; i < loaded_cf_descs.size(); ++i) {
auto* loaded_bbt_opt =
loaded_cf_descs[0]
.options.table_factory->GetOptions<BlockBasedTableOptions>();
loaded_bbt_opt->block_cache = cache;
}
}
options = Options(loaded_db_opt, loaded_cf_descs[0].options);
}
if (opts.prepare_for_bulk_load) {
options.PrepareForBulkLoad();
}

@ -20,6 +20,7 @@ impl<'a> Default for DbOpts<'a> {
fn default() -> Self {
Self {
db_path: "",
options_path: "",
prepare_for_bulk_load: false,
increase_parallelism: 0,
optimize_level_style_compaction: false,
@ -37,6 +38,7 @@ impl<'a> Default for DbOpts<'a> {
use_fixed_prefix_extractor: false,
fixed_prefix_extractor_len: 0,
destroy_on_exit: false,
block_cache_size: 0,
}
}
}
@ -46,6 +48,10 @@ impl<'a> DbBuilder<'a> {
self.opts.db_path = path;
self
}
pub fn options_path(mut self, options_path: &'a str) -> Self {
self.opts.options_path = options_path;
self
}
pub fn prepare_for_bulk_load(mut self, val: bool) -> Self {
self.opts.prepare_for_bulk_load = val;
self

@ -22,6 +22,7 @@ pub(crate) mod ffi {
#[derive(Debug, Clone)]
struct DbOpts<'a> {
pub db_path: &'a str,
pub options_path: &'a str,
pub prepare_for_bulk_load: bool,
pub increase_parallelism: usize,
pub optimize_level_style_compaction: bool,
@ -39,6 +40,7 @@ pub(crate) mod ffi {
pub use_fixed_prefix_extractor: bool,
pub fixed_prefix_extractor_len: usize,
pub destroy_on_exit: bool,
pub block_cache_size: usize,
}
#[derive(Clone, Debug, Eq, PartialEq)]

@ -49,7 +49,7 @@ storage-tikv = ["cozo/storage-tikv"]
cozo = { version = "0.2.2", path = "../cozo-core", default-features = false }
clap = { version = "4.0.26", features = ["derive"] }
rouille = "3.5.0"
env_logger = "0.9.3"
env_logger = "0.10.0"
log = "0.4.17"
rand = "0.8.5"
serde_derive = "1.0.137"

Loading…
Cancel
Save