From 93f07aa388a6c873654d1c506eb58ff9cd292be6 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Thu, 27 Oct 2022 21:14:11 +0800 Subject: [PATCH 1/2] remove comments --- src/data/tuple.rs | 138 +----------------------------------------- src/query/sort.rs | 2 +- src/query/stored.rs | 13 ---- src/runtime/db.rs | 17 ++---- src/runtime/in_mem.rs | 22 ------- 5 files changed, 8 insertions(+), 184 deletions(-) diff --git a/src/data/tuple.rs b/src/data/tuple.rs index fc9388ec..7d3dcad3 100644 --- a/src/data/tuple.rs +++ b/src/data/tuple.rs @@ -32,18 +32,7 @@ impl Tuple { for val in self.0.iter() { ret.encode_datavalue(val); } - // println!("encoded as key {:?}", ret); ret - // for (idx, val) in self.0.iter().enumerate() { - // if idx > 0 { - // let pos = (ret.len() as u32).to_be_bytes(); - // for (i, u) in pos.iter().enumerate() { - // ret[4 * (1 + idx) + i] = *u; - // } - // } - // val.serialize(&mut Serializer::new(&mut ret)).unwrap(); - // } - // ret } pub(crate) fn decode_from_key(key: &[u8]) -> Self { let mut remaining = &key[ENCODED_KEY_MIN_LEN..]; @@ -56,129 +45,4 @@ impl Tuple { Tuple(ret) } } -pub(crate) const ENCODED_KEY_MIN_LEN: usize = 8; -// -// #[derive(Copy, Clone, Debug)] -// pub(crate) struct EncodedTuple<'a>(pub(crate) &'a [u8]); -// -// impl<'a> From<&'a [u8]> for EncodedTuple<'a> { -// fn from(s: &'a [u8]) -> Self { -// EncodedTuple(s) -// } -// } -// -// impl<'a> EncodedTuple<'a> { -// pub(crate) fn prefix(&self) -> RelationId { -// debug_assert!(self.0.len() >= 6, "bad data: {:x?}", self.0); -// let id = u64::from_be_bytes([ -// 0, 0, self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], -// ]); -// RelationId(id) -// } -// pub(crate) fn arity(&self) -> usize { -// if self.0.len() == 6 { -// return 0; -// } -// debug_assert!(self.0.len() >= 8, "bad data: {:x?}", self.0); -// u16::from_be_bytes([self.0[6], self.0[7]]) as usize -// } -// fn force_get(&self, idx: usize) -> DataValue { -// let pos = if idx == 0 { -// let arity = u16::from_be_bytes([self.0[6], self.0[7]]) as usize; -// 4 * (arity + 1) -// } else { -// let len_pos = (idx + 1) * 4; -// u32::from_be_bytes([ -// self.0[len_pos], -// self.0[len_pos + 1], -// self.0[len_pos + 2], -// self.0[len_pos + 3], -// ]) as usize -// }; -// rmp_serde::from_slice(&self.0[pos..]).unwrap() -// } -// pub(crate) fn get(&self, idx: usize) -> DataValue { -// let pos = if idx == 0 { -// 4 * (self.arity() + 1) -// } else { -// let len_pos = (idx + 1) * 4; -// debug_assert!(self.0.len() >= len_pos + 4, "bad data: {:x?}", self.0); -// u32::from_be_bytes([ -// self.0[len_pos], -// self.0[len_pos + 1], -// self.0[len_pos + 2], -// self.0[len_pos + 3], -// ]) as usize -// }; -// debug_assert!( -// pos < self.0.len(), -// "bad data length for data: {:x?}", -// self.0 -// ); -// rmp_serde::from_slice(&self.0[pos..]).expect("data corruption when getting from tuple") -// } -// -// pub(crate) fn iter(&self) -> EncodedTupleIter<'a> { -// EncodedTupleIter { -// tuple: *self, -// size: 0, -// pos: 0, -// } -// } -// pub(crate) fn decode(&self) -> Tuple { -// Tuple(self.iter().collect()) -// } -// } -// -// pub(crate) struct EncodedTupleIter<'a> { -// tuple: EncodedTuple<'a>, -// size: usize, -// pos: usize, -// } -// -// impl<'a> Iterator for EncodedTupleIter<'a> { -// type Item = DataValue; -// -// fn next(&mut self) -> Option { -// if self.size == 0 { -// let arity = self.tuple.arity(); -// self.size = arity; -// } -// if self.pos == self.size { -// None -// } else { -// let pos = self.pos; -// self.pos += 1; -// Some(self.tuple.get(pos)) -// } -// } -// } -// -// pub(crate) fn rusty_scratch_cmp(a: &[u8], b: &[u8]) -> i8 { -// match compare_tuple_keys(a, b) { -// Ordering::Greater => 1, -// Ordering::Equal => 0, -// Ordering::Less => -1, -// } -// } -// -// -// pub(crate) fn compare_tuple_keys(a: &[u8], b: &[u8]) -> Ordering { -// let a = EncodedTuple(a); -// let b = EncodedTuple(b); -// match a.prefix().cmp(&b.prefix()) { -// Ordering::Equal => {} -// o => return o, -// } -// let a_len = a.arity(); -// let b_len = b.arity(); -// for idx in 0..min(a_len, b_len) { -// let av = a.force_get(idx); -// let bv = b.force_get(idx); -// match av.cmp(&bv) { -// Ordering::Equal => {} -// o => return o, -// } -// } -// a_len.cmp(&b_len) -// } +pub(crate) const ENCODED_KEY_MIN_LEN: usize = 8; \ No newline at end of file diff --git a/src/query/sort.rs b/src/query/sort.rs index f7910fc4..3b4d7c66 100644 --- a/src/query/sort.rs +++ b/src/query/sort.rs @@ -2,7 +2,7 @@ * Copyright 2022, The Cozo Project Authors. Licensed under AGPL-3 or later. */ -use std::cmp::{Ordering}; +use std::cmp::Ordering; use std::collections::BTreeMap; use itertools::Itertools; diff --git a/src/query/stored.rs b/src/query/stored.rs index 01205244..cf49643f 100644 --- a/src/query/stored.rs +++ b/src/query/stored.rs @@ -124,10 +124,6 @@ impl SessionTx { tup.0.push(val); remaining = nxt; } - // let v_tup = EncodedTuple(&existing); - // if v_tup.arity() > 0 { - // tup.0.extend(v_tup.decode().0); - // } } old_tuples.push(DataValue::List(tup.0)); } @@ -305,9 +301,6 @@ impl SessionTx { let key = relation_store.adhoc_encode_key(&extracted, *span)?; let val = relation_store.adhoc_encode_val(&extracted, *span)?; - // println!("adhoc encoded key {:?}, {:?}", key, extracted); - // println!("adhoc encoded val {:?}", val); - if has_triggers { if let Some(existing) = self.tx.get(&key, false)? { let mut tup = extracted.clone(); @@ -317,12 +310,6 @@ impl SessionTx { tup.0.push(val); remaining = nxt; } - // if !existing.is_empty() { - // let v_tup = EncodedTuple(&existing); - // if v_tup.arity() > 0 { - // tup.0.extend(v_tup.decode().0); - // } - // } old_tuples.push(DataValue::List(tup.0)); } diff --git a/src/runtime/db.rs b/src/runtime/db.rs index d4eb04d9..7a811f07 100644 --- a/src/runtime/db.rs +++ b/src/runtime/db.rs @@ -2,17 +2,17 @@ * Copyright 2022, The Cozo Project Authors. Licensed under AGPL-3 or later. */ -use std::{fs, thread}; use std::collections::BTreeMap; use std::fmt::{Debug, Formatter}; use std::path::PathBuf; -use std::sync::{Arc, Mutex}; use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::{fs, thread}; use either::{Left, Right}; use itertools::Itertools; -use miette::{bail, Diagnostic, ensure, Result, WrapErr}; +use miette::{bail, ensure, Diagnostic, Result, WrapErr}; use serde_json::json; use smartstring::SmartString; use thiserror::Error; @@ -22,14 +22,13 @@ use cozorocks::{DbBuilder, RocksDb}; use crate::data::json::JsonValue; use crate::data::program::{InputProgram, QueryAssertion, RelationOp}; use crate::data::symb::Symbol; -use crate::data::tuple::{SCRATCH_DB_KEY_PREFIX_LEN, Tuple}; +use crate::data::tuple::{Tuple, SCRATCH_DB_KEY_PREFIX_LEN}; use crate::data::value::{DataValue, LARGEST_UTF_CHAR}; -use crate::parse::{CozoScript, parse_script, SourceSpan}; use crate::parse::sys::SysOp; +use crate::parse::{parse_script, CozoScript, SourceSpan}; use crate::query::compile::{CompiledProgram, CompiledRule, CompiledRuleSet}; use crate::query::relation::{ - FilteredRA, InMemRelationRA, InnerJoin, NegJoin, RelAlgebra, ReorderRA, StoredRA, - UnificationRA, + FilteredRA, InMemRelationRA, InnerJoin, NegJoin, RelAlgebra, ReorderRA, StoredRA, UnificationRA, }; use crate::runtime::relation::{RelationHandle, RelationId}; use crate::runtime::transact::SessionTx; @@ -124,7 +123,6 @@ impl Db { let db_builder = builder .create_if_missing(is_new) .use_capped_prefix_extractor(true, SCRATCH_DB_KEY_PREFIX_LEN) - // .use_custom_comparator("cozo_rusty_cmp", rusty_scratch_cmp, false) .use_bloom_filter(true, 9.9, true) .path(store_path.to_str().unwrap()); @@ -729,9 +727,6 @@ impl Db { if upper.as_slice() <= k_slice { break; } - // if compare_tuple_keys(&upper, k_slice) != Greater { - // break; - // } let meta = RelationHandle::decode(v_slice)?; let n_keys = meta.metadata.keys.len(); let n_dependents = meta.metadata.non_keys.len(); diff --git a/src/runtime/in_mem.rs b/src/runtime/in_mem.rs index 931f92a1..d466d243 100644 --- a/src/runtime/in_mem.rs +++ b/src/runtime/in_mem.rs @@ -246,7 +246,6 @@ impl InMemRelation { let val = &tuple.0[invert_indices[idx]]; if let Some((aggr_op, _aggr_args)) = aggr { let op = aggr_op.normal_op.as_mut().unwrap(); - // (aggr_op.meet_combine)(&mut aggr_res[idx], val, aggr_args)?; op.set(val)?; } } @@ -365,24 +364,3 @@ impl InMemRelation { res.into_iter() } } -// -// struct SortedIter { -// it: DbIter, -// started: bool, -// } -// -// impl Iterator for SortedIter { -// type Item = Result; -// fn next(&mut self) -> Option { -// if !self.started { -// self.started = true; -// } else { -// self.it.next(); -// } -// match self.it.pair() { -// Err(e) => Some(Err(e.into())), -// Ok(None) => None, -// Ok(Some((_, v_slice))) => Some(Ok(EncodedTuple(v_slice).decode())), -// } -// } -// } From 51fafa52cbdc5bff9d3899a4f5595794dffee311 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Thu, 27 Oct 2022 21:33:41 +0800 Subject: [PATCH 2/2] remove unnecessary code --- cozorocks/bridge/db.cpp | 12 +------- cozorocks/bridge/db.h | 35 +--------------------- cozorocks/build.rs | 58 ++++++++++++++++++------------------- cozorocks/src/bridge/db.rs | 19 ------------ cozorocks/src/bridge/mod.rs | 4 --- src/data/memcmp.rs | 2 +- src/parse/query.rs | 4 +-- src/runtime/db.rs | 2 +- 8 files changed, 35 insertions(+), 101 deletions(-) diff --git a/cozorocks/bridge/db.cpp b/cozorocks/bridge/db.cpp index 5b811367..fb328d1d 100644 --- a/cozorocks/bridge/db.cpp +++ b/cozorocks/bridge/db.cpp @@ -46,8 +46,7 @@ ColumnFamilyOptions default_cf_options() { return options; } -shared_ptr open_db(const DbOpts &opts, RocksDbStatus &status, bool use_cmp, - RustComparatorFn cmp_impl) { +shared_ptr open_db(const DbOpts &opts, RocksDbStatus &status) { auto options = default_db_options(); if (opts.prepare_for_bulk_load) { @@ -82,20 +81,11 @@ shared_ptr open_db(const DbOpts &opts, RocksDbStatus &status, boo if (opts.use_fixed_prefix_extractor) { options.prefix_extractor.reset(NewFixedPrefixTransform(opts.fixed_prefix_extractor_len)); } - RustComparator *pri_cmp = nullptr; - if (use_cmp) { - pri_cmp = new RustComparator( - string(opts.comparator_name), - opts.comparator_different_bytes_can_be_equal, - cmp_impl); - options.comparator = pri_cmp; - } options.create_missing_column_families = true; shared_ptr db = make_shared(); db->db_path = string(opts.db_path); - db->pri_comparator.reset(pri_cmp); TransactionDB *txn_db = nullptr; write_status( diff --git a/cozorocks/bridge/db.h b/cozorocks/bridge/db.h index b666012f..be20b996 100644 --- a/cozorocks/bridge/db.h +++ b/cozorocks/bridge/db.h @@ -41,7 +41,6 @@ struct SstFileWriterBridge { }; struct RocksDbBridge { - unique_ptr pri_comparator; unique_ptr db; bool destroy_on_exit; @@ -108,39 +107,7 @@ struct RocksDbBridge { ~RocksDbBridge(); }; -//typedef int8_t (*CmpFn)(RustBytes a, RustBytes b); -typedef rust::Fn, rust::Slice)> RustComparatorFn; - -class RustComparator : public Comparator { -public: - inline RustComparator(string name_, bool can_different_bytes_be_equal_, RustComparatorFn f) : - name(std::move(name_)), - ext_cmp(f), - can_different_bytes_be_equal(can_different_bytes_be_equal_) { - } - - [[nodiscard]] inline int Compare(const Slice &a, const Slice &b) const override { - return ext_cmp(convert_slice_back(a), convert_slice_back(b)); - } - - [[nodiscard]] inline const char *Name() const override { - return name.c_str(); - } - - [[nodiscard]] inline bool CanKeysWithDifferentByteContentsBeEqual() const override { - return can_different_bytes_be_equal; - } - - inline void FindShortestSeparator(string *, const Slice &) const override {} - - inline void FindShortSuccessor(string *) const override {} - - string name; - RustComparatorFn ext_cmp; - bool can_different_bytes_be_equal; -}; - shared_ptr -open_db(const DbOpts &opts, RocksDbStatus &status, bool use_cmp, RustComparatorFn cmp_impl); +open_db(const DbOpts &opts, RocksDbStatus &status); #endif //COZOROCKS_DB_H diff --git a/cozorocks/build.rs b/cozorocks/build.rs index c44a8dd8..e475bc56 100644 --- a/cozorocks/build.rs +++ b/cozorocks/build.rs @@ -8,35 +8,6 @@ use std::path::Path; use std::{env, fs, process::Command}; fn main() { - if !Path::new("rocksdb/AUTHORS").exists() { - update_submodules(); - } - - if !try_to_find_and_link_lib("ROCKSDB") { - println!("cargo:rerun-if-changed=rocksdb/"); - fail_on_empty_directory("rocksdb"); - build_rocksdb(); - } else { - let target = env::var("TARGET").unwrap(); - // according to https://github.com/alexcrichton/cc-rs/blob/master/src/lib.rs#L2189 - if target.contains("apple") || target.contains("freebsd") || target.contains("openbsd") { - println!("cargo:rustc-link-lib=dylib=c++"); - } else if target.contains("linux") { - println!("cargo:rustc-link-lib=dylib=stdc++"); - } - } - - // Allow dependent crates to locate the sources and output directory of - // this crate. Notably, this allows a dependent crate to locate the RocksDB - // sources and built archive artifacts provided by this crate. - println!( - "cargo:cargo_manifest_dir={}", - env::var("CARGO_MANIFEST_DIR").unwrap() - ); - println!("cargo:out_dir={}", env::var("OUT_DIR").unwrap()); - - - let target = env::var("TARGET").unwrap(); let mut builder = cxx_build::bridge("src/bridge/mod.rs"); @@ -107,6 +78,35 @@ fn main() { println!("cargo:rerun-if-changed=bridge/iter.h"); println!("cargo:rerun-if-changed=bridge/tx.h"); println!("cargo:rerun-if-changed=bridge/tx.cpp"); + + + + if !Path::new("rocksdb/AUTHORS").exists() { + update_submodules(); + } + + if !try_to_find_and_link_lib("ROCKSDB") { + println!("cargo:rerun-if-changed=rocksdb/"); + fail_on_empty_directory("rocksdb"); + build_rocksdb(); + } else { + let target = env::var("TARGET").unwrap(); + // according to https://github.com/alexcrichton/cc-rs/blob/master/src/lib.rs#L2189 + if target.contains("apple") || target.contains("freebsd") || target.contains("openbsd") { + println!("cargo:rustc-link-lib=dylib=c++"); + } else if target.contains("linux") { + println!("cargo:rustc-link-lib=dylib=stdc++"); + } + } + + // Allow dependent crates to locate the sources and output directory of + // this crate. Notably, this allows a dependent crate to locate the RocksDB + // sources and built archive artifacts provided by this crate. + println!( + "cargo:cargo_manifest_dir={}", + env::var("CARGO_MANIFEST_DIR").unwrap() + ); + println!("cargo:out_dir={}", env::var("OUT_DIR").unwrap()); } fn link(name: &str, bundled: bool) { diff --git a/cozorocks/src/bridge/db.rs b/cozorocks/src/bridge/db.rs index 0ef2f9c4..c38b20bc 100644 --- a/cozorocks/src/bridge/db.rs +++ b/cozorocks/src/bridge/db.rs @@ -35,8 +35,6 @@ impl<'a> Default for DbOpts<'a> { capped_prefix_extractor_len: 0, use_fixed_prefix_extractor: false, fixed_prefix_extractor_len: 0, - comparator_name: "", - comparator_different_bytes_can_be_equal: false, destroy_on_exit: false, } } @@ -101,29 +99,12 @@ impl<'a> DbBuilder<'a> { self.opts.fixed_prefix_extractor_len = len; self } - pub fn use_custom_comparator( - mut self, - name: &'a str, - cmp: fn(&[u8], &[u8]) -> i8, - different_bytes_can_be_equal: bool, - ) -> Self { - self.cmp_fn = Some(cmp); - self.opts.comparator_name = name; - self.opts.comparator_different_bytes_can_be_equal = different_bytes_can_be_equal; - self - } pub fn build(self) -> Result { let mut status = RocksDbStatus::default(); - fn dummy(_a: &[u8], _b: &[u8]) -> i8 { - 0 - } - let result = open_db( &self.opts, &mut status, - self.cmp_fn.is_some(), - self.cmp_fn.unwrap_or(dummy), ); if status.is_ok() { Ok(RocksDb { inner: result }) diff --git a/cozorocks/src/bridge/mod.rs b/cozorocks/src/bridge/mod.rs index 0fae8bb1..95228ac8 100644 --- a/cozorocks/src/bridge/mod.rs +++ b/cozorocks/src/bridge/mod.rs @@ -34,8 +34,6 @@ pub(crate) mod ffi { pub capped_prefix_extractor_len: usize, pub use_fixed_prefix_extractor: bool, pub fixed_prefix_extractor_len: usize, - pub comparator_name: &'a str, - pub comparator_different_bytes_can_be_equal: bool, pub destroy_on_exit: bool, } @@ -121,8 +119,6 @@ pub(crate) mod ffi { fn open_db( builder: &DbOpts, status: &mut RocksDbStatus, - use_cmp: bool, - cmp_impl: fn(&[u8], &[u8]) -> i8, ) -> SharedPtr; fn transact(self: &RocksDbBridge) -> UniquePtr; fn del_range( diff --git a/src/data/memcmp.rs b/src/data/memcmp.rs index 67cd4d56..2b41c6a9 100644 --- a/src/data/memcmp.rs +++ b/src/data/memcmp.rs @@ -180,7 +180,7 @@ impl Num { n_bytes[6] = subtag[0]; n_bytes[7] = subtag[1]; let n = BigEndian::read_i64(&n_bytes); - return (Num::I(n), remaining); + (Num::I(n), remaining) } } diff --git a/src/parse/query.rs b/src/parse/query.rs index 0dfc2f63..bf5921d3 100644 --- a/src/parse/query.rs +++ b/src/parse/query.rs @@ -199,7 +199,7 @@ pub(crate) fn parse_query( let arity = algo_impl.arity(&options, &head, span)?; ensure!( - arity == 0 || head.len() == 0 || arity == head.len(), + arity == 0 || head.is_empty() || arity == head.len(), FixedRuleHeadArityMismatch(arity, head.len(), span) ); progs.insert( @@ -775,7 +775,7 @@ fn make_empty_const_rule(prog: &mut InputProgram, bindings: &[Symbol]) { }, ); prog.prog.insert( - entry_symbol.clone(), + entry_symbol, InputInlineRulesOrAlgo::Algo { algo: AlgoApply { algo: AlgoHandle { diff --git a/src/runtime/db.rs b/src/runtime/db.rs index 7a811f07..ef29824d 100644 --- a/src/runtime/db.rs +++ b/src/runtime/db.rs @@ -609,7 +609,7 @@ impl Db { } else { Right(sorted_iter) }; - let sorted_iter = sorted_iter.map(|t| Ok(t)); + let sorted_iter = sorted_iter.map(Ok); if let Some((meta, relation_op)) = &input_program.out_opts.store_relation { let to_clear = tx .execute_relation(