From 2393efb590d058ad90c7e66d115637e9c440c309 Mon Sep 17 00:00:00 2001 From: Sayan Nandan Date: Sun, 18 Jul 2021 08:53:24 +0530 Subject: [PATCH] Encode model bytemark into `PARTMAP` --- server/src/coredb/table.rs | 53 +++++++++++++++-------------- server/src/storage/bytemarks.rs | 46 +++++++++++++++++++++++++ server/src/storage/flush.rs | 26 +++++++++------ server/src/storage/interface.rs | 3 +- server/src/storage/mod.rs | 43 ++++++++++-------------- server/src/storage/tests.rs | 59 ++++++++++++++++++++++----------- server/src/storage/unflush.rs | 50 +++++++++++++++++++--------- 7 files changed, 183 insertions(+), 97 deletions(-) create mode 100644 server/src/storage/bytemarks.rs diff --git a/server/src/coredb/table.rs b/server/src/coredb/table.rs index fa51b698..db227862 100644 --- a/server/src/coredb/table.rs +++ b/server/src/coredb/table.rs @@ -29,6 +29,7 @@ use crate::coredb::htable::Coremap; use crate::coredb::Data; use crate::kvengine::KVEngine; +use crate::storage::bytemarks; #[derive(Debug)] pub enum DataModel { @@ -55,35 +56,38 @@ impl Table { None } } + /// Returns the storage type as an 8-bit uint pub const fn storage_type(&self) -> u8 { self.volatile as u8 } - pub fn kve_from_model_code_and_data( - modelcode: u8, - volatile: bool, + /// Returns the volatility of the table + pub const fn is_volatile(&self) -> bool { + self.volatile + } + /// Create a new KVE Table with the provided settings + pub fn new_kve_with_data( data: Coremap, - ) -> Option { - let data = match modelcode { - 0 => KVEngine::init_with_data(false, false, data), - 1 => KVEngine::init_with_data(false, true, data), - 2 => KVEngine::init_with_data(true, true, data), - 3 => KVEngine::init_with_data(true, false, data), - _ => return None, - }; - Some(Self { - model_store: DataModel::KV(data), + volatile: bool, + k_enc: bool, + v_enc: bool, + ) -> Self { + Self { volatile, - }) + model_store: DataModel::KV(KVEngine::init_with_data(k_enc, v_enc, data)), + } } - pub fn kve_from_model_code(modelcode: u8) -> Option { - Self::kve_from_model_code_and_data(modelcode, false, Coremap::new()) + /// Create a new kve with default settings but with provided volatile configuration + pub fn new_kve_with_volatile(volatile: bool) -> Self { + Self::new_kve_with_data(Coremap::new(), volatile, false, false) } + /// Returns the default kve: + /// - `k_enc`: `false` + /// - `v_enc`: `false` + /// - `volatile`: `false` pub fn new_default_kve() -> Self { - match Self::kve_from_model_code(0) { - Some(k) => k, - None => unsafe { core::hint::unreachable_unchecked() }, - } + Self::new_kve_with_data(Coremap::new(), false, false, false) } + /// Returns the model code. See [`bytemarks`] for more info pub fn get_model_code(&self) -> u8 { match &self.model_store { DataModel::KV(kvs) => { @@ -97,21 +101,22 @@ impl Table { if kbin { if vbin { // both k + v are str - 2 + bytemarks::BYTEMARK_MODEL_KV_STR_STR } else { // only k is str - 3 + bytemarks::BYTEMARK_MODEL_KV_STR_BIN } } else if vbin { // k is bin, v is str - 1 + bytemarks::BYTEMARK_MODEL_KV_BIN_STR } else { // both are bin - 0 + bytemarks::BYTEMARK_MODEL_KV_BIN_BIN } } } } + /// Returns the inner data model pub fn get_model_ref(&self) -> &DataModel { &self.model_store } diff --git a/server/src/storage/bytemarks.rs b/server/src/storage/bytemarks.rs new file mode 100644 index 00000000..7804eb18 --- /dev/null +++ b/server/src/storage/bytemarks.rs @@ -0,0 +1,46 @@ +/* + * Created on Sun Jul 18 2021 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2021, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +//! # Bytemarks +//! +//! Bytemarks are single bytes that are written to parts of files to provide metadata. This module +//! contains a collection of these + +// model +/// KVE model bytemark with key:bin, val:bin +pub const BYTEMARK_MODEL_KV_BIN_BIN: u8 = 0; +/// KVE model bytemark with key:bin, val:str +pub const BYTEMARK_MODEL_KV_BIN_STR: u8 = 1; +/// KVE model bytemark with key:str, val:str +pub const BYTEMARK_MODEL_KV_STR_STR: u8 = 2; +/// KVE model bytemark with key:str, val:bin +pub const BYTEMARK_MODEL_KV_STR_BIN: u8 = 3; + +// storage bym +/// Persistent storage bytemark +pub const BYTEMARK_STORAGE_PERSISTENT: u8 = 0; +/// Volatile storage bytemark +pub const BYTEMARK_STORAGE_VOLATILE: u8 = 1; diff --git a/server/src/storage/flush.rs b/server/src/storage/flush.rs index 8009de03..dae1d877 100644 --- a/server/src/storage/flush.rs +++ b/server/src/storage/flush.rs @@ -72,18 +72,22 @@ pub mod oneshot { /// No `partmap` handling. Just flushes the table to the expected location pub fn flush_table(tableid: &ObjectID, ksid: &ObjectID, table: &Table) -> IoResult<()> { - let path = tbl_path!(tableid, ksid); - let mut file = File::create(&path)?; - let modelcode = table.get_model_code(); - match table.get_model_ref() { - DataModel::KV(kve) => super::interface::serialize_map_into_slow_buffer( - &mut file, - kve.__get_inner_ref(), - modelcode, - )?, + if table.is_volatile() { + // no flushing needed + Ok(()) + } else { + // fine, this needs to be flushed + let path = tbl_path!(tableid, ksid); + let mut file = File::create(&path)?; + match table.get_model_ref() { + DataModel::KV(kve) => super::interface::serialize_map_into_slow_buffer( + &mut file, + kve.__get_inner_ref(), + )?, + } + file.sync_all()?; + fs::rename(&path, &path[..path.len() - 1]) } - file.sync_all()?; - fs::rename(&path, &path[..path.len() - 1]) } /// Flushes an entire keyspace to the expected location. No `partmap` or `preload` handling diff --git a/server/src/storage/interface.rs b/server/src/storage/interface.rs index cccd4432..28727966 100644 --- a/server/src/storage/interface.rs +++ b/server/src/storage/interface.rs @@ -82,10 +82,9 @@ pub fn create_tree(memroot: Memstore) -> IoResult<()> { pub fn serialize_map_into_slow_buffer( buffer: &mut T, map: &Coremap, - model_code: u8, ) -> IoResult<()> { let mut buffer = BufWriter::new(buffer); - super::se::raw_serialize_map(map, &mut buffer, model_code)?; + super::se::raw_serialize_map(map, &mut buffer)?; buffer.flush()?; Ok(()) } diff --git a/server/src/storage/mod.rs b/server/src/storage/mod.rs index 5189adbc..62a23696 100644 --- a/server/src/storage/mod.rs +++ b/server/src/storage/mod.rs @@ -63,6 +63,7 @@ use std::io::Write; #[macro_use] mod macros; // endof do not mess +pub mod bytemarks; pub mod flush; pub mod interface; pub mod preload; @@ -190,16 +191,13 @@ mod se { use super::*; use crate::coredb::memstore::Keyspace; /// Serialize a map into a _writable_ thing - pub fn serialize_map( - map: &Coremap, - model_code: u8, - ) -> Result, std::io::Error> { + pub fn serialize_map(map: &Coremap) -> Result, std::io::Error> { /* - [1B: Model Mark][LEN:8B][KLEN:8B|VLEN:8B][K][V][KLEN:8B][VLEN:8B]... + [LEN:8B][KLEN:8B|VLEN:8B][K][V][KLEN:8B][VLEN:8B]... */ // write the len header first let mut w = Vec::with_capacity(128); - self::raw_serialize_map(map, &mut w, model_code)?; + self::raw_serialize_map(map, &mut w)?; Ok(w) } @@ -207,10 +205,8 @@ mod se { pub fn raw_serialize_map( map: &Coremap, w: &mut W, - model_code: u8, ) -> std::io::Result<()> { unsafe { - w.write_all(raw_byte_repr(&model_code))?; w.write_all(raw_byte_repr(&to_64bit_little_endian!(map.len())))?; // now the keys and values for kv in map.iter() { @@ -244,7 +240,7 @@ mod se { /// Generate a partition map for the given keyspace /// ```text - /// [8B: EXTENT]([8B: LEN][?B: PARTITION ID][1B: Storage type])* + /// [8B: EXTENT]([8B: LEN][?B: PARTITION ID][1B: Storage type][1B: Model type])* /// ``` pub fn raw_serialize_partmap(w: &mut W, keyspace: &Keyspace) -> std::io::Result<()> { unsafe { @@ -259,6 +255,8 @@ mod se { w.write_all(table.key())?; // now storage type w.write_all(raw_byte_repr(&table.storage_type()))?; + // now model type + w.write_all(raw_byte_repr(&table.get_model_code()))?; } } Ok(()) @@ -336,8 +334,8 @@ mod de { } } - /// Deserializes a map-like set which has an 1B _bytemark_ for every entry - pub fn deserialize_set_ctype_bytemark(data: &[u8]) -> Option> + /// Deserializes a map-like set which has an 2x1B _bytemark_ for every entry + pub fn deserialize_set_ctype_bytemark(data: &[u8]) -> Option> where T: DeserializeFrom + Eq + Hash, { @@ -362,7 +360,7 @@ mod de { } let lenkey = transmute_len(ptr); ptr = ptr.add(8); - if (ptr.add(lenkey + 1)) > end_ptr { + if (ptr.add(lenkey + 2)) > end_ptr { // not enough data left return None; } @@ -373,10 +371,12 @@ mod de { let key = T::from_slice(slice::from_raw_parts(ptr, lenkey)); // move the ptr ahead; done with the key ptr = ptr.add(lenkey); - let bytemark = ptr::read(ptr); + let bytemark_a = ptr::read(ptr); + ptr = ptr.add(1); + let bytemark_b = ptr::read(ptr); ptr = ptr.add(1); // push it in - if set.insert(key, bytemark).is_some() { + if set.insert(key, (bytemark_a, bytemark_b)).is_some() { // repeat?; that's not what we wanted return None; } @@ -391,9 +391,9 @@ mod de { } } /// Deserialize a file that contains a serialized map. This also returns the model code - pub fn deserialize_map(data: Vec) -> Option<(Coremap, u8)> { + pub fn deserialize_map(data: Vec) -> Option> { // First read the length header - if data.len() < 9 { + if data.len() < 8 { // so the file doesn't even have the length/model header? noice, just return None } else { @@ -407,15 +407,6 @@ mod de { and we won't read into others' memory (or corrupt our own) */ let mut ptr = data.as_ptr(); - let modelcode: u8 = ptr::read(ptr); - - // model check - if modelcode > 3 { - // this model isn't supposed to have more than 3. Corrupted data - return None; - } - - ptr = ptr.add(1); // so we have 8B. Just unsafe access and transmute it; nobody cares let len = transmute_len(ptr); // move 8 bytes ahead since we're done with len @@ -447,7 +438,7 @@ mod de { hm.upsert(key, val); } if ptr == end_ptr { - Some((hm, modelcode)) + Some(hm) } else { // nope, someone gave us more data None diff --git a/server/src/storage/tests.rs b/server/src/storage/tests.rs index b2ba84c6..518a8946 100644 --- a/server/src/storage/tests.rs +++ b/server/src/storage/tests.rs @@ -29,10 +29,9 @@ use super::*; #[test] fn test_serialize_deserialize_empty() { let cmap = Coremap::new(); - let ser = se::serialize_map(&cmap, 0).unwrap(); - let (de, model_code) = de::deserialize_map(ser).unwrap(); + let ser = se::serialize_map(&cmap).unwrap(); + let de = de::deserialize_map(ser).unwrap(); assert!(de.len() == 0); - assert_eq!(0, model_code); } #[test] @@ -40,13 +39,12 @@ fn test_ser_de_few_elements() { let cmap = Coremap::new(); cmap.upsert("sayan".into(), "writes code".into()); cmap.upsert("supersayan".into(), "writes super code".into()); - let ser = se::serialize_map(&cmap, 0).unwrap(); - let (de, modelcode) = de::deserialize_map(ser).unwrap(); + let ser = se::serialize_map(&cmap).unwrap(); + let de = de::deserialize_map(ser).unwrap(); assert!(de.len() == cmap.len()); assert!(de .iter() .all(|kv| cmap.get(kv.key()).unwrap().eq(kv.value()))); - assert_eq!(modelcode, 0); } cfg_test!( @@ -66,13 +64,12 @@ cfg_test!( .zip(values.iter()) .map(|(k, v)| (Data::from(k.to_owned()), Data::from(v.to_owned()))) .collect(); - let ser = se::serialize_map(&cmap, 0).unwrap(); - let (de, modelcode) = de::deserialize_map(ser).unwrap(); + let ser = se::serialize_map(&cmap).unwrap(); + let de = de::deserialize_map(ser).unwrap(); assert!(de .iter() .all(|kv| cmap.get(kv.key()).unwrap().eq(kv.value()))); assert!(de.len() == cmap.len()); - assert_eq!(modelcode, 0); } #[test] @@ -89,7 +86,7 @@ cfg_test!( .zip(values.iter()) .map(|(k, v)| (Data::from(k.to_owned()), Data::from(v.to_owned()))) .collect(); - let mut se = se::serialize_map(&cmap, 0).unwrap(); + let mut se = se::serialize_map(&cmap).unwrap(); // random chop se.truncate(124); // corrupted @@ -113,7 +110,7 @@ cfg_test!( .zip(values.iter()) .map(|(k, v)| (Data::from(k.to_owned()), Data::from(v.to_owned()))) .collect(); - let mut se = se::serialize_map(&cmap, 0).unwrap(); + let mut se = se::serialize_map(&cmap).unwrap(); // random patch let patch: Vec = (0u16..500u16).into_iter().map(|v| (v >> 7) as u8).collect(); se.extend(patch); @@ -199,11 +196,23 @@ mod bytemark_set_tests { let ks = Keyspace::empty_default(); let mut v = Vec::new(); se::raw_serialize_partmap(&mut v, &ks).unwrap(); - let ret: HashMap = de::deserialize_set_ctype_bytemark(&v).unwrap(); + let ret: HashMap = de::deserialize_set_ctype_bytemark(&v).unwrap(); let mut expected = HashMap::new(); unsafe { - expected.insert(ObjectID::from_slice("default"), 0); - expected.insert(ObjectID::from_slice("_system"), 0); + expected.insert( + ObjectID::from_slice("default"), + ( + bytemarks::BYTEMARK_STORAGE_PERSISTENT, + bytemarks::BYTEMARK_MODEL_KV_BIN_BIN, + ), + ); + expected.insert( + ObjectID::from_slice("_system"), + ( + bytemarks::BYTEMARK_STORAGE_PERSISTENT, + bytemarks::BYTEMARK_MODEL_KV_BIN_BIN, + ), + ); } assert_hmeq!(expected, ret); } @@ -213,22 +222,34 @@ mod bytemark_set_tests { unsafe { ks.create_table( ObjectID::from_slice("cache"), - Table::kve_from_model_code_and_data(0, true, Coremap::new()).unwrap(), + Table::new_kve_with_volatile(true), ); ks.create_table( ObjectID::from_slice("supersafe"), - Table::kve_from_model_code_and_data(0, false, Coremap::new()).unwrap(), + Table::new_kve_with_volatile(false), ); } let mut v = Vec::new(); se::raw_serialize_partmap(&mut v, &ks).unwrap(); - let ret: HashMap = de::deserialize_set_ctype_bytemark(&v).unwrap(); + let ret: HashMap = de::deserialize_set_ctype_bytemark(&v).unwrap(); let mut expected = HashMap::new(); unsafe { // our cache is volatile - expected.insert(ObjectID::from_slice("cache"), 1); + expected.insert( + ObjectID::from_slice("cache"), + ( + bytemarks::BYTEMARK_STORAGE_VOLATILE, + bytemarks::BYTEMARK_MODEL_KV_BIN_BIN, + ), + ); // our supersafe is non volatile - expected.insert(ObjectID::from_slice("supersafe"), 0); + expected.insert( + ObjectID::from_slice("supersafe"), + ( + bytemarks::BYTEMARK_STORAGE_PERSISTENT, + bytemarks::BYTEMARK_MODEL_KV_BIN_BIN, + ), + ); } assert_hmeq!(expected, ret); } diff --git a/server/src/storage/unflush.rs b/server/src/storage/unflush.rs index 3f32d22e..c2d6174e 100644 --- a/server/src/storage/unflush.rs +++ b/server/src/storage/unflush.rs @@ -28,6 +28,7 @@ //! //! Routines for unflushing data +use super::bytemarks; use crate::coredb::memstore::ObjectID; use crate::coredb::table::Table; use crate::storage::interface::DIR_KSROOT; @@ -40,36 +41,55 @@ use std::io::Result as IoResult; use std::sync::Arc; /// Read a given table into a [`Table`] object -pub fn read_table(ksid: &ObjectID, tblid: &ObjectID, volatile: bool) -> IoResult { +/// +/// This will take care of volatility and the model_code. Just make sure that you pass the proper +/// keyspace ID and a valid table ID +pub fn read_table( + ksid: &ObjectID, + tblid: &ObjectID, + volatile: bool, + model_code: u8, +) -> IoResult
{ let filepath = unsafe { concat_path!(DIR_KSROOT, ksid.as_str(), tblid.as_str()) }; - let read = fs::read(filepath)?; - // TODO(@ohsayan): Mod this to de based on data model - let (data, model) = super::de::deserialize_map(read).ok_or_else(|| bad_data!())?; - match model { - 0 => { - // kve - Table::kve_from_model_code_and_data(model, volatile, data).ok_or_else(|| bad_data!()) + let data = if volatile { + // no need to read anything; table is volatile and has no file + Coremap::new() + } else { + // not volatile, so read this in + let f = fs::read(filepath)?; + super::de::deserialize_map(f).ok_or_else(|| bad_data!())? + }; + let tbl = match model_code { + bytemarks::BYTEMARK_MODEL_KV_BIN_BIN => { + Table::new_kve_with_data(data, volatile, false, false) } - _ => { - // some model that we don't know - Err(IoError::from(ErrorKind::Unsupported)) + bytemarks::BYTEMARK_MODEL_KV_BIN_STR => { + Table::new_kve_with_data(data, volatile, false, true) } - } + bytemarks::BYTEMARK_MODEL_KV_STR_STR => { + Table::new_kve_with_data(data, volatile, true, true) + } + bytemarks::BYTEMARK_MODEL_KV_STR_BIN => { + Table::new_kve_with_data(data, volatile, true, false) + } + _ => return Err(IoError::from(ErrorKind::Unsupported)), + }; + Ok(tbl) } /// Read an entire keyspace into a Coremap. You'll need to initialize the rest pub fn read_keyspace(ksid: ObjectID) -> IoResult>> { let filepath = unsafe { concat_path!(DIR_KSROOT, ksid.as_str(), "PARTMAP") }; - let partmap: HashMap = + let partmap: HashMap = super::de::deserialize_set_ctype_bytemark(&fs::read(filepath)?) .ok_or_else(|| bad_data!())?; let ks: Coremap> = Coremap::with_capacity(partmap.len()); - for (tableid, table_storage_type) in partmap.into_iter() { + for (tableid, (table_storage_type, model_code)) in partmap.into_iter() { if table_storage_type > 1 { return Err(bad_data!()); } let is_volatile = table_storage_type == 1; - let tbl = self::read_table(&ksid, &tableid, is_volatile)?; + let tbl = self::read_table(&ksid, &tableid, is_volatile, model_code)?; ks.true_if_insert(tableid, Arc::new(tbl)); } Ok(ks)