From 026b5ef0d2bc9537cea69689c53193504411d7ea Mon Sep 17 00:00:00 2001 From: Sayan Nandan Date: Fri, 11 Aug 2023 14:17:04 +0000 Subject: [PATCH] Implement dict dec and resolve null disambiguation The previous logic of using a different null was extremely confusing, and we essentially ended up creating "two different null types." That has been removed and we have implemented the dict dec methods and rectified the enc methods. --- server/src/corestore/rc.rs | 2 +- server/src/engine/core/space.rs | 7 +- server/src/engine/data/cell.rs | 6 + server/src/engine/data/dict.rs | 98 +++++++------- server/src/engine/data/tag.rs | 2 +- server/src/engine/data/tests/md_dict_tests.rs | 6 +- server/src/engine/ql/ddl/syn.rs | 7 +- server/src/engine/ql/tests.rs | 2 +- server/src/engine/storage/v1/inf.rs | 121 +++++++++++++++--- server/src/storage/v1/tests.rs | 13 +- 10 files changed, 176 insertions(+), 88 deletions(-) diff --git a/server/src/corestore/rc.rs b/server/src/corestore/rc.rs index 080ececa..08439fbb 100644 --- a/server/src/corestore/rc.rs +++ b/server/src/corestore/rc.rs @@ -163,7 +163,7 @@ impl AsRef<[u8]> for SharedSlice { impl Borrow<[u8]> for SharedSlice { #[inline(always)] fn borrow(&self) -> &[u8] { - self.as_slice().borrow() + self.as_slice() } } diff --git a/server/src/engine/core/space.rs b/server/src/engine/core/space.rs index 4750c4aa..c311780b 100644 --- a/server/src/engine/core/space.rs +++ b/server/src/engine/core/space.rs @@ -124,7 +124,8 @@ impl Space { // check env let env = match props.remove(SpaceMeta::KEY_ENV) { Some(DictEntryGeneric::Map(m)) if props.is_empty() => m, - Some(DictEntryGeneric::Null) | None if props.is_empty() => IndexST::default(), + Some(DictEntryGeneric::Lit(l)) if l.is_null() => IndexST::default(), + None if props.is_empty() => IndexST::default(), _ => { return Err(DatabaseError::DdlSpaceBadProperty); } @@ -169,7 +170,9 @@ impl Space { return Err(DatabaseError::DdlSpaceBadProperty); } } - Some(DictEntryGeneric::Null) if updated_props.is_empty() => space_env.clear(), + Some(DictEntryGeneric::Lit(l)) if updated_props.is_empty() & l.is_null() => { + space_env.clear() + } None => {} _ => return Err(DatabaseError::DdlSpaceBadProperty), } diff --git a/server/src/engine/data/cell.rs b/server/src/engine/data/cell.rs index 1ce99a2d..95017988 100644 --- a/server/src/engine/data/cell.rs +++ b/server/src/engine/data/cell.rs @@ -202,6 +202,12 @@ impl Datacell { pub fn list(&self) -> &RwLock> { self.try_list().unwrap() } + pub unsafe fn new_qw(qw: u64, tag: CUTag) -> Datacell { + Self::new( + tag, + DataRaw::word(SpecialPaddedWord::store(qw).dwordqn_promote()), + ) + } } direct_from! { diff --git a/server/src/engine/data/dict.rs b/server/src/engine/data/dict.rs index 148af9a6..fbab8ef0 100644 --- a/server/src/engine/data/dict.rs +++ b/server/src/engine/data/dict.rs @@ -53,12 +53,6 @@ pub enum DictEntryGeneric { Lit(Datacell), /// A map Map(DictGeneric), - /// A null - /// - /// This distinction is important because we currently only allow [`Datacell`] to store information about the intialization state (as it should be) - /// and it can't distinct between "null-but-concrete-types" (that's the task of our engine: to enforce the schema but it's not DC's task). Hence this - /// specific flag tells us that this is null (and neither a lit nor a map) - Null, } #[derive(Debug, PartialEq)] @@ -91,15 +85,15 @@ pub fn rflatten_metadata(new: DictGeneric) -> MetaDict { fn _rflatten_metadata(new: DictGeneric, empty: &mut MetaDict) { for (key, val) in new { match val { - DictEntryGeneric::Lit(l) => { + DictEntryGeneric::Lit(l) if l.is_init() => { empty.insert(key, MetaDictEntry::Data(l)); } + DictEntryGeneric::Lit(_) => {} DictEntryGeneric::Map(m) => { let mut rnew = MetaDict::new(); _rflatten_metadata(m, &mut rnew); empty.insert(key, MetaDictEntry::Map(rnew)); } - DictEntryGeneric::Null => {} } } } @@ -153,57 +147,55 @@ fn rmerge_metadata_prepare_patch( let mut okay = true; while new.len() != 0 && okay { let (key, new_entry) = new.next().unwrap(); - match (current.get(&key), &new_entry) { + match (current.get(&key), new_entry) { // non-null -> non-null: merge flatten update - (Some(this_current), DictEntryGeneric::Lit(_) | DictEntryGeneric::Map(_)) => { - okay &= { - match (this_current, new_entry) { - (MetaDictEntry::Data(this_data), DictEntryGeneric::Lit(new_data)) - if this_data.kind() == new_data.kind() => - { - patch - .0 - .insert(key, Some(MetaDictPatchEntry::Data(new_data))); - true - } - ( - MetaDictEntry::Map(this_recursive_data), - DictEntryGeneric::Map(new_recursive_data), - ) => { - let mut this_patch = MetaDictPatch::default(); - let okay = rmerge_metadata_prepare_patch( - this_recursive_data, - new_recursive_data, - &mut this_patch, - ); - patch - .0 - .insert(key, Some(MetaDictPatchEntry::Map(this_patch))); - okay - } - _ => false, - } - }; - } - // null -> non-null: flatten insert - (None, DictEntryGeneric::Lit(_) | DictEntryGeneric::Map(_)) => match new_entry { - DictEntryGeneric::Lit(d) => { - let _ = patch.0.insert(key, Some(MetaDictPatchEntry::Data(d))); - } - DictEntryGeneric::Map(m) => { - let mut this_patch = MetaDictPatch::default(); - okay &= rmerge_metadata_prepare_patch(&into_dict!(), m, &mut this_patch); - let _ = patch + (Some(MetaDictEntry::Data(this_data)), DictEntryGeneric::Lit(new_data)) + if new_data.is_init() => + { + if this_data.kind() == new_data.kind() { + patch .0 - .insert(key, Some(MetaDictPatchEntry::Map(this_patch))); + .insert(key, Some(MetaDictPatchEntry::Data(new_data))); + } else { + okay = false; } - _ => unreachable!(), - }, + } + (Some(MetaDictEntry::Data(_)), DictEntryGeneric::Map(_)) => { + okay = false; + } + ( + Some(MetaDictEntry::Map(this_recursive_data)), + DictEntryGeneric::Map(new_recursive_data), + ) => { + let mut this_patch = MetaDictPatch::default(); + let _okay = rmerge_metadata_prepare_patch( + this_recursive_data, + new_recursive_data, + &mut this_patch, + ); + patch + .0 + .insert(key, Some(MetaDictPatchEntry::Map(this_patch))); + okay &= _okay; + } + // null -> non-null: flatten insert + (None, DictEntryGeneric::Lit(l)) if l.is_init() => { + let _ = patch.0.insert(key, Some(MetaDictPatchEntry::Data(l))); + } + (None, DictEntryGeneric::Map(m)) => { + let mut this_patch = MetaDictPatch::default(); + okay &= rmerge_metadata_prepare_patch(&into_dict!(), m, &mut this_patch); + let _ = patch + .0 + .insert(key, Some(MetaDictPatchEntry::Map(this_patch))); + } // non-null -> null: remove - (Some(_), DictEntryGeneric::Null) => { + (Some(_), DictEntryGeneric::Lit(l)) => { + debug_assert!(l.is_null()); patch.0.insert(key, None); } - (None, DictEntryGeneric::Null) => { + (None, DictEntryGeneric::Lit(l)) => { + debug_assert!(l.is_null()); // ignore } } diff --git a/server/src/engine/data/tag.rs b/server/src/engine/data/tag.rs index 03d1c9cc..01d143fa 100644 --- a/server/src/engine/data/tag.rs +++ b/server/src/engine/data/tag.rs @@ -173,7 +173,7 @@ macro_rules! cutag { } impl CUTag { - const fn new(class: TagClass, unique: TagUnique) -> Self { + pub const fn new(class: TagClass, unique: TagUnique) -> Self { Self { class, unique } } } diff --git a/server/src/engine/data/tests/md_dict_tests.rs b/server/src/engine/data/tests/md_dict_tests.rs index 1110b8d2..c0c3adf4 100644 --- a/server/src/engine/data/tests/md_dict_tests.rs +++ b/server/src/engine/data/tests/md_dict_tests.rs @@ -33,7 +33,7 @@ use crate::engine::data::{ fn t_simple_flatten() { let generic_dict: DictGeneric = into_dict! { "a_valid_key" => DictEntryGeneric::Lit(100u64.into()), - "a_null_key" => DictEntryGeneric::Null, + "a_null_key" => Datacell::null(), }; let expected: MetaDict = into_dict!( "a_valid_key" => Datacell::new_uint(100) @@ -52,7 +52,7 @@ fn t_simple_patch() { let new: DictGeneric = into_dict! { "a" => Datacell::new_uint(1), "b" => Datacell::new_uint(2), - "z" => DictEntryGeneric::Null, + "z" => Datacell::null(), }; let expected: MetaDict = into_dict! { "a" => Datacell::new_uint(1), @@ -96,7 +96,7 @@ fn patch_null_out_dict() { let new: DictGeneric = into_dict! { "a" => Datacell::new_uint(2), "b" => Datacell::new_uint(3), - "z" => DictEntryGeneric::Null, + "z" => Datacell::null(), }; assert!(dict::rmerge_metadata(&mut current, new)); assert_eq!(current, expected); diff --git a/server/src/engine/ql/ddl/syn.rs b/server/src/engine/ql/ddl/syn.rs index bdc5edce..89b8ca48 100644 --- a/server/src/engine/ql/ddl/syn.rs +++ b/server/src/engine/ql/ddl/syn.rs @@ -46,7 +46,10 @@ use crate::{ engine::{ - data::dict::{DictEntryGeneric, DictGeneric}, + data::{ + cell::Datacell, + dict::{DictEntryGeneric, DictGeneric}, + }, error::{LangError, LangResult}, ql::{ ast::{QueryData, State}, @@ -176,7 +179,7 @@ where // UNSAFE(@ohsayan): we only switch to this when we've already read in a key key.take().as_str().into() }, - DictEntryGeneric::Null, + DictEntryGeneric::Lit(Datacell::null()), ) .is_none(), ); diff --git a/server/src/engine/ql/tests.rs b/server/src/engine/ql/tests.rs index 28051312..0ec76fe1 100644 --- a/server/src/engine/ql/tests.rs +++ b/server/src/engine/ql/tests.rs @@ -81,7 +81,7 @@ pub trait NullableDictEntry { impl NullableDictEntry for Null { fn data(self) -> crate::engine::data::DictEntryGeneric { - crate::engine::data::DictEntryGeneric::Null + crate::engine::data::DictEntryGeneric::Lit(Datacell::null()) } } diff --git a/server/src/engine/storage/v1/inf.rs b/server/src/engine/storage/v1/inf.rs index c43834c9..84b13a54 100644 --- a/server/src/engine/storage/v1/inf.rs +++ b/server/src/engine/storage/v1/inf.rs @@ -32,7 +32,7 @@ use { data::{ cell::Datacell, dict::{DictEntryGeneric, DictGeneric}, - tag::{DataTag, TagClass}, + tag::{CUTag, DataTag, TagClass, TagUnique}, }, idx::{AsKey, AsValue}, storage::v1::{rw::BufferedScanner, SDSSError, SDSSResult}, @@ -62,11 +62,13 @@ pub enum PersistDictEntryDscr { impl PersistDictEntryDscr { /// translates the tag class definition into the dscr definition pub const fn translate_from_class(class: TagClass) -> Self { - unsafe { core::mem::transmute(class.d() + 1) } + unsafe { Self::from_raw(class.d() + 1) } + } + pub const unsafe fn from_raw(v: u8) -> Self { + core::mem::transmute(v) } pub fn new_from_dict_gen_entry(e: &DictEntryGeneric) -> Self { match e { - DictEntryGeneric::Null => Self::Null, DictEntryGeneric::Map(_) => Self::Dict, DictEntryGeneric::Lit(dc) => Self::translate_from_class(dc.tag().tag_class()), } @@ -87,6 +89,10 @@ impl PersistDictEntryDscr { pub const fn is_recursive(&self) -> bool { self.value_u8() >= Self::List.value_u8() } + fn into_class(&self) -> TagClass { + debug_assert!(*self != Self::Null); + unsafe { mem::transmute(self.value_u8() - 1) } + } } /* @@ -177,7 +183,7 @@ fn _encode_dict(buf: &mut VecU8, dict: &HashMap( scanner: &mut BufferedScanner, ) -> SDSSResult> { - if Pd::metadec_pretest_routine(scanner) & scanner.has_left(sizeof!(u64)) { + if !(Pd::metadec_pretest_routine(scanner) & scanner.has_left(sizeof!(u64))) { return Err(SDSSError::InternalDecodeStructureCorrupted); } let dict_len = unsafe { @@ -185,7 +191,7 @@ pub fn decode_dict( scanner.next_u64_le() as usize }; let mut dict = HashMap::with_capacity(dict_len); - while Pd::metadec_pretest_entry(scanner) { + while Pd::metadec_pretest_entry(scanner) & (dict.len() < dict_len) { let md = unsafe { // UNSAFE(@ohsayan): this is compeletely because of the entry pretest match Pd::dec_entry_metadata(scanner) { @@ -224,7 +230,9 @@ pub fn decode_dict( k = _k; v = _v; } - _ => return Err(SDSSError::InternalDecodeStructureCorruptedPayload), + _ => { + return Err(SDSSError::InternalDecodeStructureCorruptedPayload); + } } } if dict.insert(k, v).is_some() { @@ -265,7 +273,7 @@ impl DGEntryMD { impl PersistDictEntryMetadata for DGEntryMD { fn verify_with_src(&self, scanner: &BufferedScanner) -> bool { static EXPECT_ATLEAST: [u8; 4] = [0, 1, 8, 8]; // PAD to align - let lbound_rem = self.klen + EXPECT_ATLEAST[cmp::min(self.dscr, 4) as usize] as usize; + let lbound_rem = self.klen + EXPECT_ATLEAST[cmp::min(self.dscr, 3) as usize] as usize; scanner.has_left(lbound_rem) & (self.dscr <= PersistDictEntryDscr::Dict.value_u8()) } } @@ -292,10 +300,6 @@ impl PersistDict for DictGeneric { } fn enc_entry_coupled(buf: &mut VecU8, key: &Self::Key, val: &Self::Value) { match val { - DictEntryGeneric::Null => { - buf.push(PersistDictEntryDscr::Null.value_u8()); - buf.extend(key.as_bytes()) - } DictEntryGeneric::Map(map) => { buf.push(PersistDictEntryDscr::Dict.value_u8()); buf.extend(key.as_bytes()); @@ -354,11 +358,98 @@ impl PersistDict for DictGeneric { } unsafe fn dec_val(scanner: &mut BufferedScanner, md: &Self::Metadata) -> Option { unsafe fn decode_element( - _: &mut BufferedScanner, - _: PersistDictEntryDscr, + scanner: &mut BufferedScanner, + dscr: PersistDictEntryDscr, + dg_top_element: bool, ) -> Option { - todo!() + let r = match dscr { + PersistDictEntryDscr::Null => DictEntryGeneric::Lit(Datacell::null()), + PersistDictEntryDscr::Bool => { + DictEntryGeneric::Lit(Datacell::new_bool(scanner.next_byte() == 1)) + } + PersistDictEntryDscr::UnsignedInt + | PersistDictEntryDscr::SignedInt + | PersistDictEntryDscr::Float => DictEntryGeneric::Lit(Datacell::new_qw( + scanner.next_u64_le(), + CUTag::new( + dscr.into_class(), + [ + TagUnique::UnsignedInt, + TagUnique::SignedInt, + TagUnique::Illegal, + TagUnique::Illegal, // pad + ][(dscr.value_u8() - 2) as usize], + ), + )), + PersistDictEntryDscr::Str | PersistDictEntryDscr::Bin => { + let slc_len = scanner.next_u64_le() as usize; + if !scanner.has_left(slc_len) { + return None; + } + let slc = scanner.next_chunk_variable(slc_len); + DictEntryGeneric::Lit(if dscr == PersistDictEntryDscr::Str { + if core::str::from_utf8(slc).is_err() { + return None; + } + Datacell::new_str( + String::from_utf8_unchecked(slc.to_owned()).into_boxed_str(), + ) + } else { + Datacell::new_bin(slc.to_owned().into_boxed_slice()) + }) + } + PersistDictEntryDscr::List => { + let list_len = scanner.next_u64_le() as usize; + let mut v = Vec::with_capacity(list_len); + while (!scanner.eof()) & (v.len() < list_len) { + let dscr = scanner.next_byte(); + if dscr > PersistDictEntryDscr::Dict.value_u8() { + return None; + } + v.push( + match decode_element( + scanner, + PersistDictEntryDscr::from_raw(dscr), + false, + ) { + Some(DictEntryGeneric::Lit(l)) => l, + None => return None, + _ => unreachable!("found top-level dict item in datacell"), + }, + ); + } + if v.len() == list_len { + DictEntryGeneric::Lit(Datacell::new_list(v)) + } else { + return None; + } + } + PersistDictEntryDscr::Dict => { + if dg_top_element { + DictEntryGeneric::Map(decode_dict::(scanner).ok()?) + } else { + unreachable!("found top-level dict item in datacell") + } + } + }; + Some(r) } - decode_element(scanner, mem::transmute(md.dscr)) + decode_element(scanner, PersistDictEntryDscr::from_raw(md.dscr), true) } } + +#[test] +fn t_dict() { + let dict: DictGeneric = into_dict! { + "hello" => Datacell::new_str("world".into()), + "omg a null?" => Datacell::null(), + "a big fat dict" => DictEntryGeneric::Map(into_dict!( + "with a value" => Datacell::new_uint(1002), + "and a null" => Datacell::null(), + )) + }; + let encoded = encode_dict::(&dict); + let mut scanner = BufferedScanner::new(&encoded); + let decoded = decode_dict::(&mut scanner).unwrap(); + assert_eq!(dict, decoded); +} diff --git a/server/src/storage/v1/tests.rs b/server/src/storage/v1/tests.rs index ca306d12..de891ed9 100644 --- a/server/src/storage/v1/tests.rs +++ b/server/src/storage/v1/tests.rs @@ -566,7 +566,6 @@ mod list_tests { use super::{de, se}; use crate::corestore::{htable::Coremap, SharedSlice}; use crate::kvengine::LockedVec; - use core::ops::Deref; use parking_lot::RwLock; #[test] fn test_list_se_de() { @@ -613,13 +612,7 @@ mod list_tests { se::raw_serialize_list_map(&mymap, &mut v).unwrap(); let de = de::deserialize_list_map(&v).unwrap(); assert_eq!(de.len(), 1); - let mykey_value = de - .get("mykey".as_bytes()) - .unwrap() - .value() - .deref() - .read() - .clone(); + let mykey_value = de.get("mykey".as_bytes()).unwrap().value().read().clone(); assert_eq!( mykey_value, vals.into_inner() @@ -642,14 +635,14 @@ mod list_tests { let de = de::deserialize_list_map(&v).unwrap(); assert_eq!(de.len(), 2); assert_eq!( - de.get(&key1).unwrap().value().deref().read().clone(), + de.get(&key1).unwrap().value().read().clone(), val1.into_inner() .into_iter() .map(SharedSlice::from) .collect::>() ); assert_eq!( - de.get(&key2).unwrap().value().deref().read().clone(), + de.get(&key2).unwrap().value().read().clone(), val2.into_inner() .into_iter() .map(SharedSlice::from)