Implement dict dec and resolve null disambiguation

The previous logic of using a different null was extremely confusing,
and we essentially ended up creating "two different null types."

That has been removed and we have implemented the dict dec methods and
rectified the enc methods.
next
Sayan Nandan 1 year ago
parent bd3225693f
commit 026b5ef0d2
No known key found for this signature in database
GPG Key ID: 42EEDF4AE9D96B54

@ -163,7 +163,7 @@ impl AsRef<[u8]> for SharedSlice {
impl Borrow<[u8]> for SharedSlice {
#[inline(always)]
fn borrow(&self) -> &[u8] {
self.as_slice().borrow()
self.as_slice()
}
}

@ -124,7 +124,8 @@ impl Space {
// check env
let env = match props.remove(SpaceMeta::KEY_ENV) {
Some(DictEntryGeneric::Map(m)) if props.is_empty() => m,
Some(DictEntryGeneric::Null) | None if props.is_empty() => IndexST::default(),
Some(DictEntryGeneric::Lit(l)) if l.is_null() => IndexST::default(),
None if props.is_empty() => IndexST::default(),
_ => {
return Err(DatabaseError::DdlSpaceBadProperty);
}
@ -169,7 +170,9 @@ impl Space {
return Err(DatabaseError::DdlSpaceBadProperty);
}
}
Some(DictEntryGeneric::Null) if updated_props.is_empty() => space_env.clear(),
Some(DictEntryGeneric::Lit(l)) if updated_props.is_empty() & l.is_null() => {
space_env.clear()
}
None => {}
_ => return Err(DatabaseError::DdlSpaceBadProperty),
}

@ -202,6 +202,12 @@ impl Datacell {
pub fn list(&self) -> &RwLock<Vec<Self>> {
self.try_list().unwrap()
}
pub unsafe fn new_qw(qw: u64, tag: CUTag) -> Datacell {
Self::new(
tag,
DataRaw::word(SpecialPaddedWord::store(qw).dwordqn_promote()),
)
}
}
direct_from! {

@ -53,12 +53,6 @@ pub enum DictEntryGeneric {
Lit(Datacell),
/// A map
Map(DictGeneric),
/// A null
///
/// This distinction is important because we currently only allow [`Datacell`] to store information about the intialization state (as it should be)
/// and it can't distinct between "null-but-concrete-types" (that's the task of our engine: to enforce the schema but it's not DC's task). Hence this
/// specific flag tells us that this is null (and neither a lit nor a map)
Null,
}
#[derive(Debug, PartialEq)]
@ -91,15 +85,15 @@ pub fn rflatten_metadata(new: DictGeneric) -> MetaDict {
fn _rflatten_metadata(new: DictGeneric, empty: &mut MetaDict) {
for (key, val) in new {
match val {
DictEntryGeneric::Lit(l) => {
DictEntryGeneric::Lit(l) if l.is_init() => {
empty.insert(key, MetaDictEntry::Data(l));
}
DictEntryGeneric::Lit(_) => {}
DictEntryGeneric::Map(m) => {
let mut rnew = MetaDict::new();
_rflatten_metadata(m, &mut rnew);
empty.insert(key, MetaDictEntry::Map(rnew));
}
DictEntryGeneric::Null => {}
}
}
}
@ -153,57 +147,55 @@ fn rmerge_metadata_prepare_patch(
let mut okay = true;
while new.len() != 0 && okay {
let (key, new_entry) = new.next().unwrap();
match (current.get(&key), &new_entry) {
match (current.get(&key), new_entry) {
// non-null -> non-null: merge flatten update
(Some(this_current), DictEntryGeneric::Lit(_) | DictEntryGeneric::Map(_)) => {
okay &= {
match (this_current, new_entry) {
(MetaDictEntry::Data(this_data), DictEntryGeneric::Lit(new_data))
if this_data.kind() == new_data.kind() =>
{
patch
.0
.insert(key, Some(MetaDictPatchEntry::Data(new_data)));
true
}
(
MetaDictEntry::Map(this_recursive_data),
DictEntryGeneric::Map(new_recursive_data),
) => {
let mut this_patch = MetaDictPatch::default();
let okay = rmerge_metadata_prepare_patch(
this_recursive_data,
new_recursive_data,
&mut this_patch,
);
patch
.0
.insert(key, Some(MetaDictPatchEntry::Map(this_patch)));
okay
}
_ => false,
}
};
}
// null -> non-null: flatten insert
(None, DictEntryGeneric::Lit(_) | DictEntryGeneric::Map(_)) => match new_entry {
DictEntryGeneric::Lit(d) => {
let _ = patch.0.insert(key, Some(MetaDictPatchEntry::Data(d)));
}
DictEntryGeneric::Map(m) => {
let mut this_patch = MetaDictPatch::default();
okay &= rmerge_metadata_prepare_patch(&into_dict!(), m, &mut this_patch);
let _ = patch
(Some(MetaDictEntry::Data(this_data)), DictEntryGeneric::Lit(new_data))
if new_data.is_init() =>
{
if this_data.kind() == new_data.kind() {
patch
.0
.insert(key, Some(MetaDictPatchEntry::Map(this_patch)));
.insert(key, Some(MetaDictPatchEntry::Data(new_data)));
} else {
okay = false;
}
_ => unreachable!(),
},
}
(Some(MetaDictEntry::Data(_)), DictEntryGeneric::Map(_)) => {
okay = false;
}
(
Some(MetaDictEntry::Map(this_recursive_data)),
DictEntryGeneric::Map(new_recursive_data),
) => {
let mut this_patch = MetaDictPatch::default();
let _okay = rmerge_metadata_prepare_patch(
this_recursive_data,
new_recursive_data,
&mut this_patch,
);
patch
.0
.insert(key, Some(MetaDictPatchEntry::Map(this_patch)));
okay &= _okay;
}
// null -> non-null: flatten insert
(None, DictEntryGeneric::Lit(l)) if l.is_init() => {
let _ = patch.0.insert(key, Some(MetaDictPatchEntry::Data(l)));
}
(None, DictEntryGeneric::Map(m)) => {
let mut this_patch = MetaDictPatch::default();
okay &= rmerge_metadata_prepare_patch(&into_dict!(), m, &mut this_patch);
let _ = patch
.0
.insert(key, Some(MetaDictPatchEntry::Map(this_patch)));
}
// non-null -> null: remove
(Some(_), DictEntryGeneric::Null) => {
(Some(_), DictEntryGeneric::Lit(l)) => {
debug_assert!(l.is_null());
patch.0.insert(key, None);
}
(None, DictEntryGeneric::Null) => {
(None, DictEntryGeneric::Lit(l)) => {
debug_assert!(l.is_null());
// ignore
}
}

@ -173,7 +173,7 @@ macro_rules! cutag {
}
impl CUTag {
const fn new(class: TagClass, unique: TagUnique) -> Self {
pub const fn new(class: TagClass, unique: TagUnique) -> Self {
Self { class, unique }
}
}

@ -33,7 +33,7 @@ use crate::engine::data::{
fn t_simple_flatten() {
let generic_dict: DictGeneric = into_dict! {
"a_valid_key" => DictEntryGeneric::Lit(100u64.into()),
"a_null_key" => DictEntryGeneric::Null,
"a_null_key" => Datacell::null(),
};
let expected: MetaDict = into_dict!(
"a_valid_key" => Datacell::new_uint(100)
@ -52,7 +52,7 @@ fn t_simple_patch() {
let new: DictGeneric = into_dict! {
"a" => Datacell::new_uint(1),
"b" => Datacell::new_uint(2),
"z" => DictEntryGeneric::Null,
"z" => Datacell::null(),
};
let expected: MetaDict = into_dict! {
"a" => Datacell::new_uint(1),
@ -96,7 +96,7 @@ fn patch_null_out_dict() {
let new: DictGeneric = into_dict! {
"a" => Datacell::new_uint(2),
"b" => Datacell::new_uint(3),
"z" => DictEntryGeneric::Null,
"z" => Datacell::null(),
};
assert!(dict::rmerge_metadata(&mut current, new));
assert_eq!(current, expected);

@ -46,7 +46,10 @@
use crate::{
engine::{
data::dict::{DictEntryGeneric, DictGeneric},
data::{
cell::Datacell,
dict::{DictEntryGeneric, DictGeneric},
},
error::{LangError, LangResult},
ql::{
ast::{QueryData, State},
@ -176,7 +179,7 @@ where
// UNSAFE(@ohsayan): we only switch to this when we've already read in a key
key.take().as_str().into()
},
DictEntryGeneric::Null,
DictEntryGeneric::Lit(Datacell::null()),
)
.is_none(),
);

@ -81,7 +81,7 @@ pub trait NullableDictEntry {
impl NullableDictEntry for Null {
fn data(self) -> crate::engine::data::DictEntryGeneric {
crate::engine::data::DictEntryGeneric::Null
crate::engine::data::DictEntryGeneric::Lit(Datacell::null())
}
}

@ -32,7 +32,7 @@ use {
data::{
cell::Datacell,
dict::{DictEntryGeneric, DictGeneric},
tag::{DataTag, TagClass},
tag::{CUTag, DataTag, TagClass, TagUnique},
},
idx::{AsKey, AsValue},
storage::v1::{rw::BufferedScanner, SDSSError, SDSSResult},
@ -62,11 +62,13 @@ pub enum PersistDictEntryDscr {
impl PersistDictEntryDscr {
/// translates the tag class definition into the dscr definition
pub const fn translate_from_class(class: TagClass) -> Self {
unsafe { core::mem::transmute(class.d() + 1) }
unsafe { Self::from_raw(class.d() + 1) }
}
pub const unsafe fn from_raw(v: u8) -> Self {
core::mem::transmute(v)
}
pub fn new_from_dict_gen_entry(e: &DictEntryGeneric) -> Self {
match e {
DictEntryGeneric::Null => Self::Null,
DictEntryGeneric::Map(_) => Self::Dict,
DictEntryGeneric::Lit(dc) => Self::translate_from_class(dc.tag().tag_class()),
}
@ -87,6 +89,10 @@ impl PersistDictEntryDscr {
pub const fn is_recursive(&self) -> bool {
self.value_u8() >= Self::List.value_u8()
}
fn into_class(&self) -> TagClass {
debug_assert!(*self != Self::Null);
unsafe { mem::transmute(self.value_u8() - 1) }
}
}
/*
@ -177,7 +183,7 @@ fn _encode_dict<Pd: PersistDict>(buf: &mut VecU8, dict: &HashMap<Pd::Key, Pd::Va
pub fn decode_dict<Pd: PersistDict>(
scanner: &mut BufferedScanner,
) -> SDSSResult<HashMap<Pd::Key, Pd::Value>> {
if Pd::metadec_pretest_routine(scanner) & scanner.has_left(sizeof!(u64)) {
if !(Pd::metadec_pretest_routine(scanner) & scanner.has_left(sizeof!(u64))) {
return Err(SDSSError::InternalDecodeStructureCorrupted);
}
let dict_len = unsafe {
@ -185,7 +191,7 @@ pub fn decode_dict<Pd: PersistDict>(
scanner.next_u64_le() as usize
};
let mut dict = HashMap::with_capacity(dict_len);
while Pd::metadec_pretest_entry(scanner) {
while Pd::metadec_pretest_entry(scanner) & (dict.len() < dict_len) {
let md = unsafe {
// UNSAFE(@ohsayan): this is compeletely because of the entry pretest
match Pd::dec_entry_metadata(scanner) {
@ -224,7 +230,9 @@ pub fn decode_dict<Pd: PersistDict>(
k = _k;
v = _v;
}
_ => return Err(SDSSError::InternalDecodeStructureCorruptedPayload),
_ => {
return Err(SDSSError::InternalDecodeStructureCorruptedPayload);
}
}
}
if dict.insert(k, v).is_some() {
@ -265,7 +273,7 @@ impl DGEntryMD {
impl PersistDictEntryMetadata for DGEntryMD {
fn verify_with_src(&self, scanner: &BufferedScanner) -> bool {
static EXPECT_ATLEAST: [u8; 4] = [0, 1, 8, 8]; // PAD to align
let lbound_rem = self.klen + EXPECT_ATLEAST[cmp::min(self.dscr, 4) as usize] as usize;
let lbound_rem = self.klen + EXPECT_ATLEAST[cmp::min(self.dscr, 3) as usize] as usize;
scanner.has_left(lbound_rem) & (self.dscr <= PersistDictEntryDscr::Dict.value_u8())
}
}
@ -292,10 +300,6 @@ impl PersistDict for DictGeneric {
}
fn enc_entry_coupled(buf: &mut VecU8, key: &Self::Key, val: &Self::Value) {
match val {
DictEntryGeneric::Null => {
buf.push(PersistDictEntryDscr::Null.value_u8());
buf.extend(key.as_bytes())
}
DictEntryGeneric::Map(map) => {
buf.push(PersistDictEntryDscr::Dict.value_u8());
buf.extend(key.as_bytes());
@ -354,11 +358,98 @@ impl PersistDict for DictGeneric {
}
unsafe fn dec_val(scanner: &mut BufferedScanner, md: &Self::Metadata) -> Option<Self::Value> {
unsafe fn decode_element(
_: &mut BufferedScanner,
_: PersistDictEntryDscr,
scanner: &mut BufferedScanner,
dscr: PersistDictEntryDscr,
dg_top_element: bool,
) -> Option<DictEntryGeneric> {
todo!()
let r = match dscr {
PersistDictEntryDscr::Null => DictEntryGeneric::Lit(Datacell::null()),
PersistDictEntryDscr::Bool => {
DictEntryGeneric::Lit(Datacell::new_bool(scanner.next_byte() == 1))
}
PersistDictEntryDscr::UnsignedInt
| PersistDictEntryDscr::SignedInt
| PersistDictEntryDscr::Float => DictEntryGeneric::Lit(Datacell::new_qw(
scanner.next_u64_le(),
CUTag::new(
dscr.into_class(),
[
TagUnique::UnsignedInt,
TagUnique::SignedInt,
TagUnique::Illegal,
TagUnique::Illegal, // pad
][(dscr.value_u8() - 2) as usize],
),
)),
PersistDictEntryDscr::Str | PersistDictEntryDscr::Bin => {
let slc_len = scanner.next_u64_le() as usize;
if !scanner.has_left(slc_len) {
return None;
}
let slc = scanner.next_chunk_variable(slc_len);
DictEntryGeneric::Lit(if dscr == PersistDictEntryDscr::Str {
if core::str::from_utf8(slc).is_err() {
return None;
}
Datacell::new_str(
String::from_utf8_unchecked(slc.to_owned()).into_boxed_str(),
)
} else {
Datacell::new_bin(slc.to_owned().into_boxed_slice())
})
}
PersistDictEntryDscr::List => {
let list_len = scanner.next_u64_le() as usize;
let mut v = Vec::with_capacity(list_len);
while (!scanner.eof()) & (v.len() < list_len) {
let dscr = scanner.next_byte();
if dscr > PersistDictEntryDscr::Dict.value_u8() {
return None;
}
v.push(
match decode_element(
scanner,
PersistDictEntryDscr::from_raw(dscr),
false,
) {
Some(DictEntryGeneric::Lit(l)) => l,
None => return None,
_ => unreachable!("found top-level dict item in datacell"),
},
);
}
if v.len() == list_len {
DictEntryGeneric::Lit(Datacell::new_list(v))
} else {
return None;
}
}
PersistDictEntryDscr::Dict => {
if dg_top_element {
DictEntryGeneric::Map(decode_dict::<DictGeneric>(scanner).ok()?)
} else {
unreachable!("found top-level dict item in datacell")
}
}
};
Some(r)
}
decode_element(scanner, mem::transmute(md.dscr))
decode_element(scanner, PersistDictEntryDscr::from_raw(md.dscr), true)
}
}
#[test]
fn t_dict() {
let dict: DictGeneric = into_dict! {
"hello" => Datacell::new_str("world".into()),
"omg a null?" => Datacell::null(),
"a big fat dict" => DictEntryGeneric::Map(into_dict!(
"with a value" => Datacell::new_uint(1002),
"and a null" => Datacell::null(),
))
};
let encoded = encode_dict::<DictGeneric>(&dict);
let mut scanner = BufferedScanner::new(&encoded);
let decoded = decode_dict::<DictGeneric>(&mut scanner).unwrap();
assert_eq!(dict, decoded);
}

@ -566,7 +566,6 @@ mod list_tests {
use super::{de, se};
use crate::corestore::{htable::Coremap, SharedSlice};
use crate::kvengine::LockedVec;
use core::ops::Deref;
use parking_lot::RwLock;
#[test]
fn test_list_se_de() {
@ -613,13 +612,7 @@ mod list_tests {
se::raw_serialize_list_map(&mymap, &mut v).unwrap();
let de = de::deserialize_list_map(&v).unwrap();
assert_eq!(de.len(), 1);
let mykey_value = de
.get("mykey".as_bytes())
.unwrap()
.value()
.deref()
.read()
.clone();
let mykey_value = de.get("mykey".as_bytes()).unwrap().value().read().clone();
assert_eq!(
mykey_value,
vals.into_inner()
@ -642,14 +635,14 @@ mod list_tests {
let de = de::deserialize_list_map(&v).unwrap();
assert_eq!(de.len(), 2);
assert_eq!(
de.get(&key1).unwrap().value().deref().read().clone(),
de.get(&key1).unwrap().value().read().clone(),
val1.into_inner()
.into_iter()
.map(SharedSlice::from)
.collect::<Vec<SharedSlice>>()
);
assert_eq!(
de.get(&key2).unwrap().value().deref().read().clone(),
de.get(&key2).unwrap().value().read().clone(),
val2.into_inner()
.into_iter()
.map(SharedSlice::from)

Loading…
Cancel
Save