diff --git a/src/data/expr.rs b/src/data/expr.rs index 729ecbd6..a3fa6f32 100644 --- a/src/data/expr.rs +++ b/src/data/expr.rs @@ -1,9 +1,24 @@ -use crate::data::op::Op; +use crate::data::op::{AggOp, Op, UnresolvedOp}; use crate::data::tuple_set::{ColId, TableId, TupleSetIdx}; -use crate::data::value::Value; +use crate::data::value::{StaticValue, Value}; use std::collections::BTreeMap; +use std::result; use std::sync::Arc; +#[derive(thiserror::Error, Debug)] +pub(crate) enum ExprError { + #[error("Cannot convert from {0}")] + ConversionFailure(StaticValue), + + #[error("Unknown expr tag {0}")] + UnknownExprTag(String), + + #[error("List extraction failed for {0}")] + ListExtractionFailed(StaticValue), +} + +type Result = result::Result; + pub(crate) enum Expr<'a> { Const(Value<'a>), List(Vec>), @@ -11,11 +26,214 @@ pub(crate) enum Expr<'a> { Variable(String), TableCol(TableId, ColId), TupleSetIdx(TupleSetIdx), - Apply(Arc, Vec>), + Apply(Arc, Vec>), + ApplyAgg(Arc, Vec>, Vec>), FieldAcc(String, Box>), IdxAcc(usize, Box>), } pub(crate) type StaticExpr = Expr<'static>; -// TODO serde expr into value +fn extract_list_from_value(value: Value, n: usize) -> Result> { + if let Value::List(l) = value { + if n > 0 && l.len() != n { + return Err(ExprError::ListExtractionFailed(Value::List(l).to_static())); + } + Ok(l) + } else { + return Err(ExprError::ListExtractionFailed(value.to_static())); + } +} + +impl<'a> TryFrom> for Expr<'a> { + type Error = ExprError; + + fn try_from(value: Value<'a>) -> Result { + if let Value::Dict(d) = value { + if d.len() != 1 { + return Err(ExprError::ConversionFailure(Value::Dict(d).to_static())); + } + let (k, v) = d.into_iter().next().unwrap(); + match k.as_ref() { + "Const" => Ok(Expr::Const(v)), + "List" => { + let l = extract_list_from_value(v, 0)?; + Ok(Expr::List(l.into_iter().map(Expr::try_from).collect::>>()?)) + } + "Dict" => { + match v { + Value::Dict(d) => { + Ok(Expr::Dict(d.into_iter().map(|(k, v)| -> Result<(String, Expr)> { + Ok((k.to_string(), Expr::try_from(v)?)) + }).collect::>>()?)) + } + v => return Err(ExprError::ConversionFailure(Value::Dict(BTreeMap::from([(k, v)])).to_static())) + } + } + "Variable" => { + if let Value::Text(t) = v { + Ok(Expr::Variable(t.to_string())) + } else { + return Err(ExprError::ConversionFailure(Value::Dict(BTreeMap::from([(k, v)])).to_static())); + } + } + "TableCol" => { + let mut l = extract_list_from_value(v, 4)?.into_iter(); + let in_root = match l.next().unwrap() { + Value::Bool(b) => b, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let tid = match l.next().unwrap() { + Value::Int(i) => i, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let is_key = match l.next().unwrap() { + Value::Bool(b) => b, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let cid = match l.next().unwrap() { + Value::Int(i) => i, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + Ok(Expr::TableCol((in_root, tid as u32).into(), (is_key, cid as usize).into())) + } + "TupleSetIdx" => { + let mut l = extract_list_from_value(v, 3)?.into_iter(); + let is_key = match l.next().unwrap() { + Value::Bool(b) => b, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let tid = match l.next().unwrap() { + Value::Int(i) => i, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let cid = match l.next().unwrap() { + Value::Int(i) => i, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + Ok(Expr::TupleSetIdx(TupleSetIdx { + is_key, + t_set: tid as usize, + col_idx: cid as usize, + })) + } + "Apply" => { + let mut ll = extract_list_from_value(v, 2)?.into_iter(); + let name = match ll.next().unwrap() { + Value::Text(t) => t, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let op = Arc::new(UnresolvedOp(name.to_string())); + let l = extract_list_from_value(ll.next().unwrap(), 0)?; + let args = l.into_iter().map(Expr::try_from).collect::>>()?; + Ok(Expr::Apply(op, args)) + } + "ApplyAgg" => { + let mut ll = extract_list_from_value(v, 3)?.into_iter(); + let name = match ll.next().unwrap() { + Value::Text(t) => t, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let op = Arc::new(UnresolvedOp(name.to_string())); + let l = extract_list_from_value(ll.next().unwrap(), 0)?; + let a_args = l.into_iter().map(Expr::try_from).collect::>>()?; + let l = extract_list_from_value(ll.next().unwrap(), 0)?; + let args = l.into_iter().map(Expr::try_from).collect::>>()?; + Ok(Expr::ApplyAgg(op, a_args, args)) + } + "FieldAcc" => { + let mut ll = extract_list_from_value(v, 2)?.into_iter(); + let field = match ll.next().unwrap() { + Value::Text(t) => t, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let arg = Expr::try_from(ll.next().unwrap())?; + Ok(Expr::FieldAcc(field.to_string(), arg.into())) + } + "IdxAcc" => { + let mut ll = extract_list_from_value(v, 2)?.into_iter(); + let idx = match ll.next().unwrap() { + Value::Int(i) => i as usize, + v => return Err(ExprError::ConversionFailure(v.to_static())) + }; + let arg = Expr::try_from(ll.next().unwrap())?; + Ok(Expr::IdxAcc(idx, arg.into())) + } + k => Err(ExprError::UnknownExprTag(k.to_string())) + } + } else { + Err(ExprError::ConversionFailure(value.to_static())) + } + } +} + +impl<'a> From> for Value<'a> { + fn from(expr: Expr<'a>) -> Self { + match expr { + Expr::Const(c) => build_tagged_value("Const", c), + Expr::List(l) => build_tagged_value( + "List", + l.into_iter().map(Value::from).collect::>().into(), + ), + Expr::Dict(d) => build_tagged_value( + "Dict", + d.into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect::>() + .into(), + ), + Expr::Variable(v) => build_tagged_value("Variable", v.into()), + Expr::TableCol(tid, cid) => build_tagged_value( + "TableCol", + vec![ + tid.in_root.into(), + Value::from(tid.id as i64), + cid.is_key.into(), + Value::from(cid.id as i64), + ] + .into(), + ), + Expr::TupleSetIdx(sid) => build_tagged_value( + "TupleSetIdx", + vec![ + sid.is_key.into(), + Value::from(sid.t_set as i64), + Value::from(sid.col_idx as i64), + ] + .into(), + ), + Expr::Apply(op, args) => build_tagged_value( + "Apply", + vec![ + Value::from(op.name().to_string()), + args.into_iter().map(Value::from).collect::>().into(), + ] + .into(), + ), + Expr::ApplyAgg(op, a_args, args) => build_tagged_value( + "ApplyAgg", + vec![ + Value::from(op.name().to_string()), + a_args + .into_iter() + .map(Value::from) + .collect::>() + .into(), + args.into_iter().map(Value::from).collect::>().into(), + ] + .into(), + ), + Expr::FieldAcc(f, v) => { + build_tagged_value("FieldAcc", vec![f.into(), Value::from(*v)].into()) + } + Expr::IdxAcc(idx, v) => { + build_tagged_value("IdxAcc", vec![(idx as i64).into(), Value::from(*v)].into()) + } + } + } +} + + +fn build_tagged_value<'a>(tag: &'static str, val: Value<'a>) -> Value<'a> { + Value::Dict(BTreeMap::from([(tag.into(), val)])) +} diff --git a/src/data/op.rs b/src/data/op.rs index 20e83641..db5eff35 100644 --- a/src/data/op.rs +++ b/src/data/op.rs @@ -1 +1,31 @@ -pub(crate) struct Op; +pub(crate) trait Op { + fn is_resolved(&self) -> bool; + fn name(&self) -> &str; +} + +pub(crate) trait AggOp { + fn is_resolved(&self) -> bool; + fn name(&self) -> &str; +} + +pub(crate) struct UnresolvedOp(pub String); + +impl Op for UnresolvedOp { + fn is_resolved(&self) -> bool { + false + } + + fn name(&self) -> &str { + &self.0 + } +} + +impl AggOp for UnresolvedOp { + fn is_resolved(&self) -> bool { + false + } + + fn name(&self) -> &str { + &self.0 + } +} \ No newline at end of file diff --git a/src/data/tuple.rs b/src/data/tuple.rs index afda666c..a6a80f42 100644 --- a/src/data/tuple.rs +++ b/src/data/tuple.rs @@ -1,6 +1,34 @@ +use crate::data::tuple::TupleError::UndefinedDataTag; +use crate::data::value::Value; +use std::borrow::Cow; +use std::cell::RefCell; +use std::cmp::{Ordering, Reverse}; +use std::collections::BTreeMap; +use std::fmt::{Debug, Formatter}; +use std::hash::{Hash, Hasher}; +use std::result; +use uuid::Uuid; + +#[derive(thiserror::Error, Debug)] +pub(crate) enum TupleError { + #[error("Undefined data kind {0}")] + UndefinedDataKind(u32), + + #[error("Undefined data tag {0}")] + UndefinedDataTag(u8), + + #[error("Index {0} out of bound for tuple {1:?}")] + IndexOutOfBound(usize, OwnTuple), + + #[error("Type mismatch: {1:?} is not {0}")] + TypeMismatch(&'static str, OwnTuple), +} + +type Result = result::Result; + #[repr(u8)] #[derive(Ord, PartialOrd, Eq, PartialEq)] -pub(crate) enum Tag { +pub(crate) enum StorageTag { BoolFalse = 1, Null = 2, BoolTrue = 3, @@ -19,11 +47,11 @@ pub(crate) enum Tag { Max = 255, } -impl TryFrom for Tag { +impl TryFrom for StorageTag { type Error = u8; #[inline] - fn try_from(u: u8) -> std::result::Result { - use self::Tag::*; + fn try_from(u: u8) -> std::result::Result { + use self::StorageTag::*; Ok(match u { 1 => BoolFalse, 2 => Null, @@ -45,3 +73,627 @@ impl TryFrom for Tag { }) } } + +#[repr(u32)] +#[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Clone)] +pub enum DataKind { + Data = 0, + Node = 1, + Edge = 2, + Assoc = 3, + Index = 4, + Val = 5, + Type = 6, + Empty = u32::MAX, +} +// In storage, key layout is `[0, name, stack_depth]` where stack_depth is a non-positive number as zigzag +// Also has inverted index `[0, stack_depth, name]` for easy popping of stacks + +pub const EMPTY_DATA: [u8; 4] = u32::MAX.to_be_bytes(); + +impl> Tuple { + pub fn data_kind(&self) -> Result { + use DataKind::*; + Ok(match self.get_prefix() { + 0 => Data, + 1 => Node, + 2 => Edge, + 3 => Assoc, + 4 => Index, + 5 => Val, + 6 => Type, + u32::MAX => Empty, + v => return Err(TupleError::UndefinedDataKind(v)), + }) + } +} + +#[derive(Clone)] +pub(crate) struct Tuple +where + T: AsRef<[u8]>, +{ + pub(crate) data: T, + idx_cache: RefCell>, +} + +impl Tuple +where + T: AsRef<[u8]>, +{ + pub(crate) fn clear_cache(&self) { + self.idx_cache.borrow_mut().clear() + } +} + +impl AsRef<[u8]> for Tuple +where + T: AsRef<[u8]>, +{ + fn as_ref(&self) -> &[u8] { + self.data.as_ref() + } +} +pub(crate) type OwnTuple = Tuple>; + +pub(crate) const PREFIX_LEN: usize = 4; + +impl> Tuple { + #[inline] + pub(crate) fn to_owned(&self) -> OwnTuple { + OwnTuple { + data: self.data.as_ref().to_vec(), + idx_cache: RefCell::new(vec![]), + } + } + + #[inline] + pub(crate) fn starts_with>(&self, other: &Tuple) -> bool { + self.data.as_ref().starts_with(other.data.as_ref()) + } + + #[inline] + pub(crate) fn key_part_eq>(&self, other: &Tuple) -> bool { + self.data.as_ref()[PREFIX_LEN..] == other.data.as_ref()[PREFIX_LEN..] + } + + #[inline] + pub(crate) fn key_part_cmp>(&self, other: &Tuple) -> Ordering { + self.iter() + .filter_map(|v| v.ok()) + .cmp(other.iter().filter_map(|v| v.ok())) + } + + #[inline] + pub(crate) fn new(data: T) -> Self { + Self { + data, + idx_cache: RefCell::new(vec![]), + } + } + + #[inline] + pub(crate) fn get_prefix(&self) -> u32 { + u32::from_be_bytes(self.data.as_ref()[0..4].try_into().unwrap()) + } + + #[inline] + fn all_cached(&self) -> bool { + match self.idx_cache.borrow().last() { + None => self.data.as_ref().len() == PREFIX_LEN, + Some(l) => *l == self.data.as_ref().len(), + } + } + #[inline] + fn get_pos(&self, idx: usize) -> Option { + if idx == 0 { + if self.data.as_ref().len() > PREFIX_LEN { + Some(PREFIX_LEN) + } else { + None + } + } else { + self.cache_until(idx); + self.idx_cache.borrow().get(idx - 1).cloned() + } + } + #[inline] + fn cache_until(&self, idx: usize) { + while self.idx_cache.borrow().len() < idx && !self.all_cached() { + self.skip_and_cache(); + } + } + #[inline] + fn skip_and_cache(&self) { + let data = self.data.as_ref(); + let tag_start = *self.idx_cache.borrow().last().unwrap_or(&PREFIX_LEN); + let mut start = tag_start + 1; + let nxt; + loop { + nxt = match StorageTag::try_from(data[tag_start]).unwrap() { + StorageTag::Null | StorageTag::BoolTrue | StorageTag::BoolFalse => start, + StorageTag::Int => start + self.parse_varint(start).1, + StorageTag::Float => start + 8, + StorageTag::Uuid => start + 16, + StorageTag::Text | StorageTag::Bytes => { + let (slen, offset) = self.parse_varint(start); + let slen = slen as usize; + start + slen + offset + } + StorageTag::List | StorageTag::Dict => { + start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize + } + StorageTag::DescVal => { + start += 1; + continue; + } + StorageTag::Max => panic!(), + }; + break; + } + self.idx_cache.borrow_mut().push(nxt); + } + + #[inline] + fn parse_varint(&self, idx: usize) -> (u64, usize) { + let data = self.data.as_ref(); + let mut cur = idx; + let mut u: u64 = 0; + let mut shift = 0; + loop { + let buf = data[cur]; + cur += 1; + u |= ((buf & 0b01111111) as u64) << shift; + if buf & 0b10000000 == 0 { + break; + } + shift += 7; + } + (u, cur - idx) + } + + #[inline] + pub(crate) fn get(&self, idx: usize) -> Result { + match self.get_pos(idx) { + Some(v) => { + if v == self.data.as_ref().len() { + return Err(TupleError::IndexOutOfBound(idx, self.to_owned())); + } + let (val, nxt) = self.parse_value_at(v)?; + if idx == self.idx_cache.borrow().len() { + self.idx_cache.borrow_mut().push(nxt); + } + Ok(val) + } + None => Err(TupleError::IndexOutOfBound(idx, self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_null(&self, idx: usize) -> Result<()> { + match self.get(idx)? { + Value::Null => Ok(()), + _ => Err(TupleError::TypeMismatch("Null", self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_int(&self, idx: usize) -> Result { + match self.get(idx)? { + Value::Int(i) => Ok(i), + _ => Err(TupleError::TypeMismatch("Int", self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_text(&self, idx: usize) -> Result> { + match self.get(idx)? { + Value::Text(d) => Ok(d), + _ => Err(TupleError::TypeMismatch("Text", self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_bool(&self, idx: usize) -> Result { + match self.get(idx)? { + Value::Bool(b) => Ok(b), + _ => Err(TupleError::TypeMismatch("Bool", self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_float(&self, idx: usize) -> Result { + match self.get(idx)? { + Value::Float(f) => Ok(f.into_inner()), + _ => Err(TupleError::TypeMismatch("Float", self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_uuid(&self, idx: usize) -> Result { + match self.get(idx)? { + Value::Uuid(u) => Ok(u), + _ => Err(TupleError::TypeMismatch("Uuid", self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_list(&self, idx: usize) -> Result> { + match self.get(idx)? { + Value::List(u) => Ok(u), + _ => Err(TupleError::TypeMismatch("List", self.to_owned())), + } + } + + #[inline] + pub(crate) fn get_dict(&self, idx: usize) -> Result, Value>> { + match self.get(idx)? { + Value::Dict(u) => Ok(u), + _ => Err(TupleError::TypeMismatch("Dict", self.to_owned())), + } + } + + #[inline] + fn parse_value_at(&self, pos: usize) -> Result<(Value, usize)> { + let data = self.data.as_ref(); + let start = pos + 1; + let tag = match StorageTag::try_from(data[pos]) { + Ok(t) => t, + Err(e) => return Err(TupleError::UndefinedDataTag(e)), + }; + let (nxt, val): (usize, Value) = match tag { + StorageTag::Null => (start, ().into()), + StorageTag::BoolTrue => (start, true.into()), + StorageTag::BoolFalse => (start, false.into()), + StorageTag::Int => { + let (u, offset) = self.parse_varint(start); + let val = Self::varint_to_zigzag(u); + (start + offset, val.into()) + } + StorageTag::Float => ( + start + 8, + f64::from_be_bytes(data[start..start + 8].try_into().unwrap()).into(), + ), + StorageTag::Uuid => ( + start + 16, + Uuid::from_slice(&data[start..start + 16]).unwrap().into(), + ), + StorageTag::Text => { + let (slen, offset) = self.parse_varint(start); + let slen = slen as usize; + let s = unsafe { + std::str::from_utf8_unchecked(&data[start + offset..start + offset + slen]) + }; + + (start + slen + offset, s.into()) + } + StorageTag::Bytes => { + let (slen, offset) = self.parse_varint(start); + let slen = slen as usize; + let s = &data[start + offset..start + offset + slen]; + + (start + slen + offset, s.into()) + } + StorageTag::List => { + let end_pos = + start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize; + let mut start_pos = start + 4; + let mut collected = vec![]; + while start_pos < end_pos { + let (val, new_pos) = self.parse_value_at(start_pos)?; + collected.push(val); + start_pos = new_pos; + } + (end_pos, collected.into()) + } + StorageTag::Dict => { + let end_pos = + start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize; + let mut start_pos = start + 4; + let mut collected: BTreeMap, Value> = BTreeMap::new(); + while start_pos < end_pos { + let (slen, offset) = self.parse_varint(start_pos); + start_pos += offset; + let key = unsafe { + std::str::from_utf8_unchecked(&data[start_pos..start_pos + slen as usize]) + }; + start_pos += slen as usize; + let (val, new_pos) = self.parse_value_at(start_pos)?; + collected.insert(key.into(), val); + start_pos = new_pos; + } + (end_pos, collected.into()) + } + StorageTag::DescVal => { + let (val, offset) = self.parse_value_at(pos + 1)?; + (offset, Value::DescVal(Reverse(val.into()))) + } + StorageTag::Max => return Err(UndefinedDataTag(StorageTag::Max as u8)), + }; + Ok((val, nxt)) + } + + fn varint_to_zigzag(u: u64) -> i64 { + if u & 1 == 0 { + (u >> 1) as i64 + } else { + -((u >> 1) as i64) - 1 + } + } + pub(crate) fn iter(&self) -> TupleIter { + TupleIter { + tuple: self, + pos: 4, + } + } +} + +impl> Debug for Tuple { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.data_kind() { + Ok(data_kind) => { + write!(f, "Tuple<{:?}>{{", data_kind)?; + } + Err(_) => { + write!(f, "Tuple<{}>{{", self.get_prefix())?; + } + } + let strings = self + .iter() + .enumerate() + .map(|(i, v)| match v { + Ok(v) => { + format!("{}: {}", i, v) + } + Err(err) => { + format!("{}: {:?}", i, err) + } + }) + .collect::>() + .join(", "); + write!(f, "{}}}", strings) + } +} + +pub(crate) struct TupleIter<'a, T: AsRef<[u8]>> { + tuple: &'a Tuple, + pos: usize, +} + +impl<'a, T: AsRef<[u8]>> Iterator for TupleIter<'a, T> { + type Item = Result>; + + fn next(&mut self) -> Option { + if self.pos == self.tuple.data.as_ref().len() { + return None; + } + let (v, pos) = match self.tuple.parse_value_at(self.pos) { + Ok(vs) => vs, + Err(e) => return Some(Err(e)), + }; + self.pos = pos; + Some(Ok(v)) + } +} + +impl OwnTuple { + #[inline] + pub(crate) fn truncate_all(&mut self) { + self.clear_cache(); + self.data.truncate(PREFIX_LEN); + } + #[inline] + pub(crate) fn empty_tuple() -> OwnTuple { + OwnTuple::with_data_prefix(DataKind::Empty) + } + #[inline] + pub(crate) fn with_null_prefix() -> Self { + Tuple::with_prefix(0) + } + #[inline] + pub(crate) fn with_data_prefix(prefix: DataKind) -> Self { + Tuple::with_prefix(prefix as u32) + } + #[inline] + pub(crate) fn with_prefix(prefix: u32) -> Self { + let data = Vec::from(prefix.to_be_bytes()); + Self { + data, + idx_cache: RefCell::new(vec![]), + } + } + #[inline] + pub(crate) fn overwrite_prefix(&mut self, prefix: u32) { + let bytes = prefix.to_be_bytes(); + self.data[..4].clone_from_slice(&bytes[..4]); + } + #[inline] + pub(crate) fn max_tuple() -> Self { + let mut ret = Tuple::with_prefix(u32::MAX); + ret.seal_with_sentinel(); + ret + } + #[inline] + pub(crate) fn seal_with_sentinel(&mut self) { + self.push_tag(StorageTag::Max); + } + #[inline] + fn push_tag(&mut self, tag: StorageTag) { + self.data.push(tag as u8); + } + #[inline] + pub(crate) fn push_null(&mut self) { + self.push_tag(StorageTag::Null); + self.idx_cache.borrow_mut().push(self.data.len()); + } + #[inline] + pub(crate) fn push_bool(&mut self, b: bool) { + self.push_tag(if b { + StorageTag::BoolTrue + } else { + StorageTag::BoolFalse + }); + self.idx_cache.borrow_mut().push(self.data.len()); + } + #[inline] + pub(crate) fn push_int(&mut self, i: i64) { + self.push_tag(StorageTag::Int); + self.push_zigzag(i); + self.idx_cache.borrow_mut().push(self.data.len()); + } + #[inline] + pub(crate) fn push_float(&mut self, f: f64) { + self.push_tag(StorageTag::Float); + self.data.extend(f.to_be_bytes()); + self.idx_cache.borrow_mut().push(self.data.len()); + } + #[inline] + pub(crate) fn push_uuid(&mut self, u: Uuid) { + self.push_tag(StorageTag::Uuid); + self.data.extend(u.as_bytes()); + self.idx_cache.borrow_mut().push(self.data.len()); + } + #[inline] + pub(crate) fn push_str(&mut self, s: impl AsRef) { + let s = s.as_ref(); + self.push_tag(StorageTag::Text); + self.push_varint(s.len() as u64); + self.data.extend_from_slice(s.as_bytes()); + self.idx_cache.borrow_mut().push(self.data.len()); + } + #[inline] + pub(crate) fn push_bytes(&mut self, b: impl AsRef<[u8]>) { + let b = b.as_ref(); + self.push_tag(StorageTag::Bytes); + self.push_varint(b.len() as u64); + self.data.extend_from_slice(b); + self.idx_cache.borrow_mut().push(self.data.len()); + } + #[inline] + pub(crate) fn push_reverse_value(&mut self, v: &Value) { + self.push_tag(StorageTag::DescVal); + let start_len = self.idx_cache.borrow().len(); + self.push_value(v); + let mut cache = self.idx_cache.borrow_mut(); + cache.truncate(start_len); + cache.push(self.data.len()); + } + #[inline] + pub(crate) fn push_value(&mut self, v: &Value) { + match v { + Value::Null => self.push_null(), + Value::Bool(b) => self.push_bool(*b), + Value::Int(i) => self.push_int(*i), + Value::Float(f) => self.push_float(f.into_inner()), + Value::Uuid(u) => self.push_uuid(*u), + Value::Text(t) => self.push_str(t), + Value::Bytes(b) => self.push_bytes(b), + Value::List(l) => { + self.push_tag(StorageTag::List); + let start_pos = self.data.len(); + let start_len = self.idx_cache.borrow().len(); + self.data.extend(0u32.to_be_bytes()); + for val in l { + self.push_value(val); + } + let length = (self.data.len() - start_pos) as u32; + let length_bytes = length.to_be_bytes(); + self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]); + let mut cache = self.idx_cache.borrow_mut(); + cache.truncate(start_len); + cache.push(self.data.len()); + } + Value::Dict(d) => { + self.push_tag(StorageTag::Dict); + let start_pos = self.data.len(); + let start_len = self.idx_cache.borrow().len(); + self.data.extend(0u32.to_be_bytes()); + for (k, v) in d { + self.push_varint(k.len() as u64); + self.data.extend_from_slice(k.as_bytes()); + self.push_value(v); + } + let length = (self.data.len() - start_pos) as u32; + let length_bytes = length.to_be_bytes(); + self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]); + let mut cache = self.idx_cache.borrow_mut(); + cache.truncate(start_len); + cache.push(self.data.len()); + } + Value::EndSentinel => panic!("Cannot push sentinel value"), + Value::DescVal(Reverse(v)) => { + self.push_reverse_value(v); + } + } + } + + #[inline] + fn push_varint(&mut self, u: u64) { + let mut u = u; + while u > 0b01111111 { + self.data.push(0b10000000 | (u as u8 & 0b01111111)); + u >>= 7; + } + self.data.push(u as u8); + } + + #[inline] + fn push_zigzag(&mut self, i: i64) { + let u: u64 = if i >= 0 { + (i as u64) << 1 + } else { + // Convoluted, to prevent overflow when calling .abs() + (((i + 1).abs() as u64) << 1) + 1 + }; + self.push_varint(u); + } + + #[inline] + pub(crate) fn concat_data>(&mut self, other: &Tuple) { + let other_data_part = &other.as_ref()[4..]; + self.data.extend_from_slice(other_data_part); + } + + #[inline] + pub(crate) fn insert_values_at<'a, T: AsRef<[Value<'a>]>>( + &self, + idx: usize, + values: T, + ) -> Result { + let mut new_tuple = Tuple::with_prefix(self.get_prefix()); + for v in self.iter().take(idx) { + new_tuple.push_value(&v?); + } + for v in values.as_ref() { + new_tuple.push_value(v); + } + for v in self.iter().skip(idx) { + new_tuple.push_value(&v?); + } + Ok(new_tuple) + } +} + +impl<'a> Extend> for OwnTuple { + #[inline] + fn extend>>(&mut self, iter: T) { + for v in iter { + self.push_value(&v) + } + } +} + +impl, T2: AsRef<[u8]>> PartialEq> for Tuple { + #[inline] + fn eq(&self, other: &Tuple) -> bool { + self.data.as_ref() == other.data.as_ref() + } +} + +impl> Hash for Tuple { + fn hash(&self, state: &mut H) { + self.data.as_ref().hash(state); + } +} + +impl> Eq for Tuple {} diff --git a/src/data/tuple_set.rs b/src/data/tuple_set.rs index 594f4adc..078ecb1e 100644 --- a/src/data/tuple_set.rs +++ b/src/data/tuple_set.rs @@ -1,3 +1,67 @@ -pub(crate) struct TableId; -pub(crate) struct ColId; -pub(crate) struct TupleSetIdx; +use std::fmt::{Debug, Formatter}; +use std::result; + +#[derive(thiserror::Error, Debug)] +pub(crate) enum TypingError { + #[error("table id not allowed: {0}")] + InvalidTableId(u32), +} + +type Result = result::Result; + +const MIN_TABLE_ID: u32 = 10001; + +#[derive(Eq, PartialEq, Clone, Copy, Ord, PartialOrd, Hash)] +pub(crate) struct TableId { + pub(crate) in_root: bool, + pub(crate) id: u32, +} + +impl Debug for TableId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "#{}{}", if self.in_root { 'G' } else { 'L' }, self.id) + } +} + +impl TableId { + pub(crate) fn new(in_root: bool, id: u32) -> Result { + if id < MIN_TABLE_ID { + Err(TypingError::InvalidTableId(id)) + } else { + Ok(TableId { in_root, id }) + } + } + pub(crate) fn is_valid(&self) -> bool { + self.id >= MIN_TABLE_ID + } +} + +impl From<(bool, u32)> for TableId { + fn from((in_root, id): (bool, u32)) -> Self { + Self { in_root, id } + } +} + +#[derive(Eq, PartialEq, Clone, Copy, Ord, PartialOrd)] +pub(crate) struct ColId { + pub(crate) is_key: bool, + pub(crate) id: usize, +} + +impl Debug for ColId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, ".{}{}", if self.is_key { 'K' } else { 'D' }, self.id) + } +} + +impl From<(bool, usize)> for ColId { + fn from((is_key, id): (bool, usize)) -> Self { + Self { is_key, id: id } + } +} + +pub(crate) struct TupleSetIdx { + pub(crate) is_key: bool, + pub(crate) t_set: usize, + pub(crate) col_idx: usize, +} diff --git a/src/data/typing.rs b/src/data/typing.rs index 9c5c1536..e136a674 100644 --- a/src/data/typing.rs +++ b/src/data/typing.rs @@ -1,10 +1,10 @@ use crate::data::value::{StaticValue, Value}; +use crate::parser::text_identifier::build_name_in_def; +use crate::parser::{CozoParser, Rule}; +use pest::iterators::Pair; +use pest::Parser; use std::fmt::{Display, Formatter}; use std::result; -use pest::{Parser}; -use pest::iterators::Pair; -use crate::parser::{CozoParser, Rule}; -use crate::parser::text_identifier::build_name_in_def; #[derive(thiserror::Error, Debug)] pub(crate) enum TypingError { @@ -21,7 +21,7 @@ pub(crate) enum TypingError { Parse(#[from] pest::error::Error), #[error(transparent)] - TextParse(#[from] crate::parser::text_identifier::TextParseError) + TextParse(#[from] crate::parser::text_identifier::TextParseError), } type Result = result::Result; @@ -143,7 +143,6 @@ impl Typing { } } - impl TryFrom<&str> for Typing { type Error = TypingError; @@ -153,7 +152,6 @@ impl TryFrom<&str> for Typing { } } - impl<'a> TryFrom> for Typing { type Error = TypingError; @@ -172,13 +170,13 @@ impl Typing { "Float" => Typing::Float, "Text" => Typing::Text, "Uuid" => Typing::Uuid, - t => return Err(TypingError::UndefinedType(t.to_string())), + t => return Err(TypingError::UndefinedType(t.to_string())), }, Rule::nullable_type => Typing::Nullable(Box::new(Typing::from_pair( - pair.into_inner().next().unwrap() + pair.into_inner().next().unwrap(), )?)), Rule::homogeneous_list_type => Typing::Homogeneous(Box::new(Typing::from_pair( - pair.into_inner().next().unwrap() + pair.into_inner().next().unwrap(), )?)), Rule::unnamed_tuple_type => { let types = pair @@ -204,4 +202,4 @@ impl Typing { _ => unreachable!(), }) } -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index d124f1a3..e0934285 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,6 @@ // pub mod error; // pub mod relation; // pub(crate) mod eval; -pub(crate) mod parser; pub(crate) mod data; pub(crate) mod logger; +pub(crate) mod parser; diff --git a/src/parser.rs b/src/parser.rs index 60e75f5d..b1af2d4c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,10 +1,10 @@ -pub mod number; -pub mod text_identifier; +pub(crate) mod number; +pub(crate) mod text_identifier; use pest_derive::Parser; #[derive(Parser)] #[grammar = "grammar.pest"] -pub struct CozoParser; +pub(crate) struct CozoParser; #[cfg(test)] mod tests { @@ -15,14 +15,23 @@ mod tests { fn identifiers() { assert_eq!(CozoParser::parse(Rule::ident, "x").unwrap().as_str(), "x"); assert_eq!(CozoParser::parse(Rule::ident, "x2").unwrap().as_str(), "x2"); - assert_eq!(CozoParser::parse(Rule::ident, "x_y").unwrap().as_str(), "x_y"); + assert_eq!( + CozoParser::parse(Rule::ident, "x_y").unwrap().as_str(), + "x_y" + ); assert_eq!(CozoParser::parse(Rule::ident, "x_").unwrap().as_str(), "x_"); - assert_eq!(CozoParser::parse(Rule::ident, "你好").unwrap().as_str(), "你好"); + assert_eq!( + CozoParser::parse(Rule::ident, "你好").unwrap().as_str(), + "你好" + ); assert_eq!( CozoParser::parse(Rule::ident, "你好123").unwrap().as_str(), "你好123" ); - assert_ne!(CozoParser::parse(Rule::ident, "x$y").unwrap().as_str(), "x$y"); + assert_ne!( + CozoParser::parse(Rule::ident, "x$y").unwrap().as_str(), + "x$y" + ); assert_eq!(CozoParser::parse(Rule::ident, "_x").unwrap().as_str(), "_x"); assert_eq!(CozoParser::parse(Rule::ident, "_").unwrap().as_str(), "_"); @@ -33,8 +42,14 @@ mod tests { assert!(CozoParser::parse(Rule::ident, "123x").is_err()); assert!(CozoParser::parse(Rule::ident, ".x").is_err()); - assert_ne!(CozoParser::parse(Rule::ident, "x.x").unwrap().as_str(), "x.x"); - assert_ne!(CozoParser::parse(Rule::ident, "x~x").unwrap().as_str(), "x~x"); + assert_ne!( + CozoParser::parse(Rule::ident, "x.x").unwrap().as_str(), + "x.x" + ); + assert_ne!( + CozoParser::parse(Rule::ident, "x~x").unwrap().as_str(), + "x~x" + ); } #[test] @@ -44,11 +59,15 @@ mod tests { r#""""# ); assert_eq!( - CozoParser::parse(Rule::string, r#"" b a c""#).unwrap().as_str(), + CozoParser::parse(Rule::string, r#"" b a c""#) + .unwrap() + .as_str(), r#"" b a c""# ); assert_eq!( - CozoParser::parse(Rule::string, r#""你好👋""#).unwrap().as_str(), + CozoParser::parse(Rule::string, r#""你好👋""#) + .unwrap() + .as_str(), r#""你好👋""# ); assert_eq!( @@ -56,7 +75,9 @@ mod tests { r#""\n""# ); assert_eq!( - CozoParser::parse(Rule::string, r#""\u5678""#).unwrap().as_str(), + CozoParser::parse(Rule::string, r#""\u5678""#) + .unwrap() + .as_str(), r#""\u5678""# ); assert!(CozoParser::parse(Rule::string, r#""\ux""#).is_err()); @@ -70,7 +91,10 @@ mod tests { #[test] fn numbers() { - assert_eq!(CozoParser::parse(Rule::number, "123").unwrap().as_str(), "123"); + assert_eq!( + CozoParser::parse(Rule::number, "123").unwrap().as_str(), + "123" + ); assert_eq!(CozoParser::parse(Rule::number, "0").unwrap().as_str(), "0"); assert_eq!( CozoParser::parse(Rule::number, "0123").unwrap().as_str(), @@ -86,11 +110,15 @@ mod tests { "0xAf03" ); assert_eq!( - CozoParser::parse(Rule::number, "0o0_7067").unwrap().as_str(), + CozoParser::parse(Rule::number, "0o0_7067") + .unwrap() + .as_str(), "0o0_7067" ); assert_ne!( - CozoParser::parse(Rule::number, "0o0_7068").unwrap().as_str(), + CozoParser::parse(Rule::number, "0o0_7068") + .unwrap() + .as_str(), "0o0_7068" ); assert_eq!( @@ -111,7 +139,9 @@ mod tests { "123.45" ); assert_eq!( - CozoParser::parse(Rule::number, "1_23.4_5_").unwrap().as_str(), + CozoParser::parse(Rule::number, "1_23.4_5_") + .unwrap() + .as_str(), "1_23.4_5_" ); assert_ne!( @@ -119,7 +149,9 @@ mod tests { "123." ); assert_eq!( - CozoParser::parse(Rule::number, "123.333e456").unwrap().as_str(), + CozoParser::parse(Rule::number, "123.333e456") + .unwrap() + .as_str(), "123.333e456" ); assert_eq!( diff --git a/src/parser/number.rs b/src/parser/number.rs index 6c2e4ff8..c68a7f50 100644 --- a/src/parser/number.rs +++ b/src/parser/number.rs @@ -1,4 +1,4 @@ #[inline] -pub fn parse_int(s: &str, radix: u32) -> i64 { +pub(crate) fn parse_int(s: &str, radix: u32) -> i64 { i64::from_str_radix(&s[2..].replace('_', ""), radix).unwrap() } diff --git a/src/parser/text_identifier.rs b/src/parser/text_identifier.rs index 7c9fc4c7..be439303 100644 --- a/src/parser/text_identifier.rs +++ b/src/parser/text_identifier.rs @@ -1,7 +1,7 @@ -use std::result; use crate::parser::number::parse_int; use crate::parser::Rule; use pest::iterators::Pair; +use std::result; #[derive(thiserror::Error, Debug)] pub(crate) enum TextParseError { @@ -17,7 +17,6 @@ pub(crate) enum TextParseError { type Result = result::Result; - #[inline] fn parse_raw_string(pair: Pair) -> Result { Ok(pair @@ -46,10 +45,13 @@ fn parse_quoted_string(pair: Pair) -> Result { r"\t" => ret.push('\t'), s if s.starts_with(r"\u") => { let code = parse_int(s, 16) as u32; - let ch = char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?; + let ch = + char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?; ret.push(ch); } - s if s.starts_with('\\') => return Err(TextParseError::InvalidEscapeSequence(s.to_string())), + s if s.starts_with('\\') => { + return Err(TextParseError::InvalidEscapeSequence(s.to_string())) + } s => ret.push_str(s), } } @@ -73,10 +75,13 @@ fn parse_s_quoted_string(pair: Pair) -> Result { r"\t" => ret.push('\t'), s if s.starts_with(r"\u") => { let code = parse_int(s, 16) as u32; - let ch = char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?; + let ch = + char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?; ret.push(ch); } - s if s.starts_with('\\') => return Err(TextParseError::InvalidEscapeSequence(s.to_string())), + s if s.starts_with('\\') => { + return Err(TextParseError::InvalidEscapeSequence(s.to_string())) + } s => ret.push_str(s), } }