ser/deser of expr to/from values

main
Ziyang Hu 2 years ago
parent 304ea38e29
commit 6b0c3d88c0

@ -1,9 +1,24 @@
use crate::data::op::Op;
use crate::data::op::{AggOp, Op, UnresolvedOp};
use crate::data::tuple_set::{ColId, TableId, TupleSetIdx};
use crate::data::value::Value;
use crate::data::value::{StaticValue, Value};
use std::collections::BTreeMap;
use std::result;
use std::sync::Arc;
#[derive(thiserror::Error, Debug)]
pub(crate) enum ExprError {
#[error("Cannot convert from {0}")]
ConversionFailure(StaticValue),
#[error("Unknown expr tag {0}")]
UnknownExprTag(String),
#[error("List extraction failed for {0}")]
ListExtractionFailed(StaticValue),
}
type Result<T> = result::Result<T, ExprError>;
pub(crate) enum Expr<'a> {
Const(Value<'a>),
List(Vec<Expr<'a>>),
@ -11,11 +26,214 @@ pub(crate) enum Expr<'a> {
Variable(String),
TableCol(TableId, ColId),
TupleSetIdx(TupleSetIdx),
Apply(Arc<Op>, Vec<Expr<'a>>),
Apply(Arc<dyn Op>, Vec<Expr<'a>>),
ApplyAgg(Arc<dyn AggOp>, Vec<Expr<'a>>, Vec<Expr<'a>>),
FieldAcc(String, Box<Expr<'a>>),
IdxAcc(usize, Box<Expr<'a>>),
}
pub(crate) type StaticExpr = Expr<'static>;
// TODO serde expr into value
fn extract_list_from_value(value: Value, n: usize) -> Result<Vec<Value>> {
if let Value::List(l) = value {
if n > 0 && l.len() != n {
return Err(ExprError::ListExtractionFailed(Value::List(l).to_static()));
}
Ok(l)
} else {
return Err(ExprError::ListExtractionFailed(value.to_static()));
}
}
impl<'a> TryFrom<Value<'a>> for Expr<'a> {
type Error = ExprError;
fn try_from(value: Value<'a>) -> Result<Self> {
if let Value::Dict(d) = value {
if d.len() != 1 {
return Err(ExprError::ConversionFailure(Value::Dict(d).to_static()));
}
let (k, v) = d.into_iter().next().unwrap();
match k.as_ref() {
"Const" => Ok(Expr::Const(v)),
"List" => {
let l = extract_list_from_value(v, 0)?;
Ok(Expr::List(l.into_iter().map(Expr::try_from).collect::<Result<Vec<_>>>()?))
}
"Dict" => {
match v {
Value::Dict(d) => {
Ok(Expr::Dict(d.into_iter().map(|(k, v)| -> Result<(String, Expr)> {
Ok((k.to_string(), Expr::try_from(v)?))
}).collect::<Result<BTreeMap<_, _>>>()?))
}
v => return Err(ExprError::ConversionFailure(Value::Dict(BTreeMap::from([(k, v)])).to_static()))
}
}
"Variable" => {
if let Value::Text(t) = v {
Ok(Expr::Variable(t.to_string()))
} else {
return Err(ExprError::ConversionFailure(Value::Dict(BTreeMap::from([(k, v)])).to_static()));
}
}
"TableCol" => {
let mut l = extract_list_from_value(v, 4)?.into_iter();
let in_root = match l.next().unwrap() {
Value::Bool(b) => b,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let tid = match l.next().unwrap() {
Value::Int(i) => i,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let is_key = match l.next().unwrap() {
Value::Bool(b) => b,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let cid = match l.next().unwrap() {
Value::Int(i) => i,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
Ok(Expr::TableCol((in_root, tid as u32).into(), (is_key, cid as usize).into()))
}
"TupleSetIdx" => {
let mut l = extract_list_from_value(v, 3)?.into_iter();
let is_key = match l.next().unwrap() {
Value::Bool(b) => b,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let tid = match l.next().unwrap() {
Value::Int(i) => i,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let cid = match l.next().unwrap() {
Value::Int(i) => i,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
Ok(Expr::TupleSetIdx(TupleSetIdx {
is_key,
t_set: tid as usize,
col_idx: cid as usize,
}))
}
"Apply" => {
let mut ll = extract_list_from_value(v, 2)?.into_iter();
let name = match ll.next().unwrap() {
Value::Text(t) => t,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let op = Arc::new(UnresolvedOp(name.to_string()));
let l = extract_list_from_value(ll.next().unwrap(), 0)?;
let args = l.into_iter().map(Expr::try_from).collect::<Result<Vec<_>>>()?;
Ok(Expr::Apply(op, args))
}
"ApplyAgg" => {
let mut ll = extract_list_from_value(v, 3)?.into_iter();
let name = match ll.next().unwrap() {
Value::Text(t) => t,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let op = Arc::new(UnresolvedOp(name.to_string()));
let l = extract_list_from_value(ll.next().unwrap(), 0)?;
let a_args = l.into_iter().map(Expr::try_from).collect::<Result<Vec<_>>>()?;
let l = extract_list_from_value(ll.next().unwrap(), 0)?;
let args = l.into_iter().map(Expr::try_from).collect::<Result<Vec<_>>>()?;
Ok(Expr::ApplyAgg(op, a_args, args))
}
"FieldAcc" => {
let mut ll = extract_list_from_value(v, 2)?.into_iter();
let field = match ll.next().unwrap() {
Value::Text(t) => t,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let arg = Expr::try_from(ll.next().unwrap())?;
Ok(Expr::FieldAcc(field.to_string(), arg.into()))
}
"IdxAcc" => {
let mut ll = extract_list_from_value(v, 2)?.into_iter();
let idx = match ll.next().unwrap() {
Value::Int(i) => i as usize,
v => return Err(ExprError::ConversionFailure(v.to_static()))
};
let arg = Expr::try_from(ll.next().unwrap())?;
Ok(Expr::IdxAcc(idx, arg.into()))
}
k => Err(ExprError::UnknownExprTag(k.to_string()))
}
} else {
Err(ExprError::ConversionFailure(value.to_static()))
}
}
}
impl<'a> From<Expr<'a>> for Value<'a> {
fn from(expr: Expr<'a>) -> Self {
match expr {
Expr::Const(c) => build_tagged_value("Const", c),
Expr::List(l) => build_tagged_value(
"List",
l.into_iter().map(Value::from).collect::<Vec<_>>().into(),
),
Expr::Dict(d) => build_tagged_value(
"Dict",
d.into_iter()
.map(|(k, v)| (k.into(), v.into()))
.collect::<BTreeMap<_, _>>()
.into(),
),
Expr::Variable(v) => build_tagged_value("Variable", v.into()),
Expr::TableCol(tid, cid) => build_tagged_value(
"TableCol",
vec![
tid.in_root.into(),
Value::from(tid.id as i64),
cid.is_key.into(),
Value::from(cid.id as i64),
]
.into(),
),
Expr::TupleSetIdx(sid) => build_tagged_value(
"TupleSetIdx",
vec![
sid.is_key.into(),
Value::from(sid.t_set as i64),
Value::from(sid.col_idx as i64),
]
.into(),
),
Expr::Apply(op, args) => build_tagged_value(
"Apply",
vec![
Value::from(op.name().to_string()),
args.into_iter().map(Value::from).collect::<Vec<_>>().into(),
]
.into(),
),
Expr::ApplyAgg(op, a_args, args) => build_tagged_value(
"ApplyAgg",
vec![
Value::from(op.name().to_string()),
a_args
.into_iter()
.map(Value::from)
.collect::<Vec<_>>()
.into(),
args.into_iter().map(Value::from).collect::<Vec<_>>().into(),
]
.into(),
),
Expr::FieldAcc(f, v) => {
build_tagged_value("FieldAcc", vec![f.into(), Value::from(*v)].into())
}
Expr::IdxAcc(idx, v) => {
build_tagged_value("IdxAcc", vec![(idx as i64).into(), Value::from(*v)].into())
}
}
}
}
fn build_tagged_value<'a>(tag: &'static str, val: Value<'a>) -> Value<'a> {
Value::Dict(BTreeMap::from([(tag.into(), val)]))
}

@ -1 +1,31 @@
pub(crate) struct Op;
pub(crate) trait Op {
fn is_resolved(&self) -> bool;
fn name(&self) -> &str;
}
pub(crate) trait AggOp {
fn is_resolved(&self) -> bool;
fn name(&self) -> &str;
}
pub(crate) struct UnresolvedOp(pub String);
impl Op for UnresolvedOp {
fn is_resolved(&self) -> bool {
false
}
fn name(&self) -> &str {
&self.0
}
}
impl AggOp for UnresolvedOp {
fn is_resolved(&self) -> bool {
false
}
fn name(&self) -> &str {
&self.0
}
}

@ -1,6 +1,34 @@
use crate::data::tuple::TupleError::UndefinedDataTag;
use crate::data::value::Value;
use std::borrow::Cow;
use std::cell::RefCell;
use std::cmp::{Ordering, Reverse};
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::result;
use uuid::Uuid;
#[derive(thiserror::Error, Debug)]
pub(crate) enum TupleError {
#[error("Undefined data kind {0}")]
UndefinedDataKind(u32),
#[error("Undefined data tag {0}")]
UndefinedDataTag(u8),
#[error("Index {0} out of bound for tuple {1:?}")]
IndexOutOfBound(usize, OwnTuple),
#[error("Type mismatch: {1:?} is not {0}")]
TypeMismatch(&'static str, OwnTuple),
}
type Result<T> = result::Result<T, TupleError>;
#[repr(u8)]
#[derive(Ord, PartialOrd, Eq, PartialEq)]
pub(crate) enum Tag {
pub(crate) enum StorageTag {
BoolFalse = 1,
Null = 2,
BoolTrue = 3,
@ -19,11 +47,11 @@ pub(crate) enum Tag {
Max = 255,
}
impl TryFrom<u8> for Tag {
impl TryFrom<u8> for StorageTag {
type Error = u8;
#[inline]
fn try_from(u: u8) -> std::result::Result<Tag, u8> {
use self::Tag::*;
fn try_from(u: u8) -> std::result::Result<StorageTag, u8> {
use self::StorageTag::*;
Ok(match u {
1 => BoolFalse,
2 => Null,
@ -45,3 +73,627 @@ impl TryFrom<u8> for Tag {
})
}
}
#[repr(u32)]
#[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Clone)]
pub enum DataKind {
Data = 0,
Node = 1,
Edge = 2,
Assoc = 3,
Index = 4,
Val = 5,
Type = 6,
Empty = u32::MAX,
}
// In storage, key layout is `[0, name, stack_depth]` where stack_depth is a non-positive number as zigzag
// Also has inverted index `[0, stack_depth, name]` for easy popping of stacks
pub const EMPTY_DATA: [u8; 4] = u32::MAX.to_be_bytes();
impl<T: AsRef<[u8]>> Tuple<T> {
pub fn data_kind(&self) -> Result<DataKind> {
use DataKind::*;
Ok(match self.get_prefix() {
0 => Data,
1 => Node,
2 => Edge,
3 => Assoc,
4 => Index,
5 => Val,
6 => Type,
u32::MAX => Empty,
v => return Err(TupleError::UndefinedDataKind(v)),
})
}
}
#[derive(Clone)]
pub(crate) struct Tuple<T>
where
T: AsRef<[u8]>,
{
pub(crate) data: T,
idx_cache: RefCell<Vec<usize>>,
}
impl<T> Tuple<T>
where
T: AsRef<[u8]>,
{
pub(crate) fn clear_cache(&self) {
self.idx_cache.borrow_mut().clear()
}
}
impl<T> AsRef<[u8]> for Tuple<T>
where
T: AsRef<[u8]>,
{
fn as_ref(&self) -> &[u8] {
self.data.as_ref()
}
}
pub(crate) type OwnTuple = Tuple<Vec<u8>>;
pub(crate) const PREFIX_LEN: usize = 4;
impl<T: AsRef<[u8]>> Tuple<T> {
#[inline]
pub(crate) fn to_owned(&self) -> OwnTuple {
OwnTuple {
data: self.data.as_ref().to_vec(),
idx_cache: RefCell::new(vec![]),
}
}
#[inline]
pub(crate) fn starts_with<T2: AsRef<[u8]>>(&self, other: &Tuple<T2>) -> bool {
self.data.as_ref().starts_with(other.data.as_ref())
}
#[inline]
pub(crate) fn key_part_eq<T2: AsRef<[u8]>>(&self, other: &Tuple<T2>) -> bool {
self.data.as_ref()[PREFIX_LEN..] == other.data.as_ref()[PREFIX_LEN..]
}
#[inline]
pub(crate) fn key_part_cmp<T2: AsRef<[u8]>>(&self, other: &Tuple<T2>) -> Ordering {
self.iter()
.filter_map(|v| v.ok())
.cmp(other.iter().filter_map(|v| v.ok()))
}
#[inline]
pub(crate) fn new(data: T) -> Self {
Self {
data,
idx_cache: RefCell::new(vec![]),
}
}
#[inline]
pub(crate) fn get_prefix(&self) -> u32 {
u32::from_be_bytes(self.data.as_ref()[0..4].try_into().unwrap())
}
#[inline]
fn all_cached(&self) -> bool {
match self.idx_cache.borrow().last() {
None => self.data.as_ref().len() == PREFIX_LEN,
Some(l) => *l == self.data.as_ref().len(),
}
}
#[inline]
fn get_pos(&self, idx: usize) -> Option<usize> {
if idx == 0 {
if self.data.as_ref().len() > PREFIX_LEN {
Some(PREFIX_LEN)
} else {
None
}
} else {
self.cache_until(idx);
self.idx_cache.borrow().get(idx - 1).cloned()
}
}
#[inline]
fn cache_until(&self, idx: usize) {
while self.idx_cache.borrow().len() < idx && !self.all_cached() {
self.skip_and_cache();
}
}
#[inline]
fn skip_and_cache(&self) {
let data = self.data.as_ref();
let tag_start = *self.idx_cache.borrow().last().unwrap_or(&PREFIX_LEN);
let mut start = tag_start + 1;
let nxt;
loop {
nxt = match StorageTag::try_from(data[tag_start]).unwrap() {
StorageTag::Null | StorageTag::BoolTrue | StorageTag::BoolFalse => start,
StorageTag::Int => start + self.parse_varint(start).1,
StorageTag::Float => start + 8,
StorageTag::Uuid => start + 16,
StorageTag::Text | StorageTag::Bytes => {
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
start + slen + offset
}
StorageTag::List | StorageTag::Dict => {
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize
}
StorageTag::DescVal => {
start += 1;
continue;
}
StorageTag::Max => panic!(),
};
break;
}
self.idx_cache.borrow_mut().push(nxt);
}
#[inline]
fn parse_varint(&self, idx: usize) -> (u64, usize) {
let data = self.data.as_ref();
let mut cur = idx;
let mut u: u64 = 0;
let mut shift = 0;
loop {
let buf = data[cur];
cur += 1;
u |= ((buf & 0b01111111) as u64) << shift;
if buf & 0b10000000 == 0 {
break;
}
shift += 7;
}
(u, cur - idx)
}
#[inline]
pub(crate) fn get(&self, idx: usize) -> Result<Value> {
match self.get_pos(idx) {
Some(v) => {
if v == self.data.as_ref().len() {
return Err(TupleError::IndexOutOfBound(idx, self.to_owned()));
}
let (val, nxt) = self.parse_value_at(v)?;
if idx == self.idx_cache.borrow().len() {
self.idx_cache.borrow_mut().push(nxt);
}
Ok(val)
}
None => Err(TupleError::IndexOutOfBound(idx, self.to_owned())),
}
}
#[inline]
pub(crate) fn get_null(&self, idx: usize) -> Result<()> {
match self.get(idx)? {
Value::Null => Ok(()),
_ => Err(TupleError::TypeMismatch("Null", self.to_owned())),
}
}
#[inline]
pub(crate) fn get_int(&self, idx: usize) -> Result<i64> {
match self.get(idx)? {
Value::Int(i) => Ok(i),
_ => Err(TupleError::TypeMismatch("Int", self.to_owned())),
}
}
#[inline]
pub(crate) fn get_text(&self, idx: usize) -> Result<Cow<str>> {
match self.get(idx)? {
Value::Text(d) => Ok(d),
_ => Err(TupleError::TypeMismatch("Text", self.to_owned())),
}
}
#[inline]
pub(crate) fn get_bool(&self, idx: usize) -> Result<bool> {
match self.get(idx)? {
Value::Bool(b) => Ok(b),
_ => Err(TupleError::TypeMismatch("Bool", self.to_owned())),
}
}
#[inline]
pub(crate) fn get_float(&self, idx: usize) -> Result<f64> {
match self.get(idx)? {
Value::Float(f) => Ok(f.into_inner()),
_ => Err(TupleError::TypeMismatch("Float", self.to_owned())),
}
}
#[inline]
pub(crate) fn get_uuid(&self, idx: usize) -> Result<Uuid> {
match self.get(idx)? {
Value::Uuid(u) => Ok(u),
_ => Err(TupleError::TypeMismatch("Uuid", self.to_owned())),
}
}
#[inline]
pub(crate) fn get_list(&self, idx: usize) -> Result<Vec<Value>> {
match self.get(idx)? {
Value::List(u) => Ok(u),
_ => Err(TupleError::TypeMismatch("List", self.to_owned())),
}
}
#[inline]
pub(crate) fn get_dict(&self, idx: usize) -> Result<BTreeMap<Cow<str>, Value>> {
match self.get(idx)? {
Value::Dict(u) => Ok(u),
_ => Err(TupleError::TypeMismatch("Dict", self.to_owned())),
}
}
#[inline]
fn parse_value_at(&self, pos: usize) -> Result<(Value, usize)> {
let data = self.data.as_ref();
let start = pos + 1;
let tag = match StorageTag::try_from(data[pos]) {
Ok(t) => t,
Err(e) => return Err(TupleError::UndefinedDataTag(e)),
};
let (nxt, val): (usize, Value) = match tag {
StorageTag::Null => (start, ().into()),
StorageTag::BoolTrue => (start, true.into()),
StorageTag::BoolFalse => (start, false.into()),
StorageTag::Int => {
let (u, offset) = self.parse_varint(start);
let val = Self::varint_to_zigzag(u);
(start + offset, val.into())
}
StorageTag::Float => (
start + 8,
f64::from_be_bytes(data[start..start + 8].try_into().unwrap()).into(),
),
StorageTag::Uuid => (
start + 16,
Uuid::from_slice(&data[start..start + 16]).unwrap().into(),
),
StorageTag::Text => {
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
let s = unsafe {
std::str::from_utf8_unchecked(&data[start + offset..start + offset + slen])
};
(start + slen + offset, s.into())
}
StorageTag::Bytes => {
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
let s = &data[start + offset..start + offset + slen];
(start + slen + offset, s.into())
}
StorageTag::List => {
let end_pos =
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize;
let mut start_pos = start + 4;
let mut collected = vec![];
while start_pos < end_pos {
let (val, new_pos) = self.parse_value_at(start_pos)?;
collected.push(val);
start_pos = new_pos;
}
(end_pos, collected.into())
}
StorageTag::Dict => {
let end_pos =
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize;
let mut start_pos = start + 4;
let mut collected: BTreeMap<Cow<str>, Value> = BTreeMap::new();
while start_pos < end_pos {
let (slen, offset) = self.parse_varint(start_pos);
start_pos += offset;
let key = unsafe {
std::str::from_utf8_unchecked(&data[start_pos..start_pos + slen as usize])
};
start_pos += slen as usize;
let (val, new_pos) = self.parse_value_at(start_pos)?;
collected.insert(key.into(), val);
start_pos = new_pos;
}
(end_pos, collected.into())
}
StorageTag::DescVal => {
let (val, offset) = self.parse_value_at(pos + 1)?;
(offset, Value::DescVal(Reverse(val.into())))
}
StorageTag::Max => return Err(UndefinedDataTag(StorageTag::Max as u8)),
};
Ok((val, nxt))
}
fn varint_to_zigzag(u: u64) -> i64 {
if u & 1 == 0 {
(u >> 1) as i64
} else {
-((u >> 1) as i64) - 1
}
}
pub(crate) fn iter(&self) -> TupleIter<T> {
TupleIter {
tuple: self,
pos: 4,
}
}
}
impl<T: AsRef<[u8]>> Debug for Tuple<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self.data_kind() {
Ok(data_kind) => {
write!(f, "Tuple<{:?}>{{", data_kind)?;
}
Err(_) => {
write!(f, "Tuple<{}>{{", self.get_prefix())?;
}
}
let strings = self
.iter()
.enumerate()
.map(|(i, v)| match v {
Ok(v) => {
format!("{}: {}", i, v)
}
Err(err) => {
format!("{}: {:?}", i, err)
}
})
.collect::<Vec<_>>()
.join(", ");
write!(f, "{}}}", strings)
}
}
pub(crate) struct TupleIter<'a, T: AsRef<[u8]>> {
tuple: &'a Tuple<T>,
pos: usize,
}
impl<'a, T: AsRef<[u8]>> Iterator for TupleIter<'a, T> {
type Item = Result<Value<'a>>;
fn next(&mut self) -> Option<Self::Item> {
if self.pos == self.tuple.data.as_ref().len() {
return None;
}
let (v, pos) = match self.tuple.parse_value_at(self.pos) {
Ok(vs) => vs,
Err(e) => return Some(Err(e)),
};
self.pos = pos;
Some(Ok(v))
}
}
impl OwnTuple {
#[inline]
pub(crate) fn truncate_all(&mut self) {
self.clear_cache();
self.data.truncate(PREFIX_LEN);
}
#[inline]
pub(crate) fn empty_tuple() -> OwnTuple {
OwnTuple::with_data_prefix(DataKind::Empty)
}
#[inline]
pub(crate) fn with_null_prefix() -> Self {
Tuple::with_prefix(0)
}
#[inline]
pub(crate) fn with_data_prefix(prefix: DataKind) -> Self {
Tuple::with_prefix(prefix as u32)
}
#[inline]
pub(crate) fn with_prefix(prefix: u32) -> Self {
let data = Vec::from(prefix.to_be_bytes());
Self {
data,
idx_cache: RefCell::new(vec![]),
}
}
#[inline]
pub(crate) fn overwrite_prefix(&mut self, prefix: u32) {
let bytes = prefix.to_be_bytes();
self.data[..4].clone_from_slice(&bytes[..4]);
}
#[inline]
pub(crate) fn max_tuple() -> Self {
let mut ret = Tuple::with_prefix(u32::MAX);
ret.seal_with_sentinel();
ret
}
#[inline]
pub(crate) fn seal_with_sentinel(&mut self) {
self.push_tag(StorageTag::Max);
}
#[inline]
fn push_tag(&mut self, tag: StorageTag) {
self.data.push(tag as u8);
}
#[inline]
pub(crate) fn push_null(&mut self) {
self.push_tag(StorageTag::Null);
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub(crate) fn push_bool(&mut self, b: bool) {
self.push_tag(if b {
StorageTag::BoolTrue
} else {
StorageTag::BoolFalse
});
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub(crate) fn push_int(&mut self, i: i64) {
self.push_tag(StorageTag::Int);
self.push_zigzag(i);
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub(crate) fn push_float(&mut self, f: f64) {
self.push_tag(StorageTag::Float);
self.data.extend(f.to_be_bytes());
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub(crate) fn push_uuid(&mut self, u: Uuid) {
self.push_tag(StorageTag::Uuid);
self.data.extend(u.as_bytes());
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub(crate) fn push_str(&mut self, s: impl AsRef<str>) {
let s = s.as_ref();
self.push_tag(StorageTag::Text);
self.push_varint(s.len() as u64);
self.data.extend_from_slice(s.as_bytes());
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub(crate) fn push_bytes(&mut self, b: impl AsRef<[u8]>) {
let b = b.as_ref();
self.push_tag(StorageTag::Bytes);
self.push_varint(b.len() as u64);
self.data.extend_from_slice(b);
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub(crate) fn push_reverse_value(&mut self, v: &Value) {
self.push_tag(StorageTag::DescVal);
let start_len = self.idx_cache.borrow().len();
self.push_value(v);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
#[inline]
pub(crate) fn push_value(&mut self, v: &Value) {
match v {
Value::Null => self.push_null(),
Value::Bool(b) => self.push_bool(*b),
Value::Int(i) => self.push_int(*i),
Value::Float(f) => self.push_float(f.into_inner()),
Value::Uuid(u) => self.push_uuid(*u),
Value::Text(t) => self.push_str(t),
Value::Bytes(b) => self.push_bytes(b),
Value::List(l) => {
self.push_tag(StorageTag::List);
let start_pos = self.data.len();
let start_len = self.idx_cache.borrow().len();
self.data.extend(0u32.to_be_bytes());
for val in l {
self.push_value(val);
}
let length = (self.data.len() - start_pos) as u32;
let length_bytes = length.to_be_bytes();
self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::Dict(d) => {
self.push_tag(StorageTag::Dict);
let start_pos = self.data.len();
let start_len = self.idx_cache.borrow().len();
self.data.extend(0u32.to_be_bytes());
for (k, v) in d {
self.push_varint(k.len() as u64);
self.data.extend_from_slice(k.as_bytes());
self.push_value(v);
}
let length = (self.data.len() - start_pos) as u32;
let length_bytes = length.to_be_bytes();
self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::EndSentinel => panic!("Cannot push sentinel value"),
Value::DescVal(Reverse(v)) => {
self.push_reverse_value(v);
}
}
}
#[inline]
fn push_varint(&mut self, u: u64) {
let mut u = u;
while u > 0b01111111 {
self.data.push(0b10000000 | (u as u8 & 0b01111111));
u >>= 7;
}
self.data.push(u as u8);
}
#[inline]
fn push_zigzag(&mut self, i: i64) {
let u: u64 = if i >= 0 {
(i as u64) << 1
} else {
// Convoluted, to prevent overflow when calling .abs()
(((i + 1).abs() as u64) << 1) + 1
};
self.push_varint(u);
}
#[inline]
pub(crate) fn concat_data<T: AsRef<[u8]>>(&mut self, other: &Tuple<T>) {
let other_data_part = &other.as_ref()[4..];
self.data.extend_from_slice(other_data_part);
}
#[inline]
pub(crate) fn insert_values_at<'a, T: AsRef<[Value<'a>]>>(
&self,
idx: usize,
values: T,
) -> Result<Self> {
let mut new_tuple = Tuple::with_prefix(self.get_prefix());
for v in self.iter().take(idx) {
new_tuple.push_value(&v?);
}
for v in values.as_ref() {
new_tuple.push_value(v);
}
for v in self.iter().skip(idx) {
new_tuple.push_value(&v?);
}
Ok(new_tuple)
}
}
impl<'a> Extend<Value<'a>> for OwnTuple {
#[inline]
fn extend<T: IntoIterator<Item = Value<'a>>>(&mut self, iter: T) {
for v in iter {
self.push_value(&v)
}
}
}
impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Tuple<T2>> for Tuple<T> {
#[inline]
fn eq(&self, other: &Tuple<T2>) -> bool {
self.data.as_ref() == other.data.as_ref()
}
}
impl<T: AsRef<[u8]>> Hash for Tuple<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.data.as_ref().hash(state);
}
}
impl<T: AsRef<[u8]>> Eq for Tuple<T> {}

@ -1,3 +1,67 @@
pub(crate) struct TableId;
pub(crate) struct ColId;
pub(crate) struct TupleSetIdx;
use std::fmt::{Debug, Formatter};
use std::result;
#[derive(thiserror::Error, Debug)]
pub(crate) enum TypingError {
#[error("table id not allowed: {0}")]
InvalidTableId(u32),
}
type Result<T> = result::Result<T, TypingError>;
const MIN_TABLE_ID: u32 = 10001;
#[derive(Eq, PartialEq, Clone, Copy, Ord, PartialOrd, Hash)]
pub(crate) struct TableId {
pub(crate) in_root: bool,
pub(crate) id: u32,
}
impl Debug for TableId {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "#{}{}", if self.in_root { 'G' } else { 'L' }, self.id)
}
}
impl TableId {
pub(crate) fn new(in_root: bool, id: u32) -> Result<Self> {
if id < MIN_TABLE_ID {
Err(TypingError::InvalidTableId(id))
} else {
Ok(TableId { in_root, id })
}
}
pub(crate) fn is_valid(&self) -> bool {
self.id >= MIN_TABLE_ID
}
}
impl From<(bool, u32)> for TableId {
fn from((in_root, id): (bool, u32)) -> Self {
Self { in_root, id }
}
}
#[derive(Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
pub(crate) struct ColId {
pub(crate) is_key: bool,
pub(crate) id: usize,
}
impl Debug for ColId {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, ".{}{}", if self.is_key { 'K' } else { 'D' }, self.id)
}
}
impl From<(bool, usize)> for ColId {
fn from((is_key, id): (bool, usize)) -> Self {
Self { is_key, id: id }
}
}
pub(crate) struct TupleSetIdx {
pub(crate) is_key: bool,
pub(crate) t_set: usize,
pub(crate) col_idx: usize,
}

@ -1,10 +1,10 @@
use crate::data::value::{StaticValue, Value};
use crate::parser::text_identifier::build_name_in_def;
use crate::parser::{CozoParser, Rule};
use pest::iterators::Pair;
use pest::Parser;
use std::fmt::{Display, Formatter};
use std::result;
use pest::{Parser};
use pest::iterators::Pair;
use crate::parser::{CozoParser, Rule};
use crate::parser::text_identifier::build_name_in_def;
#[derive(thiserror::Error, Debug)]
pub(crate) enum TypingError {
@ -21,7 +21,7 @@ pub(crate) enum TypingError {
Parse(#[from] pest::error::Error<Rule>),
#[error(transparent)]
TextParse(#[from] crate::parser::text_identifier::TextParseError)
TextParse(#[from] crate::parser::text_identifier::TextParseError),
}
type Result<T> = result::Result<T, TypingError>;
@ -143,7 +143,6 @@ impl Typing {
}
}
impl TryFrom<&str> for Typing {
type Error = TypingError;
@ -153,7 +152,6 @@ impl TryFrom<&str> for Typing {
}
}
impl<'a> TryFrom<Value<'a>> for Typing {
type Error = TypingError;
@ -172,13 +170,13 @@ impl Typing {
"Float" => Typing::Float,
"Text" => Typing::Text,
"Uuid" => Typing::Uuid,
t => return Err(TypingError::UndefinedType(t.to_string())),
t => return Err(TypingError::UndefinedType(t.to_string())),
},
Rule::nullable_type => Typing::Nullable(Box::new(Typing::from_pair(
pair.into_inner().next().unwrap()
pair.into_inner().next().unwrap(),
)?)),
Rule::homogeneous_list_type => Typing::Homogeneous(Box::new(Typing::from_pair(
pair.into_inner().next().unwrap()
pair.into_inner().next().unwrap(),
)?)),
Rule::unnamed_tuple_type => {
let types = pair
@ -204,4 +202,4 @@ impl Typing {
_ => unreachable!(),
})
}
}
}

@ -2,6 +2,6 @@
// pub mod error;
// pub mod relation;
// pub(crate) mod eval;
pub(crate) mod parser;
pub(crate) mod data;
pub(crate) mod logger;
pub(crate) mod parser;

@ -1,10 +1,10 @@
pub mod number;
pub mod text_identifier;
pub(crate) mod number;
pub(crate) mod text_identifier;
use pest_derive::Parser;
#[derive(Parser)]
#[grammar = "grammar.pest"]
pub struct CozoParser;
pub(crate) struct CozoParser;
#[cfg(test)]
mod tests {
@ -15,14 +15,23 @@ mod tests {
fn identifiers() {
assert_eq!(CozoParser::parse(Rule::ident, "x").unwrap().as_str(), "x");
assert_eq!(CozoParser::parse(Rule::ident, "x2").unwrap().as_str(), "x2");
assert_eq!(CozoParser::parse(Rule::ident, "x_y").unwrap().as_str(), "x_y");
assert_eq!(
CozoParser::parse(Rule::ident, "x_y").unwrap().as_str(),
"x_y"
);
assert_eq!(CozoParser::parse(Rule::ident, "x_").unwrap().as_str(), "x_");
assert_eq!(CozoParser::parse(Rule::ident, "你好").unwrap().as_str(), "你好");
assert_eq!(
CozoParser::parse(Rule::ident, "你好").unwrap().as_str(),
"你好"
);
assert_eq!(
CozoParser::parse(Rule::ident, "你好123").unwrap().as_str(),
"你好123"
);
assert_ne!(CozoParser::parse(Rule::ident, "x$y").unwrap().as_str(), "x$y");
assert_ne!(
CozoParser::parse(Rule::ident, "x$y").unwrap().as_str(),
"x$y"
);
assert_eq!(CozoParser::parse(Rule::ident, "_x").unwrap().as_str(), "_x");
assert_eq!(CozoParser::parse(Rule::ident, "_").unwrap().as_str(), "_");
@ -33,8 +42,14 @@ mod tests {
assert!(CozoParser::parse(Rule::ident, "123x").is_err());
assert!(CozoParser::parse(Rule::ident, ".x").is_err());
assert_ne!(CozoParser::parse(Rule::ident, "x.x").unwrap().as_str(), "x.x");
assert_ne!(CozoParser::parse(Rule::ident, "x~x").unwrap().as_str(), "x~x");
assert_ne!(
CozoParser::parse(Rule::ident, "x.x").unwrap().as_str(),
"x.x"
);
assert_ne!(
CozoParser::parse(Rule::ident, "x~x").unwrap().as_str(),
"x~x"
);
}
#[test]
@ -44,11 +59,15 @@ mod tests {
r#""""#
);
assert_eq!(
CozoParser::parse(Rule::string, r#"" b a c""#).unwrap().as_str(),
CozoParser::parse(Rule::string, r#"" b a c""#)
.unwrap()
.as_str(),
r#"" b a c""#
);
assert_eq!(
CozoParser::parse(Rule::string, r#""你好👋""#).unwrap().as_str(),
CozoParser::parse(Rule::string, r#""你好👋""#)
.unwrap()
.as_str(),
r#""你好👋""#
);
assert_eq!(
@ -56,7 +75,9 @@ mod tests {
r#""\n""#
);
assert_eq!(
CozoParser::parse(Rule::string, r#""\u5678""#).unwrap().as_str(),
CozoParser::parse(Rule::string, r#""\u5678""#)
.unwrap()
.as_str(),
r#""\u5678""#
);
assert!(CozoParser::parse(Rule::string, r#""\ux""#).is_err());
@ -70,7 +91,10 @@ mod tests {
#[test]
fn numbers() {
assert_eq!(CozoParser::parse(Rule::number, "123").unwrap().as_str(), "123");
assert_eq!(
CozoParser::parse(Rule::number, "123").unwrap().as_str(),
"123"
);
assert_eq!(CozoParser::parse(Rule::number, "0").unwrap().as_str(), "0");
assert_eq!(
CozoParser::parse(Rule::number, "0123").unwrap().as_str(),
@ -86,11 +110,15 @@ mod tests {
"0xAf03"
);
assert_eq!(
CozoParser::parse(Rule::number, "0o0_7067").unwrap().as_str(),
CozoParser::parse(Rule::number, "0o0_7067")
.unwrap()
.as_str(),
"0o0_7067"
);
assert_ne!(
CozoParser::parse(Rule::number, "0o0_7068").unwrap().as_str(),
CozoParser::parse(Rule::number, "0o0_7068")
.unwrap()
.as_str(),
"0o0_7068"
);
assert_eq!(
@ -111,7 +139,9 @@ mod tests {
"123.45"
);
assert_eq!(
CozoParser::parse(Rule::number, "1_23.4_5_").unwrap().as_str(),
CozoParser::parse(Rule::number, "1_23.4_5_")
.unwrap()
.as_str(),
"1_23.4_5_"
);
assert_ne!(
@ -119,7 +149,9 @@ mod tests {
"123."
);
assert_eq!(
CozoParser::parse(Rule::number, "123.333e456").unwrap().as_str(),
CozoParser::parse(Rule::number, "123.333e456")
.unwrap()
.as_str(),
"123.333e456"
);
assert_eq!(

@ -1,4 +1,4 @@
#[inline]
pub fn parse_int(s: &str, radix: u32) -> i64 {
pub(crate) fn parse_int(s: &str, radix: u32) -> i64 {
i64::from_str_radix(&s[2..].replace('_', ""), radix).unwrap()
}

@ -1,7 +1,7 @@
use std::result;
use crate::parser::number::parse_int;
use crate::parser::Rule;
use pest::iterators::Pair;
use std::result;
#[derive(thiserror::Error, Debug)]
pub(crate) enum TextParseError {
@ -17,7 +17,6 @@ pub(crate) enum TextParseError {
type Result<T> = result::Result<T, TextParseError>;
#[inline]
fn parse_raw_string(pair: Pair<Rule>) -> Result<String> {
Ok(pair
@ -46,10 +45,13 @@ fn parse_quoted_string(pair: Pair<Rule>) -> Result<String> {
r"\t" => ret.push('\t'),
s if s.starts_with(r"\u") => {
let code = parse_int(s, 16) as u32;
let ch = char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?;
let ch =
char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?;
ret.push(ch);
}
s if s.starts_with('\\') => return Err(TextParseError::InvalidEscapeSequence(s.to_string())),
s if s.starts_with('\\') => {
return Err(TextParseError::InvalidEscapeSequence(s.to_string()))
}
s => ret.push_str(s),
}
}
@ -73,10 +75,13 @@ fn parse_s_quoted_string(pair: Pair<Rule>) -> Result<String> {
r"\t" => ret.push('\t'),
s if s.starts_with(r"\u") => {
let code = parse_int(s, 16) as u32;
let ch = char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?;
let ch =
char::from_u32(code).ok_or_else(|| TextParseError::InvalidUtfCode(code))?;
ret.push(ch);
}
s if s.starts_with('\\') => return Err(TextParseError::InvalidEscapeSequence(s.to_string())),
s if s.starts_with('\\') => {
return Err(TextParseError::InvalidEscapeSequence(s.to_string()))
}
s => ret.push_str(s),
}
}

Loading…
Cancel
Save