diff --git a/Cargo.toml b/Cargo.toml index 3e424c01..c9d43d33 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ authors = ["Ziyang Hu"] [dependencies] uuid = { version = "0.8", features = ["v1", "v4", "serde"] } +nanoid = { version = "0.4.0", features = [] } rand = "0.8.5" anyhow = "1.0" lazy_static = "1.4.0" diff --git a/src/data/keyword.rs b/src/data/keyword.rs index c23d9462..d4624a37 100644 --- a/src/data/keyword.rs +++ b/src/data/keyword.rs @@ -1,6 +1,7 @@ use std::fmt::{Debug, Display, Formatter}; use std::str::Utf8Error; +use nanoid::nanoid; use serde_derive::{Deserialize, Serialize}; use smartstring::{LazyCompact, SmartString}; @@ -51,15 +52,25 @@ impl TryFrom<&[u8]> for Keyword { } impl Keyword { + pub(crate) fn rand() -> Self { + let id = nanoid!(); + Keyword::from(&id as &str) + } pub(crate) fn is_reserved(&self) -> bool { self.0.is_empty() || self.0.starts_with(['_', ':', '<', '.', '*', '?', '!']) } - pub(crate) fn is_user_binding(&self) -> bool { + pub(crate) fn is_query_binding(&self) -> bool { self.0.starts_with('?') } - pub(crate) fn is_anon_binding(&self) -> bool { + pub(crate) fn is_ignored_binding(&self) -> bool { self.0.starts_with('_') } + pub(crate) fn is_ignored_wildcard(&self) -> bool { + self.0 == "_" + } + pub(crate) fn is_binding(&self) -> bool { + self.is_query_binding() || self.is_ignored_binding() + } pub(crate) fn to_string_no_prefix(&self) -> String { format!("{}", self.0) } diff --git a/src/data/tuple.rs b/src/data/tuple.rs index f2002e6f..39c60603 100644 --- a/src/data/tuple.rs +++ b/src/data/tuple.rs @@ -1,10 +1,12 @@ use std::cmp::{min, Ordering}; +use std::fmt::{Debug, Formatter}; use anyhow::Result; use itertools::Itertools; use rmp_serde::Serializer; use serde::Serialize; +use crate::data::json::JsonValue; use crate::data::value::DataValue; pub(crate) const SCRATCH_DB_KEY_PREFIX_LEN: usize = 4; @@ -15,7 +17,21 @@ pub enum TupleError { BadData(String, Vec), } -pub(crate) struct Tuple(pub(crate) Vec); +pub struct Tuple(pub(crate) Vec); + +impl Debug for Tuple { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "[")?; + for (i, v) in self.0.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + let j = JsonValue::from(v.clone()); + write!(f, "{}", j)?; + } + write!(f, "]") + } +} pub(crate) type TupleIter<'a> = Box> + 'a>; diff --git a/src/preprocess/query.rs b/src/preprocess/query.rs index 1b26833d..744b7542 100644 --- a/src/preprocess/query.rs +++ b/src/preprocess/query.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeSet; + use anyhow::Result; use itertools::Itertools; @@ -7,6 +9,9 @@ use crate::data::keyword::Keyword; use crate::data::value::DataValue; use crate::preprocess::triple::TxError; use crate::runtime::transact::SessionTx; +use crate::transact::query::{ + InlineFixedRelation, InnerJoin, Joiner, ProjectedRelation, Relation, TripleRelation, +}; use crate::{EntityId, Validity}; #[derive(Debug, thiserror::Error)] @@ -17,11 +22,25 @@ pub enum QueryClauseError { #[derive(Clone, Debug)] pub(crate) enum MaybeVariable { - Ignore, Variable(Keyword), Const(T), } +impl MaybeVariable { + pub(crate) fn get_var(&self) -> Option<&Keyword> { + match self { + Self::Variable(k) => Some(k), + Self::Const(_) => None, + } + } + pub(crate) fn get_const(&self) -> Option<&T> { + match self { + Self::Const(v) => Some(v), + Self::Variable(_) => None, + } + } +} + #[derive(Clone, Debug)] pub struct AttrTripleClause { pub(crate) attr: Attribute, @@ -45,6 +64,224 @@ impl SessionTx { .map(|el| self.parse_clause(el, vld)) .try_collect() } + pub fn compile_clauses(&mut self, clauses: Vec, vld: Validity) -> Result { + let mut ret = Relation::unit(); + let mut seen_variables = BTreeSet::new(); + for clause in clauses { + match clause { + Clause::AttrTriple(a_triple) => match (a_triple.entity, a_triple.value) { + (MaybeVariable::Const(eid), MaybeVariable::Variable(v_kw)) => { + let mut to_eliminate = BTreeSet::new(); + + let temp_join_key_left = Keyword::rand(); + let temp_join_key_right = Keyword::rand(); + to_eliminate.insert(temp_join_key_left.clone()); + to_eliminate.insert(temp_join_key_right.clone()); + let const_rel = Relation::Fixed(InlineFixedRelation { + bindings: vec![temp_join_key_left.clone()], + data: vec![vec![DataValue::EnId(eid)]], + }); + ret = Relation::Join(Box::new(InnerJoin { + left: ret, + right: const_rel, + joiner: Joiner { + left_keys: vec![], + right_keys: vec![], + }, + })); + + let mut join_left_keys = vec![temp_join_key_left]; + let mut join_right_keys = vec![temp_join_key_right.clone()]; + + let v_kw = { + if seen_variables.contains(&v_kw) { + let ret = Keyword::rand(); + to_eliminate.insert(ret.clone()); + join_left_keys.push(v_kw); + join_right_keys.push(ret.clone()); + ret + } else { + seen_variables.insert(v_kw.clone()); + v_kw + } + }; + let right = Relation::Triple(TripleRelation { + attr: a_triple.attr, + vld, + bindings: [temp_join_key_right, v_kw], + }); + ret = Relation::Join(Box::new(InnerJoin { + left: ret, + right, + joiner: Joiner { + left_keys: join_left_keys, + right_keys: join_right_keys, + }, + })); + ret = Relation::Project(Box::new(ProjectedRelation { + relation: ret, + eliminate: to_eliminate, + })) + } + (MaybeVariable::Variable(e_kw), MaybeVariable::Const(val)) => { + let mut to_eliminate = BTreeSet::new(); + + let temp_join_key_left = Keyword::rand(); + let temp_join_key_right = Keyword::rand(); + to_eliminate.insert(temp_join_key_left.clone()); + to_eliminate.insert(temp_join_key_right.clone()); + let const_rel = Relation::Fixed(InlineFixedRelation { + bindings: vec![temp_join_key_left.clone()], + data: vec![vec![val]], + }); + ret = Relation::Join(Box::new(InnerJoin { + left: ret, + right: const_rel, + joiner: Joiner { + left_keys: vec![], + right_keys: vec![], + }, + })); + + let mut join_left_keys = vec![temp_join_key_left]; + let mut join_right_keys = vec![temp_join_key_right.clone()]; + + let e_kw = { + if seen_variables.contains(&e_kw) { + let ret = Keyword::rand(); + to_eliminate.insert(ret.clone()); + join_left_keys.push(e_kw); + join_right_keys.push(ret.clone()); + ret + } else { + seen_variables.insert(e_kw.clone()); + e_kw + } + }; + let right = Relation::Triple(TripleRelation { + attr: a_triple.attr, + vld, + bindings: [e_kw, temp_join_key_right], + }); + ret = Relation::Join(Box::new(InnerJoin { + left: ret, + right, + joiner: Joiner { + left_keys: join_left_keys, + right_keys: join_right_keys, + }, + })); + ret = Relation::Project(Box::new(ProjectedRelation { + relation: ret, + eliminate: to_eliminate, + })) + } + (MaybeVariable::Variable(e_kw), MaybeVariable::Variable(v_kw)) => { + let mut to_eliminate = BTreeSet::new(); + let mut join_left_keys = vec![]; + let mut join_right_keys = vec![]; + if e_kw == v_kw { + unimplemented!(); + } + let e_kw = { + if seen_variables.contains(&e_kw) { + let ret = Keyword::rand(); + to_eliminate.insert(ret.clone()); + join_left_keys.push(e_kw); + join_right_keys.push(ret.clone()); + ret + } else { + seen_variables.insert(e_kw.clone()); + e_kw + } + }; + let v_kw = { + if seen_variables.contains(&v_kw) { + let ret = Keyword::rand(); + to_eliminate.insert(ret.clone()); + join_left_keys.push(v_kw); + join_right_keys.push(ret.clone()); + ret + } else { + seen_variables.insert(v_kw.clone()); + v_kw + } + }; + let right = Relation::Triple(TripleRelation { + attr: a_triple.attr, + vld, + bindings: [e_kw, v_kw], + }); + ret = Relation::Join(Box::new(InnerJoin { + left: ret, + right, + joiner: Joiner { + left_keys: join_left_keys, + right_keys: join_right_keys, + }, + })); + if !to_eliminate.is_empty() { + ret = Relation::Project(Box::new(ProjectedRelation { + relation: ret, + eliminate: to_eliminate, + })) + } + } + (MaybeVariable::Const(eid), MaybeVariable::Const(val)) => { + let (left_var_1, left_var_2) = (Keyword::rand(), Keyword::rand()); + let const_rel = Relation::Fixed(InlineFixedRelation { + bindings: vec![left_var_1.clone(), left_var_2.clone()], + data: vec![vec![DataValue::EnId(eid), val]], + }); + ret = Relation::Join(Box::new(InnerJoin { + left: ret, + right: const_rel, + joiner: Joiner { + left_keys: vec![], + right_keys: vec![], + }, + })); + let (right_var_1, right_var_2) = (Keyword::rand(), Keyword::rand()); + + let right = Relation::Triple(TripleRelation { + attr: a_triple.attr, + vld, + bindings: [right_var_1.clone(), right_var_2.clone()], + }); + ret = Relation::Join(Box::new(InnerJoin { + left: ret, + right, + joiner: Joiner { + left_keys: vec![left_var_1.clone(), left_var_2.clone()], + right_keys: vec![right_var_1.clone(), right_var_2.clone()], + }, + })); + ret = Relation::Project(Box::new(ProjectedRelation { + relation: ret, + eliminate: BTreeSet::from([ + left_var_1, + left_var_2, + right_var_1, + right_var_2, + ]), + })) + } + }, + } + } + let eliminate: BTreeSet = seen_variables + .into_iter() + .filter(|kw| !kw.is_query_binding()) + .collect(); + if !eliminate.is_empty() { + ret = Relation::Project(Box::new(ProjectedRelation { + relation: ret, + eliminate, + })) + } + + Ok(ret) + } fn parse_clause(&mut self, payload: &JsonValue, vld: Validity) -> Result { match payload { JsonValue::Array(arr) => match arr as &[JsonValue] { @@ -79,10 +316,8 @@ impl SessionTx { vld: Validity, ) -> Result> { if let Some(s) = value_rep.as_str() { - if s.starts_with('?') { + if s.starts_with(['?', '_']) { return Ok(MaybeVariable::Variable(Keyword::from(s))); - } else if s.starts_with('_') { - return Ok(MaybeVariable::Ignore); } } if let Some(o) = value_rep.as_object() { @@ -99,10 +334,8 @@ impl SessionTx { entity_rep: &JsonValue, ) -> Result> { if let Some(s) = entity_rep.as_str() { - if s.starts_with('?') { + if s.starts_with(['?', '_']) { return Ok(MaybeVariable::Variable(Keyword::from(s))); - } else if s.starts_with('_') { - return Ok(MaybeVariable::Ignore); } } if let Some(u) = entity_rep.as_u64() { diff --git a/src/transact/query.rs b/src/transact/query.rs index 0266c8f8..a98253a3 100644 --- a/src/transact/query.rs +++ b/src/transact/query.rs @@ -12,18 +12,34 @@ use crate::transact::pull::PullSpec; use crate::transact::throwaway::ThrowawayArea; use crate::Validity; -pub(crate) struct QuerySpec { - find: Vec<(Keyword, PullSpec)>, - rules: (), - input: (), - order: (), - limit: Option, - offset: Option, +#[derive(Debug)] +pub enum Relation { + Fixed(InlineFixedRelation), + Triple(TripleRelation), + Derived(StoredDerivedRelation), + Join(Box), + Project(Box), } -pub(crate) struct InlineFixedRelation { - bindings: Vec, - data: Vec>, +impl Relation { + pub(crate) fn unit() -> Self { + Self::Fixed(InlineFixedRelation::unit()) + } +} + +#[derive(Debug)] +pub struct InlineFixedRelation { + pub(crate) bindings: Vec, + pub(crate) data: Vec>, +} + +impl InlineFixedRelation { + pub(crate) fn unit() -> Self { + Self { + bindings: vec![], + data: vec![vec![]], + } + } } impl InlineFixedRelation { @@ -84,10 +100,11 @@ impl InlineFixedRelation { } } -pub(crate) struct TripleRelation { - attr: Attribute, - vld: Validity, - bindings: [Keyword; 2], +#[derive(Debug)] +pub struct TripleRelation { + pub(crate) attr: Attribute, + pub(crate) vld: Validity, + pub(crate) bindings: [Keyword; 2], } fn flatten_err, E2: Into>( @@ -309,9 +326,10 @@ impl TripleRelation { } } -pub(crate) struct ProjectedRelation { - relation: Relation, - eliminate: BTreeSet, +#[derive(Debug)] +pub struct ProjectedRelation { + pub(crate) relation: Relation, + pub(crate) eliminate: BTreeSet, } impl ProjectedRelation { @@ -329,9 +347,9 @@ impl ProjectedRelation { .enumerate() .filter_map(|(idx, kw)| { if self.eliminate.contains(kw) { - None - } else { Some(idx) + } else { + None } }) .collect::>(); @@ -354,15 +372,8 @@ impl ProjectedRelation { } } -pub(crate) enum Relation { - Fixed(InlineFixedRelation), - Triple(TripleRelation), - Derived(StoredDerivedRelation), - Join(Box), - Project(Box), -} - -pub(crate) struct StoredDerivedRelation { +#[derive(Debug)] +pub struct StoredDerivedRelation { arity: usize, bindings: Vec, storage: ThrowawayArea, @@ -410,10 +421,11 @@ impl StoredDerivedRelation { } } +#[derive(Debug)] pub(crate) struct Joiner { // invariant: these are of the same lengths - left_keys: Vec, - right_keys: Vec, + pub(crate) left_keys: Vec, + pub(crate) right_keys: Vec, } impl Joiner { @@ -457,14 +469,15 @@ impl Joiner { } } -pub(crate) struct InnerJoin { - left: Relation, - right: Relation, - joiner: Joiner, +#[derive(Debug)] +pub struct InnerJoin { + pub(crate) left: Relation, + pub(crate) right: Relation, + pub(crate) joiner: Joiner, } impl Relation { - pub(crate) fn bindings(&self) -> Vec { + pub fn bindings(&self) -> Vec { match self { Relation::Fixed(f) => f.bindings.clone(), Relation::Triple(t) => t.bindings.to_vec(), @@ -473,7 +486,7 @@ impl Relation { Relation::Project(p) => p.bindings(), } } - pub(crate) fn iter<'a>(&'a self, tx: &'a SessionTx) -> TupleIter<'a> { + pub fn iter<'a>(&'a self, tx: &'a SessionTx) -> TupleIter<'a> { match self { Relation::Fixed(f) => Box::new(f.data.iter().map(|t| Ok(Tuple(t.clone())))), Relation::Triple(r) => Box::new( diff --git a/src/transact/throwaway.rs b/src/transact/throwaway.rs index 40d78413..8d9f048f 100644 --- a/src/transact/throwaway.rs +++ b/src/transact/throwaway.rs @@ -1,3 +1,4 @@ +use std::fmt::{Debug, Formatter}; use cozorocks::{DbIter, PinSlice, RawRocksDb, RocksDbStatus}; use crate::data::tuple::{EncodedTuple, Tuple}; @@ -8,6 +9,12 @@ pub(crate) struct ThrowawayArea { pub(crate) prefix: u32, } +impl Debug for ThrowawayArea { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Throwaway<{}>", self.prefix) + } +} + impl ThrowawayArea { pub(crate) fn put(&mut self, tuple: &Tuple, value: &[u8]) -> Result<(), RocksDbStatus> { let key_encoded = tuple.encode_as_key(self.prefix); diff --git a/tests/creation.rs b/tests/creation.rs index 64520a50..d4994c08 100644 --- a/tests/creation.rs +++ b/tests/creation.rs @@ -107,6 +107,21 @@ fn creation() { println!("{}", to_string_pretty(&pulled).unwrap()); + let query = json!([ + ["_id", "person/first_name", "Eve"], + ["_id", "person/friend", "?friend"], + ["?friend", "person/first_name", "?friend_name"] + ]); + let mut tx = db.transact().unwrap(); + let vld = Validity::current(); + let query = tx.parse_clauses(&query, vld).unwrap(); + dbg!(&query); + let compiled = tx.compile_clauses(query, vld).unwrap(); + dbg!(&compiled); + for x in compiled.iter(&tx) { + dbg!(x.unwrap()); + } + // iteration // let mut it = db.total_iter(); // while let Some((k_slice, v_slice)) = it.pair().unwrap() {