From 9ac734d5a9b11585609fa62a071d5c04626f5d11 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Tue, 2 Aug 2022 13:27:11 +0800 Subject: [PATCH] reworked compilation --- src/data/keyword.rs | 2 +- src/data/program.rs | 2 - src/parse/query.rs | 17 +++- src/query/compile.rs | 190 +++++++++++++++++++++++++++++++++++++- src/query/logical.rs | 5 +- src/query/magic.rs | 9 +- src/query/relation.rs | 96 ++++++++++++++++++- src/runtime/temp_store.rs | 2 +- src/transact/exec.rs | 1 - 9 files changed, 301 insertions(+), 23 deletions(-) diff --git a/src/data/keyword.rs b/src/data/keyword.rs index 3f5eaa04..cfb68e07 100644 --- a/src/data/keyword.rs +++ b/src/data/keyword.rs @@ -39,7 +39,7 @@ impl Keyword { self.0.is_empty() || self .0 - .starts_with(['_', ':', '<', '.', '*', '?', '!', ']', '[']) + .starts_with(['_', ':', '<', '.', '*', '#', '$', '?', '!', ']', '[']) } pub(crate) fn to_string_no_prefix(&self) -> String { format!("{}", self.0) diff --git a/src/data/program.rs b/src/data/program.rs index a65896ef..eeafb2ae 100644 --- a/src/data/program.rs +++ b/src/data/program.rs @@ -195,8 +195,6 @@ pub(crate) struct MagicAttrTripleAtom { pub(crate) attr: Attribute, pub(crate) entity: Keyword, pub(crate) value: Keyword, - pub(crate) entity_is_bound: bool, - pub(crate) value_is_bound: bool, } #[derive(Clone, Debug)] diff --git a/src/parse/query.rs b/src/parse/query.rs index 4ee2dac4..db8a25cd 100644 --- a/src/parse/query.rs +++ b/src/parse/query.rs @@ -9,15 +9,16 @@ use crate::data::attr::Attribute; use crate::data::expr::{get_op, Expr}; use crate::data::json::JsonValue; use crate::data::keyword::{Keyword, PROG_ENTRY}; -use crate::data::program::{InputAtom, InputAttrTripleAtom, InputProgram, InputRule, InputRuleApplyAtom, InputTerm, NormalFormProgram}; +use crate::data::program::{ + InputAtom, InputAttrTripleAtom, InputProgram, InputRule, InputRuleApplyAtom, InputTerm, +}; use crate::data::value::DataValue; use crate::query::compile::{ Atom, AttrTripleAtom, BindingHeadTerm, DatalogProgram, Rule, RuleApplyAtom, RuleSet, Term, }; -use crate::query::magic::magic_sets_rewrite; use crate::query::pull::PullSpecs; use crate::runtime::transact::SessionTx; -use crate::utils::{swap_option_result, swap_result_option}; +use crate::utils::swap_result_option; use crate::{EntityId, Validity}; pub(crate) type OutSpec = (Vec<(usize, Option)>, Option>); @@ -663,7 +664,11 @@ impl SessionTx { v => bail!("expected atom definition {:?}", v), } } - fn parse_input_logical_atom(&mut self, map: &Map, vld: Validity) -> Result { + fn parse_input_logical_atom( + &mut self, + map: &Map, + vld: Validity, + ) -> Result { let (k, v) = map.iter().next().unwrap(); Ok(match k as &str { "not_exists" => { @@ -804,7 +809,9 @@ impl SessionTx { Ok(InputTerm::Const(self.parse_value_from_map(o, attr)?)) }; } - Ok(InputTerm::Const(attr.val_type.coerce_value(value_rep.into())?)) + Ok(InputTerm::Const( + attr.val_type.coerce_value(value_rep.into())?, + )) } fn parse_triple_clause_value( &mut self, diff --git a/src/query/compile.rs b/src/query/compile.rs index 94aa83f3..3e95b20d 100644 --- a/src/query/compile.rs +++ b/src/query/compile.rs @@ -1,14 +1,13 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fmt::{Debug, Formatter}; -use std::ops::Sub; use anyhow::{anyhow, ensure, Result}; use itertools::Itertools; use crate::data::attr::Attribute; use crate::data::expr::Expr; -use crate::data::json::JsonValue; use crate::data::keyword::Keyword; +use crate::data::program::{MagicAtom, MagicKeyword, MagicRule}; use crate::data::value::DataValue; use crate::query::relation::Relation; use crate::runtime::temp_store::TempStore; @@ -182,6 +181,193 @@ impl Debug for BindingHeadFormatter<'_> { } impl SessionTx { + pub(crate) fn compile_magic_rule_body( + &mut self, + rule: &MagicRule, + rule_name: &MagicKeyword, + rule_idx: usize, + vld: Validity, + stores: &BTreeMap, + ret_vars: &[Keyword], + ) -> Result { + let mut ret = Relation::unit(); + let mut seen_variables = BTreeSet::new(); + let mut serial_id = 0; + let mut gen_kw = || { + let ret = Keyword::from(&format!("**{}", serial_id) as &str); + serial_id += 1; + ret + }; + for atom in &rule.body { + match atom { + MagicAtom::AttrTriple(t) => { + let mut join_left_keys = vec![]; + let mut join_right_keys = vec![]; + let e_kw = if seen_variables.contains(&t.entity) { + let kw = gen_kw(); + join_left_keys.push(t.entity.clone()); + join_right_keys.push(kw.clone()); + kw + } else { + seen_variables.insert(t.entity.clone()); + t.entity.clone() + }; + let v_kw = if seen_variables.contains(&t.value) { + let kw = gen_kw(); + join_left_keys.push(t.value.clone()); + join_right_keys.push(kw.clone()); + kw + } else { + seen_variables.insert(t.value.clone()); + t.value.clone() + }; + let right = Relation::triple(t.attr.clone(), vld, e_kw, v_kw); + if ret.is_unit() { + ret = right + } else { + debug_assert_eq!(join_left_keys.len(), join_right_keys.len()); + ret = ret.join(right, join_left_keys, join_right_keys); + } + } + MagicAtom::Rule(rule_app) => { + let (store, arity) = stores + .get(&rule_app.name) + .ok_or_else(|| anyhow!("undefined rule {:?} encountered", rule_app.name))? + .clone(); + ensure!( + arity == rule_app.args.len(), + "arity mismatch in rule application {:?}, expect {}, found {}", + rule_app.name, + arity, + rule_app.args.len() + ); + let mut prev_joiner_vars = vec![]; + let mut right_joiner_vars = vec![]; + let mut right_vars = vec![]; + + for var in &rule_app.args { + if seen_variables.contains(var) { + prev_joiner_vars.push(var.clone()); + let rk = gen_kw(); + right_vars.push(rk.clone()); + right_joiner_vars.push(rk); + } else { + seen_variables.insert(var.clone()); + right_vars.push(var.clone()); + } + } + + let right = Relation::derived(right_vars, store); + debug_assert_eq!(prev_joiner_vars.len(), right_joiner_vars.len()); + ret = ret.join(right, prev_joiner_vars, right_joiner_vars); + } + MagicAtom::NegatedAttrTriple(a_triple) => { + let mut join_left_keys = vec![]; + let mut join_right_keys = vec![]; + let e_kw = { + if seen_variables.contains(&a_triple.entity) { + let kw = gen_kw(); + join_left_keys.push(a_triple.entity.clone()); + join_right_keys.push(kw.clone()); + kw + } else { + seen_variables.insert(a_triple.entity.clone()); + a_triple.entity.clone() + } + }; + let v_kw = { + if seen_variables.contains(&a_triple.value) { + let kw = gen_kw(); + join_left_keys.push(a_triple.value.clone()); + join_right_keys.push(kw.clone()); + kw + } else { + seen_variables.insert(a_triple.value.clone()); + a_triple.value.clone() + } + }; + ensure!( + !join_right_keys.is_empty(), + "unsafe negation: {} and {} are unbound", + e_kw, + v_kw + ); + let right = Relation::triple(a_triple.attr.clone(), vld, e_kw, v_kw); + if ret.is_unit() { + ret = right; + } else { + debug_assert_eq!(join_left_keys.len(), join_right_keys.len()); + ret = ret.neg_join(right, join_left_keys, join_right_keys); + } + } + MagicAtom::NegatedRule(rule_app) => { + let (store, arity) = stores + .get(&rule_app.name) + .ok_or_else(|| anyhow!("undefined rule encountered: {:?}", rule_app.name))? + .clone(); + ensure!( + arity == rule_app.args.len(), + "arity mismatch for {:?}, expect {}, got {}", + rule_app.name, + arity, + rule_app.args.len() + ); + + let mut prev_joiner_vars = vec![]; + let mut right_joiner_vars = vec![]; + let mut right_vars = vec![]; + + for var in &rule_app.args { + if seen_variables.contains(var) { + prev_joiner_vars.push(var.clone()); + let rk = gen_kw(); + right_vars.push(rk.clone()); + right_joiner_vars.push(rk); + } else { + seen_variables.insert(var.clone()); + right_vars.push(var.clone()); + } + } + + let right = Relation::derived(right_vars, store); + debug_assert_eq!(prev_joiner_vars.len(), right_joiner_vars.len()); + ret = ret.neg_join(right, prev_joiner_vars, right_joiner_vars); + } + MagicAtom::Predicate(p) => { + ret = ret.filter(p.clone()); + } + MagicAtom::Unification(u) => { + seen_variables.insert(u.binding.clone()); + ret = ret.unify(u.binding.clone(), u.expr.clone()); + } + } + } + + let ret_vars_set = ret_vars.iter().cloned().collect(); + ret.eliminate_temp_vars(&ret_vars_set)?; + let cur_ret_set: BTreeSet<_> = ret.bindings_after_eliminate().into_iter().collect(); + if cur_ret_set != ret_vars_set { + ret = ret.cartesian_join(Relation::unit()); + ret.eliminate_temp_vars(&ret_vars_set)?; + } + + let cur_ret_set: BTreeSet<_> = ret.bindings_after_eliminate().into_iter().collect(); + ensure!( + cur_ret_set == ret_vars_set, + "unbound variables in rule head for {:?}.{}: variables required {:?}, of which only {:?} are bound", + rule_name, + rule_idx, + ret_vars_set, + cur_ret_set + ); + let cur_ret_bindings = ret.bindings_after_eliminate(); + if ret_vars != cur_ret_bindings { + ret = ret.reorder(ret_vars.to_vec()); + } + + Ok(ret) + } + pub(crate) fn compile_rule_body( &mut self, clauses: &[Atom], diff --git a/src/query/logical.rs b/src/query/logical.rs index 39a6652b..dc50c6f0 100644 --- a/src/query/logical.rs +++ b/src/query/logical.rs @@ -10,7 +10,6 @@ use crate::data::program::{ }; use crate::data::value::DataValue; use crate::query::compile::Atom; -use crate::EntityId; pub(crate) struct Disjunction(pub(crate) Vec); @@ -122,7 +121,7 @@ impl InputAtom { } impl InputRuleApplyAtom { - fn normalize(mut self, is_negated: bool, gen: &mut TempKwGen) -> Disjunction { + fn normalize(self, is_negated: bool, gen: &mut TempKwGen) -> Disjunction { let mut ret = Vec::with_capacity(self.args.len() + 1); let mut args = Vec::with_capacity(self.args.len()); let mut seen_variables = BTreeSet::new(); @@ -169,7 +168,7 @@ impl InputRuleApplyAtom { } impl InputAttrTripleAtom { - fn normalize(mut self, is_negated: bool, gen: &mut TempKwGen) -> Disjunction { + fn normalize(self, is_negated: bool, gen: &mut TempKwGen) -> Disjunction { let wrap = |atom| { if is_negated { NormalFormAtom::NegatedAttrTriple(atom) diff --git a/src/query/magic.rs b/src/query/magic.rs index 14031ce7..c2f401e1 100644 --- a/src/query/magic.rs +++ b/src/query/magic.rs @@ -1,7 +1,6 @@ use std::collections::{BTreeMap, BTreeSet}; use std::mem; -use anyhow::Result; use itertools::Itertools; use smallvec::SmallVec; @@ -288,13 +287,11 @@ impl NormalFormAtom { attr: a.attr.clone(), entity: a.entity.clone(), value: a.value.clone(), - entity_is_bound: seen_bindings.contains(&a.entity), - value_is_bound: seen_bindings.contains(&a.value), }; - if !t.entity_is_bound { + if !seen_bindings.contains(&a.entity) { seen_bindings.insert(a.entity.clone()); } - if !t.value_is_bound { + if !seen_bindings.contains(&a.value) { seen_bindings.insert(a.value.clone()); } MagicAtom::AttrTriple(t) @@ -336,8 +333,6 @@ impl NormalFormAtom { attr: na.attr.clone(), entity: na.entity.clone(), value: na.value.clone(), - entity_is_bound: true, - value_is_bound: true, }) } NormalFormAtom::NegatedRule(nr) => MagicAtom::NegatedRule(MagicRuleApplyAtom { diff --git a/src/query/relation.rs b/src/query/relation.rs index 0e9a2e15..e37afa24 100644 --- a/src/query/relation.rs +++ b/src/query/relation.rs @@ -22,6 +22,73 @@ pub enum Relation { NegJoin(Box), Reorder(ReorderRelation), Filter(FilteredRelation), + Unification(UnificationRelation), +} + +pub struct UnificationRelation { + parent: Box, + binding: Keyword, + expr: Expr, + pub(crate) to_eliminate: BTreeSet, +} + +impl UnificationRelation { + fn fill_binding_indices(&mut self) { + let parent_bindings: BTreeMap<_, _> = self + .parent + .bindings_after_eliminate() + .into_iter() + .enumerate() + .map(|(a, b)| (b, a)) + .collect(); + self.expr.fill_binding_indices(&parent_bindings); + } + pub(crate) fn do_eliminate_temp_vars(&mut self, used: &BTreeSet) -> Result<()> { + for binding in self.parent.bindings_before_eliminate() { + if !used.contains(&binding) { + self.to_eliminate.insert(binding.clone()); + } + } + let mut nxt = used.clone(); + nxt.extend(self.expr.bindings()); + self.parent.eliminate_temp_vars(&nxt)?; + Ok(()) + } + + fn iter<'a>( + &'a self, + tx: &'a SessionTx, + epoch: Option, + use_delta: &BTreeSet, + ) -> TupleIter<'a> { + let mut bindings = self.parent.bindings_after_eliminate(); + bindings.push(self.binding.clone()); + let eliminate_indices = get_eliminate_indices(&bindings, &self.to_eliminate); + Box::new( + self.parent + .iter(tx, epoch, use_delta) + .map_ok(move |tuple| -> Result { + let result = self.expr.eval(&tuple)?; + let mut ret = tuple.0; + ret.push(result); + if !eliminate_indices.is_empty() { + ret = ret + .into_iter() + .enumerate() + .filter_map(|(i, v)| { + if eliminate_indices.contains(&i) { + None + } else { + Some(v) + } + }) + .collect_vec(); + } + Ok(Tuple(ret)) + }) + .map(flatten_err), + ) + } } pub struct FilteredRelation { @@ -161,6 +228,13 @@ impl Debug for Relation { .field(&r.pred) .field(&r.parent) .finish(), + Relation::Unification(r) => f + .debug_tuple("Filter") + .field(&bindings) + .field(&r.binding) + .field(&r.expr) + .field(&r.parent) + .finish(), } } } @@ -185,6 +259,10 @@ impl Relation { Relation::NegJoin(r) => { r.left.fill_predicate_binding_indices(); } + Relation::Unification(u) => { + u.parent.fill_predicate_binding_indices(); + u.fill_binding_indices() + } } } pub(crate) fn unit() -> Self { @@ -238,6 +316,14 @@ impl Relation { to_eliminate: Default::default(), }) } + pub(crate) fn unify(self, binding: Keyword, expr: Expr) -> Self { + Relation::Unification(UnificationRelation { + parent: Box::new(self), + binding, + expr: expr, + to_eliminate: Default::default(), + }) + } pub(crate) fn join( self, right: Relation, @@ -1195,6 +1281,7 @@ impl Relation { Relation::Reorder(r) => r.relation.eliminate_temp_vars(used), Relation::Filter(r) => r.do_eliminate_temp_vars(used), Relation::NegJoin(r) => r.do_eliminate_temp_vars(used), + Relation::Unification(r) => r.do_eliminate_temp_vars(used), } } @@ -1207,6 +1294,7 @@ impl Relation { Relation::Reorder(_) => None, Relation::Filter(r) => Some(&r.to_eliminate), Relation::NegJoin(r) => Some(&r.to_eliminate), + Relation::Unification(u) => Some(&u.to_eliminate), } } @@ -1230,6 +1318,11 @@ impl Relation { Relation::Reorder(r) => r.bindings(), Relation::Filter(r) => r.parent.bindings_after_eliminate(), Relation::NegJoin(j) => j.left.bindings_after_eliminate(), + Relation::Unification(u) => { + let mut bindings = u.parent.bindings_after_eliminate(); + bindings.push(u.binding.clone()); + bindings + } } } pub fn iter<'a>( @@ -1249,6 +1342,7 @@ impl Relation { Relation::Reorder(r) => r.iter(tx, epoch, use_delta), Relation::Filter(r) => r.iter(tx, epoch, use_delta), Relation::NegJoin(r) => r.iter(tx, epoch, use_delta), + Relation::Unification(r) => r.iter(tx, epoch, use_delta), } } } @@ -1408,7 +1502,7 @@ impl InnerJoin { self.materialized_join(tx, eliminate_indices, epoch, use_delta) } } - Relation::Join(_) | Relation::Filter(_) => { + Relation::Join(_) | Relation::Filter(_) | Relation::Unification(_) => { self.materialized_join(tx, eliminate_indices, epoch, use_delta) } Relation::Reorder(_) => { diff --git a/src/runtime/temp_store.rs b/src/runtime/temp_store.rs index e0ec12f1..9bf14fd0 100644 --- a/src/runtime/temp_store.rs +++ b/src/runtime/temp_store.rs @@ -1,6 +1,6 @@ use std::fmt::{Debug, Formatter}; -use log::{debug, error}; +use log::{error}; use cozorocks::{DbIter, RawRocksDb, RocksDbStatus}; diff --git a/src/transact/exec.rs b/src/transact/exec.rs index aec5d2e8..25d3ed81 100644 --- a/src/transact/exec.rs +++ b/src/transact/exec.rs @@ -13,7 +13,6 @@ use crate::data::encode::{ EncodedVec, LARGE_VEC_SIZE, }; use crate::data::id::{AttrId, EntityId, Validity}; -use crate::data::keyword::Keyword; use crate::data::triple::StoreOp; use crate::data::value::{DataValue, INLINE_VAL_SIZE_LIMIT}; use crate::parse::triple::{Quintuple, TxAction};