magic sets works!

main
Ziyang Hu 2 years ago
parent 4a370a525f
commit 905fb36e59

@ -1,39 +0,0 @@
## Tarjan's algorithm for SCC detection
```
UNVISITED = -1
n = number of nodes in graph
g = adjacency list with directed edges
id = 0 # Used to give each node an id
sccCount = 0 # Used to count number of SCCs found
# Index i in these arrays represents node i
ids = [0, 0, ... 0, 0] # Length n
low = [0, 0, ... 0, 0] # Length n
onStack = [false, false, ..., false] # Length n
stack = an empty stack data structure
function findSccs():
for(i = 0; i < n; i++): ids[i] = UNVISITED
for(i = 0; i < n; i++):
if(ids[i] == UNVISITED):
dfs(i)
return low
function dfs(at):
stack.push(at)
onStack[at] = true
ids[at] = low[at] = id++
# Visit all neighbours & min low-link on callback
for(to : g[at]):
if(ids[to] == UNVISITED): dfs(to)
if(onStack[to]): low[at] = min(low[at],low[to])
# After having visited all the neighbours of at # if we're at the start of a SCC empty the seen
# stack until were back to the start of the SCC.
if(ids[at] == low[at]):
for(node = stack.pop();;node = stack.pop()): onStack[node] = false
low[node] = ids[at]
if(node == at): break
sccCount++
```

@ -4,7 +4,7 @@
* [x] negation
* [x] disjunction
* [x] stratum
* [ ] magic sets
* [x] magic sets
* [ ] aggregation
* [ ] function symbol
* [ ] arithmetic

@ -70,11 +70,10 @@ impl SessionTx {
{
Err(QueryCompilationError::EntryHeadsNotIdentical.into())
} else {
Ok(ret)
// Ok(ret)
// dbg!(&ret);
// let magic_ret = magic_sets_rewrite(&ret);
// dbg!(&magic_ret);
// Ok(magic_ret)
let magic_ret = magic_sets_rewrite(&ret);
Ok(magic_ret)
}
}
}

@ -5,7 +5,6 @@ use std::ops::Sub;
use anyhow::Result;
use itertools::Itertools;
use crate::{EntityId, Validity};
use crate::data::attr::Attribute;
use crate::data::expr::Expr;
use crate::data::json::JsonValue;
@ -14,6 +13,7 @@ use crate::data::value::DataValue;
use crate::query::relation::Relation;
use crate::runtime::temp_store::TempStore;
use crate::runtime::transact::SessionTx;
use crate::{EntityId, Validity};
/// example ruleset in python and javascript
/// ```python
@ -43,8 +43,8 @@ pub enum QueryCompilationError {
ArityMismatch(Keyword),
#[error("encountered undefined rule {0}")]
UndefinedRule(Keyword),
#[error("safety: unbound variables {0:?}")]
UnsafeUnboundVars(BTreeSet<Keyword>),
#[error("safety: unbound variables for rule {0}({1}): needs to return {2:?}, bound {3:?}")]
UnsafeUnboundVars(Keyword, usize, BTreeSet<Keyword>, BTreeSet<Keyword>),
#[error("program logic error: {0}")]
LogicError(String),
#[error("entry not found: expect a rule named '?'")]
@ -87,7 +87,7 @@ impl<T> Term<T> {
pub(crate) fn get_var(&self) -> Option<&Keyword> {
match self {
Term::Var(k) => Some(k),
Term::Const(_) => None
Term::Const(_) => None,
}
}
}
@ -103,7 +103,7 @@ pub struct AttrTripleAtom {
pub struct RuleApplyAtom {
pub(crate) name: Keyword,
pub(crate) args: Vec<Term<DataValue>>,
pub(crate) adornment: Option<Vec<bool>>
pub(crate) adornment: Option<Vec<bool>>,
}
#[derive(Clone, Debug)]
@ -221,6 +221,8 @@ impl SessionTx {
vld: Validity,
stores: &BTreeMap<Keyword, (TempStore, usize)>,
ret_vars: &[Keyword],
rule_name: &Keyword,
rule_idx: usize,
) -> Result<Relation> {
let mut ret = Relation::unit();
let mut seen_variables = BTreeSet::new();
@ -521,7 +523,7 @@ impl SessionTx {
e_kw.clone(),
v_kw.clone(),
])
.into());
.into());
}
let right =
Relation::triple(a_triple.attr.clone(), vld, e_kw, v_kw);
@ -570,7 +572,7 @@ impl SessionTx {
return Err(QueryCompilationError::ArityMismatch(
rule_app.name.clone(),
)
.into());
.into());
}
let mut prev_joiner_vars = vec![];
@ -617,7 +619,7 @@ impl SessionTx {
_ => unreachable!(),
},
Atom::Conjunction(_) => unreachable!(),
Atom::Disjunction(_) => unreachable!()
Atom::Disjunction(_) => unreachable!(),
}
}
@ -632,8 +634,13 @@ impl SessionTx {
let cur_ret_set: BTreeSet<_> = ret.bindings_after_eliminate().into_iter().collect();
if cur_ret_set != ret_vars_set {
let diff = cur_ret_set.sub(&cur_ret_set);
return Err(QueryCompilationError::UnsafeUnboundVars(diff).into());
return Err(QueryCompilationError::UnsafeUnboundVars(
rule_name.clone(),
rule_idx,
ret_vars_set,
cur_ret_set,
)
.into());
}
let cur_ret_bindings = ret.bindings_after_eliminate();
if ret_vars != cur_ret_bindings {

@ -17,6 +17,7 @@ use crate::runtime::transact::SessionTx;
impl SessionTx {
pub(crate) fn stratified_evaluate(&mut self, prog: &DatalogProgram) -> Result<TempStore> {
let stratified_prog = stratify_program(prog)?;
// dbg!(&stratified_prog);
let stores = stratified_prog
.iter()
.flatten()
@ -49,10 +50,10 @@ impl SessionTx {
Vec<(Vec<BindingHeadTerm>, BTreeSet<Keyword>, Relation)>,
)> {
let mut collected = Vec::with_capacity(body.rules.len());
for rule in &body.rules {
for (rule_idx, rule) in body.rules.iter().enumerate() {
let header = rule.head.iter().map(|t| &t.name).cloned().collect_vec();
let mut relation =
self.compile_rule_body(&rule.body, rule.vld, &stores, &header)?;
self.compile_rule_body(&rule.body, rule.vld, &stores, &header, k, rule_idx)?;
relation.fill_predicate_binding_indices();
collected.push((rule.head.clone(), rule.contained_rules(), relation));
}
@ -111,7 +112,7 @@ impl SessionTx {
}
}
if !should_do_calculation {
debug!("skipping rule {}.{} as none of its dependencies changed in the last iteration", k, rule_n);
// debug!("skip {}.{}", k, rule_n);
continue;
}
for (delta_key, (delta_store, _)) in stores.iter() {

@ -69,7 +69,7 @@ pub(crate) type Graph<T> = BTreeMap<T, Vec<T>>;
pub(crate) fn strongly_connected_components<T>(graph: &Graph<T>) -> Vec<Vec<&T>>
where
T: Ord,
T: Ord + Debug,
{
let indices = graph.keys().collect_vec();
let invert_indices: BTreeMap<_, _> = indices
@ -79,7 +79,11 @@ where
.collect();
let idx_graph = graph
.values()
.map(|vs| vs.iter().map(|v| invert_indices[v]).collect_vec())
.map(|vs| {
vs.iter()
.map(|v| invert_indices.get(v).ok_or(v).unwrap().clone())
.collect_vec()
})
.collect_vec();
TarjanScc::new(&idx_graph)
.run()
@ -94,9 +98,11 @@ struct Reachable<'a, T> {
impl<'a, T: Ord + Debug> Reachable<'a, T> {
fn walk(&self, starting: &T, collected: &mut BTreeSet<&'a T>) {
for el in self.graph.get(starting).ok_or(starting).unwrap() {
if collected.insert(el) {
self.walk(el, collected);
if let Some(children) = self.graph.get(starting) {
for el in children {
if collected.insert(el) {
self.walk(el, collected);
}
}
}
}

@ -47,7 +47,8 @@ fn adorn_atom(
let mut adornment = Vec::with_capacity(rule.args.len());
for term in rule.args.iter() {
if let Term::Var(kw) = term {
adornment.push(seen_bindings.insert(kw.clone()));
let var_is_free = seen_bindings.insert(kw.clone());
adornment.push(!var_is_free);
} else {
adornment.push(false);
}
@ -157,13 +158,27 @@ fn make_magic_input_rule_head(name: &Keyword, adornment: &[bool]) -> Keyword {
make_adorned_kw(name, "I", adornment)
}
fn make_magic_sup_rule_head(name: &Keyword, rule_idx: usize, pos: usize) -> Keyword {
let rule_name = format!("!S<{}>{}.{}", name, rule_idx, pos);
fn make_magic_sup_rule_head(
name: &Keyword,
rule_idx: usize,
pos: usize,
adornment: &[bool],
) -> Keyword {
let mut rule_name = format!("!S<{}>{}.{}.", name.0, rule_idx, pos);
for bound in adornment {
rule_name.push(if *bound { 'b' } else { 'f' })
}
Keyword::from(&rule_name as &str)
}
fn make_magic_sup_rule_app(name: &Keyword, rule_idx: usize, pos: usize, args: &[Keyword]) -> Atom {
let rule_name = make_magic_sup_rule_head(name, rule_idx, pos);
fn make_magic_sup_rule_app(
name: &Keyword,
rule_idx: usize,
pos: usize,
args: &[Keyword],
adornment: &[bool],
) -> Atom {
let rule_name = make_magic_sup_rule_head(name, rule_idx, pos, adornment);
Atom::Rule(RuleApplyAtom {
name: rule_name,
args: args.iter().map(|kw| Term::Var(kw.clone())).collect_vec(),
@ -197,8 +212,16 @@ fn adorned_to_magic(input: &AdornedDatalogProgram) -> DatalogProgram {
for (rule_head, rule_set) in input {
for (rule_idx, rule) in rule_set.rules.iter().enumerate() {
if !rule_head.name.is_prog_entry() {
let sup_rule_head = make_magic_sup_rule_head(&rule_head.name, rule_idx, 0);
let mut rule_is_bound = false;
for is_bound in &rule_head.adornment {
if *is_bound {
rule_is_bound = true;
break;
}
}
if rule_is_bound {
let sup_rule_head =
make_magic_sup_rule_head(&rule_head.name, rule_idx, 0, &rule_head.adornment);
let args = rule
.head
.iter()
@ -223,6 +246,7 @@ fn adorned_to_magic(input: &AdornedDatalogProgram) -> DatalogProgram {
&rule_head.adornment,
&args,
)];
debug_assert_eq!(entry.arity, args.len());
entry.rules.push(Rule {
head: args,
body,
@ -245,12 +269,13 @@ fn adorned_to_magic(input: &AdornedDatalogProgram) -> DatalogProgram {
let mut collected_atoms = vec![];
if !rule_head.name.is_prog_entry() {
if rule_is_bound {
collected_atoms.push(make_magic_sup_rule_app(
&rule_head.name,
rule_idx,
0,
&sup_0_bindings,
&rule_head.adornment,
));
}
let mut seen_bindings: BTreeSet<_> = sup_0_bindings.iter().cloned().collect();
@ -294,8 +319,12 @@ fn adorned_to_magic(input: &AdornedDatalogProgram) -> DatalogProgram {
adornment: None,
}));
} else {
let sup_head =
make_magic_sup_rule_head(&rule_head.name, rule_idx, atom_idx);
let sup_head = make_magic_sup_rule_head(
&rule_head.name,
rule_idx,
atom_idx,
&rule_head.adornment,
);
// todo: order it such that seen bindings has the applied rule as prefix
// see m7 in notes
let args = seen_bindings.iter().collect_vec();
@ -306,22 +335,25 @@ fn adorned_to_magic(input: &AdornedDatalogProgram) -> DatalogProgram {
});
let mut sup_rule_atoms = vec![];
mem::swap(&mut sup_rule_atoms, &mut collected_atoms);
let head_args = args
.iter()
.map(|kw| BindingHeadTerm {
name: (*kw).clone(),
aggr: Default::default(),
})
.collect_vec();
debug_assert_eq!(entry.arity, head_args.len());
entry.rules.push(Rule {
head: args
.iter()
.map(|kw| BindingHeadTerm {
name: (*kw).clone(),
aggr: Default::default(),
})
.collect_vec(),
head: head_args,
body: sup_rule_atoms,
vld: rule.vld,
});
let sup_app_rule = Atom::Rule(RuleApplyAtom {
let sup_app_rule_atom = RuleApplyAtom {
name: sup_head,
args: args.iter().map(|kw| Term::Var((*kw).clone())).collect_vec(),
adornment: None,
});
};
let sup_app_rule = Atom::Rule(sup_app_rule_atom);
collected_atoms.push(sup_app_rule.clone());
let head = make_magic_rule_head(&r.name, r_adornment);
@ -342,22 +374,24 @@ fn adorned_to_magic(input: &AdornedDatalogProgram) -> DatalogProgram {
rules: vec![],
arity,
});
let ientry_args = r
.args
.iter()
.zip(r_adornment.iter())
.filter_map(|(kw, is_bound)| {
if *is_bound {
Some(BindingHeadTerm {
name: kw.get_var().cloned().unwrap(),
aggr: Default::default(),
})
} else {
None
}
})
.collect_vec();
debug_assert_eq!(ientry.arity, ientry_args.len());
ientry.rules.push(Rule {
head: r
.args
.iter()
.zip(r_adornment.iter())
.filter_map(|(kw, is_bound)| {
if *is_bound {
Some(BindingHeadTerm {
name: kw.get_var().cloned().unwrap(),
aggr: Default::default(),
})
} else {
None
}
})
.collect_vec(),
head: ientry_args,
body: vec![sup_app_rule],
vld: rule.vld,
});
@ -374,6 +408,7 @@ fn adorned_to_magic(input: &AdornedDatalogProgram) -> DatalogProgram {
rules: vec![],
arity: rule_set.arity,
});
debug_assert_eq!(ruleset.arity, rule.head.len());
ruleset.rules.push(Rule {
head: rule.head.clone(),
body: collected_atoms,

@ -142,10 +142,10 @@ fn creation() {
{
"rule": "?",
"args": [["?a"],
// {"not_exists": {"rule": "ff", "args": ["?alice", "?a"]}},
["?alice", "person.first_name", "Alice"],
{"rule": "ff", "args": ["?alice", "?a"]},
// {"not_exists": {"rule": "ff", "args": ["?alice", "?a"]}},
["?a", "person.first_name", "?n"],
["?alice", "person.first_name", "Alice"],
]
}
],

Loading…
Cancel
Save