stratification; generalized Kahn

main
Ziyang Hu 2 years ago
parent 884ed4ba97
commit 61172da6c5

@ -5,7 +5,7 @@ use anyhow::Result;
use itertools::Itertools; use itertools::Itertools;
use log::{debug, log_enabled, trace, Level}; use log::{debug, log_enabled, trace, Level};
use crate::data::keyword::Keyword; use crate::data::keyword::{Keyword, PROG_ENTRY};
use crate::query::compile::{ use crate::query::compile::{
BindingHeadFormatter, BindingHeadTerm, DatalogProgram, QueryCompilationError, BindingHeadFormatter, BindingHeadTerm, DatalogProgram, QueryCompilationError,
}; };
@ -20,7 +20,7 @@ impl SessionTx {
.map(|(k, s)| (k.clone(), (self.new_throwaway(), s.arity))) .map(|(k, s)| (k.clone(), (self.new_throwaway(), s.arity)))
.collect::<BTreeMap<_, _>>(); .collect::<BTreeMap<_, _>>();
let ret_area = stores let ret_area = stores
.get(&Keyword::from("?")) .get(&PROG_ENTRY)
.ok_or(QueryCompilationError::EntryNotFound)? .ok_or(QueryCompilationError::EntryNotFound)?
.0 .0
.clone(); .clone();

@ -64,11 +64,11 @@ impl<'a> TarjanScc<'a> {
} }
} }
type Graph<T> = BTreeMap<T, Vec<T>>; pub(crate) type Graph<T> = BTreeMap<T, Vec<T>>;
pub(crate) fn strongly_connected_components<T>(graph: &Graph<T>) -> Vec<Vec<&T>> pub(crate) fn strongly_connected_components<T>(graph: &Graph<T>) -> Vec<Vec<&T>>
where where
T: Ord, T: Ord,
{ {
let indices = graph.keys().collect_vec(); let indices = graph.keys().collect_vec();
let invert_indices: BTreeMap<_, _> = indices let invert_indices: BTreeMap<_, _> = indices
@ -88,7 +88,7 @@ pub(crate) fn strongly_connected_components<T>(graph: &Graph<T>) -> Vec<Vec<&T>>
} }
struct Reachable<'a, T> { struct Reachable<'a, T> {
graph: &'a Graph<T> graph: &'a Graph<T>,
} }
impl<'a, T: Ord> Reachable<'a, T> { impl<'a, T: Ord> Reachable<'a, T> {
@ -101,18 +101,85 @@ impl<'a, T: Ord> Reachable<'a, T> {
} }
} }
pub(crate) fn reachable_components<'a, T: Ord>(graph: &'a Graph<T>, start: &'a T) -> BTreeSet<&'a T> { pub(crate) fn reachable_components<'a, T: Ord>(
graph: &'a Graph<T>,
start: &'a T,
) -> BTreeSet<&'a T> {
let mut collected = BTreeSet::from([start]); let mut collected = BTreeSet::from([start]);
let worker = Reachable {graph}; let worker = Reachable { graph };
worker.walk(start, &mut collected); worker.walk(start, &mut collected);
collected collected
} }
pub(crate) type StratifiedGraph<T> = BTreeMap<T, BTreeMap<T, bool>>;
pub(crate) fn generalized_kahn(
graph: &StratifiedGraph<usize>,
num_nodes: usize,
) -> Vec<Vec<usize>> {
/// For this generalized Kahn's algorithm, graph edges can be labelled 'poisoned', so that no
/// stratum contains any poisoned edges within it.
/// the returned vector of vector is simultaneously a topological ordering and a
/// stratification, which is greedy with respect to the starting node.
/// Assuming starting node is 0.
let mut in_degree = vec![0; num_nodes];
for (_from, tos) in graph {
for to in tos.keys() {
in_degree[*to] += 1;
}
}
let mut ret = vec![];
let mut current_stratum = vec![];
let mut safe_pending = vec![];
let mut unsafe_nodes: BTreeSet<usize> = BTreeSet::new();
for (node, degree) in in_degree.iter().enumerate() {
if *degree == 0 {
safe_pending.push(node);
}
}
loop {
if safe_pending.is_empty() && !unsafe_nodes.is_empty() {
ret.push(current_stratum.clone());
current_stratum.clear();
for node in &unsafe_nodes {
if in_degree[*node] == 0 {
safe_pending.push(*node);
}
}
unsafe_nodes.clear();
}
if safe_pending.is_empty() {
if !current_stratum.is_empty() {
ret.push(current_stratum);
}
break;
}
let removed = safe_pending.pop().unwrap();
current_stratum.push(removed);
if let Some(edges) = graph.get(&removed) {
for (nxt, poisoned) in edges {
in_degree[*nxt] -= 1;
if *poisoned {
unsafe_nodes.insert(*nxt);
}
if in_degree[*nxt] == 0 && !unsafe_nodes.contains(nxt) {
safe_pending.push(*nxt)
}
}
}
}
ret
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::collections::BTreeMap; use std::collections::BTreeMap;
use crate::query::graph::{reachable_components, strongly_connected_components}; use crate::query::graph::{
generalized_kahn, reachable_components, strongly_connected_components, StratifiedGraph,
};
#[test] #[test]
fn test_scc() { fn test_scc() {
@ -122,12 +189,24 @@ mod tests {
("c", vec!["a", "d", "e"]), ("c", vec!["a", "d", "e"]),
("d", vec!["e", "e", "e"]), ("d", vec!["e", "e", "e"]),
("e", vec![]), ("e", vec![]),
("f", vec![]) ("f", vec![]),
]); ]);
let scc = strongly_connected_components(&graph); let scc = strongly_connected_components(&graph);
dbg!(scc); dbg!(scc);
let reachable = reachable_components(&graph, &"a"); let reachable = reachable_components(&graph, &"a");
dbg!(reachable); dbg!(reachable);
let s_graph: StratifiedGraph<usize> = BTreeMap::from([
(
0,
BTreeMap::from([(1, false), (2, false), (3, false), (4, true), (5, true)]),
),
(1, BTreeMap::from([(6, false)])),
(2, BTreeMap::from([(6, false)])),
(3, BTreeMap::from([(6, true)])),
(4, BTreeMap::from([(6, true)])),
(5, BTreeMap::from([(6, false)])),
]);
dbg!(generalized_kahn(&s_graph, 7));
} }
} }

@ -19,6 +19,7 @@ pub(crate) mod pull;
pub(crate) mod relation; pub(crate) mod relation;
pub(crate) mod logical; pub(crate) mod logical;
pub(crate) mod graph; pub(crate) mod graph;
pub(crate) mod stratify;
impl SessionTx { impl SessionTx {
pub fn run_query(&mut self, payload: &JsonValue) -> Result<QueryResult<'_>> { pub fn run_query(&mut self, payload: &JsonValue) -> Result<QueryResult<'_>> {
@ -125,7 +126,7 @@ impl SessionTx {
vld, vld,
spec, spec,
0, 0,
&specs, specs,
CurrentPath::new(idx)?, CurrentPath::new(idx)?,
&mut collected, &mut collected,
&mut recursive_seen, &mut recursive_seen,

@ -0,0 +1,135 @@
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, BTreeSet};
use anyhow::Result;
use itertools::Itertools;
use crate::data::keyword::{Keyword, PROG_ENTRY};
use crate::query::compile::{Atom, DatalogProgram, RuleSet};
use crate::query::graph::{reachable_components, strongly_connected_components, Graph, StratifiedGraph, generalized_kahn};
#[derive(thiserror::Error, Debug)]
pub enum GraphError {
#[error("every program requires an entry named '?'")]
EntryNotFound,
#[error("the rules #{0:?} form a cycle with negation/aggregation inside, which is unsafe")]
GraphNotStratified(BTreeSet<Keyword>),
}
impl Atom {
fn contained_rules(&self) -> BTreeMap<&Keyword, bool> {
match self {
Atom::AttrTriple(_) | Atom::Predicate(_) => Default::default(),
Atom::Rule(r) => BTreeMap::from([(&r.name, false)]),
Atom::Negation(a) => a
.contained_rules()
.into_iter()
.map(|(k, is_neg)| (k, !is_neg))
.collect(),
Atom::Conjunction(args) | Atom::Disjunction(args) => {
let mut ret: BTreeMap<&Keyword, bool> = Default::default();
for arg in args {
for (k, v) in arg.contained_rules() {
match ret.entry(k) {
Entry::Vacant(e) => {
e.insert(v);
}
Entry::Occupied(mut e) => {
let old = *e.get();
e.insert(old || v);
}
}
}
}
ret
}
}
}
}
fn convert_program_to_graph(prog: &DatalogProgram) -> StratifiedGraph<&'_ Keyword> {
prog.iter()
.map(|(k, ruleset)| {
let mut ret: BTreeMap<&Keyword, bool> = BTreeMap::default();
for rule in &ruleset.rules {
for atom in &rule.body {
let contained = atom.contained_rules();
for (found_key, negated) in contained {
match ret.entry(found_key) {
Entry::Vacant(e) => {
e.insert(negated);
}
Entry::Occupied(mut e) => {
let old = *e.get();
e.insert(old || negated);
}
}
}
}
}
(k, ret)
})
.collect()
}
fn reduce_to_graph<'a>(g: &StratifiedGraph<&'a Keyword>) -> Graph<&'a Keyword> {
g.iter()
.map(|(k, s)| (*k, s.iter().map(|(sk, _)| *sk).collect_vec()))
.collect()
}
fn verify_no_cycle(g: &StratifiedGraph<&'_ Keyword>, sccs: Vec<BTreeSet<&Keyword>>) -> Result<()> {
for (k, vs) in g {
for scc in &sccs {
if scc.contains(k) {
for (v, negated) in vs {
if *negated && scc.contains(v) {
return Err(GraphError::GraphNotStratified(
scc.iter().cloned().cloned().collect(),
)
.into());
}
}
}
}
}
Ok(())
}
pub(crate) fn stratify_program(prog: DatalogProgram) -> Result<Vec<DatalogProgram>> {
// prerequisite: the program is already in disjunctive normal form
// 0. build a graph of the program
let prog_entry: &Keyword = &PROG_ENTRY;
let stratified_graph = convert_program_to_graph(&prog);
let graph = reduce_to_graph(&stratified_graph);
if !graph.contains_key(prog_entry) {
return Err(GraphError::EntryNotFound.into());
}
// 1. find reachable clauses starting from the query
let reachable: BTreeSet<_> = reachable_components(&graph, &prog_entry)
.into_iter()
.map(|k| (*k).clone())
.collect();
// 2. prune the graph of unreachable clauses
let stratified_graph: StratifiedGraph<_> = stratified_graph
.into_iter()
.filter(|(k, _)| !reachable.contains(k))
.collect();
let graph: Graph<_> = graph
.into_iter()
.filter(|(k, _)| !reachable.contains(k))
.collect();
// 3. find SCC of the clauses
let sccs: Vec<BTreeSet<&Keyword>> = strongly_connected_components(&graph)
.into_iter()
.map(|scc| scc.into_iter().cloned().collect())
.collect_vec();
// 4. for each SCC, verify that no neg/agg edges are present so that it is really stratifiable
verify_no_cycle(&stratified_graph, sccs)?;
// 5. build a reduced graph for the SCC's
// 6. topological sort the reduced graph to get a stratification
// 7. translate the stratification into datalog program
todo!()
}
Loading…
Cancel
Save