From 3fb032c9a784d26dee6cbe20b8d6e6ef685b1d2e Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Sat, 27 Aug 2022 17:49:55 +0800 Subject: [PATCH] scc impl --- README.md | 4 +- src/algo/connected_components.rs | 1 + src/algo/mod.rs | 4 +- src/algo/strongly_connected_components.rs | 176 ++++++++++++++++++++++ src/query/graph.rs | 63 +------- tests/air_routes.rs | 6 + 6 files changed, 189 insertions(+), 65 deletions(-) create mode 100644 src/algo/strongly_connected_components.rs diff --git a/README.md b/README.md index a329be0e..0e2f55cc 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ * [ ] betweenness centrality * [ ] pagerank * [ ] triangle counting - * [ ] strongly connected components - * [ ] connected components + * [x] strongly connected components + * [x] connected components * [ ] label propagation * [ ] louvain modularity * [x] direct loading of data \ No newline at end of file diff --git a/src/algo/connected_components.rs b/src/algo/connected_components.rs index e23b66a2..9cf92768 100644 --- a/src/algo/connected_components.rs +++ b/src/algo/connected_components.rs @@ -125,6 +125,7 @@ impl AlgoImpl for ConnectedComponents { anyhow!("nodes relation for 'connected_components' too short") })?; if !self.union_find.contains_key(&node) { + self.union_find.insert(node.clone(), DataValue::Bottom); let tuple = if reverse_mode { Tuple(vec![DataValue::from(counter), node]) } else { diff --git a/src/algo/mod.rs b/src/algo/mod.rs index a5aaa157..500d9ac4 100644 --- a/src/algo/mod.rs +++ b/src/algo/mod.rs @@ -7,6 +7,7 @@ use crate::algo::bfs::Bfs; use crate::algo::connected_components::ConnectedComponents; use crate::algo::degree_centrality::DegreeCentrality; use crate::algo::dfs::Dfs; +use crate::algo::strongly_connected_components::StronglyConnectedComponent; use crate::algo::top_sort::TopSort; use crate::data::expr::Expr; use crate::data::id::{EntityId, Validity}; @@ -22,6 +23,7 @@ pub(crate) mod connected_components; pub(crate) mod degree_centrality; pub(crate) mod dfs; pub(crate) mod page_rank; +pub(crate) mod strongly_connected_components; pub(crate) mod top_sort; pub(crate) trait AlgoImpl { @@ -66,7 +68,7 @@ impl AlgoHandle { "breadth_first_search" | "bfs" => Box::new(Bfs), "top_sort" => Box::new(TopSort), "connected_components" => Box::new(ConnectedComponents::default()), - "strongly_connected_components" | "scc" => todo!(), + "strongly_connected_components" | "scc" => Box::new(StronglyConnectedComponent), "page_rank" => todo!(), name => bail!("algorithm '{}' not found", name), }) diff --git a/src/algo/strongly_connected_components.rs b/src/algo/strongly_connected_components.rs new file mode 100644 index 00000000..c6837b5c --- /dev/null +++ b/src/algo/strongly_connected_components.rs @@ -0,0 +1,176 @@ +use std::cmp::min; +use std::collections::BTreeMap; + +use anyhow::{anyhow, bail, Result}; +use itertools::Itertools; + +use crate::algo::AlgoImpl; +use crate::data::expr::Expr; +use crate::data::program::{MagicAlgoRuleArg, MagicSymbol}; +use crate::data::symb::Symbol; +use crate::data::tuple::Tuple; +use crate::data::value::DataValue; +use crate::runtime::derived::DerivedRelStore; +use crate::runtime::transact::SessionTx; + +pub(crate) struct StronglyConnectedComponent; + +impl AlgoImpl for StronglyConnectedComponent { + fn run( + &mut self, + tx: &mut SessionTx, + rels: &[MagicAlgoRuleArg], + opts: &BTreeMap, + stores: &BTreeMap, + out: &DerivedRelStore, + ) -> Result<()> { + let edges = rels + .get(0) + .ok_or_else(|| anyhow!("'strongly_connected_components' missing edges relation"))?; + + let reverse_mode = match opts.get(&Symbol::from("mode")) { + None => false, + Some(Expr::Const(DataValue::String(s))) => match s as &str { + "group_first" => true, + "key_first" => false, + v => bail!( + "unexpected option 'mode' for 'strongly_connected_components': {}", + v + ), + }, + Some(v) => bail!( + "unexpected option 'mode' for 'strongly_connected_components': {:?}", + v + ), + }; + + let mut graph: Vec> = vec![]; + let mut indices: Vec = vec![]; + let mut inv_indices: BTreeMap = Default::default(); + + for tuple in edges.iter(tx, stores)? { + let mut tuple = tuple?.0.into_iter(); + let from = tuple.next().ok_or_else(|| { + anyhow!("edges relation for 'strongly_connected_components' too short") + })?; + let to = tuple.next().ok_or_else(|| { + anyhow!("edges relation for 'strongly_connected_components' too short") + })?; + let from_idx = if let Some(idx) = inv_indices.get(&from) { + *idx + } else { + inv_indices.insert(from.clone(), graph.len()); + indices.push(from.clone()); + graph.push(vec![]); + graph.len() - 1 + }; + let to_idx = if let Some(idx) = inv_indices.get(&to) { + *idx + } else { + inv_indices.insert(to.clone(), graph.len()); + indices.push(to.clone()); + graph.push(vec![]); + graph.len() - 1 + }; + let from_target = graph.get_mut(from_idx).unwrap(); + from_target.push(to_idx); + } + + let tarjan = TarjanScc::new(&graph).run(); + for (grp_id, cc) in tarjan.iter().enumerate() { + for idx in cc { + let val = indices.get(*idx).unwrap(); + let tuple = if reverse_mode { + Tuple(vec![DataValue::from(grp_id as i64), val.clone()]) + } else { + Tuple(vec![val.clone(), DataValue::from(grp_id as i64)]) + }; + out.put(tuple, 0); + } + } + + let mut counter = tarjan.len() as i64; + + if let Some(nodes) = rels.get(1) { + for tuple in nodes.iter(tx, stores)? { + let tuple = tuple?; + let node = tuple.0.into_iter().next().ok_or_else(|| { + anyhow!("nodes relation for 'strongly_connected_components' too short") + })?; + if !inv_indices.contains_key(&node) { + inv_indices.insert(node.clone(), usize::MAX); + let tuple = if reverse_mode { + Tuple(vec![DataValue::from(counter), node]) + } else { + Tuple(vec![node, DataValue::from(counter)]) + }; + out.put(tuple, 0); + counter += 1; + } + } + } + + Ok(()) + } +} + +pub(crate) struct TarjanScc<'a> { + graph: &'a [Vec], + id: usize, + ids: Vec>, + low: Vec, + on_stack: Vec, + stack: Vec, +} + +impl<'a> TarjanScc<'a> { + pub(crate) fn new(graph: &'a [Vec]) -> Self { + Self { + graph, + id: 0, + ids: vec![None; graph.len()], + low: vec![0; graph.len()], + on_stack: vec![false; graph.len()], + stack: vec![], + } + } + pub(crate) fn run(mut self) -> Vec> { + for i in 0..self.graph.len() { + if self.ids[i].is_none() { + self.dfs(i); + } + } + + let mut low_map: BTreeMap> = BTreeMap::new(); + for (idx, grp) in self.low.into_iter().enumerate() { + low_map.entry(grp).or_default().push(idx); + } + + low_map.into_iter().map(|(_, vs)| vs).collect_vec() + } + fn dfs(&mut self, at: usize) { + self.stack.push(at); + self.on_stack[at] = true; + self.id += 1; + self.ids[at] = Some(self.id); + self.low[at] = self.id; + for to in &self.graph[at] { + let to = *to; + if self.ids[to].is_none() { + self.dfs(to); + } + if self.on_stack[to] { + self.low[at] = min(self.low[at], self.low[to]); + } + } + if self.ids[at].unwrap() == self.low[at] { + while let Some(node) = self.stack.pop() { + self.on_stack[node] = false; + self.low[node] = self.ids[at].unwrap(); + if node == at { + break; + } + } + } + } +} diff --git a/src/query/graph.rs b/src/query/graph.rs index f3a9c497..d8fd71f2 100644 --- a/src/query/graph.rs +++ b/src/query/graph.rs @@ -1,69 +1,8 @@ -use std::cmp::min; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Debug; use itertools::Itertools; - -struct TarjanScc<'a> { - graph: &'a [Vec], - id: usize, - ids: Vec>, - low: Vec, - on_stack: Vec, - stack: Vec, -} - -impl<'a> TarjanScc<'a> { - pub(crate) fn new(graph: &'a [Vec]) -> Self { - Self { - graph, - id: 0, - ids: vec![None; graph.len()], - low: vec![0; graph.len()], - on_stack: vec![false; graph.len()], - stack: vec![], - } - } - pub(crate) fn run(mut self) -> Vec> { - for i in 0..self.graph.len() { - if self.ids[i].is_none() { - self.dfs(i); - } - } - - let mut low_map: BTreeMap> = BTreeMap::new(); - for (idx, grp) in self.low.into_iter().enumerate() { - low_map.entry(grp).or_default().push(idx); - } - - low_map.into_iter().map(|(_, vs)| vs).collect_vec() - } - fn dfs(&mut self, at: usize) { - self.stack.push(at); - self.on_stack[at] = true; - self.id += 1; - self.ids[at] = Some(self.id); - self.low[at] = self.id; - for to in &self.graph[at] { - let to = *to; - if self.ids[to].is_none() { - self.dfs(to); - } - if self.on_stack[to] { - self.low[at] = min(self.low[at], self.low[to]); - } - } - if self.ids[at].unwrap() == self.low[at] { - while let Some(node) = self.stack.pop() { - self.on_stack[node] = false; - self.low[node] = self.ids[at].unwrap(); - if node == at { - break; - } - } - } - } -} +use crate::algo::strongly_connected_components::TarjanScc; pub(crate) type Graph = BTreeMap>; diff --git a/tests/air_routes.rs b/tests/air_routes.rs index 1d462e1a..d7596cf7 100644 --- a/tests/air_routes.rs +++ b/tests/air_routes.rs @@ -113,6 +113,12 @@ fn air_routes() -> Result<()> { dbg!(bfs_time.elapsed()); println!("{}", res); + let scc_time = Instant::now(); + let res = db.run_script(r#" + ? <- strongly_connected_components!(:flies_to_code[], [?id