update algorithms to use more efficient graph representation

main
Ziyang Hu 2 years ago
parent 03d23173cb
commit 3b29579c48

81
Cargo.lock generated

@ -277,12 +277,6 @@ version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c"
[[package]]
name = "bytemuck"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f"
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.4.3" version = "1.4.3"
@ -551,7 +545,6 @@ dependencies = [
"log", "log",
"miette", "miette",
"minreq", "minreq",
"nalgebra",
"num-traits", "num-traits",
"ordered-float", "ordered-float",
"pest", "pest",
@ -1674,15 +1667,6 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "matrixmultiply"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84"
dependencies = [
"rawpointer",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.5.0" version = "2.5.0"
@ -1836,33 +1820,6 @@ dependencies = [
"twoway", "twoway",
] ]
[[package]]
name = "nalgebra"
version = "0.31.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20bd243ab3dbb395b39ee730402d2e5405e448c75133ec49cc977762c4cba3d1"
dependencies = [
"approx",
"matrixmultiply",
"nalgebra-macros",
"num-complex",
"num-rational",
"num-traits",
"simba",
"typenum",
]
[[package]]
name = "nalgebra-macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "nanorand" name = "nanorand"
version = "0.7.0" version = "0.7.0"
@ -2702,12 +2659,6 @@ dependencies = [
"rand_core 0.5.1", "rand_core 0.5.1",
] ]
[[package]]
name = "rawpointer"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.6.0" version = "1.6.0"
@ -2925,15 +2876,6 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "safe_arch"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "794821e4ccb0d9f979512f9c1973480123f9bd62a90d74ab0f9426fcf8f4a529"
dependencies = [
"bytemuck",
]
[[package]] [[package]]
name = "safemem" name = "safemem"
version = "0.3.3" version = "0.3.3"
@ -3094,19 +3036,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
[[package]]
name = "simba"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f3fd720c48c53cace224ae62bef1bbff363a70c68c4802a78b5cc6159618176"
dependencies = [
"approx",
"num-complex",
"num-traits",
"paste",
"wide",
]
[[package]] [[package]]
name = "siphasher" name = "siphasher"
version = "0.3.10" version = "0.3.10"
@ -3959,16 +3888,6 @@ dependencies = [
"once_cell", "once_cell",
] ]
[[package]]
name = "wide"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae41ecad2489a1655c8ef8489444b0b113c0a0c795944a3572a0931cf7d2525c"
dependencies = [
"bytemuck",
"safe_arch",
]
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"

@ -33,7 +33,7 @@ storage-sqlite = ["dep:sqlite", "dep:sqlite3-src"]
## You can also [fine-tune](https://github.com/cozodb/cozo/blob/main/TUNING_ROCKSDB.md) RocksDB options. ## You can also [fine-tune](https://github.com/cozodb/cozo/blob/main/TUNING_ROCKSDB.md) RocksDB options.
storage-rocksdb = ["dep:cozorocks"] storage-rocksdb = ["dep:cozorocks"]
## Enables the graph algorithms. ## Enables the graph algorithms.
graph-algo = ["dep:nalgebra"] graph-algo = []
## Allows the utilities to make web requests to fetch data. ## Allows the utilities to make web requests to fetch data.
requests = ["dep:minreq"] requests = ["dep:minreq"]
## Uses jemalloc as the global allocator, can make a difference in performance. ## Uses jemalloc as the global allocator, can make a difference in performance.
@ -124,7 +124,6 @@ uuid = { version = "1.1.2", features = ["v1", "v4", "serde"] }
csv = "1.1.6" csv = "1.1.6"
document-features = "0.2.6" document-features = "0.2.6"
rayon = { version = "1.5.3", optional = true } rayon = { version = "1.5.3", optional = true }
nalgebra = { version = "0.31.1", optional = true }
minreq = { version = "2.6.0", features = ["https-rustls"], optional = true } minreq = { version = "2.6.0", features = ["https-rustls"], optional = true }
tikv-jemallocator-global = { version = "0.5.0", optional = true } tikv-jemallocator-global = { version = "0.5.0", optional = true }
cozorocks = { path = "../cozorocks", version = "0.1.2", optional = true } cozorocks = { path = "../cozorocks", version = "0.1.2", optional = true }

@ -76,3 +76,18 @@ fn wikipedia_pagerank(b: &mut Bencher) {
.unwrap() .unwrap()
}); });
} }
#[bench]
fn wikipedia_louvain(b: &mut Bencher) {
initialize(&TEST_DB);
b.iter(|| {
let start = Instant::now();
TEST_DB
.run_script(
"?[grp, idx] <~ CommunityDetectionLouvain(*article[])",
Default::default(),
)
.unwrap();
dbg!(start.elapsed());
})
}

@ -8,12 +8,12 @@
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools; use itertools::Itertools;
use miette::Result; use miette::Result;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use priority_queue::PriorityQueue; use priority_queue::PriorityQueue;
#[cfg(feature = "rayon")]
use rayon::prelude::*; use rayon::prelude::*;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
@ -38,28 +38,25 @@ impl FixedRule for BetweennessCentrality {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?; let undirected = payload.bool_option("undirected", Some(false))?;
let (graph, indices, _inv_indices, _) = let (graph, indices, _inv_indices) =
edges.convert_edge_to_weighted_graph(undirected, false)?; edges.to_directed_weighted_graph(undirected, false)?;
let n = graph.len(); let n = graph.node_count();
if n == 0 { if n == 0 {
return Ok(()); return Ok(());
} }
#[cfg(feature = "rayon")]
let it = (0..n).into_par_iter(); let it = (0..n).into_par_iter();
#[cfg(not(feature = "rayon"))]
let it = (0..n).into_iter();
let centrality_segs: Vec<_> = it let centrality_segs: Vec<_> = it
.map(|start| -> Result<BTreeMap<usize, f64>> { .map(|start| -> Result<BTreeMap<u32, f32>> {
let res_for_start = let res_for_start =
dijkstra_keep_ties(&graph, start, &(), &(), &(), poison.clone())?; dijkstra_keep_ties(&graph, start, &(), &(), &(), poison.clone())?;
let mut ret: BTreeMap<usize, f64> = Default::default(); let mut ret: BTreeMap<u32, f32> = Default::default();
let grouped = res_for_start.into_iter().group_by(|(n, _, _)| *n); let grouped = res_for_start.into_iter().group_by(|(n, _, _)| *n);
for (_, grp) in grouped.into_iter() { for (_, grp) in grouped.into_iter() {
let grp = grp.collect_vec(); let grp = grp.collect_vec();
let l = grp.len() as f64; let l = grp.len() as f32;
for (_, _, path) in grp { for (_, _, path) in grp {
if path.len() < 3 { if path.len() < 3 {
continue; continue;
@ -73,16 +70,16 @@ impl FixedRule for BetweennessCentrality {
Ok(ret) Ok(ret)
}) })
.collect::<Result<_>>()?; .collect::<Result<_>>()?;
let mut centrality: Vec<f64> = vec![0.; graph.len()]; let mut centrality: Vec<f32> = vec![0.; n as usize];
for m in centrality_segs { for m in centrality_segs {
for (k, v) in m { for (k, v) in m {
centrality[k] += v; centrality[k as usize] += v;
} }
} }
for (i, s) in centrality.into_iter().enumerate() { for (i, s) in centrality.into_iter().enumerate() {
let node = indices[i].clone(); let node = indices[i].clone();
out.put(vec![node, s.into()]); out.put(vec![node, (s as f64).into()]);
} }
Ok(()) Ok(())
@ -110,28 +107,25 @@ impl FixedRule for ClosenessCentrality {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?; let undirected = payload.bool_option("undirected", Some(false))?;
let (graph, indices, _inv_indices, _) = let (graph, indices, _inv_indices) =
edges.convert_edge_to_weighted_graph(undirected, false)?; edges.to_directed_weighted_graph(undirected, false)?;
let n = graph.len(); let n = graph.node_count();
if n == 0 { if n == 0 {
return Ok(()); return Ok(());
} }
#[cfg(feature = "rayon")]
let it = (0..n).into_par_iter(); let it = (0..n).into_par_iter();
#[cfg(not(feature = "rayon"))]
let it = (0..n).into_iter();
let res: Vec<_> = it let res: Vec<_> = it
.map(|start| -> Result<f64> { .map(|start| -> Result<f32> {
let distances = dijkstra_cost_only(&graph, start, poison.clone())?; let distances = dijkstra_cost_only(&graph, start, poison.clone())?;
let total_dist: f64 = distances.iter().filter(|d| d.is_finite()).cloned().sum(); let total_dist: f32 = distances.iter().filter(|d| d.is_finite()).cloned().sum();
let nc: f64 = distances.iter().filter(|d| d.is_finite()).count() as f64; let nc: f32 = distances.iter().filter(|d| d.is_finite()).count() as f32;
Ok(nc * nc / total_dist / (n - 1) as f64) Ok(nc * nc / total_dist / (n - 1) as f32)
}) })
.collect::<Result<_>>()?; .collect::<Result<_>>()?;
for (idx, centrality) in res.into_iter().enumerate() { for (idx, centrality) in res.into_iter().enumerate() {
out.put(vec![indices[idx].clone(), DataValue::from(centrality)]); out.put(vec![indices[idx].clone(), DataValue::from(centrality as f64)]);
poison.check()?; poison.check()?;
} }
Ok(()) Ok(())
@ -148,28 +142,32 @@ impl FixedRule for ClosenessCentrality {
} }
pub(crate) fn dijkstra_cost_only( pub(crate) fn dijkstra_cost_only(
edges: &[Vec<(usize, f64)>], edges: &DirectedCsrGraph<u32, (), f32>,
start: usize, start: u32,
poison: Poison, poison: Poison,
) -> Result<Vec<f64>> { ) -> Result<Vec<f32>> {
let mut distance = vec![f64::INFINITY; edges.len()]; let mut distance = vec![f32::INFINITY; edges.node_count() as usize];
let mut pq = PriorityQueue::new(); let mut pq = PriorityQueue::new();
let mut back_pointers = vec![usize::MAX; edges.len()]; let mut back_pointers = vec![u32::MAX; edges.node_count() as usize];
distance[start] = 0.; distance[start as usize] = 0.;
pq.push(start, Reverse(OrderedFloat(0.))); pq.push(start, Reverse(OrderedFloat(0.)));
while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() { while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() {
if cost > distance[node] { if cost > distance[node as usize] {
continue; continue;
} }
for (nxt_node, path_weight) in &edges[node] { for target in edges.out_neighbors_with_values(node) {
let nxt_cost = cost + *path_weight; let nxt_node = target.target;
if nxt_cost < distance[*nxt_node] { let path_weight = target.value;
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[*nxt_node] = nxt_cost; let nxt_cost = cost + path_weight;
back_pointers[*nxt_node] = node; if nxt_cost < distance[nxt_node as usize] {
pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[nxt_node as usize] = nxt_cost;
back_pointers[nxt_node as usize] = node;
} }
} }
poison.check()?; poison.check()?;
} }

@ -8,6 +8,7 @@
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools; use itertools::Itertools;
use miette::Result; use miette::Result;
@ -33,16 +34,16 @@ impl FixedRule for MinimumSpanningForestKruskal {
poison: Poison, poison: Poison,
) -> Result<()> { ) -> Result<()> {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let (graph, indices, _, _) = edges.convert_edge_to_weighted_graph(true, true)?; let (graph, indices, _) = edges.to_directed_weighted_graph(true, true)?;
if graph.is_empty() { if graph.node_count() == 0 {
return Ok(()); return Ok(());
} }
let msp = kruskal(&graph, poison)?; let msp = kruskal(&graph, poison)?;
for (src, dst, cost) in msp { for (src, dst, cost) in msp {
out.put(vec![ out.put(vec![
indices[src].clone(), indices[src as usize].clone(),
indices[dst].clone(), indices[dst as usize].clone(),
DataValue::from(cost), DataValue::from(cost as f64),
]); ]);
} }
@ -59,15 +60,16 @@ impl FixedRule for MinimumSpanningForestKruskal {
} }
} }
fn kruskal(edges: &[Vec<(usize, f64)>], poison: Poison) -> Result<Vec<(usize, usize, f64)>> { fn kruskal(edges: &DirectedCsrGraph<u32, (), f32>, poison: Poison) -> Result<Vec<(u32, u32, f32)>> {
let mut pq = PriorityQueue::new(); let mut pq = PriorityQueue::new();
let mut uf = UnionFind::new(edges.len()); let mut uf = UnionFind::new(edges.node_count());
let mut mst = Vec::with_capacity(edges.len() - 1); let mut mst = Vec::with_capacity((edges.node_count() - 1) as usize);
for (from, tos) in edges.iter().enumerate() { for from in 0..edges.node_count() {
for (to, cost) in tos { for target in edges.out_neighbors_with_values(from) {
pq.push((from, *to), Reverse(OrderedFloat(*cost))); let to = target.target;
let cost = target.value;
pq.push((from, to), Reverse(OrderedFloat(cost)));
} }
poison.check()?;
} }
while let Some(((from, to), Reverse(OrderedFloat(cost)))) = pq.pop() { while let Some(((from, to), Reverse(OrderedFloat(cost)))) = pq.pop() {
if uf.connected(from, to) { if uf.connected(from, to) {
@ -76,7 +78,7 @@ fn kruskal(edges: &[Vec<(usize, f64)>], poison: Poison) -> Result<Vec<(usize, us
uf.union(from, to); uf.union(from, to);
mst.push((from, to, cost)); mst.push((from, to, cost));
if uf.szs[0] == edges.len() { if uf.szs[0] == edges.node_count() {
break; break;
} }
poison.check()?; poison.check()?;
@ -85,43 +87,43 @@ fn kruskal(edges: &[Vec<(usize, f64)>], poison: Poison) -> Result<Vec<(usize, us
} }
struct UnionFind { struct UnionFind {
ids: Vec<usize>, ids: Vec<u32>,
szs: Vec<usize>, szs: Vec<u32>,
} }
impl UnionFind { impl UnionFind {
fn new(n: usize) -> Self { fn new(n: u32) -> Self {
Self { Self {
ids: (0..n).collect_vec(), ids: (0..n).collect_vec(),
szs: vec![1; n], szs: vec![1; n as usize],
} }
} }
fn union(&mut self, p: usize, q: usize) { fn union(&mut self, p: u32, q: u32) {
let root1 = self.find(p); let root1 = self.find(p);
let root2 = self.find(q); let root2 = self.find(q);
if root1 != root2 { if root1 != root2 {
if self.szs[root1] < self.szs[root2] { if self.szs[root1 as usize] < self.szs[root2 as usize] {
self.szs[root2] += self.szs[root1]; self.szs[root2 as usize] += self.szs[root1 as usize];
self.ids[root1] = root2; self.ids[root1 as usize] = root2;
} else { } else {
self.szs[root1] += self.szs[root2]; self.szs[root1 as usize] += self.szs[root2 as usize];
self.ids[root2] = root1; self.ids[root2 as usize] = root1;
} }
} }
} }
fn find(&mut self, mut p: usize) -> usize { fn find(&mut self, mut p: u32) -> u32 {
let mut root = p; let mut root = p;
while root != self.ids[root] { while root != self.ids[root as usize] {
root = self.ids[root]; root = self.ids[root as usize];
} }
while p != root { while p != root {
let next = self.ids[p]; let next = self.ids[p as usize];
self.ids[p] = root; self.ids[p as usize] = root;
p = next; p = next;
} }
root root
} }
fn connected(&mut self, p: usize, q: usize) -> bool { fn connected(&mut self, p: u32, q: u32) -> bool {
self.find(p) == self.find(q) self.find(p) == self.find(q)
} }
} }

@ -8,15 +8,16 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools; use itertools::Itertools;
use miette::Result; use miette::Result;
use rand::prelude::*; use rand::prelude::*;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr; use crate::data::expr::Expr;
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::value::DataValue; use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan; use crate::parse::SourceSpan;
use crate::runtime::db::Poison; use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore; use crate::runtime::temp_store::RegularTempStore;
@ -33,8 +34,7 @@ impl FixedRule for LabelPropagation {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?; let undirected = payload.bool_option("undirected", Some(false))?;
let max_iter = payload.pos_integer_option("max_iter", Some(10))?; let max_iter = payload.pos_integer_option("max_iter", Some(10))?;
let (graph, indices, _inv_indices, _) = let (graph, indices, _inv_indices) = edges.to_directed_weighted_graph(undirected, true)?;
edges.convert_edge_to_weighted_graph(undirected, true)?;
let labels = label_propagation(&graph, max_iter, poison)?; let labels = label_propagation(&graph, max_iter, poison)?;
for (idx, label) in labels.into_iter().enumerate() { for (idx, label) in labels.into_iter().enumerate() {
let node = indices[idx].clone(); let node = indices[idx].clone();
@ -54,11 +54,11 @@ impl FixedRule for LabelPropagation {
} }
fn label_propagation( fn label_propagation(
graph: &[Vec<(usize, f64)>], graph: &DirectedCsrGraph<u32, (), f32>,
max_iter: usize, max_iter: usize,
poison: Poison, poison: Poison,
) -> Result<Vec<usize>> { ) -> Result<Vec<u32>> {
let n_nodes = graph.len(); let n_nodes = graph.node_count();
let mut labels = (0..n_nodes).collect_vec(); let mut labels = (0..n_nodes).collect_vec();
let mut rng = thread_rng(); let mut rng = thread_rng();
let mut iter_order = (0..n_nodes).collect_vec(); let mut iter_order = (0..n_nodes).collect_vec();
@ -66,14 +66,13 @@ fn label_propagation(
iter_order.shuffle(&mut rng); iter_order.shuffle(&mut rng);
let mut changed = false; let mut changed = false;
for node in &iter_order { for node in &iter_order {
let mut labels_for_node: BTreeMap<usize, f64> = BTreeMap::new(); let mut labels_for_node: BTreeMap<u32, f32> = BTreeMap::new();
let neighbours = &graph[*node]; for edge in graph.out_neighbors_with_values(*node) {
if neighbours.is_empty() { let label = labels[edge.target as usize];
continue; *labels_for_node.entry(label).or_default() += edge.value;
} }
for (to_node, weight) in neighbours { if labels_for_node.is_empty() {
let label = labels[*to_node]; continue;
*labels_for_node.entry(label).or_default() += *weight;
} }
let mut labels_by_score = labels_for_node.into_iter().collect_vec(); let mut labels_by_score = labels_for_node.into_iter().collect_vec();
labels_by_score.sort_by(|a, b| a.1.total_cmp(&b.1).reverse()); labels_by_score.sort_by(|a, b| a.1.total_cmp(&b.1).reverse());
@ -84,9 +83,9 @@ fn label_propagation(
.map(|(l, _)| l) .map(|(l, _)| l)
.collect_vec(); .collect_vec();
let new_label = *candidate_labels.choose(&mut rng).unwrap(); let new_label = *candidate_labels.choose(&mut rng).unwrap();
if new_label != labels[*node] { if new_label != labels[*node as usize] {
changed = true; changed = true;
labels[*node] = new_label; labels[*node as usize] = new_label;
} }
poison.check()?; poison.check()?;
} }

@ -8,15 +8,18 @@
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use graph::prelude::{
CsrLayout, DirectedCsrGraph, DirectedNeighborsWithValues, Graph, GraphBuilder,
};
use itertools::Itertools; use itertools::Itertools;
use log::debug; use log::debug;
use miette::Result; use miette::Result;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr; use crate::data::expr::Expr;
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::value::DataValue; use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan; use crate::parse::SourceSpan;
use crate::runtime::db::Poison; use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore; use crate::runtime::temp_store::RegularTempStore;
@ -33,27 +36,16 @@ impl FixedRule for CommunityDetectionLouvain {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?; let undirected = payload.bool_option("undirected", Some(false))?;
let max_iter = payload.pos_integer_option("max_iter", Some(10))?; let max_iter = payload.pos_integer_option("max_iter", Some(10))?;
let delta = payload.unit_interval_option("delta", Some(0.0001))?; let delta = payload.unit_interval_option("delta", Some(0.0001))? as f32;
let keep_depth = payload.non_neg_integer_option("keep_depth", None).ok(); let keep_depth = payload.non_neg_integer_option("keep_depth", None).ok();
let (graph, indices, _inv_indices, _) = let (graph, indices, _inv_indices) = edges.to_directed_weighted_graph(undirected, false)?;
edges.convert_edge_to_weighted_graph(undirected, false)?;
let graph = graph
.into_iter()
.map(|edges| -> BTreeMap<usize, f64> {
let mut m = BTreeMap::default();
for (to, weight) in edges {
*m.entry(to).or_default() += weight;
}
m
})
.collect_vec();
let result = louvain(&graph, delta, max_iter, poison)?; let result = louvain(&graph, delta, max_iter, poison)?;
for (idx, node) in indices.into_iter().enumerate() { for (idx, node) in indices.into_iter().enumerate() {
let mut labels = vec![]; let mut labels = vec![];
let mut cur_idx = idx; let mut cur_idx = idx as u32;
for hierarchy in &result { for hierarchy in &result {
let nxt_idx = hierarchy[cur_idx]; let nxt_idx = hierarchy[cur_idx as usize];
labels.push(DataValue::from(nxt_idx as i64)); labels.push(DataValue::from(nxt_idx as i64));
cur_idx = nxt_idx; cur_idx = nxt_idx;
} }
@ -78,21 +70,21 @@ impl FixedRule for CommunityDetectionLouvain {
} }
fn louvain( fn louvain(
graph: &[BTreeMap<usize, f64>], graph: &DirectedCsrGraph<u32, (), f32>,
delta: f64, delta: f32,
max_iter: usize, max_iter: usize,
poison: Poison, poison: Poison,
) -> Result<Vec<Vec<usize>>> { ) -> Result<Vec<Vec<u32>>> {
let mut current = graph; let mut current = graph;
let mut collected = vec![]; let mut collected = vec![];
while current.len() > 2 { while current.node_count() > 2 {
let (node2comm, new_graph) = louvain_step(current, delta, max_iter, poison.clone())?; let (node2comm, new_graph) = louvain_step(current, delta, max_iter, poison.clone())?;
debug!( debug!(
"before size: {}, after size: {}", "before size: {}, after size: {}",
current.len(), current.node_count(),
new_graph.len() new_graph.node_count()
); );
if new_graph.len() == current.len() { if new_graph.node_count() == current.node_count() {
break; break;
} }
collected.push((node2comm, new_graph)); collected.push((node2comm, new_graph));
@ -102,68 +94,82 @@ fn louvain(
} }
fn calculate_delta( fn calculate_delta(
node: usize, node: u32,
target_community: usize, target_community: u32,
graph: &[BTreeMap<usize, f64>], graph: &DirectedCsrGraph<u32, (), f32>,
comm2nodes: &[BTreeSet<usize>], comm2nodes: &[BTreeSet<u32>],
out_weights: &[f64], out_weights: &[f32],
in_weights: &[f64], in_weights: &[f32],
total_weight: f64, total_weight: f32,
) -> f64 { ) -> f32 {
let mut sigma_out_total = 0.; let mut sigma_out_total = 0.;
let mut sigma_in_total = 0.; let mut sigma_in_total = 0.;
let mut d2comm = 0.; let mut d2comm = 0.;
let target_community_members = &comm2nodes[target_community]; let target_community_members = &comm2nodes[target_community as usize];
for member in target_community_members.iter() { for member in target_community_members.iter() {
if *member == node { if *member == node {
continue; continue;
} }
sigma_out_total += out_weights[*member]; sigma_out_total += out_weights[*member as usize];
sigma_in_total += in_weights[*member]; sigma_in_total += in_weights[*member as usize];
if let Some(weight) = graph[node].get(member) { for target in graph.out_neighbors_with_values(node) {
d2comm += *weight; if target.target == *member {
d2comm += target.value;
break;
}
} }
if let Some(weight) = graph[*member].get(&node) { for target in graph.out_neighbors_with_values(*member) {
d2comm += *weight; if target.target == node {
d2comm += target.value;
break;
}
} }
} }
d2comm d2comm
- (sigma_out_total * in_weights[node] + sigma_in_total * out_weights[node]) / total_weight - (sigma_out_total * in_weights[node as usize]
+ sigma_in_total * out_weights[node as usize])
/ total_weight
} }
fn louvain_step( fn louvain_step(
graph: &[BTreeMap<usize, f64>], graph: &DirectedCsrGraph<u32, (), f32>,
delta: f64, delta: f32,
max_iter: usize, max_iter: usize,
poison: Poison, poison: Poison,
) -> Result<(Vec<usize>, Vec<BTreeMap<usize, f64>>)> { ) -> Result<(Vec<u32>, DirectedCsrGraph<u32, (), f32>)> {
let n_nodes = graph.len(); let n_nodes = graph.node_count();
let mut total_weight = 0.; let mut total_weight = 0.;
let mut out_weights = vec![0.; n_nodes]; let mut out_weights = vec![0.; n_nodes as usize];
let mut in_weights = vec![0.; n_nodes]; let mut in_weights = vec![0.; n_nodes as usize];
for (from, edges) in graph.iter().enumerate() { for from in 0..n_nodes {
for (to, weight) in edges { for target in graph.out_neighbors_with_values(from) {
total_weight += *weight; let to = target.target;
out_weights[from] += *weight; let weight = target.value;
in_weights[*to] += *weight;
total_weight += weight;
out_weights[from as usize] += weight;
in_weights[to as usize] += weight;
} }
} }
let mut node2comm = (0..n_nodes).collect_vec(); let mut node2comm = (0..n_nodes).collect_vec();
let mut comm2nodes = (0..n_nodes).map(|i| BTreeSet::from([i])).collect_vec(); let mut comm2nodes = (0..n_nodes).map(|i| BTreeSet::from([i])).collect_vec();
let mut last_modurality = f64::NEG_INFINITY; let mut last_modurality = f32::NEG_INFINITY;
for _ in 0..max_iter { for _ in 0..max_iter {
let modularity = { let modularity = {
let mut modularity = 0.; let mut modularity = 0.;
for from in 0..n_nodes { for from in 0..n_nodes {
for to in &comm2nodes[node2comm[from]] { for to in &comm2nodes[node2comm[from as usize] as usize] {
if let Some(weight) = graph[from].get(to) { for target in graph.out_neighbors_with_values(from) {
modularity += *weight; if target.target == *to {
modularity += target.value;
}
} }
modularity -= in_weights[from] * out_weights[*to] / total_weight; modularity -=
in_weights[from as usize] * out_weights[*to as usize] / total_weight;
} }
} }
modularity /= total_weight; modularity /= total_weight;
@ -177,8 +183,9 @@ fn louvain_step(
} }
let mut moved = false; let mut moved = false;
for (node, edges) in graph.iter().enumerate() { for node in 0..n_nodes {
let community_for_node = node2comm[node]; let community_for_node = node2comm[node as usize];
let original_delta_q = calculate_delta( let original_delta_q = calculate_delta(
node, node,
community_for_node, community_for_node,
@ -192,8 +199,10 @@ fn louvain_step(
let mut best_improvement = 0.; let mut best_improvement = 0.;
let mut considered_communities = BTreeSet::from([community_for_node]); let mut considered_communities = BTreeSet::from([community_for_node]);
for to_node in edges.keys() { for target in graph.out_neighbors_with_values(node) {
let target_community = node2comm[*to_node]; let to_node = target.target;
let target_community = node2comm[to_node as usize];
if target_community == community_for_node if target_community == community_for_node
|| considered_communities.contains(&target_community) || considered_communities.contains(&target_community)
{ {
@ -217,9 +226,9 @@ fn louvain_step(
} }
if best_improvement > 0. { if best_improvement > 0. {
moved = true; moved = true;
node2comm[node] = candidate_community; node2comm[node as usize] = candidate_community;
comm2nodes[community_for_node].remove(&node); comm2nodes[community_for_node as usize].remove(&node);
comm2nodes[candidate_community].insert(node); comm2nodes[candidate_community as usize].insert(node);
} }
poison.check()?; poison.check()?;
} }
@ -227,8 +236,8 @@ fn louvain_step(
break; break;
} }
} }
let mut new_comm_indices: BTreeMap<usize, usize> = Default::default(); let mut new_comm_indices: BTreeMap<u32, u32> = Default::default();
let mut new_comm_count: usize = 0; let mut new_comm_count: u32 = 0;
for temp_comm_idx in node2comm.iter_mut() { for temp_comm_idx in node2comm.iter_mut() {
if let Some(new_comm_idx) = new_comm_indices.get(temp_comm_idx) { if let Some(new_comm_idx) = new_comm_indices.get(temp_comm_idx) {
@ -240,27 +249,44 @@ fn louvain_step(
} }
} }
let mut new_graph = vec![BTreeMap::new(); new_comm_count]; let mut new_graph_list: Vec<BTreeMap<u32, f32>> =
vec![BTreeMap::new(); new_comm_count as usize];
for (node, comm) in node2comm.iter().enumerate() { for (node, comm) in node2comm.iter().enumerate() {
let target = &mut new_graph[*comm]; let target = &mut new_graph_list[*comm as usize];
for (to_node, weight) in &graph[node] { for t in graph.out_neighbors_with_values(node as u32) {
let to_comm = node2comm[*to_node]; let to_node = t.target;
let weight = t.value;
let to_comm = node2comm[to_node as usize];
*target.entry(to_comm).or_default() += weight; *target.entry(to_comm).or_default() += weight;
} }
} }
let new_graph: DirectedCsrGraph<u32, (), f32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges_with_values(
new_graph_list
.into_iter()
.enumerate()
.flat_map(move |(fr, nds)| {
nds.into_iter()
.map(move |(to, weight)| (fr as u32, to, weight))
}),
)
.build();
Ok((node2comm, new_graph)) Ok((node2comm, new_graph))
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use itertools::Itertools; use graph::prelude::{CsrLayout, GraphBuilder};
use crate::fixed_rule::algos::louvain::louvain; use crate::fixed_rule::algos::louvain::louvain;
use crate::runtime::db::Poison; use crate::runtime::db::Poison;
#[test] #[test]
fn sample() { fn sample() {
let graph: Vec<Vec<usize>> = vec![ let graph: Vec<Vec<u32>> = vec![
vec![2, 3, 5], // 0 vec![2, 3, 5], // 0
vec![2, 4, 7], // 1 vec![2, 4, 7], // 1
vec![0, 1, 4, 5, 6], // 2 vec![0, 1, 4, 5, 6], // 2
@ -278,10 +304,15 @@ mod tests {
vec![8, 9, 10], // 14 vec![8, 9, 10], // 14
vec![8], // 15 vec![8], // 15
]; ];
let graph = graph let graph = GraphBuilder::new()
.into_iter() .csr_layout(CsrLayout::Sorted)
.map(|edges| edges.into_iter().map(|n| (n, 1.)).collect()) .edges_with_values(
.collect_vec(); graph
.into_iter()
.enumerate()
.flat_map(|(fr, tos)| tos.into_iter().map(move |to| (fr as u32, to, 1.))),
)
.build();
louvain(&graph, 0., 100, Poison::default()).unwrap(); louvain(&graph, 0., 100, Poison::default()).unwrap();
} }
} }

@ -10,17 +10,14 @@ use std::collections::BTreeMap;
#[cfg(not(feature = "rayon"))] #[cfg(not(feature = "rayon"))]
use approx::AbsDiffEq; use approx::AbsDiffEq;
#[cfg(feature = "rayon")] use graph::prelude::{page_rank, PageRankConfig};
use graph::prelude::{page_rank, CsrLayout, DirectedCsrGraph, GraphBuilder, PageRankConfig};
use miette::Result; use miette::Result;
#[cfg(not(feature = "rayon"))]
use nalgebra::{Dynamic, OMatrix, U1};
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr; use crate::data::expr::Expr;
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::value::DataValue; use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan; use crate::parse::SourceSpan;
use crate::runtime::db::Poison; use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore; use crate::runtime::temp_store::RegularTempStore;
@ -41,38 +38,15 @@ impl FixedRule for PageRank {
let epsilon = payload.unit_interval_option("epsilon", Some(0.0001))? as f32; let epsilon = payload.unit_interval_option("epsilon", Some(0.0001))? as f32;
let iterations = payload.pos_integer_option("iterations", Some(10))?; let iterations = payload.pos_integer_option("iterations", Some(10))?;
let (graph, indices, _) = edges.convert_edge_to_graph(undirected)?; let (graph, indices, _) = edges.to_directed_graph(undirected)?;
#[cfg(feature = "rayon")]
{
let graph: DirectedCsrGraph<u32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges(
graph
.iter()
.enumerate()
.flat_map(|(fr, ls)| ls.iter().map(move |to| (fr as u32, *to as u32))),
)
.build();
let (ranks, _n_run, _) = page_rank( let (ranks, _n_run, _) = page_rank(
&graph, &graph,
PageRankConfig::new(iterations, epsilon as f64, theta), PageRankConfig::new(iterations, epsilon as f64, theta),
); );
for (idx, score) in ranks.iter().enumerate() { for (idx, score) in ranks.iter().enumerate() {
out.put(vec![indices[idx].clone(), DataValue::from(*score as f64)]); out.put(vec![indices[idx].clone(), DataValue::from(*score as f64)]);
}
}
#[cfg(not(feature = "rayon"))]
{
let res = pagerank(&graph, theta, epsilon, iterations, _poison)?;
for (idx, score) in res.iter().enumerate() {
out.put(
Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]),
0,
);
}
} }
Ok(()) Ok(())
} }

@ -8,6 +8,7 @@
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use miette::Diagnostic; use miette::Diagnostic;
use miette::Result; use miette::Result;
@ -34,8 +35,8 @@ impl FixedRule for MinimumSpanningTreePrim {
poison: Poison, poison: Poison,
) -> Result<()> { ) -> Result<()> {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let (graph, indices, inv_indices, _) = edges.convert_edge_to_weighted_graph(true, true)?; let (graph, indices, inv_indices) = edges.to_directed_weighted_graph(true, true)?;
if graph.is_empty() { if graph.node_count() == 0 {
return Ok(()); return Ok(());
} }
let starting = match payload.get_input(1) { let starting = match payload.get_input(1) {
@ -63,9 +64,9 @@ impl FixedRule for MinimumSpanningTreePrim {
let msp = prim(&graph, starting, poison)?; let msp = prim(&graph, starting, poison)?;
for (src, dst, cost) in msp { for (src, dst, cost) in msp {
out.put(vec![ out.put(vec![
indices[src].clone(), indices[src as usize].clone(),
indices[dst].clone(), indices[dst as usize].clone(),
DataValue::from(cost), DataValue::from(cost as f64),
]); ]);
} }
Ok(()) Ok(())
@ -82,29 +83,30 @@ impl FixedRule for MinimumSpanningTreePrim {
} }
fn prim( fn prim(
graph: &[Vec<(usize, f64)>], graph: &DirectedCsrGraph<u32, (), f32>,
starting: usize, starting: u32,
poison: Poison, poison: Poison,
) -> Result<Vec<(usize, usize, f64)>> { ) -> Result<Vec<(u32, u32, f32)>> {
let mut visited = vec![false; graph.len()]; let mut visited = vec![false; graph.node_count() as usize];
let mut mst_edges = Vec::with_capacity(graph.len() - 1); let mut mst_edges = Vec::with_capacity((graph.node_count() - 1) as usize);
let mut pq = PriorityQueue::new(); let mut pq = PriorityQueue::new();
let mut relax_edges_at_node = |node: usize, pq: &mut PriorityQueue<_, _>| { let mut relax_edges_at_node = |node: u32, pq: &mut PriorityQueue<_, _>| {
visited[node] = true; visited[node as usize] = true;
let edges = &graph[node]; for target in graph.out_neighbors_with_values(node) {
for (to_node, cost) in edges { let to_node = target.target;
if visited[*to_node] { let cost = target.value;
if visited[to_node as usize] {
continue; continue;
} }
pq.push_increase(*to_node, (Reverse(OrderedFloat(*cost)), node)); pq.push_increase(to_node, (Reverse(OrderedFloat(cost)), node));
} }
}; };
relax_edges_at_node(starting, &mut pq); relax_edges_at_node(starting, &mut pq);
while let Some((to_node, (Reverse(OrderedFloat(cost)), from_node))) = pq.pop() { while let Some((to_node, (Reverse(OrderedFloat(cost)), from_node))) = pq.pop() {
if mst_edges.len() == graph.len() - 1 { if mst_edges.len() == (graph.node_count() - 1) as usize {
break; break;
} }
mst_edges.push((from_node, to_node, cost)); mst_edges.push((from_node, to_node, cost));

@ -9,12 +9,12 @@
use std::cmp::{Ordering, Reverse}; use std::cmp::{Ordering, Reverse};
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::iter; use std::iter;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools; use itertools::Itertools;
use miette::Result; use miette::Result;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use priority_queue::PriorityQueue; use priority_queue::PriorityQueue;
#[cfg(feature = "rayon")]
use rayon::prelude::*; use rayon::prelude::*;
use smallvec::{smallvec, SmallVec}; use smallvec::{smallvec, SmallVec};
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
@ -42,8 +42,8 @@ impl FixedRule for ShortestPathDijkstra {
let undirected = payload.bool_option("undirected", Some(false))?; let undirected = payload.bool_option("undirected", Some(false))?;
let keep_ties = payload.bool_option("keep_ties", Some(false))?; let keep_ties = payload.bool_option("keep_ties", Some(false))?;
let (graph, indices, inv_indices, _) = let (graph, indices, inv_indices) =
edges.convert_edge_to_weighted_graph(undirected, false)?; edges.to_directed_weighted_graph(undirected, false)?;
let mut starting_nodes = BTreeSet::new(); let mut starting_nodes = BTreeSet::new();
for tuple in starting.iter()? { for tuple in starting.iter()? {
@ -88,22 +88,19 @@ impl FixedRule for ShortestPathDijkstra {
}; };
for (target, cost, path) in res { for (target, cost, path) in res {
let t = vec![ let t = vec![
indices[start].clone(), indices[start as usize].clone(),
indices[target].clone(), indices[target as usize].clone(),
DataValue::from(cost), DataValue::from(cost as f64),
DataValue::List(path.into_iter().map(|u| indices[u].clone()).collect_vec()), DataValue::List(path.into_iter().map(|u| indices[u as usize].clone()).collect_vec()),
]; ];
out.put(t) out.put(t)
} }
} }
} else { } else {
#[cfg(feature = "rayon")]
let it = starting_nodes.into_par_iter(); let it = starting_nodes.into_par_iter();
#[cfg(not(feature = "rayon"))]
let it = starting_nodes.into_iter();
let all_res: Vec<_> = it let all_res: Vec<_> = it
.map(|start| -> Result<(usize, Vec<(usize, f64, Vec<usize>)>)> { .map(|start| -> Result<(u32, Vec<(u32, f32, Vec<u32>)>)> {
Ok(( Ok((
start, start,
if let Some(tn) = &termination_nodes { if let Some(tn) = &termination_nodes {
@ -135,10 +132,10 @@ impl FixedRule for ShortestPathDijkstra {
for (start, res) in all_res { for (start, res) in all_res {
for (target, cost, path) in res { for (target, cost, path) in res {
let t = vec![ let t = vec![
indices[start].clone(), indices[start as usize].clone(),
indices[target].clone(), indices[target as usize].clone(),
DataValue::from(cost), DataValue::from(cost as f64),
DataValue::List(path.into_iter().map(|u| indices[u].clone()).collect_vec()), DataValue::List(path.into_iter().map(|u| indices[u as usize].clone()).collect_vec()),
]; ];
out.put(t) out.put(t)
} }
@ -182,41 +179,41 @@ impl Ord for HeapState {
impl Eq for HeapState {} impl Eq for HeapState {}
pub(crate) trait ForbiddenEdge { pub(crate) trait ForbiddenEdge {
fn is_forbidden(&self, src: usize, dst: usize) -> bool; fn is_forbidden(&self, src: u32, dst: u32) -> bool;
} }
impl ForbiddenEdge for () { impl ForbiddenEdge for () {
fn is_forbidden(&self, _src: usize, _dst: usize) -> bool { fn is_forbidden(&self, _src: u32, _dst: u32) -> bool {
false false
} }
} }
impl ForbiddenEdge for BTreeSet<(usize, usize)> { impl ForbiddenEdge for BTreeSet<(u32, u32)> {
fn is_forbidden(&self, src: usize, dst: usize) -> bool { fn is_forbidden(&self, src: u32, dst: u32) -> bool {
self.contains(&(src, dst)) self.contains(&(src, dst))
} }
} }
pub(crate) trait ForbiddenNode { pub(crate) trait ForbiddenNode {
fn is_forbidden(&self, node: usize) -> bool; fn is_forbidden(&self, node: u32) -> bool;
} }
impl ForbiddenNode for () { impl ForbiddenNode for () {
fn is_forbidden(&self, _node: usize) -> bool { fn is_forbidden(&self, _node: u32) -> bool {
false false
} }
} }
impl ForbiddenNode for BTreeSet<usize> { impl ForbiddenNode for BTreeSet<u32> {
fn is_forbidden(&self, node: usize) -> bool { fn is_forbidden(&self, node: u32) -> bool {
self.contains(&node) self.contains(&node)
} }
} }
pub(crate) trait Goal { pub(crate) trait Goal {
fn is_exhausted(&self) -> bool; fn is_exhausted(&self) -> bool;
fn visit(&mut self, node: usize); fn visit(&mut self, node: u32);
fn iter(&self, total: usize) -> Box<dyn Iterator<Item = usize> + '_>; fn iter(&self, total: u32) -> Box<dyn Iterator<Item = u32> + '_>;
} }
impl Goal for () { impl Goal for () {
@ -224,19 +221,19 @@ impl Goal for () {
false false
} }
fn visit(&mut self, _node: usize) {} fn visit(&mut self, _node: u32) {}
fn iter(&self, total: usize) -> Box<dyn Iterator<Item = usize> + '_> { fn iter(&self, total: u32) -> Box<dyn Iterator<Item = u32> + '_> {
Box::new(0..total) Box::new(0..total)
} }
} }
impl Goal for Option<usize> { impl Goal for Option<u32> {
fn is_exhausted(&self) -> bool { fn is_exhausted(&self) -> bool {
self.is_none() self.is_none()
} }
fn visit(&mut self, node: usize) { fn visit(&mut self, node: u32) {
if let Some(u) = &self { if let Some(u) = &self {
if *u == node { if *u == node {
self.take(); self.take();
@ -244,7 +241,7 @@ impl Goal for Option<usize> {
} }
} }
fn iter(&self, _total: usize) -> Box<dyn Iterator<Item = usize> + '_> { fn iter(&self, _total: u32) -> Box<dyn Iterator<Item = u32> + '_> {
if let Some(u) = self { if let Some(u) = self {
Box::new(iter::once(*u)) Box::new(iter::once(*u))
} else { } else {
@ -253,51 +250,55 @@ impl Goal for Option<usize> {
} }
} }
impl Goal for BTreeSet<usize> { impl Goal for BTreeSet<u32> {
fn is_exhausted(&self) -> bool { fn is_exhausted(&self) -> bool {
self.is_empty() self.is_empty()
} }
fn visit(&mut self, node: usize) { fn visit(&mut self, node: u32) {
self.remove(&node); self.remove(&node);
} }
fn iter(&self, _total: usize) -> Box<dyn Iterator<Item = usize> + '_> { fn iter(&self, _total: u32) -> Box<dyn Iterator<Item = u32> + '_> {
Box::new(self.iter().cloned()) Box::new(self.iter().cloned())
} }
} }
pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>( pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
edges: &[Vec<(usize, f64)>], edges: &DirectedCsrGraph<u32, (), f32>,
start: usize, start: u32,
goals: &G, goals: &G,
forbidden_edges: &FE, forbidden_edges: &FE,
forbidden_nodes: &FN, forbidden_nodes: &FN,
) -> Vec<(usize, f64, Vec<usize>)> { ) -> Vec<(u32, f32, Vec<u32>)> {
let mut distance = vec![f64::INFINITY; edges.len()]; let graph_size = edges.node_count();
let mut distance = vec![f32::INFINITY; graph_size as usize];
let mut pq = PriorityQueue::new(); let mut pq = PriorityQueue::new();
let mut back_pointers = vec![usize::MAX; edges.len()]; let mut back_pointers = vec![u32::MAX; graph_size as usize];
distance[start] = 0.; distance[start as usize] = 0.;
pq.push(start, Reverse(OrderedFloat(0.))); pq.push(start, Reverse(OrderedFloat(0.)));
let mut goals_remaining = goals.clone(); let mut goals_remaining = goals.clone();
while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() { while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() {
if cost > distance[node] { if cost > distance[node as usize] {
continue; continue;
} }
for (nxt_node, path_weight) in &edges[node] { for target in edges.out_neighbors_with_values(node) {
if forbidden_nodes.is_forbidden(*nxt_node) { let nxt_node = target.target;
let path_weight = target.value;
if forbidden_nodes.is_forbidden(nxt_node) {
continue; continue;
} }
if forbidden_edges.is_forbidden(node, *nxt_node) { if forbidden_edges.is_forbidden(node, nxt_node) {
continue; continue;
} }
let nxt_cost = cost + *path_weight; let nxt_cost = cost + path_weight;
if nxt_cost < distance[*nxt_node] { if nxt_cost < distance[nxt_node as usize] {
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost))); pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[*nxt_node] = nxt_cost; distance[nxt_node as usize] = nxt_cost;
back_pointers[*nxt_node] = node; back_pointers[nxt_node as usize] = node;
} }
} }
@ -308,9 +309,9 @@ pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
} }
let ret = goals let ret = goals
.iter(edges.len()) .iter(edges.node_count())
.map(|target| { .map(|target| {
let cost = distance[target]; let cost = distance[target as usize];
if !cost.is_finite() { if !cost.is_finite() {
(target, cost, vec![]) (target, cost, vec![])
} else { } else {
@ -318,7 +319,7 @@ pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
let mut current = target; let mut current = target;
while current != start { while current != start {
path.push(current); path.push(current);
current = back_pointers[current]; current = back_pointers[current as usize];
} }
path.push(start); path.push(start);
path.reverse(); path.reverse();
@ -331,41 +332,44 @@ pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
} }
pub(crate) fn dijkstra_keep_ties<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>( pub(crate) fn dijkstra_keep_ties<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
edges: &[Vec<(usize, f64)>], edges: &DirectedCsrGraph<u32, (), f32>,
start: usize, start: u32,
goals: &G, goals: &G,
forbidden_edges: &FE, forbidden_edges: &FE,
forbidden_nodes: &FN, forbidden_nodes: &FN,
poison: Poison, poison: Poison,
) -> Result<Vec<(usize, f64, Vec<usize>)>> { ) -> Result<Vec<(u32, f32, Vec<u32>)>> {
let mut distance = vec![f64::INFINITY; edges.len()]; let mut distance = vec![f32::INFINITY; edges.node_count() as usize];
let mut pq = PriorityQueue::new(); let mut pq = PriorityQueue::new();
let mut back_pointers: Vec<SmallVec<[usize; 1]>> = vec![smallvec![]; edges.len()]; let mut back_pointers: Vec<SmallVec<[u32; 1]>> = vec![smallvec![]; edges.node_count() as usize];
distance[start] = 0.; distance[start as usize] = 0.;
pq.push(start, Reverse(OrderedFloat(0.))); pq.push(start, Reverse(OrderedFloat(0.)));
let mut goals_remaining = goals.clone(); let mut goals_remaining = goals.clone();
while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() { while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() {
if cost > distance[node] { if cost > distance[node as usize] {
continue; continue;
} }
for (nxt_node, path_weight) in &edges[node] { for target in edges.out_neighbors_with_values(node) {
if forbidden_nodes.is_forbidden(*nxt_node) { let nxt_node = target.target;
let path_weight = target.value;
if forbidden_nodes.is_forbidden(nxt_node) {
continue; continue;
} }
if forbidden_edges.is_forbidden(node, *nxt_node) { if forbidden_edges.is_forbidden(node, nxt_node) {
continue; continue;
} }
let nxt_cost = cost + *path_weight; let nxt_cost = cost + path_weight;
if nxt_cost < distance[*nxt_node] { if nxt_cost < distance[nxt_node as usize] {
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost))); pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[*nxt_node] = nxt_cost; distance[nxt_node as usize] = nxt_cost;
back_pointers[*nxt_node].clear(); back_pointers[nxt_node as usize].clear();
back_pointers[*nxt_node].push(node); back_pointers[nxt_node as usize].push(node);
} else if nxt_cost == distance[*nxt_node] { } else if nxt_cost == distance[nxt_node as usize] {
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost))); pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
back_pointers[*nxt_node].push(node); back_pointers[nxt_node as usize].push(node);
} }
poison.check()?; poison.check()?;
} }
@ -377,27 +381,27 @@ pub(crate) fn dijkstra_keep_ties<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal +
} }
let ret = goals let ret = goals
.iter(edges.len()) .iter(edges.node_count())
.flat_map(|target| { .flat_map(|target| {
let cost = distance[target]; let cost = distance[target as usize];
if !cost.is_finite() { if !cost.is_finite() {
vec![(target, cost, vec![])] vec![(target, cost, vec![])]
} else { } else {
struct CollectPath { struct CollectPath {
collected: Vec<(usize, f64, Vec<usize>)>, collected: Vec<(u32, f32, Vec<u32>)>,
} }
impl CollectPath { impl CollectPath {
fn collect( fn collect(
&mut self, &mut self,
chain: &[usize], chain: &[u32],
start: usize, start: u32,
target: usize, target: u32,
cost: f64, cost: f32,
back_pointers: &[SmallVec<[usize; 1]>], back_pointers: &[SmallVec<[u32; 1]>],
) { ) {
let last = chain.last().unwrap(); let last = chain.last().unwrap();
let prevs = &back_pointers[*last]; let prevs = &back_pointers[*last as usize];
for nxt in prevs { for nxt in prevs {
let mut ret = chain.to_vec(); let mut ret = chain.to_vec();
ret.push(*nxt); ret.push(*nxt);

@ -9,6 +9,7 @@
use std::cmp::min; use std::cmp::min;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighbors, Graph};
use itertools::Itertools; use itertools::Itertools;
use miette::Result; use miette::Result;
@ -47,12 +48,12 @@ impl FixedRule for StronglyConnectedComponent {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let (graph, indices, mut inv_indices) = let (graph, indices, mut inv_indices) =
edges.convert_edge_to_graph(!self.strong)?; edges.to_directed_graph(!self.strong)?;
let tarjan = TarjanScc::new(&graph).run(poison)?; let tarjan = TarjanSccG::new(graph).run(poison)?;
for (grp_id, cc) in tarjan.iter().enumerate() { for (grp_id, cc) in tarjan.iter().enumerate() {
for idx in cc { for idx in cc {
let val = indices.get(*idx).unwrap(); let val = indices.get(*idx as usize).unwrap();
let tuple = vec![val.clone(), DataValue::from(grp_id as i64)]; let tuple = vec![val.clone(), DataValue::from(grp_id as i64)];
out.put(tuple); out.put(tuple);
} }
@ -65,7 +66,7 @@ impl FixedRule for StronglyConnectedComponent {
let tuple = tuple?; let tuple = tuple?;
let node = tuple.into_iter().next().unwrap(); let node = tuple.into_iter().next().unwrap();
if !inv_indices.contains_key(&node) { if !inv_indices.contains_key(&node) {
inv_indices.insert(node.clone(), usize::MAX); inv_indices.insert(node.clone(), u32::MAX);
let tuple = vec![node, DataValue::from(counter)]; let tuple = vec![node, DataValue::from(counter)];
out.put(tuple); out.put(tuple);
counter += 1; counter += 1;
@ -86,60 +87,62 @@ impl FixedRule for StronglyConnectedComponent {
} }
} }
pub(crate) struct TarjanScc<'a> {
graph: &'a [Vec<usize>], pub(crate) struct TarjanSccG {
id: usize, graph: DirectedCsrGraph<u32>,
ids: Vec<Option<usize>>, id: u32,
low: Vec<usize>, ids: Vec<Option<u32>>,
low: Vec<u32>,
on_stack: Vec<bool>, on_stack: Vec<bool>,
stack: Vec<usize>, stack: Vec<u32>,
} }
impl<'a> TarjanScc<'a> {
pub(crate) fn new(graph: &'a [Vec<usize>]) -> Self { impl TarjanSccG {
pub(crate) fn new(graph: DirectedCsrGraph<u32>) -> Self {
let graph_size = graph.node_count();
Self { Self {
graph, graph,
id: 0, id: 0,
ids: vec![None; graph.len()], ids: vec![None; graph_size as usize],
low: vec![0; graph.len()], low: vec![0; graph_size as usize],
on_stack: vec![false; graph.len()], on_stack: vec![false; graph_size as usize],
stack: vec![], stack: vec![],
} }
} }
pub(crate) fn run(mut self, poison: Poison) -> Result<Vec<Vec<usize>>> { pub(crate) fn run(mut self, poison: Poison) -> Result<Vec<Vec<u32>>> {
for i in 0..self.graph.len() { for i in 0..self.graph.node_count() {
if self.ids[i].is_none() { if self.ids[i as usize].is_none() {
self.dfs(i); self.dfs(i);
poison.check()?; poison.check()?;
} }
} }
let mut low_map: BTreeMap<usize, Vec<usize>> = BTreeMap::new(); let mut low_map: BTreeMap<u32, Vec<u32>> = BTreeMap::new();
for (idx, grp) in self.low.into_iter().enumerate() { for (idx, grp) in self.low.into_iter().enumerate() {
low_map.entry(grp).or_default().push(idx); low_map.entry(grp).or_default().push(idx as u32);
} }
Ok(low_map.into_iter().map(|(_, vs)| vs).collect_vec()) Ok(low_map.into_iter().map(|(_, vs)| vs).collect_vec())
} }
fn dfs(&mut self, at: usize) { fn dfs(&mut self, at: u32) {
self.stack.push(at); self.stack.push(at);
self.on_stack[at] = true; self.on_stack[at as usize] = true;
self.id += 1; self.id += 1;
self.ids[at] = Some(self.id); self.ids[at as usize] = Some(self.id);
self.low[at] = self.id; self.low[at as usize] = self.id;
for to in &self.graph[at] { for to in self.graph.out_neighbors(at).cloned().collect_vec() {
let to = *to; if self.ids[to as usize].is_none() {
if self.ids[to].is_none() {
self.dfs(to); self.dfs(to);
} }
if self.on_stack[to] { if self.on_stack[to as usize] {
self.low[at] = min(self.low[at], self.low[to]); self.low[at as usize] = min(self.low[at as usize], self.low[to as usize]);
} }
} }
if self.ids[at].unwrap() == self.low[at] { if self.ids[at as usize].unwrap() == self.low[at as usize] {
while let Some(node) = self.stack.pop() { while let Some(node) = self.stack.pop() {
self.on_stack[node] = false; self.on_stack[node as usize] = false;
self.low[node] = self.ids[at].unwrap(); self.low[node as usize] = self.ids[at as usize].unwrap();
if node == at { if node == at {
break; break;
} }
@ -147,3 +150,4 @@ impl<'a> TarjanScc<'a> {
} }
} }
} }

@ -7,6 +7,7 @@
*/ */
use std::collections::BTreeMap; use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighbors, Graph};
use miette::Result; use miette::Result;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
@ -30,12 +31,12 @@ impl FixedRule for TopSort {
) -> Result<()> { ) -> Result<()> {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let (graph, indices, _) = edges.convert_edge_to_graph(false)?; let (graph, indices, _) = edges.to_directed_graph(false)?;
let sorted = kahn(&graph, poison)?; let sorted = kahn_g(&graph, poison)?;
for (idx, val_id) in sorted.iter().enumerate() { for (idx, val_id) in sorted.iter().enumerate() {
let val = indices.get(*val_id).unwrap(); let val = indices.get(*val_id as usize).unwrap();
let tuple = vec![DataValue::from(idx as i64), val.clone()]; let tuple = vec![DataValue::from(idx as i64), val.clone()];
out.put(tuple); out.put(tuple);
} }
@ -53,31 +54,31 @@ impl FixedRule for TopSort {
} }
} }
pub(crate) fn kahn(graph: &[Vec<usize>], poison: Poison) -> Result<Vec<usize>> {
let mut in_degree = vec![0; graph.len()]; pub(crate) fn kahn_g(graph: &DirectedCsrGraph<u32>, poison: Poison) -> Result<Vec<u32>> {
for tos in graph { let graph_size = graph.node_count();
for to in tos { let mut in_degree = vec![0; graph_size as usize];
in_degree[*to] += 1; for tos in 0..graph_size {
for to in graph.out_neighbors(tos) {
in_degree[*to as usize] += 1;
} }
} }
let mut sorted = Vec::with_capacity(graph.len()); let mut sorted = Vec::with_capacity(graph_size as usize);
let mut pending = vec![]; let mut pending = vec![];
for (node, degree) in in_degree.iter().enumerate() { for (node, degree) in in_degree.iter().enumerate() {
if *degree == 0 { if *degree == 0 {
pending.push(node); pending.push(node as u32);
} }
} }
while !pending.is_empty() { while !pending.is_empty() {
let removed = pending.pop().unwrap(); let removed = pending.pop().unwrap();
sorted.push(removed); sorted.push(removed);
if let Some(edges) = graph.get(removed) { for nxt in graph.out_neighbors(removed as u32) {
for nxt in edges { in_degree[*nxt as usize] -= 1;
in_degree[*nxt] -= 1; if in_degree[*nxt as usize] == 0 {
if in_degree[*nxt] == 0 { pending.push(*nxt);
pending.push(*nxt);
}
} }
} }
poison.check()?; poison.check()?;

@ -6,17 +6,19 @@
* You can obtain one at https://mozilla.org/MPL/2.0/. * You can obtain one at https://mozilla.org/MPL/2.0/.
*/ */
use std::collections::{BTreeMap, BTreeSet}; use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighbors, Graph};
use itertools::Itertools;
use miette::Result; use miette::Result;
#[cfg(feature = "rayon")] #[cfg(feature = "rayon")]
use rayon::prelude::*; use rayon::prelude::*;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr; use crate::data::expr::Expr;
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::value::DataValue; use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan; use crate::parse::SourceSpan;
use crate::runtime::db::Poison; use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore; use crate::runtime::temp_store::RegularTempStore;
@ -31,9 +33,7 @@ impl FixedRule for ClusteringCoefficients {
poison: Poison, poison: Poison,
) -> Result<()> { ) -> Result<()> {
let edges = payload.get_input(0)?; let edges = payload.get_input(0)?;
let (graph, indices, _) = edges.convert_edge_to_graph(true)?; let (graph, indices, _) = edges.to_directed_graph(true)?;
let graph: Vec<BTreeSet<usize>> =
graph.into_iter().map(|e| e.into_iter().collect()).collect();
let coefficients = clustering_coefficients(&graph, poison)?; let coefficients = clustering_coefficients(&graph, poison)?;
for (idx, (cc, n_triangles, degree)) in coefficients.into_iter().enumerate() { for (idx, (cc, n_triangles, degree)) in coefficients.into_iter().enumerate() {
out.put(vec![ out.put(vec![
@ -58,32 +58,42 @@ impl FixedRule for ClusteringCoefficients {
} }
fn clustering_coefficients( fn clustering_coefficients(
graph: &[BTreeSet<usize>], graph: &DirectedCsrGraph<u32>,
poison: Poison, poison: Poison,
) -> Result<Vec<(f64, usize, usize)>> { ) -> Result<Vec<(f64, usize, usize)>> {
#[cfg(feature = "rayon")] let node_size = graph.node_count();
let it = graph.par_iter();
#[cfg(not(feature = "rayon"))]
let it = graph.iter();
it.map(|edges| -> Result<(f64, usize, usize)> { (0..node_size)
let degree = edges.len(); .into_par_iter()
if degree < 2 { .map(|node_idx| -> Result<(f64, usize, usize)> {
Ok((0., 0, degree)) let edges = graph.out_neighbors(node_idx).collect_vec();
} else { let degree = edges.len();
let n_triangles = edges if degree < 2 {
.iter() Ok((0., 0, degree))
.map(|e_src| { } else {
edges let n_triangles = edges
.iter() .iter()
.filter(|e_dst| e_src > e_dst && graph[*e_src].contains(*e_dst)) .map(|e_src| {
.count() edges
}) .iter()
.sum(); .filter(|e_dst| {
let cc = 2. * n_triangles as f64 / ((degree as f64) * ((degree as f64) - 1.)); if e_src <= e_dst {
poison.check()?; return false;
Ok((cc, n_triangles, degree)) }
} for nb in graph.out_neighbors(**e_src) {
}) if nb == **e_dst {
.collect::<Result<_>>() return true;
}
}
return false;
})
.count()
})
.sum();
let cc = 2. * n_triangles as f64 / ((degree as f64) * ((degree as f64) - 1.));
poison.check()?;
Ok((cc, n_triangles, degree))
}
})
.collect::<Result<_>>()
} }

@ -8,17 +8,18 @@
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues};
use itertools::Itertools; use itertools::Itertools;
use miette::Result; use miette::Result;
#[cfg(feature = "rayon")] #[cfg(feature = "rayon")]
use rayon::prelude::*; use rayon::prelude::*;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::algos::shortest_path_dijkstra::dijkstra;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr; use crate::data::expr::Expr;
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::value::DataValue; use crate::data::value::DataValue;
use crate::fixed_rule::algos::shortest_path_dijkstra::dijkstra;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan; use crate::parse::SourceSpan;
use crate::runtime::db::Poison; use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore; use crate::runtime::temp_store::RegularTempStore;
@ -38,8 +39,7 @@ impl FixedRule for KShortestPathYen {
let undirected = payload.bool_option("undirected", Some(false))?; let undirected = payload.bool_option("undirected", Some(false))?;
let k = payload.pos_integer_option("k", None)?; let k = payload.pos_integer_option("k", None)?;
let (graph, indices, inv_indices, _) = let (graph, indices, inv_indices) = edges.to_directed_weighted_graph(undirected, false)?;
edges.convert_edge_to_weighted_graph(undirected, false)?;
let mut starting_nodes = BTreeSet::new(); let mut starting_nodes = BTreeSet::new();
for tuple in starting.iter()? { for tuple in starting.iter()? {
@ -64,11 +64,13 @@ impl FixedRule for KShortestPathYen {
k_shortest_path_yen(k as usize, &graph, start, *goal, poison.clone())? k_shortest_path_yen(k as usize, &graph, start, *goal, poison.clone())?
{ {
let t = vec![ let t = vec![
indices[start].clone(), indices[start as usize].clone(),
indices[*goal].clone(), indices[*goal as usize].clone(),
DataValue::from(cost), DataValue::from(cost as f64),
DataValue::List( DataValue::List(
path.into_iter().map(|u| indices[u].clone()).collect_vec(), path.into_iter()
.map(|u| indices[u as usize].clone())
.collect_vec(),
), ),
]; ];
out.put(t) out.put(t)
@ -84,7 +86,7 @@ impl FixedRule for KShortestPathYen {
let res_all: Vec<_> = first_it let res_all: Vec<_> = first_it
.map( .map(
|(start, goal)| -> Result<(usize, usize, Vec<(f64, Vec<usize>)>)> { |(start, goal)| -> Result<(u32, u32, Vec<(f32, Vec<u32>)>)> {
Ok(( Ok((
start, start,
goal, goal,
@ -97,10 +99,10 @@ impl FixedRule for KShortestPathYen {
for (start, goal, res) in res_all { for (start, goal, res) in res_all {
for (cost, path) in res { for (cost, path) in res {
let t = vec![ let t = vec![
indices[start].clone(), indices[start as usize].clone(),
indices[goal].clone(), indices[goal as usize].clone(),
DataValue::from(cost), DataValue::from(cost as f64),
DataValue::List(path.into_iter().map(|u| indices[u].clone()).collect_vec()), DataValue::List(path.into_iter().map(|u| indices[u as usize].clone()).collect_vec()),
]; ];
out.put(t) out.put(t)
} }
@ -121,13 +123,13 @@ impl FixedRule for KShortestPathYen {
fn k_shortest_path_yen( fn k_shortest_path_yen(
k: usize, k: usize,
edges: &[Vec<(usize, f64)>], edges: &DirectedCsrGraph<u32, (), f32>,
start: usize, start: u32,
goal: usize, goal: u32,
poison: Poison, poison: Poison,
) -> Result<Vec<(f64, Vec<usize>)>> { ) -> Result<Vec<(f32, Vec<u32>)>> {
let mut k_shortest: Vec<(f64, Vec<usize>)> = Vec::with_capacity(k); let mut k_shortest: Vec<(f32, Vec<u32>)> = Vec::with_capacity(k);
let mut candidates: Vec<(f64, Vec<usize>)> = vec![]; let mut candidates: Vec<(f32, Vec<u32>)> = vec![];
match dijkstra(edges, start, &Some(goal), &(), &()) match dijkstra(edges, start, &Some(goal), &(), &())
.into_iter() .into_iter()
@ -170,10 +172,11 @@ fn k_shortest_path_yen(
for i in 0..root_path.len() - 1 { for i in 0..root_path.len() - 1 {
let s = root_path[i]; let s = root_path[i];
let d = root_path[i + 1]; let d = root_path[i + 1];
let eds = &edges[s]; for target in edges.out_neighbors_with_values(s) {
for (e, c) in eds { let e = target.target;
if *e == d { let c = target.value;
total_cost += *c; if e == d {
total_cost += c;
break; break;
} }
} }

@ -9,8 +9,10 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::sync::Arc; use std::sync::Arc;
use either::{Left, Right};
use graph::prelude::{CsrLayout, DirectedCsrGraph, GraphBuilder};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use miette::{bail, ensure, Diagnostic, Result}; use miette::{bail, ensure, Diagnostic, Report, Result};
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use thiserror::Error; use thiserror::Error;
@ -136,128 +138,194 @@ impl<'a, 'b> FixedRuleInputRelation<'a, 'b> {
pub fn span(&self) -> SourceSpan { pub fn span(&self) -> SourceSpan {
self.arg_manifest.span() self.arg_manifest.span()
} }
fn convert_edge_to_graph( pub fn to_directed_graph(
&self, &self,
undirected: bool, undirected: bool,
) -> Result<(Vec<Vec<usize>>, Vec<DataValue>, BTreeMap<DataValue, usize>)> { ) -> Result<(
let mut graph: Vec<Vec<usize>> = vec![]; DirectedCsrGraph<u32>,
Vec<DataValue>,
BTreeMap<DataValue, u32>,
)> {
let mut indices: Vec<DataValue> = vec![]; let mut indices: Vec<DataValue> = vec![];
let mut inv_indices: BTreeMap<DataValue, usize> = Default::default(); let mut inv_indices: BTreeMap<DataValue, u32> = Default::default();
let mut error: Option<Report> = None;
for tuple in self.iter()? { let it = self.iter()?.filter_map(|r_tuple| match r_tuple {
let mut tuple = tuple?.into_iter(); Ok(tuple) => {
let from = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?; let mut tuple = tuple.into_iter();
let to = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?; let from = match tuple.next() {
let from_idx = if let Some(idx) = inv_indices.get(&from) { None => {
*idx error = Some(NotAnEdgeError(self.span()).into());
} else { return None;
inv_indices.insert(from.clone(), graph.len()); }
indices.push(from.clone()); Some(f) => f,
graph.push(vec![]); };
graph.len() - 1 let to = match tuple.next() {
}; None => {
let to_idx = if let Some(idx) = inv_indices.get(&to) { error = Some(NotAnEdgeError(self.span()).into());
*idx return None;
} else { }
inv_indices.insert(to.clone(), graph.len()); Some(f) => f,
indices.push(to.clone()); };
graph.push(vec![]); let from_idx = if let Some(idx) = inv_indices.get(&from) {
graph.len() - 1 *idx
}; } else {
let from_target = graph.get_mut(from_idx).unwrap(); let idx = indices.len() as u32;
from_target.push(to_idx); inv_indices.insert(from.clone(), idx);
if undirected { indices.push(from.clone());
let to_target = graph.get_mut(to_idx).unwrap(); idx
to_target.push(from_idx); };
let to_idx = if let Some(idx) = inv_indices.get(&to) {
*idx
} else {
let idx = indices.len() as u32;
inv_indices.insert(to.clone(), idx);
indices.push(to.clone());
idx
};
Some((from_idx, to_idx))
} }
Err(err) => {
error = Some(err);
None
}
});
let it = if undirected {
Right(it.flat_map(|(f, t)| [(f, t), (t, f)]))
} else {
Left(it)
};
let graph: DirectedCsrGraph<u32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges(it)
.build();
if let Some(err) = error {
bail!(err)
} }
Ok((graph, indices, inv_indices)) Ok((graph, indices, inv_indices))
} }
pub fn convert_edge_to_weighted_graph(
pub fn to_directed_weighted_graph(
&self, &self,
undirected: bool, undirected: bool,
allow_negative_edges: bool, allow_negative_weights: bool,
) -> Result<( ) -> Result<(
Vec<Vec<(usize, f64)>>, DirectedCsrGraph<u32, (), f32>,
Vec<DataValue>, Vec<DataValue>,
BTreeMap<DataValue, usize>, BTreeMap<DataValue, u32>,
bool,
)> { )> {
let mut graph: Vec<Vec<(usize, f64)>> = vec![];
let mut indices: Vec<DataValue> = vec![]; let mut indices: Vec<DataValue> = vec![];
let mut inv_indices: BTreeMap<DataValue, usize> = Default::default(); let mut inv_indices: BTreeMap<DataValue, u32> = Default::default();
let mut has_neg_edge = false; let mut error: Option<Report> = None;
let it = self.iter()?.filter_map(|r_tuple| match r_tuple {
Ok(tuple) => {
let mut tuple = tuple.into_iter();
let from = match tuple.next() {
None => {
error = Some(NotAnEdgeError(self.span()).into());
return None;
}
Some(f) => f,
};
let to = match tuple.next() {
None => {
error = Some(NotAnEdgeError(self.span()).into());
return None;
}
Some(f) => f,
};
let from_idx = if let Some(idx) = inv_indices.get(&from) {
*idx
} else {
let idx = indices.len() as u32;
inv_indices.insert(from.clone(), idx);
indices.push(from.clone());
idx
};
let to_idx = if let Some(idx) = inv_indices.get(&to) {
*idx
} else {
let idx = indices.len() as u32;
inv_indices.insert(to.clone(), idx);
indices.push(to.clone());
idx
};
for tuple in self.iter()? { let weight = match tuple.next() {
let mut tuple = tuple?.into_iter(); None => 1.0,
let from = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?; Some(d) => match d.get_float() {
let to = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?; Some(f) => {
let weight = match tuple.next() { if !f.is_finite() {
None => 1.0, error = Some(
Some(d) => match d.get_float() { BadEdgeWeightError(
Some(f) => { d,
ensure!( self.arg_manifest
f.is_finite(), .bindings()
BadEdgeWeightError( .get(2)
d, .map(|s| s.span)
self.arg_manifest .unwrap_or_else(|| self.span()),
.bindings() )
.get(2) .into(),
.map(|s| s.span) );
.unwrap_or_else(|| self.span()) return None;
) };
);
if f < 0. { if f < 0. {
if !allow_negative_edges { if !allow_negative_weights {
bail!(BadEdgeWeightError( error = Some(
BadEdgeWeightError(
d,
self.arg_manifest
.bindings()
.get(2)
.map(|s| s.span)
.unwrap_or_else(|| self.span()),
)
.into(),
);
return None;
}
}
f
}
None => {
error = Some(
BadEdgeWeightError(
d, d,
self.arg_manifest self.arg_manifest
.bindings() .bindings()
.get(2) .get(2)
.map(|s| s.span) .map(|s| s.span)
.unwrap_or_else(|| self.span()) .unwrap_or_else(|| self.span()),
)); )
} .into(),
has_neg_edge = true; );
return None;
} }
f },
} };
None => {
bail!(BadEdgeWeightError( Some((from_idx, to_idx, weight as f32))
d, }
self.arg_manifest Err(err) => {
.bindings() error = Some(err);
.get(2) None
.map(|s| s.span)
.unwrap_or_else(|| self.span())
))
}
},
};
let from_idx = if let Some(idx) = inv_indices.get(&from) {
*idx
} else {
inv_indices.insert(from.clone(), graph.len());
indices.push(from.clone());
graph.push(vec![]);
graph.len() - 1
};
let to_idx = if let Some(idx) = inv_indices.get(&to) {
*idx
} else {
inv_indices.insert(to.clone(), graph.len());
indices.push(to.clone());
graph.push(vec![]);
graph.len() - 1
};
let from_target = graph.get_mut(from_idx).unwrap();
from_target.push((to_idx, weight));
if undirected {
let to_target = graph.get_mut(to_idx).unwrap();
to_target.push((from_idx, weight));
} }
});
let it = if undirected {
Right(it.flat_map(|(f, t, w)| [(f, t, w), (t, f, w)]))
} else {
Left(it)
};
let graph: DirectedCsrGraph<u32, (), f32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges_with_values(it)
.build();
if let Some(err) = error {
bail!(err)
} }
Ok((graph, indices, inv_indices, has_neg_edge))
Ok((graph, indices, inv_indices))
} }
} }

@ -6,13 +6,13 @@
* You can obtain one at https://mozilla.org/MPL/2.0/. * You can obtain one at https://mozilla.org/MPL/2.0/.
*/ */
use std::cmp::min;
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Debug; use std::fmt::Debug;
use miette::Result;
use itertools::Itertools; use itertools::Itertools;
use miette::Result;
use crate::fixed_rule::algos::strongly_connected_components::TarjanScc;
use crate::runtime::db::Poison; use crate::runtime::db::Poison;
pub(crate) type Graph<T> = BTreeMap<T, Vec<T>>; pub(crate) type Graph<T> = BTreeMap<T, Vec<T>>;
@ -129,3 +129,65 @@ pub(crate) fn generalized_kahn(
debug_assert_eq!(in_degree.iter().sum::<usize>(), 0); debug_assert_eq!(in_degree.iter().sum::<usize>(), 0);
ret ret
} }
struct TarjanScc<'a> {
graph: &'a [Vec<usize>],
id: usize,
ids: Vec<Option<usize>>,
low: Vec<usize>,
on_stack: Vec<bool>,
stack: Vec<usize>,
}
impl<'a> TarjanScc<'a> {
fn new(graph: &'a [Vec<usize>]) -> Self {
Self {
graph,
id: 0,
ids: vec![None; graph.len()],
low: vec![0; graph.len()],
on_stack: vec![false; graph.len()],
stack: vec![],
}
}
fn run(mut self, poison: Poison) -> Result<Vec<Vec<usize>>> {
for i in 0..self.graph.len() {
if self.ids[i].is_none() {
self.dfs(i);
poison.check()?;
}
}
let mut low_map: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
for (idx, grp) in self.low.into_iter().enumerate() {
low_map.entry(grp).or_default().push(idx);
}
Ok(low_map.into_iter().map(|(_, vs)| vs).collect_vec())
}
fn dfs(&mut self, at: usize) {
self.stack.push(at);
self.on_stack[at] = true;
self.id += 1;
self.ids[at] = Some(self.id);
self.low[at] = self.id;
for to in &self.graph[at] {
let to = *to;
if self.ids[to].is_none() {
self.dfs(to);
}
if self.on_stack[to] {
self.low[at] = min(self.low[at], self.low[to]);
}
}
if self.ids[at].unwrap() == self.low[at] {
while let Some(node) = self.stack.pop() {
self.on_stack[node] = false;
self.low[node] = self.ids[at].unwrap();
if node == at {
break;
}
}
}
}
}

@ -17,7 +17,7 @@ default = ["console_error_panic_hook"]
[dependencies] [dependencies]
wasm-bindgen = "0.2.63" wasm-bindgen = "0.2.63"
cozo = { version = "0.2.1", path = "../cozo-core", default-features = false, features = ["wasm", "graph-algo", "nothread"] } cozo = { version = "0.2.1", path = "../cozo-core", default-features = false, features = ["wasm", "nothread"] }
# The `console_error_panic_hook` crate provides better debugging of panics by # The `console_error_panic_hook` crate provides better debugging of panics by
# logging them with `console.error`. This is great for development, but requires # logging them with `console.error`. This is great for development, but requires

@ -8,15 +8,6 @@ export MACOSX_DEPLOYMENT_TARGET=10.14
#rm -fr release #rm -fr release
mkdir -p release mkdir -p release
for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \
-F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET
cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone
done
# copy python
cp target/wheels/*.whl release/
for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
# standalone, c, java, nodejs # standalone, c, java, nodejs
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver -p cozo_c -p cozo_java -p cozo-node -F compact -F storage-rocksdb --target $TARGET CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver -p cozo_c -p cozo_java -p cozo-node -F compact -F storage-rocksdb --target $TARGET
@ -32,11 +23,21 @@ for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
cd .. cd ..
done done
# copy python
cp target/wheels/*.whl release/
# swift # swift
cd cozo-lib-swift cd cozo-lib-swift
CARGO_PROFILE_RELEASE_LTO=fat ./build-rust.sh CARGO_PROFILE_RELEASE_LTO=fat ./build-rust.sh
cd .. cd ..
# with TiKV
for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \
-F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET
cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone
done
# WASM # WASM
cd cozo-lib-wasm cd cozo-lib-wasm
CARGO_PROFILE_RELEASE_LTO=fat ./build.sh CARGO_PROFILE_RELEASE_LTO=fat ./build.sh

Loading…
Cancel
Save