update algorithms to use more efficient graph representation

main
Ziyang Hu 2 years ago
parent 03d23173cb
commit 3b29579c48

81
Cargo.lock generated

@ -277,12 +277,6 @@ version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c"
[[package]]
name = "bytemuck"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f"
[[package]]
name = "byteorder"
version = "1.4.3"
@ -551,7 +545,6 @@ dependencies = [
"log",
"miette",
"minreq",
"nalgebra",
"num-traits",
"ordered-float",
"pest",
@ -1674,15 +1667,6 @@ dependencies = [
"libc",
]
[[package]]
name = "matrixmultiply"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84"
dependencies = [
"rawpointer",
]
[[package]]
name = "memchr"
version = "2.5.0"
@ -1836,33 +1820,6 @@ dependencies = [
"twoway",
]
[[package]]
name = "nalgebra"
version = "0.31.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20bd243ab3dbb395b39ee730402d2e5405e448c75133ec49cc977762c4cba3d1"
dependencies = [
"approx",
"matrixmultiply",
"nalgebra-macros",
"num-complex",
"num-rational",
"num-traits",
"simba",
"typenum",
]
[[package]]
name = "nalgebra-macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "nanorand"
version = "0.7.0"
@ -2702,12 +2659,6 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rawpointer"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.6.0"
@ -2925,15 +2876,6 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "safe_arch"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "794821e4ccb0d9f979512f9c1973480123f9bd62a90d74ab0f9426fcf8f4a529"
dependencies = [
"bytemuck",
]
[[package]]
name = "safemem"
version = "0.3.3"
@ -3094,19 +3036,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
[[package]]
name = "simba"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f3fd720c48c53cace224ae62bef1bbff363a70c68c4802a78b5cc6159618176"
dependencies = [
"approx",
"num-complex",
"num-traits",
"paste",
"wide",
]
[[package]]
name = "siphasher"
version = "0.3.10"
@ -3959,16 +3888,6 @@ dependencies = [
"once_cell",
]
[[package]]
name = "wide"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae41ecad2489a1655c8ef8489444b0b113c0a0c795944a3572a0931cf7d2525c"
dependencies = [
"bytemuck",
"safe_arch",
]
[[package]]
name = "winapi"
version = "0.3.9"

@ -33,7 +33,7 @@ storage-sqlite = ["dep:sqlite", "dep:sqlite3-src"]
## You can also [fine-tune](https://github.com/cozodb/cozo/blob/main/TUNING_ROCKSDB.md) RocksDB options.
storage-rocksdb = ["dep:cozorocks"]
## Enables the graph algorithms.
graph-algo = ["dep:nalgebra"]
graph-algo = []
## Allows the utilities to make web requests to fetch data.
requests = ["dep:minreq"]
## Uses jemalloc as the global allocator, can make a difference in performance.
@ -124,7 +124,6 @@ uuid = { version = "1.1.2", features = ["v1", "v4", "serde"] }
csv = "1.1.6"
document-features = "0.2.6"
rayon = { version = "1.5.3", optional = true }
nalgebra = { version = "0.31.1", optional = true }
minreq = { version = "2.6.0", features = ["https-rustls"], optional = true }
tikv-jemallocator-global = { version = "0.5.0", optional = true }
cozorocks = { path = "../cozorocks", version = "0.1.2", optional = true }

@ -76,3 +76,18 @@ fn wikipedia_pagerank(b: &mut Bencher) {
.unwrap()
});
}
#[bench]
fn wikipedia_louvain(b: &mut Bencher) {
initialize(&TEST_DB);
b.iter(|| {
let start = Instant::now();
TEST_DB
.run_script(
"?[grp, idx] <~ CommunityDetectionLouvain(*article[])",
Default::default(),
)
.unwrap();
dbg!(start.elapsed());
})
}

@ -8,12 +8,12 @@
use std::cmp::Reverse;
use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools;
use miette::Result;
use ordered_float::OrderedFloat;
use priority_queue::PriorityQueue;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
use smartstring::{LazyCompact, SmartString};
@ -38,28 +38,25 @@ impl FixedRule for BetweennessCentrality {
let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?;
let (graph, indices, _inv_indices, _) =
edges.convert_edge_to_weighted_graph(undirected, false)?;
let (graph, indices, _inv_indices) =
edges.to_directed_weighted_graph(undirected, false)?;
let n = graph.len();
let n = graph.node_count();
if n == 0 {
return Ok(());
}
#[cfg(feature = "rayon")]
let it = (0..n).into_par_iter();
#[cfg(not(feature = "rayon"))]
let it = (0..n).into_iter();
let centrality_segs: Vec<_> = it
.map(|start| -> Result<BTreeMap<usize, f64>> {
.map(|start| -> Result<BTreeMap<u32, f32>> {
let res_for_start =
dijkstra_keep_ties(&graph, start, &(), &(), &(), poison.clone())?;
let mut ret: BTreeMap<usize, f64> = Default::default();
let mut ret: BTreeMap<u32, f32> = Default::default();
let grouped = res_for_start.into_iter().group_by(|(n, _, _)| *n);
for (_, grp) in grouped.into_iter() {
let grp = grp.collect_vec();
let l = grp.len() as f64;
let l = grp.len() as f32;
for (_, _, path) in grp {
if path.len() < 3 {
continue;
@ -73,16 +70,16 @@ impl FixedRule for BetweennessCentrality {
Ok(ret)
})
.collect::<Result<_>>()?;
let mut centrality: Vec<f64> = vec![0.; graph.len()];
let mut centrality: Vec<f32> = vec![0.; n as usize];
for m in centrality_segs {
for (k, v) in m {
centrality[k] += v;
centrality[k as usize] += v;
}
}
for (i, s) in centrality.into_iter().enumerate() {
let node = indices[i].clone();
out.put(vec![node, s.into()]);
out.put(vec![node, (s as f64).into()]);
}
Ok(())
@ -110,28 +107,25 @@ impl FixedRule for ClosenessCentrality {
let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?;
let (graph, indices, _inv_indices, _) =
edges.convert_edge_to_weighted_graph(undirected, false)?;
let (graph, indices, _inv_indices) =
edges.to_directed_weighted_graph(undirected, false)?;
let n = graph.len();
let n = graph.node_count();
if n == 0 {
return Ok(());
}
#[cfg(feature = "rayon")]
let it = (0..n).into_par_iter();
#[cfg(not(feature = "rayon"))]
let it = (0..n).into_iter();
let res: Vec<_> = it
.map(|start| -> Result<f64> {
.map(|start| -> Result<f32> {
let distances = dijkstra_cost_only(&graph, start, poison.clone())?;
let total_dist: f64 = distances.iter().filter(|d| d.is_finite()).cloned().sum();
let nc: f64 = distances.iter().filter(|d| d.is_finite()).count() as f64;
Ok(nc * nc / total_dist / (n - 1) as f64)
let total_dist: f32 = distances.iter().filter(|d| d.is_finite()).cloned().sum();
let nc: f32 = distances.iter().filter(|d| d.is_finite()).count() as f32;
Ok(nc * nc / total_dist / (n - 1) as f32)
})
.collect::<Result<_>>()?;
for (idx, centrality) in res.into_iter().enumerate() {
out.put(vec![indices[idx].clone(), DataValue::from(centrality)]);
out.put(vec![indices[idx].clone(), DataValue::from(centrality as f64)]);
poison.check()?;
}
Ok(())
@ -148,28 +142,32 @@ impl FixedRule for ClosenessCentrality {
}
pub(crate) fn dijkstra_cost_only(
edges: &[Vec<(usize, f64)>],
start: usize,
edges: &DirectedCsrGraph<u32, (), f32>,
start: u32,
poison: Poison,
) -> Result<Vec<f64>> {
let mut distance = vec![f64::INFINITY; edges.len()];
) -> Result<Vec<f32>> {
let mut distance = vec![f32::INFINITY; edges.node_count() as usize];
let mut pq = PriorityQueue::new();
let mut back_pointers = vec![usize::MAX; edges.len()];
distance[start] = 0.;
let mut back_pointers = vec![u32::MAX; edges.node_count() as usize];
distance[start as usize] = 0.;
pq.push(start, Reverse(OrderedFloat(0.)));
while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() {
if cost > distance[node] {
if cost > distance[node as usize] {
continue;
}
for (nxt_node, path_weight) in &edges[node] {
let nxt_cost = cost + *path_weight;
if nxt_cost < distance[*nxt_node] {
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[*nxt_node] = nxt_cost;
back_pointers[*nxt_node] = node;
for target in edges.out_neighbors_with_values(node) {
let nxt_node = target.target;
let path_weight = target.value;
let nxt_cost = cost + path_weight;
if nxt_cost < distance[nxt_node as usize] {
pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[nxt_node as usize] = nxt_cost;
back_pointers[nxt_node as usize] = node;
}
}
poison.check()?;
}

@ -8,6 +8,7 @@
use std::cmp::Reverse;
use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools;
use miette::Result;
@ -33,16 +34,16 @@ impl FixedRule for MinimumSpanningForestKruskal {
poison: Poison,
) -> Result<()> {
let edges = payload.get_input(0)?;
let (graph, indices, _, _) = edges.convert_edge_to_weighted_graph(true, true)?;
if graph.is_empty() {
let (graph, indices, _) = edges.to_directed_weighted_graph(true, true)?;
if graph.node_count() == 0 {
return Ok(());
}
let msp = kruskal(&graph, poison)?;
for (src, dst, cost) in msp {
out.put(vec![
indices[src].clone(),
indices[dst].clone(),
DataValue::from(cost),
indices[src as usize].clone(),
indices[dst as usize].clone(),
DataValue::from(cost as f64),
]);
}
@ -59,15 +60,16 @@ impl FixedRule for MinimumSpanningForestKruskal {
}
}
fn kruskal(edges: &[Vec<(usize, f64)>], poison: Poison) -> Result<Vec<(usize, usize, f64)>> {
fn kruskal(edges: &DirectedCsrGraph<u32, (), f32>, poison: Poison) -> Result<Vec<(u32, u32, f32)>> {
let mut pq = PriorityQueue::new();
let mut uf = UnionFind::new(edges.len());
let mut mst = Vec::with_capacity(edges.len() - 1);
for (from, tos) in edges.iter().enumerate() {
for (to, cost) in tos {
pq.push((from, *to), Reverse(OrderedFloat(*cost)));
let mut uf = UnionFind::new(edges.node_count());
let mut mst = Vec::with_capacity((edges.node_count() - 1) as usize);
for from in 0..edges.node_count() {
for target in edges.out_neighbors_with_values(from) {
let to = target.target;
let cost = target.value;
pq.push((from, to), Reverse(OrderedFloat(cost)));
}
poison.check()?;
}
while let Some(((from, to), Reverse(OrderedFloat(cost)))) = pq.pop() {
if uf.connected(from, to) {
@ -76,7 +78,7 @@ fn kruskal(edges: &[Vec<(usize, f64)>], poison: Poison) -> Result<Vec<(usize, us
uf.union(from, to);
mst.push((from, to, cost));
if uf.szs[0] == edges.len() {
if uf.szs[0] == edges.node_count() {
break;
}
poison.check()?;
@ -85,43 +87,43 @@ fn kruskal(edges: &[Vec<(usize, f64)>], poison: Poison) -> Result<Vec<(usize, us
}
struct UnionFind {
ids: Vec<usize>,
szs: Vec<usize>,
ids: Vec<u32>,
szs: Vec<u32>,
}
impl UnionFind {
fn new(n: usize) -> Self {
fn new(n: u32) -> Self {
Self {
ids: (0..n).collect_vec(),
szs: vec![1; n],
szs: vec![1; n as usize],
}
}
fn union(&mut self, p: usize, q: usize) {
fn union(&mut self, p: u32, q: u32) {
let root1 = self.find(p);
let root2 = self.find(q);
if root1 != root2 {
if self.szs[root1] < self.szs[root2] {
self.szs[root2] += self.szs[root1];
self.ids[root1] = root2;
if self.szs[root1 as usize] < self.szs[root2 as usize] {
self.szs[root2 as usize] += self.szs[root1 as usize];
self.ids[root1 as usize] = root2;
} else {
self.szs[root1] += self.szs[root2];
self.ids[root2] = root1;
self.szs[root1 as usize] += self.szs[root2 as usize];
self.ids[root2 as usize] = root1;
}
}
}
fn find(&mut self, mut p: usize) -> usize {
fn find(&mut self, mut p: u32) -> u32 {
let mut root = p;
while root != self.ids[root] {
root = self.ids[root];
while root != self.ids[root as usize] {
root = self.ids[root as usize];
}
while p != root {
let next = self.ids[p];
self.ids[p] = root;
let next = self.ids[p as usize];
self.ids[p as usize] = root;
p = next;
}
root
}
fn connected(&mut self, p: usize, q: usize) -> bool {
fn connected(&mut self, p: u32, q: u32) -> bool {
self.find(p) == self.find(q)
}
}

@ -8,15 +8,16 @@
use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools;
use miette::Result;
use rand::prelude::*;
use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr;
use crate::data::symb::Symbol;
use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan;
use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore;
@ -33,8 +34,7 @@ impl FixedRule for LabelPropagation {
let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?;
let max_iter = payload.pos_integer_option("max_iter", Some(10))?;
let (graph, indices, _inv_indices, _) =
edges.convert_edge_to_weighted_graph(undirected, true)?;
let (graph, indices, _inv_indices) = edges.to_directed_weighted_graph(undirected, true)?;
let labels = label_propagation(&graph, max_iter, poison)?;
for (idx, label) in labels.into_iter().enumerate() {
let node = indices[idx].clone();
@ -54,11 +54,11 @@ impl FixedRule for LabelPropagation {
}
fn label_propagation(
graph: &[Vec<(usize, f64)>],
graph: &DirectedCsrGraph<u32, (), f32>,
max_iter: usize,
poison: Poison,
) -> Result<Vec<usize>> {
let n_nodes = graph.len();
) -> Result<Vec<u32>> {
let n_nodes = graph.node_count();
let mut labels = (0..n_nodes).collect_vec();
let mut rng = thread_rng();
let mut iter_order = (0..n_nodes).collect_vec();
@ -66,14 +66,13 @@ fn label_propagation(
iter_order.shuffle(&mut rng);
let mut changed = false;
for node in &iter_order {
let mut labels_for_node: BTreeMap<usize, f64> = BTreeMap::new();
let neighbours = &graph[*node];
if neighbours.is_empty() {
continue;
let mut labels_for_node: BTreeMap<u32, f32> = BTreeMap::new();
for edge in graph.out_neighbors_with_values(*node) {
let label = labels[edge.target as usize];
*labels_for_node.entry(label).or_default() += edge.value;
}
for (to_node, weight) in neighbours {
let label = labels[*to_node];
*labels_for_node.entry(label).or_default() += *weight;
if labels_for_node.is_empty() {
continue;
}
let mut labels_by_score = labels_for_node.into_iter().collect_vec();
labels_by_score.sort_by(|a, b| a.1.total_cmp(&b.1).reverse());
@ -84,9 +83,9 @@ fn label_propagation(
.map(|(l, _)| l)
.collect_vec();
let new_label = *candidate_labels.choose(&mut rng).unwrap();
if new_label != labels[*node] {
if new_label != labels[*node as usize] {
changed = true;
labels[*node] = new_label;
labels[*node as usize] = new_label;
}
poison.check()?;
}

@ -8,15 +8,18 @@
use std::collections::{BTreeMap, BTreeSet};
use graph::prelude::{
CsrLayout, DirectedCsrGraph, DirectedNeighborsWithValues, Graph, GraphBuilder,
};
use itertools::Itertools;
use log::debug;
use miette::Result;
use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr;
use crate::data::symb::Symbol;
use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan;
use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore;
@ -33,27 +36,16 @@ impl FixedRule for CommunityDetectionLouvain {
let edges = payload.get_input(0)?;
let undirected = payload.bool_option("undirected", Some(false))?;
let max_iter = payload.pos_integer_option("max_iter", Some(10))?;
let delta = payload.unit_interval_option("delta", Some(0.0001))?;
let delta = payload.unit_interval_option("delta", Some(0.0001))? as f32;
let keep_depth = payload.non_neg_integer_option("keep_depth", None).ok();
let (graph, indices, _inv_indices, _) =
edges.convert_edge_to_weighted_graph(undirected, false)?;
let graph = graph
.into_iter()
.map(|edges| -> BTreeMap<usize, f64> {
let mut m = BTreeMap::default();
for (to, weight) in edges {
*m.entry(to).or_default() += weight;
}
m
})
.collect_vec();
let (graph, indices, _inv_indices) = edges.to_directed_weighted_graph(undirected, false)?;
let result = louvain(&graph, delta, max_iter, poison)?;
for (idx, node) in indices.into_iter().enumerate() {
let mut labels = vec![];
let mut cur_idx = idx;
let mut cur_idx = idx as u32;
for hierarchy in &result {
let nxt_idx = hierarchy[cur_idx];
let nxt_idx = hierarchy[cur_idx as usize];
labels.push(DataValue::from(nxt_idx as i64));
cur_idx = nxt_idx;
}
@ -78,21 +70,21 @@ impl FixedRule for CommunityDetectionLouvain {
}
fn louvain(
graph: &[BTreeMap<usize, f64>],
delta: f64,
graph: &DirectedCsrGraph<u32, (), f32>,
delta: f32,
max_iter: usize,
poison: Poison,
) -> Result<Vec<Vec<usize>>> {
) -> Result<Vec<Vec<u32>>> {
let mut current = graph;
let mut collected = vec![];
while current.len() > 2 {
while current.node_count() > 2 {
let (node2comm, new_graph) = louvain_step(current, delta, max_iter, poison.clone())?;
debug!(
"before size: {}, after size: {}",
current.len(),
new_graph.len()
current.node_count(),
new_graph.node_count()
);
if new_graph.len() == current.len() {
if new_graph.node_count() == current.node_count() {
break;
}
collected.push((node2comm, new_graph));
@ -102,68 +94,82 @@ fn louvain(
}
fn calculate_delta(
node: usize,
target_community: usize,
graph: &[BTreeMap<usize, f64>],
comm2nodes: &[BTreeSet<usize>],
out_weights: &[f64],
in_weights: &[f64],
total_weight: f64,
) -> f64 {
node: u32,
target_community: u32,
graph: &DirectedCsrGraph<u32, (), f32>,
comm2nodes: &[BTreeSet<u32>],
out_weights: &[f32],
in_weights: &[f32],
total_weight: f32,
) -> f32 {
let mut sigma_out_total = 0.;
let mut sigma_in_total = 0.;
let mut d2comm = 0.;
let target_community_members = &comm2nodes[target_community];
let target_community_members = &comm2nodes[target_community as usize];
for member in target_community_members.iter() {
if *member == node {
continue;
}
sigma_out_total += out_weights[*member];
sigma_in_total += in_weights[*member];
if let Some(weight) = graph[node].get(member) {
d2comm += *weight;
sigma_out_total += out_weights[*member as usize];
sigma_in_total += in_weights[*member as usize];
for target in graph.out_neighbors_with_values(node) {
if target.target == *member {
d2comm += target.value;
break;
}
}
if let Some(weight) = graph[*member].get(&node) {
d2comm += *weight;
for target in graph.out_neighbors_with_values(*member) {
if target.target == node {
d2comm += target.value;
break;
}
}
}
d2comm
- (sigma_out_total * in_weights[node] + sigma_in_total * out_weights[node]) / total_weight
- (sigma_out_total * in_weights[node as usize]
+ sigma_in_total * out_weights[node as usize])
/ total_weight
}
fn louvain_step(
graph: &[BTreeMap<usize, f64>],
delta: f64,
graph: &DirectedCsrGraph<u32, (), f32>,
delta: f32,
max_iter: usize,
poison: Poison,
) -> Result<(Vec<usize>, Vec<BTreeMap<usize, f64>>)> {
let n_nodes = graph.len();
) -> Result<(Vec<u32>, DirectedCsrGraph<u32, (), f32>)> {
let n_nodes = graph.node_count();
let mut total_weight = 0.;
let mut out_weights = vec![0.; n_nodes];
let mut in_weights = vec![0.; n_nodes];
let mut out_weights = vec![0.; n_nodes as usize];
let mut in_weights = vec![0.; n_nodes as usize];
for (from, edges) in graph.iter().enumerate() {
for (to, weight) in edges {
total_weight += *weight;
out_weights[from] += *weight;
in_weights[*to] += *weight;
for from in 0..n_nodes {
for target in graph.out_neighbors_with_values(from) {
let to = target.target;
let weight = target.value;
total_weight += weight;
out_weights[from as usize] += weight;
in_weights[to as usize] += weight;
}
}
let mut node2comm = (0..n_nodes).collect_vec();
let mut comm2nodes = (0..n_nodes).map(|i| BTreeSet::from([i])).collect_vec();
let mut last_modurality = f64::NEG_INFINITY;
let mut last_modurality = f32::NEG_INFINITY;
for _ in 0..max_iter {
let modularity = {
let mut modularity = 0.;
for from in 0..n_nodes {
for to in &comm2nodes[node2comm[from]] {
if let Some(weight) = graph[from].get(to) {
modularity += *weight;
for to in &comm2nodes[node2comm[from as usize] as usize] {
for target in graph.out_neighbors_with_values(from) {
if target.target == *to {
modularity += target.value;
}
}
modularity -= in_weights[from] * out_weights[*to] / total_weight;
modularity -=
in_weights[from as usize] * out_weights[*to as usize] / total_weight;
}
}
modularity /= total_weight;
@ -177,8 +183,9 @@ fn louvain_step(
}
let mut moved = false;
for (node, edges) in graph.iter().enumerate() {
let community_for_node = node2comm[node];
for node in 0..n_nodes {
let community_for_node = node2comm[node as usize];
let original_delta_q = calculate_delta(
node,
community_for_node,
@ -192,8 +199,10 @@ fn louvain_step(
let mut best_improvement = 0.;
let mut considered_communities = BTreeSet::from([community_for_node]);
for to_node in edges.keys() {
let target_community = node2comm[*to_node];
for target in graph.out_neighbors_with_values(node) {
let to_node = target.target;
let target_community = node2comm[to_node as usize];
if target_community == community_for_node
|| considered_communities.contains(&target_community)
{
@ -217,9 +226,9 @@ fn louvain_step(
}
if best_improvement > 0. {
moved = true;
node2comm[node] = candidate_community;
comm2nodes[community_for_node].remove(&node);
comm2nodes[candidate_community].insert(node);
node2comm[node as usize] = candidate_community;
comm2nodes[community_for_node as usize].remove(&node);
comm2nodes[candidate_community as usize].insert(node);
}
poison.check()?;
}
@ -227,8 +236,8 @@ fn louvain_step(
break;
}
}
let mut new_comm_indices: BTreeMap<usize, usize> = Default::default();
let mut new_comm_count: usize = 0;
let mut new_comm_indices: BTreeMap<u32, u32> = Default::default();
let mut new_comm_count: u32 = 0;
for temp_comm_idx in node2comm.iter_mut() {
if let Some(new_comm_idx) = new_comm_indices.get(temp_comm_idx) {
@ -240,27 +249,44 @@ fn louvain_step(
}
}
let mut new_graph = vec![BTreeMap::new(); new_comm_count];
let mut new_graph_list: Vec<BTreeMap<u32, f32>> =
vec![BTreeMap::new(); new_comm_count as usize];
for (node, comm) in node2comm.iter().enumerate() {
let target = &mut new_graph[*comm];
for (to_node, weight) in &graph[node] {
let to_comm = node2comm[*to_node];
let target = &mut new_graph_list[*comm as usize];
for t in graph.out_neighbors_with_values(node as u32) {
let to_node = t.target;
let weight = t.value;
let to_comm = node2comm[to_node as usize];
*target.entry(to_comm).or_default() += weight;
}
}
let new_graph: DirectedCsrGraph<u32, (), f32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges_with_values(
new_graph_list
.into_iter()
.enumerate()
.flat_map(move |(fr, nds)| {
nds.into_iter()
.map(move |(to, weight)| (fr as u32, to, weight))
}),
)
.build();
Ok((node2comm, new_graph))
}
#[cfg(test)]
mod tests {
use itertools::Itertools;
use graph::prelude::{CsrLayout, GraphBuilder};
use crate::fixed_rule::algos::louvain::louvain;
use crate::runtime::db::Poison;
#[test]
fn sample() {
let graph: Vec<Vec<usize>> = vec![
let graph: Vec<Vec<u32>> = vec![
vec![2, 3, 5], // 0
vec![2, 4, 7], // 1
vec![0, 1, 4, 5, 6], // 2
@ -278,10 +304,15 @@ mod tests {
vec![8, 9, 10], // 14
vec![8], // 15
];
let graph = graph
.into_iter()
.map(|edges| edges.into_iter().map(|n| (n, 1.)).collect())
.collect_vec();
let graph = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges_with_values(
graph
.into_iter()
.enumerate()
.flat_map(|(fr, tos)| tos.into_iter().map(move |to| (fr as u32, to, 1.))),
)
.build();
louvain(&graph, 0., 100, Poison::default()).unwrap();
}
}

@ -10,17 +10,14 @@ use std::collections::BTreeMap;
#[cfg(not(feature = "rayon"))]
use approx::AbsDiffEq;
#[cfg(feature = "rayon")]
use graph::prelude::{page_rank, CsrLayout, DirectedCsrGraph, GraphBuilder, PageRankConfig};
use graph::prelude::{page_rank, PageRankConfig};
use miette::Result;
#[cfg(not(feature = "rayon"))]
use nalgebra::{Dynamic, OMatrix, U1};
use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr;
use crate::data::symb::Symbol;
use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan;
use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore;
@ -41,38 +38,15 @@ impl FixedRule for PageRank {
let epsilon = payload.unit_interval_option("epsilon", Some(0.0001))? as f32;
let iterations = payload.pos_integer_option("iterations", Some(10))?;
let (graph, indices, _) = edges.convert_edge_to_graph(undirected)?;
#[cfg(feature = "rayon")]
{
let graph: DirectedCsrGraph<u32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges(
graph
.iter()
.enumerate()
.flat_map(|(fr, ls)| ls.iter().map(move |to| (fr as u32, *to as u32))),
)
.build();
let (graph, indices, _) = edges.to_directed_graph(undirected)?;
let (ranks, _n_run, _) = page_rank(
&graph,
PageRankConfig::new(iterations, epsilon as f64, theta),
);
let (ranks, _n_run, _) = page_rank(
&graph,
PageRankConfig::new(iterations, epsilon as f64, theta),
);
for (idx, score) in ranks.iter().enumerate() {
out.put(vec![indices[idx].clone(), DataValue::from(*score as f64)]);
}
}
#[cfg(not(feature = "rayon"))]
{
let res = pagerank(&graph, theta, epsilon, iterations, _poison)?;
for (idx, score) in res.iter().enumerate() {
out.put(
Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]),
0,
);
}
for (idx, score) in ranks.iter().enumerate() {
out.put(vec![indices[idx].clone(), DataValue::from(*score as f64)]);
}
Ok(())
}

@ -8,6 +8,7 @@
use std::cmp::Reverse;
use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use miette::Diagnostic;
use miette::Result;
@ -34,8 +35,8 @@ impl FixedRule for MinimumSpanningTreePrim {
poison: Poison,
) -> Result<()> {
let edges = payload.get_input(0)?;
let (graph, indices, inv_indices, _) = edges.convert_edge_to_weighted_graph(true, true)?;
if graph.is_empty() {
let (graph, indices, inv_indices) = edges.to_directed_weighted_graph(true, true)?;
if graph.node_count() == 0 {
return Ok(());
}
let starting = match payload.get_input(1) {
@ -63,9 +64,9 @@ impl FixedRule for MinimumSpanningTreePrim {
let msp = prim(&graph, starting, poison)?;
for (src, dst, cost) in msp {
out.put(vec![
indices[src].clone(),
indices[dst].clone(),
DataValue::from(cost),
indices[src as usize].clone(),
indices[dst as usize].clone(),
DataValue::from(cost as f64),
]);
}
Ok(())
@ -82,29 +83,30 @@ impl FixedRule for MinimumSpanningTreePrim {
}
fn prim(
graph: &[Vec<(usize, f64)>],
starting: usize,
graph: &DirectedCsrGraph<u32, (), f32>,
starting: u32,
poison: Poison,
) -> Result<Vec<(usize, usize, f64)>> {
let mut visited = vec![false; graph.len()];
let mut mst_edges = Vec::with_capacity(graph.len() - 1);
) -> Result<Vec<(u32, u32, f32)>> {
let mut visited = vec![false; graph.node_count() as usize];
let mut mst_edges = Vec::with_capacity((graph.node_count() - 1) as usize);
let mut pq = PriorityQueue::new();
let mut relax_edges_at_node = |node: usize, pq: &mut PriorityQueue<_, _>| {
visited[node] = true;
let edges = &graph[node];
for (to_node, cost) in edges {
if visited[*to_node] {
let mut relax_edges_at_node = |node: u32, pq: &mut PriorityQueue<_, _>| {
visited[node as usize] = true;
for target in graph.out_neighbors_with_values(node) {
let to_node = target.target;
let cost = target.value;
if visited[to_node as usize] {
continue;
}
pq.push_increase(*to_node, (Reverse(OrderedFloat(*cost)), node));
pq.push_increase(to_node, (Reverse(OrderedFloat(cost)), node));
}
};
relax_edges_at_node(starting, &mut pq);
while let Some((to_node, (Reverse(OrderedFloat(cost)), from_node))) = pq.pop() {
if mst_edges.len() == graph.len() - 1 {
if mst_edges.len() == (graph.node_count() - 1) as usize {
break;
}
mst_edges.push((from_node, to_node, cost));

@ -9,12 +9,12 @@
use std::cmp::{Ordering, Reverse};
use std::collections::{BTreeMap, BTreeSet};
use std::iter;
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues, Graph};
use itertools::Itertools;
use miette::Result;
use ordered_float::OrderedFloat;
use priority_queue::PriorityQueue;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
use smallvec::{smallvec, SmallVec};
use smartstring::{LazyCompact, SmartString};
@ -42,8 +42,8 @@ impl FixedRule for ShortestPathDijkstra {
let undirected = payload.bool_option("undirected", Some(false))?;
let keep_ties = payload.bool_option("keep_ties", Some(false))?;
let (graph, indices, inv_indices, _) =
edges.convert_edge_to_weighted_graph(undirected, false)?;
let (graph, indices, inv_indices) =
edges.to_directed_weighted_graph(undirected, false)?;
let mut starting_nodes = BTreeSet::new();
for tuple in starting.iter()? {
@ -88,22 +88,19 @@ impl FixedRule for ShortestPathDijkstra {
};
for (target, cost, path) in res {
let t = vec![
indices[start].clone(),
indices[target].clone(),
DataValue::from(cost),
DataValue::List(path.into_iter().map(|u| indices[u].clone()).collect_vec()),
indices[start as usize].clone(),
indices[target as usize].clone(),
DataValue::from(cost as f64),
DataValue::List(path.into_iter().map(|u| indices[u as usize].clone()).collect_vec()),
];
out.put(t)
}
}
} else {
#[cfg(feature = "rayon")]
let it = starting_nodes.into_par_iter();
#[cfg(not(feature = "rayon"))]
let it = starting_nodes.into_iter();
let all_res: Vec<_> = it
.map(|start| -> Result<(usize, Vec<(usize, f64, Vec<usize>)>)> {
.map(|start| -> Result<(u32, Vec<(u32, f32, Vec<u32>)>)> {
Ok((
start,
if let Some(tn) = &termination_nodes {
@ -135,10 +132,10 @@ impl FixedRule for ShortestPathDijkstra {
for (start, res) in all_res {
for (target, cost, path) in res {
let t = vec![
indices[start].clone(),
indices[target].clone(),
DataValue::from(cost),
DataValue::List(path.into_iter().map(|u| indices[u].clone()).collect_vec()),
indices[start as usize].clone(),
indices[target as usize].clone(),
DataValue::from(cost as f64),
DataValue::List(path.into_iter().map(|u| indices[u as usize].clone()).collect_vec()),
];
out.put(t)
}
@ -182,41 +179,41 @@ impl Ord for HeapState {
impl Eq for HeapState {}
pub(crate) trait ForbiddenEdge {
fn is_forbidden(&self, src: usize, dst: usize) -> bool;
fn is_forbidden(&self, src: u32, dst: u32) -> bool;
}
impl ForbiddenEdge for () {
fn is_forbidden(&self, _src: usize, _dst: usize) -> bool {
fn is_forbidden(&self, _src: u32, _dst: u32) -> bool {
false
}
}
impl ForbiddenEdge for BTreeSet<(usize, usize)> {
fn is_forbidden(&self, src: usize, dst: usize) -> bool {
impl ForbiddenEdge for BTreeSet<(u32, u32)> {
fn is_forbidden(&self, src: u32, dst: u32) -> bool {
self.contains(&(src, dst))
}
}
pub(crate) trait ForbiddenNode {
fn is_forbidden(&self, node: usize) -> bool;
fn is_forbidden(&self, node: u32) -> bool;
}
impl ForbiddenNode for () {
fn is_forbidden(&self, _node: usize) -> bool {
fn is_forbidden(&self, _node: u32) -> bool {
false
}
}
impl ForbiddenNode for BTreeSet<usize> {
fn is_forbidden(&self, node: usize) -> bool {
impl ForbiddenNode for BTreeSet<u32> {
fn is_forbidden(&self, node: u32) -> bool {
self.contains(&node)
}
}
pub(crate) trait Goal {
fn is_exhausted(&self) -> bool;
fn visit(&mut self, node: usize);
fn iter(&self, total: usize) -> Box<dyn Iterator<Item = usize> + '_>;
fn visit(&mut self, node: u32);
fn iter(&self, total: u32) -> Box<dyn Iterator<Item = u32> + '_>;
}
impl Goal for () {
@ -224,19 +221,19 @@ impl Goal for () {
false
}
fn visit(&mut self, _node: usize) {}
fn visit(&mut self, _node: u32) {}
fn iter(&self, total: usize) -> Box<dyn Iterator<Item = usize> + '_> {
fn iter(&self, total: u32) -> Box<dyn Iterator<Item = u32> + '_> {
Box::new(0..total)
}
}
impl Goal for Option<usize> {
impl Goal for Option<u32> {
fn is_exhausted(&self) -> bool {
self.is_none()
}
fn visit(&mut self, node: usize) {
fn visit(&mut self, node: u32) {
if let Some(u) = &self {
if *u == node {
self.take();
@ -244,7 +241,7 @@ impl Goal for Option<usize> {
}
}
fn iter(&self, _total: usize) -> Box<dyn Iterator<Item = usize> + '_> {
fn iter(&self, _total: u32) -> Box<dyn Iterator<Item = u32> + '_> {
if let Some(u) = self {
Box::new(iter::once(*u))
} else {
@ -253,51 +250,55 @@ impl Goal for Option<usize> {
}
}
impl Goal for BTreeSet<usize> {
impl Goal for BTreeSet<u32> {
fn is_exhausted(&self) -> bool {
self.is_empty()
}
fn visit(&mut self, node: usize) {
fn visit(&mut self, node: u32) {
self.remove(&node);
}
fn iter(&self, _total: usize) -> Box<dyn Iterator<Item = usize> + '_> {
fn iter(&self, _total: u32) -> Box<dyn Iterator<Item = u32> + '_> {
Box::new(self.iter().cloned())
}
}
pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
edges: &[Vec<(usize, f64)>],
start: usize,
edges: &DirectedCsrGraph<u32, (), f32>,
start: u32,
goals: &G,
forbidden_edges: &FE,
forbidden_nodes: &FN,
) -> Vec<(usize, f64, Vec<usize>)> {
let mut distance = vec![f64::INFINITY; edges.len()];
) -> Vec<(u32, f32, Vec<u32>)> {
let graph_size = edges.node_count();
let mut distance = vec![f32::INFINITY; graph_size as usize];
let mut pq = PriorityQueue::new();
let mut back_pointers = vec![usize::MAX; edges.len()];
distance[start] = 0.;
let mut back_pointers = vec![u32::MAX; graph_size as usize];
distance[start as usize] = 0.;
pq.push(start, Reverse(OrderedFloat(0.)));
let mut goals_remaining = goals.clone();
while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() {
if cost > distance[node] {
if cost > distance[node as usize] {
continue;
}
for (nxt_node, path_weight) in &edges[node] {
if forbidden_nodes.is_forbidden(*nxt_node) {
for target in edges.out_neighbors_with_values(node) {
let nxt_node = target.target;
let path_weight = target.value;
if forbidden_nodes.is_forbidden(nxt_node) {
continue;
}
if forbidden_edges.is_forbidden(node, *nxt_node) {
if forbidden_edges.is_forbidden(node, nxt_node) {
continue;
}
let nxt_cost = cost + *path_weight;
if nxt_cost < distance[*nxt_node] {
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[*nxt_node] = nxt_cost;
back_pointers[*nxt_node] = node;
let nxt_cost = cost + path_weight;
if nxt_cost < distance[nxt_node as usize] {
pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[nxt_node as usize] = nxt_cost;
back_pointers[nxt_node as usize] = node;
}
}
@ -308,9 +309,9 @@ pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
}
let ret = goals
.iter(edges.len())
.iter(edges.node_count())
.map(|target| {
let cost = distance[target];
let cost = distance[target as usize];
if !cost.is_finite() {
(target, cost, vec![])
} else {
@ -318,7 +319,7 @@ pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
let mut current = target;
while current != start {
path.push(current);
current = back_pointers[current];
current = back_pointers[current as usize];
}
path.push(start);
path.reverse();
@ -331,41 +332,44 @@ pub(crate) fn dijkstra<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
}
pub(crate) fn dijkstra_keep_ties<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal + Clone>(
edges: &[Vec<(usize, f64)>],
start: usize,
edges: &DirectedCsrGraph<u32, (), f32>,
start: u32,
goals: &G,
forbidden_edges: &FE,
forbidden_nodes: &FN,
poison: Poison,
) -> Result<Vec<(usize, f64, Vec<usize>)>> {
let mut distance = vec![f64::INFINITY; edges.len()];
) -> Result<Vec<(u32, f32, Vec<u32>)>> {
let mut distance = vec![f32::INFINITY; edges.node_count() as usize];
let mut pq = PriorityQueue::new();
let mut back_pointers: Vec<SmallVec<[usize; 1]>> = vec![smallvec![]; edges.len()];
distance[start] = 0.;
let mut back_pointers: Vec<SmallVec<[u32; 1]>> = vec![smallvec![]; edges.node_count() as usize];
distance[start as usize] = 0.;
pq.push(start, Reverse(OrderedFloat(0.)));
let mut goals_remaining = goals.clone();
while let Some((node, Reverse(OrderedFloat(cost)))) = pq.pop() {
if cost > distance[node] {
if cost > distance[node as usize] {
continue;
}
for (nxt_node, path_weight) in &edges[node] {
if forbidden_nodes.is_forbidden(*nxt_node) {
for target in edges.out_neighbors_with_values(node) {
let nxt_node = target.target;
let path_weight = target.value;
if forbidden_nodes.is_forbidden(nxt_node) {
continue;
}
if forbidden_edges.is_forbidden(node, *nxt_node) {
if forbidden_edges.is_forbidden(node, nxt_node) {
continue;
}
let nxt_cost = cost + *path_weight;
if nxt_cost < distance[*nxt_node] {
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[*nxt_node] = nxt_cost;
back_pointers[*nxt_node].clear();
back_pointers[*nxt_node].push(node);
} else if nxt_cost == distance[*nxt_node] {
pq.push_increase(*nxt_node, Reverse(OrderedFloat(nxt_cost)));
back_pointers[*nxt_node].push(node);
let nxt_cost = cost + path_weight;
if nxt_cost < distance[nxt_node as usize] {
pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
distance[nxt_node as usize] = nxt_cost;
back_pointers[nxt_node as usize].clear();
back_pointers[nxt_node as usize].push(node);
} else if nxt_cost == distance[nxt_node as usize] {
pq.push_increase(nxt_node, Reverse(OrderedFloat(nxt_cost)));
back_pointers[nxt_node as usize].push(node);
}
poison.check()?;
}
@ -377,27 +381,27 @@ pub(crate) fn dijkstra_keep_ties<FE: ForbiddenEdge, FN: ForbiddenNode, G: Goal +
}
let ret = goals
.iter(edges.len())
.iter(edges.node_count())
.flat_map(|target| {
let cost = distance[target];
let cost = distance[target as usize];
if !cost.is_finite() {
vec![(target, cost, vec![])]
} else {
struct CollectPath {
collected: Vec<(usize, f64, Vec<usize>)>,
collected: Vec<(u32, f32, Vec<u32>)>,
}
impl CollectPath {
fn collect(
&mut self,
chain: &[usize],
start: usize,
target: usize,
cost: f64,
back_pointers: &[SmallVec<[usize; 1]>],
chain: &[u32],
start: u32,
target: u32,
cost: f32,
back_pointers: &[SmallVec<[u32; 1]>],
) {
let last = chain.last().unwrap();
let prevs = &back_pointers[*last];
let prevs = &back_pointers[*last as usize];
for nxt in prevs {
let mut ret = chain.to_vec();
ret.push(*nxt);

@ -9,6 +9,7 @@
use std::cmp::min;
use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighbors, Graph};
use itertools::Itertools;
use miette::Result;
@ -47,12 +48,12 @@ impl FixedRule for StronglyConnectedComponent {
let edges = payload.get_input(0)?;
let (graph, indices, mut inv_indices) =
edges.convert_edge_to_graph(!self.strong)?;
edges.to_directed_graph(!self.strong)?;
let tarjan = TarjanScc::new(&graph).run(poison)?;
let tarjan = TarjanSccG::new(graph).run(poison)?;
for (grp_id, cc) in tarjan.iter().enumerate() {
for idx in cc {
let val = indices.get(*idx).unwrap();
let val = indices.get(*idx as usize).unwrap();
let tuple = vec![val.clone(), DataValue::from(grp_id as i64)];
out.put(tuple);
}
@ -65,7 +66,7 @@ impl FixedRule for StronglyConnectedComponent {
let tuple = tuple?;
let node = tuple.into_iter().next().unwrap();
if !inv_indices.contains_key(&node) {
inv_indices.insert(node.clone(), usize::MAX);
inv_indices.insert(node.clone(), u32::MAX);
let tuple = vec![node, DataValue::from(counter)];
out.put(tuple);
counter += 1;
@ -86,60 +87,62 @@ impl FixedRule for StronglyConnectedComponent {
}
}
pub(crate) struct TarjanScc<'a> {
graph: &'a [Vec<usize>],
id: usize,
ids: Vec<Option<usize>>,
low: Vec<usize>,
pub(crate) struct TarjanSccG {
graph: DirectedCsrGraph<u32>,
id: u32,
ids: Vec<Option<u32>>,
low: Vec<u32>,
on_stack: Vec<bool>,
stack: Vec<usize>,
stack: Vec<u32>,
}
impl<'a> TarjanScc<'a> {
pub(crate) fn new(graph: &'a [Vec<usize>]) -> Self {
impl TarjanSccG {
pub(crate) fn new(graph: DirectedCsrGraph<u32>) -> Self {
let graph_size = graph.node_count();
Self {
graph,
id: 0,
ids: vec![None; graph.len()],
low: vec![0; graph.len()],
on_stack: vec![false; graph.len()],
ids: vec![None; graph_size as usize],
low: vec![0; graph_size as usize],
on_stack: vec![false; graph_size as usize],
stack: vec![],
}
}
pub(crate) fn run(mut self, poison: Poison) -> Result<Vec<Vec<usize>>> {
for i in 0..self.graph.len() {
if self.ids[i].is_none() {
pub(crate) fn run(mut self, poison: Poison) -> Result<Vec<Vec<u32>>> {
for i in 0..self.graph.node_count() {
if self.ids[i as usize].is_none() {
self.dfs(i);
poison.check()?;
}
}
let mut low_map: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
let mut low_map: BTreeMap<u32, Vec<u32>> = BTreeMap::new();
for (idx, grp) in self.low.into_iter().enumerate() {
low_map.entry(grp).or_default().push(idx);
low_map.entry(grp).or_default().push(idx as u32);
}
Ok(low_map.into_iter().map(|(_, vs)| vs).collect_vec())
}
fn dfs(&mut self, at: usize) {
fn dfs(&mut self, at: u32) {
self.stack.push(at);
self.on_stack[at] = true;
self.on_stack[at as usize] = true;
self.id += 1;
self.ids[at] = Some(self.id);
self.low[at] = self.id;
for to in &self.graph[at] {
let to = *to;
if self.ids[to].is_none() {
self.ids[at as usize] = Some(self.id);
self.low[at as usize] = self.id;
for to in self.graph.out_neighbors(at).cloned().collect_vec() {
if self.ids[to as usize].is_none() {
self.dfs(to);
}
if self.on_stack[to] {
self.low[at] = min(self.low[at], self.low[to]);
if self.on_stack[to as usize] {
self.low[at as usize] = min(self.low[at as usize], self.low[to as usize]);
}
}
if self.ids[at].unwrap() == self.low[at] {
if self.ids[at as usize].unwrap() == self.low[at as usize] {
while let Some(node) = self.stack.pop() {
self.on_stack[node] = false;
self.low[node] = self.ids[at].unwrap();
self.on_stack[node as usize] = false;
self.low[node as usize] = self.ids[at as usize].unwrap();
if node == at {
break;
}
@ -147,3 +150,4 @@ impl<'a> TarjanScc<'a> {
}
}
}

@ -7,6 +7,7 @@
*/
use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighbors, Graph};
use miette::Result;
use smartstring::{LazyCompact, SmartString};
@ -30,12 +31,12 @@ impl FixedRule for TopSort {
) -> Result<()> {
let edges = payload.get_input(0)?;
let (graph, indices, _) = edges.convert_edge_to_graph(false)?;
let (graph, indices, _) = edges.to_directed_graph(false)?;
let sorted = kahn(&graph, poison)?;
let sorted = kahn_g(&graph, poison)?;
for (idx, val_id) in sorted.iter().enumerate() {
let val = indices.get(*val_id).unwrap();
let val = indices.get(*val_id as usize).unwrap();
let tuple = vec![DataValue::from(idx as i64), val.clone()];
out.put(tuple);
}
@ -53,31 +54,31 @@ impl FixedRule for TopSort {
}
}
pub(crate) fn kahn(graph: &[Vec<usize>], poison: Poison) -> Result<Vec<usize>> {
let mut in_degree = vec![0; graph.len()];
for tos in graph {
for to in tos {
in_degree[*to] += 1;
pub(crate) fn kahn_g(graph: &DirectedCsrGraph<u32>, poison: Poison) -> Result<Vec<u32>> {
let graph_size = graph.node_count();
let mut in_degree = vec![0; graph_size as usize];
for tos in 0..graph_size {
for to in graph.out_neighbors(tos) {
in_degree[*to as usize] += 1;
}
}
let mut sorted = Vec::with_capacity(graph.len());
let mut sorted = Vec::with_capacity(graph_size as usize);
let mut pending = vec![];
for (node, degree) in in_degree.iter().enumerate() {
if *degree == 0 {
pending.push(node);
pending.push(node as u32);
}
}
while !pending.is_empty() {
let removed = pending.pop().unwrap();
sorted.push(removed);
if let Some(edges) = graph.get(removed) {
for nxt in edges {
in_degree[*nxt] -= 1;
if in_degree[*nxt] == 0 {
pending.push(*nxt);
}
for nxt in graph.out_neighbors(removed as u32) {
in_degree[*nxt as usize] -= 1;
if in_degree[*nxt as usize] == 0 {
pending.push(*nxt);
}
}
poison.check()?;

@ -6,17 +6,19 @@
* You can obtain one at https://mozilla.org/MPL/2.0/.
*/
use std::collections::{BTreeMap, BTreeSet};
use std::collections::BTreeMap;
use graph::prelude::{DirectedCsrGraph, DirectedNeighbors, Graph};
use itertools::Itertools;
use miette::Result;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr;
use crate::data::symb::Symbol;
use crate::data::value::DataValue;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan;
use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore;
@ -31,9 +33,7 @@ impl FixedRule for ClusteringCoefficients {
poison: Poison,
) -> Result<()> {
let edges = payload.get_input(0)?;
let (graph, indices, _) = edges.convert_edge_to_graph(true)?;
let graph: Vec<BTreeSet<usize>> =
graph.into_iter().map(|e| e.into_iter().collect()).collect();
let (graph, indices, _) = edges.to_directed_graph(true)?;
let coefficients = clustering_coefficients(&graph, poison)?;
for (idx, (cc, n_triangles, degree)) in coefficients.into_iter().enumerate() {
out.put(vec![
@ -58,32 +58,42 @@ impl FixedRule for ClusteringCoefficients {
}
fn clustering_coefficients(
graph: &[BTreeSet<usize>],
graph: &DirectedCsrGraph<u32>,
poison: Poison,
) -> Result<Vec<(f64, usize, usize)>> {
#[cfg(feature = "rayon")]
let it = graph.par_iter();
#[cfg(not(feature = "rayon"))]
let it = graph.iter();
let node_size = graph.node_count();
it.map(|edges| -> Result<(f64, usize, usize)> {
let degree = edges.len();
if degree < 2 {
Ok((0., 0, degree))
} else {
let n_triangles = edges
.iter()
.map(|e_src| {
edges
.iter()
.filter(|e_dst| e_src > e_dst && graph[*e_src].contains(*e_dst))
.count()
})
.sum();
let cc = 2. * n_triangles as f64 / ((degree as f64) * ((degree as f64) - 1.));
poison.check()?;
Ok((cc, n_triangles, degree))
}
})
.collect::<Result<_>>()
(0..node_size)
.into_par_iter()
.map(|node_idx| -> Result<(f64, usize, usize)> {
let edges = graph.out_neighbors(node_idx).collect_vec();
let degree = edges.len();
if degree < 2 {
Ok((0., 0, degree))
} else {
let n_triangles = edges
.iter()
.map(|e_src| {
edges
.iter()
.filter(|e_dst| {
if e_src <= e_dst {
return false;
}
for nb in graph.out_neighbors(**e_src) {
if nb == **e_dst {
return true;
}
}
return false;
})
.count()
})
.sum();
let cc = 2. * n_triangles as f64 / ((degree as f64) * ((degree as f64) - 1.));
poison.check()?;
Ok((cc, n_triangles, degree))
}
})
.collect::<Result<_>>()
}

@ -8,17 +8,18 @@
use std::collections::{BTreeMap, BTreeSet};
use graph::prelude::{DirectedCsrGraph, DirectedNeighborsWithValues};
use itertools::Itertools;
use miette::Result;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
use smartstring::{LazyCompact, SmartString};
use crate::fixed_rule::algos::shortest_path_dijkstra::dijkstra;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::data::expr::Expr;
use crate::data::symb::Symbol;
use crate::data::value::DataValue;
use crate::fixed_rule::algos::shortest_path_dijkstra::dijkstra;
use crate::fixed_rule::{FixedRule, FixedRulePayload};
use crate::parse::SourceSpan;
use crate::runtime::db::Poison;
use crate::runtime::temp_store::RegularTempStore;
@ -38,8 +39,7 @@ impl FixedRule for KShortestPathYen {
let undirected = payload.bool_option("undirected", Some(false))?;
let k = payload.pos_integer_option("k", None)?;
let (graph, indices, inv_indices, _) =
edges.convert_edge_to_weighted_graph(undirected, false)?;
let (graph, indices, inv_indices) = edges.to_directed_weighted_graph(undirected, false)?;
let mut starting_nodes = BTreeSet::new();
for tuple in starting.iter()? {
@ -64,11 +64,13 @@ impl FixedRule for KShortestPathYen {
k_shortest_path_yen(k as usize, &graph, start, *goal, poison.clone())?
{
let t = vec![
indices[start].clone(),
indices[*goal].clone(),
DataValue::from(cost),
indices[start as usize].clone(),
indices[*goal as usize].clone(),
DataValue::from(cost as f64),
DataValue::List(
path.into_iter().map(|u| indices[u].clone()).collect_vec(),
path.into_iter()
.map(|u| indices[u as usize].clone())
.collect_vec(),
),
];
out.put(t)
@ -84,7 +86,7 @@ impl FixedRule for KShortestPathYen {
let res_all: Vec<_> = first_it
.map(
|(start, goal)| -> Result<(usize, usize, Vec<(f64, Vec<usize>)>)> {
|(start, goal)| -> Result<(u32, u32, Vec<(f32, Vec<u32>)>)> {
Ok((
start,
goal,
@ -97,10 +99,10 @@ impl FixedRule for KShortestPathYen {
for (start, goal, res) in res_all {
for (cost, path) in res {
let t = vec![
indices[start].clone(),
indices[goal].clone(),
DataValue::from(cost),
DataValue::List(path.into_iter().map(|u| indices[u].clone()).collect_vec()),
indices[start as usize].clone(),
indices[goal as usize].clone(),
DataValue::from(cost as f64),
DataValue::List(path.into_iter().map(|u| indices[u as usize].clone()).collect_vec()),
];
out.put(t)
}
@ -121,13 +123,13 @@ impl FixedRule for KShortestPathYen {
fn k_shortest_path_yen(
k: usize,
edges: &[Vec<(usize, f64)>],
start: usize,
goal: usize,
edges: &DirectedCsrGraph<u32, (), f32>,
start: u32,
goal: u32,
poison: Poison,
) -> Result<Vec<(f64, Vec<usize>)>> {
let mut k_shortest: Vec<(f64, Vec<usize>)> = Vec::with_capacity(k);
let mut candidates: Vec<(f64, Vec<usize>)> = vec![];
) -> Result<Vec<(f32, Vec<u32>)>> {
let mut k_shortest: Vec<(f32, Vec<u32>)> = Vec::with_capacity(k);
let mut candidates: Vec<(f32, Vec<u32>)> = vec![];
match dijkstra(edges, start, &Some(goal), &(), &())
.into_iter()
@ -170,10 +172,11 @@ fn k_shortest_path_yen(
for i in 0..root_path.len() - 1 {
let s = root_path[i];
let d = root_path[i + 1];
let eds = &edges[s];
for (e, c) in eds {
if *e == d {
total_cost += *c;
for target in edges.out_neighbors_with_values(s) {
let e = target.target;
let c = target.value;
if e == d {
total_cost += c;
break;
}
}

@ -9,8 +9,10 @@
use std::collections::BTreeMap;
use std::sync::Arc;
use either::{Left, Right};
use graph::prelude::{CsrLayout, DirectedCsrGraph, GraphBuilder};
use lazy_static::lazy_static;
use miette::{bail, ensure, Diagnostic, Result};
use miette::{bail, ensure, Diagnostic, Report, Result};
use smartstring::{LazyCompact, SmartString};
use thiserror::Error;
@ -136,128 +138,194 @@ impl<'a, 'b> FixedRuleInputRelation<'a, 'b> {
pub fn span(&self) -> SourceSpan {
self.arg_manifest.span()
}
fn convert_edge_to_graph(
pub fn to_directed_graph(
&self,
undirected: bool,
) -> Result<(Vec<Vec<usize>>, Vec<DataValue>, BTreeMap<DataValue, usize>)> {
let mut graph: Vec<Vec<usize>> = vec![];
) -> Result<(
DirectedCsrGraph<u32>,
Vec<DataValue>,
BTreeMap<DataValue, u32>,
)> {
let mut indices: Vec<DataValue> = vec![];
let mut inv_indices: BTreeMap<DataValue, usize> = Default::default();
for tuple in self.iter()? {
let mut tuple = tuple?.into_iter();
let from = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?;
let to = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?;
let from_idx = if let Some(idx) = inv_indices.get(&from) {
*idx
} else {
inv_indices.insert(from.clone(), graph.len());
indices.push(from.clone());
graph.push(vec![]);
graph.len() - 1
};
let to_idx = if let Some(idx) = inv_indices.get(&to) {
*idx
} else {
inv_indices.insert(to.clone(), graph.len());
indices.push(to.clone());
graph.push(vec![]);
graph.len() - 1
};
let from_target = graph.get_mut(from_idx).unwrap();
from_target.push(to_idx);
if undirected {
let to_target = graph.get_mut(to_idx).unwrap();
to_target.push(from_idx);
let mut inv_indices: BTreeMap<DataValue, u32> = Default::default();
let mut error: Option<Report> = None;
let it = self.iter()?.filter_map(|r_tuple| match r_tuple {
Ok(tuple) => {
let mut tuple = tuple.into_iter();
let from = match tuple.next() {
None => {
error = Some(NotAnEdgeError(self.span()).into());
return None;
}
Some(f) => f,
};
let to = match tuple.next() {
None => {
error = Some(NotAnEdgeError(self.span()).into());
return None;
}
Some(f) => f,
};
let from_idx = if let Some(idx) = inv_indices.get(&from) {
*idx
} else {
let idx = indices.len() as u32;
inv_indices.insert(from.clone(), idx);
indices.push(from.clone());
idx
};
let to_idx = if let Some(idx) = inv_indices.get(&to) {
*idx
} else {
let idx = indices.len() as u32;
inv_indices.insert(to.clone(), idx);
indices.push(to.clone());
idx
};
Some((from_idx, to_idx))
}
Err(err) => {
error = Some(err);
None
}
});
let it = if undirected {
Right(it.flat_map(|(f, t)| [(f, t), (t, f)]))
} else {
Left(it)
};
let graph: DirectedCsrGraph<u32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges(it)
.build();
if let Some(err) = error {
bail!(err)
}
Ok((graph, indices, inv_indices))
}
pub fn convert_edge_to_weighted_graph(
pub fn to_directed_weighted_graph(
&self,
undirected: bool,
allow_negative_edges: bool,
allow_negative_weights: bool,
) -> Result<(
Vec<Vec<(usize, f64)>>,
DirectedCsrGraph<u32, (), f32>,
Vec<DataValue>,
BTreeMap<DataValue, usize>,
bool,
BTreeMap<DataValue, u32>,
)> {
let mut graph: Vec<Vec<(usize, f64)>> = vec![];
let mut indices: Vec<DataValue> = vec![];
let mut inv_indices: BTreeMap<DataValue, usize> = Default::default();
let mut has_neg_edge = false;
let mut inv_indices: BTreeMap<DataValue, u32> = Default::default();
let mut error: Option<Report> = None;
let it = self.iter()?.filter_map(|r_tuple| match r_tuple {
Ok(tuple) => {
let mut tuple = tuple.into_iter();
let from = match tuple.next() {
None => {
error = Some(NotAnEdgeError(self.span()).into());
return None;
}
Some(f) => f,
};
let to = match tuple.next() {
None => {
error = Some(NotAnEdgeError(self.span()).into());
return None;
}
Some(f) => f,
};
let from_idx = if let Some(idx) = inv_indices.get(&from) {
*idx
} else {
let idx = indices.len() as u32;
inv_indices.insert(from.clone(), idx);
indices.push(from.clone());
idx
};
let to_idx = if let Some(idx) = inv_indices.get(&to) {
*idx
} else {
let idx = indices.len() as u32;
inv_indices.insert(to.clone(), idx);
indices.push(to.clone());
idx
};
for tuple in self.iter()? {
let mut tuple = tuple?.into_iter();
let from = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?;
let to = tuple.next().ok_or_else(|| NotAnEdgeError(self.span()))?;
let weight = match tuple.next() {
None => 1.0,
Some(d) => match d.get_float() {
Some(f) => {
ensure!(
f.is_finite(),
BadEdgeWeightError(
d,
self.arg_manifest
.bindings()
.get(2)
.map(|s| s.span)
.unwrap_or_else(|| self.span())
)
);
if f < 0. {
if !allow_negative_edges {
bail!(BadEdgeWeightError(
let weight = match tuple.next() {
None => 1.0,
Some(d) => match d.get_float() {
Some(f) => {
if !f.is_finite() {
error = Some(
BadEdgeWeightError(
d,
self.arg_manifest
.bindings()
.get(2)
.map(|s| s.span)
.unwrap_or_else(|| self.span()),
)
.into(),
);
return None;
};
if f < 0. {
if !allow_negative_weights {
error = Some(
BadEdgeWeightError(
d,
self.arg_manifest
.bindings()
.get(2)
.map(|s| s.span)
.unwrap_or_else(|| self.span()),
)
.into(),
);
return None;
}
}
f
}
None => {
error = Some(
BadEdgeWeightError(
d,
self.arg_manifest
.bindings()
.get(2)
.map(|s| s.span)
.unwrap_or_else(|| self.span())
));
}
has_neg_edge = true;
.unwrap_or_else(|| self.span()),
)
.into(),
);
return None;
}
f
}
None => {
bail!(BadEdgeWeightError(
d,
self.arg_manifest
.bindings()
.get(2)
.map(|s| s.span)
.unwrap_or_else(|| self.span())
))
}
},
};
let from_idx = if let Some(idx) = inv_indices.get(&from) {
*idx
} else {
inv_indices.insert(from.clone(), graph.len());
indices.push(from.clone());
graph.push(vec![]);
graph.len() - 1
};
let to_idx = if let Some(idx) = inv_indices.get(&to) {
*idx
} else {
inv_indices.insert(to.clone(), graph.len());
indices.push(to.clone());
graph.push(vec![]);
graph.len() - 1
};
let from_target = graph.get_mut(from_idx).unwrap();
from_target.push((to_idx, weight));
if undirected {
let to_target = graph.get_mut(to_idx).unwrap();
to_target.push((from_idx, weight));
},
};
Some((from_idx, to_idx, weight as f32))
}
Err(err) => {
error = Some(err);
None
}
});
let it = if undirected {
Right(it.flat_map(|(f, t, w)| [(f, t, w), (t, f, w)]))
} else {
Left(it)
};
let graph: DirectedCsrGraph<u32, (), f32> = GraphBuilder::new()
.csr_layout(CsrLayout::Sorted)
.edges_with_values(it)
.build();
if let Some(err) = error {
bail!(err)
}
Ok((graph, indices, inv_indices, has_neg_edge))
Ok((graph, indices, inv_indices))
}
}

@ -6,13 +6,13 @@
* You can obtain one at https://mozilla.org/MPL/2.0/.
*/
use std::cmp::min;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Debug;
use miette::Result;
use itertools::Itertools;
use miette::Result;
use crate::fixed_rule::algos::strongly_connected_components::TarjanScc;
use crate::runtime::db::Poison;
pub(crate) type Graph<T> = BTreeMap<T, Vec<T>>;
@ -129,3 +129,65 @@ pub(crate) fn generalized_kahn(
debug_assert_eq!(in_degree.iter().sum::<usize>(), 0);
ret
}
struct TarjanScc<'a> {
graph: &'a [Vec<usize>],
id: usize,
ids: Vec<Option<usize>>,
low: Vec<usize>,
on_stack: Vec<bool>,
stack: Vec<usize>,
}
impl<'a> TarjanScc<'a> {
fn new(graph: &'a [Vec<usize>]) -> Self {
Self {
graph,
id: 0,
ids: vec![None; graph.len()],
low: vec![0; graph.len()],
on_stack: vec![false; graph.len()],
stack: vec![],
}
}
fn run(mut self, poison: Poison) -> Result<Vec<Vec<usize>>> {
for i in 0..self.graph.len() {
if self.ids[i].is_none() {
self.dfs(i);
poison.check()?;
}
}
let mut low_map: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
for (idx, grp) in self.low.into_iter().enumerate() {
low_map.entry(grp).or_default().push(idx);
}
Ok(low_map.into_iter().map(|(_, vs)| vs).collect_vec())
}
fn dfs(&mut self, at: usize) {
self.stack.push(at);
self.on_stack[at] = true;
self.id += 1;
self.ids[at] = Some(self.id);
self.low[at] = self.id;
for to in &self.graph[at] {
let to = *to;
if self.ids[to].is_none() {
self.dfs(to);
}
if self.on_stack[to] {
self.low[at] = min(self.low[at], self.low[to]);
}
}
if self.ids[at].unwrap() == self.low[at] {
while let Some(node) = self.stack.pop() {
self.on_stack[node] = false;
self.low[node] = self.ids[at].unwrap();
if node == at {
break;
}
}
}
}
}

@ -17,7 +17,7 @@ default = ["console_error_panic_hook"]
[dependencies]
wasm-bindgen = "0.2.63"
cozo = { version = "0.2.1", path = "../cozo-core", default-features = false, features = ["wasm", "graph-algo", "nothread"] }
cozo = { version = "0.2.1", path = "../cozo-core", default-features = false, features = ["wasm", "nothread"] }
# The `console_error_panic_hook` crate provides better debugging of panics by
# logging them with `console.error`. This is great for development, but requires

@ -8,15 +8,6 @@ export MACOSX_DEPLOYMENT_TARGET=10.14
#rm -fr release
mkdir -p release
for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \
-F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET
cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone
done
# copy python
cp target/wheels/*.whl release/
for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
# standalone, c, java, nodejs
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver -p cozo_c -p cozo_java -p cozo-node -F compact -F storage-rocksdb --target $TARGET
@ -32,11 +23,21 @@ for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
cd ..
done
# copy python
cp target/wheels/*.whl release/
# swift
cd cozo-lib-swift
CARGO_PROFILE_RELEASE_LTO=fat ./build-rust.sh
cd ..
# with TiKV
for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \
-F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET
cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone
done
# WASM
cd cozo-lib-wasm
CARGO_PROFILE_RELEASE_LTO=fat ./build.sh

Loading…
Cancel
Save