random walk and the completion of algo for the moment

main
Ziyang Hu 2 years ago
parent 4bb5ab57a2
commit e5c2b70114

@ -1,26 +0,0 @@
## TODO
* [x] more complete functions and aggregations
* [ ] more complete tx tests
* [ ] graph algorithms
* [x] bfs
* [x] dfs
* [x] shortest path
* [x] A*
* [x] Yen's k-shortest
* [x] all-pairs shortest path
* [x] single-source shortest path
* [x] minimum spanning tree
* [ ] random walking
* [x] degree centrality
* [x] closeness centrality
* [x] betweenness centrality
* [x] pagerank
* [x] triangle counting
* [x] strongly connected components
* [x] connected components
* [x] label propagation
* [x] louvain modularity
* [x] direct loading of data
* [ ] serial agg function
* [x] random function

@ -15,6 +15,7 @@ use crate::algo::label_propagation::LabelPropagation;
use crate::algo::louvain::CommunityDetectionLouvain;
use crate::algo::pagerank::PageRank;
use crate::algo::prim::MinimumSpanningTreePrim;
use crate::algo::random_walk::RandomWalk;
use crate::algo::reorder_sort::ReorderSort;
use crate::algo::shortest_path_dijkstra::ShortestPathDijkstra;
use crate::algo::strongly_connected_components::StronglyConnectedComponent;
@ -40,8 +41,9 @@ pub(crate) mod label_propagation;
pub(crate) mod louvain;
pub(crate) mod pagerank;
pub(crate) mod prim;
pub(crate) mod shortest_path_dijkstra;
pub(crate) mod random_walk;
pub(crate) mod reorder_sort;
pub(crate) mod shortest_path_dijkstra;
pub(crate) mod strongly_connected_components;
pub(crate) mod top_sort;
pub(crate) mod triangles;
@ -92,6 +94,7 @@ impl AlgoHandle {
"pagerank" => 2,
"community_detection_louvain" => 2,
"label_propagation" => 2,
"random_walk" => 3,
"reorder_sort" => {
let out_opts = opts
.get("out")
@ -129,6 +132,7 @@ impl AlgoHandle {
"pagerank" => Box::new(PageRank),
"community_detection_louvain" => Box::new(CommunityDetectionLouvain),
"label_propagation" => Box::new(LabelPropagation),
"random_walk" => Box::new(RandomWalk),
"reorder_sort" => Box::new(ReorderSort),
name => bail!("algorithm '{}' not found", name),
})

@ -0,0 +1,156 @@
use std::collections::BTreeMap;
use anyhow::{anyhow, bail, ensure, Result};
use itertools::Itertools;
use rand::distributions::WeightedIndex;
use rand::prelude::*;
use smartstring::{LazyCompact, SmartString};
use crate::algo::AlgoImpl;
use crate::data::expr::Expr;
use crate::data::program::{MagicAlgoRuleArg, MagicSymbol};
use crate::data::tuple::Tuple;
use crate::data::value::DataValue;
use crate::runtime::derived::DerivedRelStore;
use crate::runtime::transact::SessionTx;
pub(crate) struct RandomWalk;
impl AlgoImpl for RandomWalk {
fn run(
&mut self,
tx: &SessionTx,
rels: &[MagicAlgoRuleArg],
opts: &BTreeMap<SmartString<LazyCompact>, Expr>,
stores: &BTreeMap<MagicSymbol, DerivedRelStore>,
out: &DerivedRelStore,
) -> Result<()> {
let edges = rels
.get(0)
.ok_or_else(|| anyhow!("'random_walk' requires edges relation as first argument"))?;
let nodes = rels
.get(1)
.ok_or_else(|| anyhow!("'random_walk' requires nodes relation as second argument"))?;
let starting = rels
.get(2)
.ok_or_else(|| anyhow!("'random_walk' requires starting relation as third argument"))?;
let iterations = match opts.get("iterations") {
None => 1usize,
Some(Expr::Const(DataValue::Number(n))) => {
let n = n.get_int().ok_or_else(|| {
anyhow!(
"'iterations' for 'random_walk' requires an integer, got {}",
n
)
})?;
ensure!(
n > 0,
"'iterations' for 'random_walk' must be positive, got {}",
n
);
n as usize
}
Some(v) => bail!(
"'iterations' for 'random_walk' requires an integer, got {:?}",
v
),
};
let steps = match opts
.get("steps")
.ok_or_else(|| anyhow!("'random_walk' requires option 'steps'"))?
{
Expr::Const(DataValue::Number(n)) => {
let n = n.get_int().ok_or_else(|| {
anyhow!("'steps' for 'random_walk' requires an integer, got {}", n)
})?;
ensure!(
n > 0,
"'iterations' for 'random_walk' must be positive, got {}",
n
);
n as usize
}
v => bail!(
"'iterations' for 'random_walk' requires an integer, got {:?}",
v
),
};
let mut maybe_weight = opts.get("weight").cloned();
if let Some(weight) = &mut maybe_weight {
let mut nodes_binding = nodes.get_binding_map(0);
let nodes_arity = nodes.arity(tx, stores)?;
let edges_binding = edges.get_binding_map(nodes_arity);
nodes_binding.extend(edges_binding);
weight.fill_binding_indices(&nodes_binding)?;
}
let mut counter = 0i64;
let mut rng = thread_rng();
for start_node in starting.iter(tx, stores)? {
let start_node = start_node?;
let start_node_key = start_node
.0
.get(0)
.ok_or_else(|| anyhow!("starting node relation too short"))?;
let starting_tuple = nodes
.prefix_iter(start_node_key, tx, stores)?
.next()
.ok_or_else(|| anyhow!("node with key '{:?}' not found", start_node_key))??;
for _ in 0..iterations {
counter += 1;
let mut current_tuple = starting_tuple.clone();
let mut path = vec![start_node_key.clone()];
for _ in 0..steps {
let cur_node_key = current_tuple
.0
.get(0)
.ok_or_else(|| anyhow!("node tuple too short"))?;
let candidate_steps: Vec<_> =
edges.prefix_iter(cur_node_key, tx, stores)?.try_collect()?;
if candidate_steps.is_empty() {
break;
}
let next_step = if let Some(weight_expr) = &maybe_weight {
let weights: Vec<_> = candidate_steps.iter().map(|t| -> Result<f64> {
let mut cand = current_tuple.clone();
cand.0.extend_from_slice(&t.0);
Ok(match weight_expr.eval(&cand)? {
DataValue::Number(n) => {
let f = n.get_float();
ensure!(f >= 0., "'weight' for 'random_walk' needs to be non-negative, got {:?}", f);
f
}
v => bail!("'weight' for 'random_walk' must evaluate to a float, got {:?}", v)
})
}).try_collect()?;
let dist = WeightedIndex::new(&weights).unwrap();
&candidate_steps[dist.sample(&mut rng)]
} else {
candidate_steps.choose(&mut rng).unwrap()
};
let next_node = next_step
.0
.get(1)
.ok_or_else(|| anyhow!("edges relation for 'random_walk' too short"))?;
path.push(next_node.clone());
current_tuple = nodes
.prefix_iter(next_node, tx, stores)?
.next()
.ok_or_else(|| {
anyhow!("node with key '{:?}' not found", start_node_key)
})??;
}
out.put(
Tuple(vec![
DataValue::from(counter),
start_node_key.clone(),
DataValue::List(path),
]),
0,
);
}
}
Ok(())
}
}
Loading…
Cancel
Save