more efficient graph algo

main
Ziyang Hu 2 years ago
parent 85827e6d68
commit b1c6c9f2e5

179
Cargo.lock generated

@ -34,6 +34,18 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "ahash"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
dependencies = [
"cfg-if 1.0.0",
"getrandom 0.2.8",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "0.7.20" version = "0.7.20"
@ -82,6 +94,12 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]] [[package]]
name = "ascii" name = "ascii"
version = "1.1.0" version = "1.1.0"
@ -99,6 +117,15 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "atoi"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "atomic" name = "atomic"
version = "0.5.1" version = "0.5.1"
@ -108,6 +135,12 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "atomic_float"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62af46d040ba9df09edc6528dae9d8e49f5f3e82f55b7d2ec31a733c38dbc49d"
[[package]] [[package]]
name = "atty" name = "atty"
version = "0.2.14" version = "0.2.14"
@ -238,6 +271,12 @@ version = "3.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
[[package]]
name = "byte-slice-cast"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c"
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.12.3" version = "1.12.3"
@ -505,6 +544,7 @@ dependencies = [
"document-features", "document-features",
"either", "either",
"env_logger", "env_logger",
"graph",
"itertools 0.10.5", "itertools 0.10.5",
"js-sys", "js-sys",
"lazy_static", "lazy_static",
@ -773,6 +813,17 @@ dependencies = [
"gzip-header", "gzip-header",
] ]
[[package]]
name = "delegate"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "082a24a9967533dc5d743c602157637116fc1b52806d694a5a45e6f32567fcdd"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "derive-new" name = "derive-new"
version = "0.5.9" version = "0.5.9"
@ -863,6 +914,12 @@ dependencies = [
"rand 0.7.3", "rand 0.7.3",
] ]
[[package]]
name = "fast-float"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "1.8.0" version = "1.8.0"
@ -1110,6 +1167,45 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "graph"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624b74cfef1d6e08adeac4b5947e8e79351d0a53491b07b9d342701f8e58b68f"
dependencies = [
"ahash 0.8.2",
"atomic_float",
"graph_builder",
"log",
"nanorand",
"num-format",
"rayon",
]
[[package]]
name = "graph_builder"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ba851e549b0354700eab51c77fc28055936f6565e45c6ecd750f06f6414ac7f"
dependencies = [
"atoi",
"atomic",
"byte-slice-cast",
"delegate",
"fast-float",
"fxhash",
"linereader",
"log",
"memmap2",
"num",
"num-format",
"num_cpus",
"page_size",
"parking_lot 0.12.1",
"rayon",
"thiserror",
]
[[package]] [[package]]
name = "grpcio" name = "grpcio"
version = "0.8.3" version = "0.8.3"
@ -1189,7 +1285,7 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [ dependencies = [
"ahash", "ahash 0.7.6",
] ]
[[package]] [[package]]
@ -1519,6 +1615,15 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "linereader"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d921fea6860357575519aca014c6e22470585accdd543b370c404a8a72d0dd1d"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "link-cplusplus" name = "link-cplusplus"
version = "1.0.7" version = "1.0.7"
@ -1584,6 +1689,15 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memmap2"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "memoffset" name = "memoffset"
version = "0.6.5" version = "0.6.5"
@ -1749,6 +1863,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "nanorand"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3"
[[package]] [[package]]
name = "native-tls" name = "native-tls"
version = "0.2.11" version = "0.2.11"
@ -1818,6 +1938,31 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "num"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]] [[package]]
name = "num-complex" name = "num-complex"
version = "0.4.2" version = "0.4.2"
@ -1827,6 +1972,16 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "num-format"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54b862ff8df690cf089058c98b183676a7ed0f974cc08b426800093227cbff3b"
dependencies = [
"arrayvec",
"itoa 1.0.4",
]
[[package]] [[package]]
name = "num-integer" name = "num-integer"
version = "0.1.45" version = "0.1.45"
@ -1837,6 +1992,17 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "num-iter"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]] [[package]]
name = "num-rational" name = "num-rational"
version = "0.4.1" version = "0.4.1"
@ -1844,6 +2010,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"num-bigint",
"num-integer", "num-integer",
"num-traits", "num-traits",
] ]
@ -1967,6 +2134,16 @@ version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
[[package]]
name = "page_size"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
version = "0.11.2" version = "0.11.2"

@ -42,7 +42,7 @@ io-uring = ["cozorocks?/io-uring"]
## Polyfills for the WASM target ## Polyfills for the WASM target
wasm = ["uuid/js", "dep:js-sys"] wasm = ["uuid/js", "dep:js-sys"]
## Allows threading and enables the use of the `rayon` library for parallelizing algorithms. ## Allows threading and enables the use of the `rayon` library for parallelizing algorithms.
rayon = ["dep:rayon"] rayon = ["dep:rayon", "dep:graph"]
## Disallows the use of threads. ## Disallows the use of threads.
nothread = [] nothread = []
@ -134,5 +134,6 @@ tokio = { version = "1.21.2", optional = true }
sqlite = { version = "0.30.1", optional = true } sqlite = { version = "0.30.1", optional = true }
sqlite3-src = { version = "0.4.0", optional = true, features = ["bundled"] } sqlite3-src = { version = "0.4.0", optional = true, features = ["bundled"] }
js-sys = { version = "0.3.60", optional = true } js-sys = { version = "0.3.60", optional = true }
graph = { version = "0.3.0", optional = true }
#redb = "0.9.0" #redb = "0.9.0"
#ouroboros = "0.15.5" #ouroboros = "0.15.5"

@ -7,11 +7,10 @@
*/ */
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::mem;
use approx::AbsDiffEq; #[cfg(feature = "rayon")]
use graph::prelude::{page_rank, CsrLayout, DirectedCsrGraph, GraphBuilder, PageRankConfig};
use miette::Result; use miette::Result;
use nalgebra::{Dynamic, OMatrix, U1};
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use crate::algo::AlgoImpl; use crate::algo::AlgoImpl;
@ -34,20 +33,49 @@ impl AlgoImpl for PageRank {
algo: &'a MagicAlgoApply, algo: &'a MagicAlgoApply,
stores: &'a BTreeMap<MagicSymbol, InMemRelation>, stores: &'a BTreeMap<MagicSymbol, InMemRelation>,
out: &'a InMemRelation, out: &'a InMemRelation,
poison: Poison, _poison: Poison,
) -> Result<()> { ) -> Result<()> {
let edges = algo.relation(0)?; let edges = algo.relation(0)?;
let undirected = algo.bool_option("undirected", Some(false))?; let undirected = algo.bool_option("undirected", Some(false))?;
let theta = algo.unit_interval_option("theta", Some(0.8))? as f32; let theta = algo.unit_interval_option("theta", Some(0.8))? as f32;
let epsilon = algo.unit_interval_option("epsilon", Some(0.05))? as f32; let epsilon = algo.unit_interval_option("epsilon", Some(0.05))? as f32;
let iterations = algo.pos_integer_option("iterations", Some(20))?; let iterations = algo.pos_integer_option("iterations", Some(10))?;
let (graph, indices, _) = edges.convert_edge_to_graph(undirected, tx, stores)?; let (graph, indices, _) = edges.convert_edge_to_graph(undirected, tx, stores)?;
let res = pagerank(&graph, theta, epsilon, iterations, poison)?;
for (idx, score) in res.iter().enumerate() { #[cfg(feature = "rayon")]
out.put( {
Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]), let graph: DirectedCsrGraph<u32> = GraphBuilder::new()
0, .csr_layout(CsrLayout::Sorted)
.edges(
graph
.iter()
.enumerate()
.flat_map(|(fr, ls)| ls.iter().map(move |to| (fr as u32, *to as u32))),
)
.build();
let (ranks, _n_run, _) = page_rank(
&graph,
PageRankConfig::new(iterations, epsilon as f64, theta),
); );
for (idx, score) in ranks.iter().enumerate() {
out.put(
Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]),
0,
);
}
}
#[cfg(not(feature = "rayon"))]
{
let res = pagerank(&graph, theta, epsilon, iterations, _poison)?;
for (idx, score) in res.iter().enumerate() {
out.put(
Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]),
0,
);
}
} }
Ok(()) Ok(())
} }
@ -62,6 +90,7 @@ impl AlgoImpl for PageRank {
} }
} }
#[cfg(not(feature = "rayon"))]
fn pagerank( fn pagerank(
edges: &[Vec<usize>], edges: &[Vec<usize>],
theta: f32, theta: f32,
@ -69,6 +98,9 @@ fn pagerank(
iterations: usize, iterations: usize,
poison: Poison, poison: Poison,
) -> Result<OMatrix<f32, Dynamic, U1>> { ) -> Result<OMatrix<f32, Dynamic, U1>> {
use approx::AbsDiffEq;
use nalgebra::{Dynamic, OMatrix, U1};
let init_val = (1. - theta) / edges.len() as f32; let init_val = (1. - theta) / edges.len() as f32;
let mut g_mat = OMatrix::<f32, Dynamic, Dynamic>::repeat(edges.len(), edges.len(), init_val); let mut g_mat = OMatrix::<f32, Dynamic, Dynamic>::repeat(edges.len(), edges.len(), init_val);
let n = edges.len(); let n = edges.len();
@ -90,7 +122,7 @@ fn pagerank(
let scale_target = (n as f32).sqrt(); let scale_target = (n as f32).sqrt();
let mut last_pi_vec = pi_vec.clone(); let mut last_pi_vec = pi_vec.clone();
for _ in 0..iterations { for _ in 0..iterations {
mem::swap(&mut pi_vec, &mut last_pi_vec); std::mem::swap(&mut pi_vec, &mut last_pi_vec);
pi_vec = g_mat.tr_mul(&last_pi_vec); pi_vec = g_mat.tr_mul(&last_pi_vec);
pi_vec.normalize_mut(); pi_vec.normalize_mut();
let f = pi_vec.norm() / scale_target; let f = pi_vec.norm() / scale_target;

@ -8,7 +8,16 @@ export MACOSX_DEPLOYMENT_TARGET=10.14
#rm -fr release #rm -fr release
mkdir -p release mkdir -p release
for TARGET in x86_64-apple-darwin aarch64-apple-darwin; do for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \
-F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET
cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone
done
# copy python
cp target/wheels/*.whl release/
for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do
# standalone, c, java, nodejs # standalone, c, java, nodejs
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver -p cozo_c -p cozo_java -p cozo-node -F compact -F storage-rocksdb --target $TARGET CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver -p cozo_c -p cozo_java -p cozo-node -F compact -F storage-rocksdb --target $TARGET
cp target/$TARGET/release/cozoserver release/cozoserver-$VERSION-$TARGET # standalone cp target/$TARGET/release/cozoserver release/cozoserver-$VERSION-$TARGET # standalone
@ -23,15 +32,6 @@ for TARGET in x86_64-apple-darwin aarch64-apple-darwin; do
cd .. cd ..
done done
for TARGET in x86_64-apple-darwin aarch64-apple-darwin; do
CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \
-F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET
cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone
done
# copy python
cp target/wheels/*.whl release/
# swift # swift
cd cozo-lib-swift cd cozo-lib-swift
CARGO_PROFILE_RELEASE_LTO=fat ./build-rust.sh CARGO_PROFILE_RELEASE_LTO=fat ./build-rust.sh

Loading…
Cancel
Save