diff --git a/Cargo.lock b/Cargo.lock index d492de7f..72136edc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,6 +34,18 @@ dependencies = [ "version_check", ] +[[package]] +name = "ahash" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107" +dependencies = [ + "cfg-if 1.0.0", + "getrandom 0.2.8", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.20" @@ -82,6 +94,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + [[package]] name = "ascii" version = "1.1.0" @@ -99,6 +117,15 @@ dependencies = [ "syn", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic" version = "0.5.1" @@ -108,6 +135,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "atomic_float" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62af46d040ba9df09edc6528dae9d8e49f5f3e82f55b7d2ec31a733c38dbc49d" + [[package]] name = "atty" version = "0.2.14" @@ -238,6 +271,12 @@ version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +[[package]] +name = "byte-slice-cast" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" + [[package]] name = "bytemuck" version = "1.12.3" @@ -505,6 +544,7 @@ dependencies = [ "document-features", "either", "env_logger", + "graph", "itertools 0.10.5", "js-sys", "lazy_static", @@ -773,6 +813,17 @@ dependencies = [ "gzip-header", ] +[[package]] +name = "delegate" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "082a24a9967533dc5d743c602157637116fc1b52806d694a5a45e6f32567fcdd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "derive-new" version = "0.5.9" @@ -863,6 +914,12 @@ dependencies = [ "rand 0.7.3", ] +[[package]] +name = "fast-float" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" + [[package]] name = "fastrand" version = "1.8.0" @@ -1110,6 +1167,45 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "graph" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624b74cfef1d6e08adeac4b5947e8e79351d0a53491b07b9d342701f8e58b68f" +dependencies = [ + "ahash 0.8.2", + "atomic_float", + "graph_builder", + "log", + "nanorand", + "num-format", + "rayon", +] + +[[package]] +name = "graph_builder" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba851e549b0354700eab51c77fc28055936f6565e45c6ecd750f06f6414ac7f" +dependencies = [ + "atoi", + "atomic", + "byte-slice-cast", + "delegate", + "fast-float", + "fxhash", + "linereader", + "log", + "memmap2", + "num", + "num-format", + "num_cpus", + "page_size", + "parking_lot 0.12.1", + "rayon", + "thiserror", +] + [[package]] name = "grpcio" version = "0.8.3" @@ -1189,7 +1285,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash", + "ahash 0.7.6", ] [[package]] @@ -1519,6 +1615,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linereader" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d921fea6860357575519aca014c6e22470585accdd543b370c404a8a72d0dd1d" +dependencies = [ + "memchr", +] + [[package]] name = "link-cplusplus" version = "1.0.7" @@ -1584,6 +1689,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -1749,6 +1863,12 @@ dependencies = [ "syn", ] +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" + [[package]] name = "native-tls" version = "0.2.11" @@ -1818,6 +1938,31 @@ dependencies = [ "version_check", ] +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.2" @@ -1827,6 +1972,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-format" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b862ff8df690cf089058c98b183676a7ed0f974cc08b426800093227cbff3b" +dependencies = [ + "arrayvec", + "itoa 1.0.4", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -1837,6 +1992,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-rational" version = "0.4.1" @@ -1844,6 +2010,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", + "num-bigint", "num-integer", "num-traits", ] @@ -1967,6 +2134,16 @@ version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +[[package]] +name = "page_size" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking_lot" version = "0.11.2" diff --git a/cozo-core/Cargo.toml b/cozo-core/Cargo.toml index 8734fbf4..45942cfa 100644 --- a/cozo-core/Cargo.toml +++ b/cozo-core/Cargo.toml @@ -42,7 +42,7 @@ io-uring = ["cozorocks?/io-uring"] ## Polyfills for the WASM target wasm = ["uuid/js", "dep:js-sys"] ## Allows threading and enables the use of the `rayon` library for parallelizing algorithms. -rayon = ["dep:rayon"] +rayon = ["dep:rayon", "dep:graph"] ## Disallows the use of threads. nothread = [] @@ -134,5 +134,6 @@ tokio = { version = "1.21.2", optional = true } sqlite = { version = "0.30.1", optional = true } sqlite3-src = { version = "0.4.0", optional = true, features = ["bundled"] } js-sys = { version = "0.3.60", optional = true } +graph = { version = "0.3.0", optional = true } #redb = "0.9.0" #ouroboros = "0.15.5" diff --git a/cozo-core/src/algo/pagerank.rs b/cozo-core/src/algo/pagerank.rs index e54fc751..d6c866b7 100644 --- a/cozo-core/src/algo/pagerank.rs +++ b/cozo-core/src/algo/pagerank.rs @@ -7,11 +7,10 @@ */ use std::collections::BTreeMap; -use std::mem; -use approx::AbsDiffEq; +#[cfg(feature = "rayon")] +use graph::prelude::{page_rank, CsrLayout, DirectedCsrGraph, GraphBuilder, PageRankConfig}; use miette::Result; -use nalgebra::{Dynamic, OMatrix, U1}; use smartstring::{LazyCompact, SmartString}; use crate::algo::AlgoImpl; @@ -34,20 +33,49 @@ impl AlgoImpl for PageRank { algo: &'a MagicAlgoApply, stores: &'a BTreeMap, out: &'a InMemRelation, - poison: Poison, + _poison: Poison, ) -> Result<()> { let edges = algo.relation(0)?; let undirected = algo.bool_option("undirected", Some(false))?; let theta = algo.unit_interval_option("theta", Some(0.8))? as f32; let epsilon = algo.unit_interval_option("epsilon", Some(0.05))? as f32; - let iterations = algo.pos_integer_option("iterations", Some(20))?; + let iterations = algo.pos_integer_option("iterations", Some(10))?; + let (graph, indices, _) = edges.convert_edge_to_graph(undirected, tx, stores)?; - let res = pagerank(&graph, theta, epsilon, iterations, poison)?; - for (idx, score) in res.iter().enumerate() { - out.put( - Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]), - 0, + + #[cfg(feature = "rayon")] + { + let graph: DirectedCsrGraph = GraphBuilder::new() + .csr_layout(CsrLayout::Sorted) + .edges( + graph + .iter() + .enumerate() + .flat_map(|(fr, ls)| ls.iter().map(move |to| (fr as u32, *to as u32))), + ) + .build(); + + let (ranks, _n_run, _) = page_rank( + &graph, + PageRankConfig::new(iterations, epsilon as f64, theta), ); + + for (idx, score) in ranks.iter().enumerate() { + out.put( + Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]), + 0, + ); + } + } + #[cfg(not(feature = "rayon"))] + { + let res = pagerank(&graph, theta, epsilon, iterations, _poison)?; + for (idx, score) in res.iter().enumerate() { + out.put( + Tuple(vec![indices[idx].clone(), DataValue::from(*score as f64)]), + 0, + ); + } } Ok(()) } @@ -62,6 +90,7 @@ impl AlgoImpl for PageRank { } } +#[cfg(not(feature = "rayon"))] fn pagerank( edges: &[Vec], theta: f32, @@ -69,6 +98,9 @@ fn pagerank( iterations: usize, poison: Poison, ) -> Result> { + use approx::AbsDiffEq; + use nalgebra::{Dynamic, OMatrix, U1}; + let init_val = (1. - theta) / edges.len() as f32; let mut g_mat = OMatrix::::repeat(edges.len(), edges.len(), init_val); let n = edges.len(); @@ -90,7 +122,7 @@ fn pagerank( let scale_target = (n as f32).sqrt(); let mut last_pi_vec = pi_vec.clone(); for _ in 0..iterations { - mem::swap(&mut pi_vec, &mut last_pi_vec); + std::mem::swap(&mut pi_vec, &mut last_pi_vec); pi_vec = g_mat.tr_mul(&last_pi_vec); pi_vec.normalize_mut(); let f = pi_vec.norm() / scale_target; diff --git a/scripts/build-release-mac.sh b/scripts/build-release-mac.sh index 3a7f40d6..c1622f0c 100755 --- a/scripts/build-release-mac.sh +++ b/scripts/build-release-mac.sh @@ -8,7 +8,16 @@ export MACOSX_DEPLOYMENT_TARGET=10.14 #rm -fr release mkdir -p release -for TARGET in x86_64-apple-darwin aarch64-apple-darwin; do +for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do + CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \ + -F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET + cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone +done + +# copy python +cp target/wheels/*.whl release/ + +for TARGET in aarch64-apple-darwin x86_64-apple-darwin; do # standalone, c, java, nodejs CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver -p cozo_c -p cozo_java -p cozo-node -F compact -F storage-rocksdb --target $TARGET cp target/$TARGET/release/cozoserver release/cozoserver-$VERSION-$TARGET # standalone @@ -23,15 +32,6 @@ for TARGET in x86_64-apple-darwin aarch64-apple-darwin; do cd .. done -for TARGET in x86_64-apple-darwin aarch64-apple-darwin; do - CARGO_PROFILE_RELEASE_LTO=fat cargo build --release -p cozoserver \ - -F compact -F storage-rocksdb -F storage-tikv -F storage-sled --target $TARGET - cp target/$TARGET/release/cozoserver release/cozoserver_all-$VERSION-$TARGET # standalone -done - -# copy python -cp target/wheels/*.whl release/ - # swift cd cozo-lib-swift CARGO_PROFILE_RELEASE_LTO=fat ./build-rust.sh