merge from dev

main
Ziyang Hu 2 years ago
commit 59f783409e

474
Cargo.lock generated

@ -2,16 +2,6 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "Inflector"
version = "0.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
dependencies = [
"lazy_static",
"regex",
]
[[package]]
name = "addr2line"
version = "0.17.0"
@ -39,7 +29,7 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom 0.2.8",
"getrandom",
"once_cell",
"version_check",
]
@ -215,12 +205,6 @@ version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bytes"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
[[package]]
name = "casey"
version = "0.3.3"
@ -239,12 +223,6 @@ dependencies = [
"jobserver",
]
[[package]]
name = "cesu8"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cfg-if"
version = "0.1.10"
@ -312,7 +290,7 @@ dependencies = [
"clap_lex",
"indexmap",
"once_cell",
"strsim 0.10.0",
"strsim",
"termcolor",
"textwrap 0.16.0",
]
@ -349,16 +327,6 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "combine"
version = "4.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
dependencies = [
"bytes",
"memchr",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.3"
@ -367,7 +335,7 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "cozo"
version = "0.1.1"
version = "0.1.2"
dependencies = [
"approx",
"base64",
@ -391,7 +359,7 @@ dependencies = [
"pest",
"pest_derive",
"priority-queue",
"rand 0.8.5",
"rand",
"rayon",
"regex",
"rmp",
@ -410,48 +378,6 @@ dependencies = [
"uuid",
]
[[package]]
name = "cozo-nodejs"
version = "0.1.1"
dependencies = [
"cozo",
"lazy_static",
"miette",
"neon",
"serde_json",
]
[[package]]
name = "cozo_c"
version = "0.1.1"
dependencies = [
"cozo",
"lazy_static",
"miette",
"serde_json",
]
[[package]]
name = "cozo_java"
version = "0.1.1"
dependencies = [
"cozo",
"lazy_static",
"miette",
"robusta_jni",
"serde_json",
]
[[package]]
name = "cozo_py_module"
version = "0.1.1"
dependencies = [
"cozo",
"miette",
"pyo3",
"serde_json",
]
[[package]]
name = "cozorocks"
version = "0.1.0"
@ -604,41 +530,6 @@ dependencies = [
"syn",
]
[[package]]
name = "darling"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim 0.9.3",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "deflate"
version = "1.0.0"
@ -678,16 +569,6 @@ dependencies = [
"termcolor",
]
[[package]]
name = "error-chain"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc"
dependencies = [
"backtrace",
"version_check",
]
[[package]]
name = "fastrand"
version = "1.8.0"
@ -709,12 +590,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.1.0"
@ -740,17 +615,6 @@ dependencies = [
"version_check",
]
[[package]]
name = "getrandom"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
dependencies = [
"cfg-if 1.0.0",
"libc",
"wasi 0.9.0+wasi-snapshot-preview1",
]
[[package]]
name = "getrandom"
version = "0.2.8"
@ -843,12 +707,6 @@ dependencies = [
"cxx-build",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.3.0"
@ -869,12 +727,6 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "indoc"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adab1eaa3408fb7f0c777a73e7465fd5656136fc93b670eb6df3c88c2c1344e3"
[[package]]
name = "instant"
version = "0.1.12"
@ -911,26 +763,6 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
[[package]]
name = "jni"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36bcc950632e48b86da402c5c077590583da5ac0d480103611d5374e7c967a3c"
dependencies = [
"cesu8",
"combine",
"error-chain",
"jni-sys",
"log",
"walkdir",
]
[[package]]
name = "jni-sys"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
version = "0.1.25"
@ -961,16 +793,6 @@ version = "0.2.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89"
[[package]]
name = "libloading"
version = "0.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883"
dependencies = [
"cfg-if 1.0.0",
"winapi",
]
[[package]]
name = "link-cplusplus"
version = "1.0.7"
@ -980,16 +802,6 @@ dependencies = [
"cc",
]
[[package]]
name = "lock_api"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.17"
@ -1114,7 +926,7 @@ dependencies = [
"mime",
"mime_guess",
"quick-error",
"rand 0.8.5",
"rand",
"safemem",
"tempfile",
"twoway",
@ -1147,47 +959,6 @@ dependencies = [
"syn",
]
[[package]]
name = "neon"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28e15415261d880aed48122e917a45e87bb82cf0260bb6db48bbab44b7464373"
dependencies = [
"neon-build",
"neon-macros",
"neon-runtime",
"semver",
"smallvec",
]
[[package]]
name = "neon-build"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bac98a702e71804af3dacfde41edde4a16076a7bbe889ae61e56e18c5b1c811"
[[package]]
name = "neon-macros"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7288eac8b54af7913c60e0eb0e2a7683020dffa342ab3fd15e28f035ba897cf"
dependencies = [
"quote",
"syn",
"syn-mid",
]
[[package]]
name = "neon-runtime"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4676720fa8bb32c64c3d9f49c47a47289239ec46b4bdb66d0913cc512cb0daca"
dependencies = [
"cfg-if 1.0.0",
"libloading",
"smallvec",
]
[[package]]
name = "num-complex"
version = "0.4.2"
@ -1282,29 +1053,6 @@ version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0"
dependencies = [
"cfg-if 1.0.0",
"libc",
"redox_syscall",
"smallvec",
"windows-sys",
]
[[package]]
name = "parse-zoneinfo"
version = "0.3.0"
@ -1396,7 +1144,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
dependencies = [
"phf_shared",
"rand 0.8.5",
"rand",
]
[[package]]
@ -1464,66 +1212,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "268be0c73583c183f2b14052337465768c07726936a260f480f0857cb95ba543"
dependencies = [
"cfg-if 1.0.0",
"indoc",
"libc",
"memoffset",
"parking_lot",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
"unindent",
]
[[package]]
name = "pyo3-build-config"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28fcd1e73f06ec85bf3280c48c67e731d8290ad3d730f8be9dc07946923005c8"
dependencies = [
"once_cell",
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f6cb136e222e49115b3c51c32792886defbfb0adead26a688142b346a0b9ffc"
dependencies = [
"libc",
"pyo3-build-config",
]
[[package]]
name = "pyo3-macros"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94144a1266e236b1c932682136dc35a9dee8d3589728f68130c7c3861ef96b28"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8df9be978a2d2f0cdebabb03206ed73b11314701a5bfe71b0d753b81997777f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "quick-error"
version = "1.2.3"
@ -1539,19 +1227,6 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
dependencies = [
"getrandom 0.1.16",
"libc",
"rand_chacha 0.2.2",
"rand_core 0.5.1",
"rand_hc",
]
[[package]]
name = "rand"
version = "0.8.5"
@ -1559,18 +1234,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha 0.3.1",
"rand_core 0.6.4",
]
[[package]]
name = "rand_chacha"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
dependencies = [
"ppv-lite86",
"rand_core 0.5.1",
"rand_chacha",
"rand_core",
]
[[package]]
@ -1580,16 +1245,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.4",
]
[[package]]
name = "rand_core"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
dependencies = [
"getrandom 0.1.16",
"rand_core",
]
[[package]]
@ -1598,16 +1254,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom 0.2.8",
]
[[package]]
name = "rand_hc"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
dependencies = [
"rand_core 0.5.1",
"getrandom",
]
[[package]]
@ -1728,33 +1375,6 @@ dependencies = [
"rmp",
]
[[package]]
name = "robusta-codegen"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f66a33c846c7b892c2fd20ff58ba34859e6d83e491d51ce94f678cc7157ad774"
dependencies = [
"Inflector",
"darling",
"proc-macro-error",
"proc-macro2",
"quote",
"rand 0.7.3",
"syn",
]
[[package]]
name = "robusta_jni"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a61f3ca304f439f93ae5f59f2fe65f4c2693b253c7656dc242c061996a4b01f8"
dependencies = [
"jni",
"paste",
"robusta-codegen",
"static_assertions",
]
[[package]]
name = "rouille"
version = "3.6.1"
@ -1769,7 +1389,7 @@ dependencies = [
"multipart",
"num_cpus",
"percent-encoding",
"rand 0.8.5",
"rand",
"serde",
"serde_derive",
"serde_json",
@ -1819,15 +1439,6 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -1850,21 +1461,6 @@ dependencies = [
"untrusted",
]
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.147"
@ -1971,12 +1567,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strsim"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c"
[[package]]
name = "strsim"
version = "0.10.0"
@ -2022,23 +1612,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "syn-mid"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baa8e7560a164edb1621a55d18a0c59abf49d360f47aa7b821061dd7eea7fac9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "target-lexicon"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1"
[[package]]
name = "tempfile"
version = "3.3.0"
@ -2281,12 +1854,6 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unindent"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58ee9362deb4a96cef4d437d1ad49cffc9b9e92d202b6995674e928ce684f112"
[[package]]
name = "untrusted"
version = "0.7.1"
@ -2311,7 +1878,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83"
dependencies = [
"atomic",
"getrandom 0.2.8",
"getrandom",
"serde",
]
@ -2321,23 +1888,6 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"

@ -1,6 +1,6 @@
[package]
name = "cozo"
version = "0.1.1"
version = "0.1.2"
edition = "2021"
description = "A general-purpose, transactional, relational database that uses Datalog and focuses on graph data and algorithms"
authors = ["Ziyang Hu"]
@ -68,4 +68,4 @@ lto = true
#debug = true
[workspace]
members = ["cozorocks", "python", "nodejs", "java", "c"]
members = ["cozorocks"]

@ -0,0 +1,216 @@
[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/cozodb/cozo/Build)](https://github.com/cozodb/cozo/actions/workflows/build.yml)
[![Crates.io](https://img.shields.io/crates/v/cozo)](https://crates.io/crates/cozo)
[![GitHub](https://img.shields.io/github/license/cozodb/cozo)](https://github.com/cozodb/cozo/blob/main/LICENSE.txt)
# `cozo`
A general-purpose, transactional, relational database
that uses Datalog for query, is embeddable, and focuses on graph data and algorithms.
## Features
* Relational database with [Datalog](https://en.wikipedia.org/wiki/Datalog) as the query language
* Recursive queries, recursion through (safe) aggregations, capable of expressing complex graph operations and
algorithms
* Fixed rules for efficient whole-graph algorithms which integrate seamlessly with Datalog
* Rich set of built-in functions and aggregations
* Easy to use from any programming language, or as a standalone program
* [Embeddable](https://cozodb.github.io/current/manual/setup.html#embedding-cozo), with ready-to-use bindings for
Python, NodeJS and Java
* Single executable, trivial to deploy and run
* [Jupyter](https://jupyter.org/) notebooks integration, plays well with the DataScience ecosystem
* Modern, clean, flexible syntax, informative error messages
## Teasers
Here `*route` is a relation with two columns `src` and `dst`,
representing a route between those airports.
Find airports reachable by one stop from Frankfurt Airport (code `FRA`):
TODO replace with images
```js
?[dst] := *route{src: 'FRA', dst: stop},
*route{src: stop, dst}
```
Find airports reachable from Frankfurt with any number of stops
with code starting with the letter `A`:
```js
reachable[dst] := *route{src: 'FRA', dst}
reachable[dst] := reachable[src], *route{src, dst}
?[airport] := reachable[airport], starts_with(airport, 'A')
```
Compute the shortest path between Frankfurt and all airports in the world:
```js
shortest_paths[dst, shortest(path)] := *route{src: 'FRA', dst},
path = ['FRA', dst]
shortest_paths[dst, shortest(path)] := shortest_paths[stop, prev_path],
*route{src: stop, dst},
path = append(prev_path, dst)
?[dst, path] := shortest_paths[dst, path]
```
Compute the shortest path again, but with built-in algorithm:
```js
starting[airport] := airport = 'FRA'
?[src, dst, cost, path] <~ ShortestPathDijkstra(*route[], starting[])
```
Nice error messages when things go wrong:
xxx
## Getting started
First we need to get Cozo installed on the local machine. As Cozo is embeddable,
there are lots of options for how we run Cozo. Follow one of the following guides that suits you best:
* Embedded in Python with JupyterLab (best interactive experience, recommended if you are comfortable with the Python
ecosystem)
* Embedded in Python
* Embedded in NodeJS (Javascript)
* Embedded in Java (or any JVM language)
* Embedded in Rust
* Client/server with HTTP API
* Embedded in C/C++, or any language that has a C FFI
(Golang, R, Haskell, CommonLisp, Julia, Fortran, C#, Swift, ...)
After you have it installed, you can start learning CozoScript:
* Start with the [Tutorial](https://nbviewer.org/github/cozodb/cozo/blob/main/docs/tutorial/tutorial.ipynb) to learn the
basics;
* Continue with the [Manual](https://cozodb.github.io/current/manual/) for the fine points.
## Bug reports, discussions
If you encounter a bug, first search for [past issues](https://github.com/cozodb/cozo/issues) to see
if it has already been reported. If not, open a new issue.
Please provide sufficient information so that we can diagnose the problem faster.
Other discussions about Cozo should be in [GitHub discussions](https://github.com/cozodb/cozo/discussions).
## Use cases
As Cozo is a general-purpose database,
it can be used in situations
where traditional databases such as PostgreSQL and SQLite
are used.
However, Cozo is designed to overcome several shortcomings
of traditional databases, and hence fares especially well
in specific situations:
* You have a lot of interconnected relations
and the usual queries need to relate many relations together.
In other words, you need to query a complex graph.
* An example is a system granting permissions to users for specific tasks.
In this case, users may have roles,
belong to an organization hierarchy, and tasks similarly have organizations
and special provisions associated with them.
The granting process itself may also be a complicated rule encoded as data
within the database.
* With a traditional database,
the corresponding SQL tend to become
an entangled web of nested queries, with many tables joined together,
and maybe even with some recursive CTE thrown in. This is hard to maintain,
and worse, the performance is unpredictable since query optimizers in general
fail when you have over twenty tables joined together.
* With Cozo, on the other hand, [Horn clauses](https://en.wikipedia.org/wiki/Horn_clause)
make it easy to break
the logic into smaller pieces and write clear, easily testable queries.
Furthermore, the deterministic evaluation order makes identifying and solving
performance problems easier.
* Your data may be simple, even a single table, but it is inherently a graph.
* We have seen an example in
the [Tutorial](https://nbviewer.org/github/cozodb/cozo/blob/main/docs/tutorial/tutorial.ipynb):
the air route dataset, where the key relation contains the routes connecting airports.
* In traditional databases, when you are given a new relation,
you try to understand it by running aggregations on it to collect statistics:
what is the distribution of values, how are the columns correlated, etc.
* In Cozo you can do the same exploratory analysis,
except now you also have graph algorithms that you can
easily apply to understand things such as: what is the most _connected_ entity,
how are the nodes connected, and what are the _communities_ structure within the nodes.
* Your data contains hidden structures that only become apparent when you
identify the _scales_ of the relevant structures.
* Examples are most real networks, such as social networks,
which have a very rich hierarchy of structures
* In a traditional database, you are limited to doing nested aggregations and filtering,
i.e. a form of multifaceted data analysis. For example, you can analyze by gender, geography,
job or combinations of them. For structures hidden in other ways,
or if such categorizing tags are not already present in your data,
you are out of luck.
* With Cozo, you can now deal with emergent and fuzzy structures by using e.g.
community detection algorithms, and collapse the original graph into a coarse-grained
graph consisting of super-nodes and super-edges.
The process can be iterated to gain insights into even higher-order emergent structures.
This is possible in a social network with only edges and _no_ categorizing tags
associated with nodes at all,
and the discovered structures almost always have meanings correlated to real-world events and
organizations, for example, forms of collusion and crime rings.
Also, from a performance perspective,
coarse-graining is a required step in analyzing the so-called big data,
since many graph algorithms have high complexity and are only applicable to
the coarse-grained small or medium networks.
* You want to understand your live business data better by augmenting it into a _knowledge graph_.
* For example, your sales database contains product, buyer, inventory, and invoice tables.
The augmentation is external data about the entities in your data in the form of _taxonomies_
and _ontologies_ in layers.
* This is inherently a graph-theoretic undertaking and traditional databases are not suitable.
Usually, a dedicated graph processing engine is used, separate from the main database.
* With Cozo, it is possible to keep your live data and knowledge graph analysis together,
and importing new external data and doing analysis is just a few lines of code away.
This ease of use means that you will do the analysis much more often, with a perhaps much wider scope.
## Status of the project
Cozo is very young and **not** production-ready yet,
but we encourage you to try it out for your use case.
Any feedback is welcome.
Versions before 1.0 do not promise syntax/API stability or storage compatibility.
We promise that when you try to open database files created with an incompatible version,
Cozo will at least refuse to start instead of silently corrupting your data.
## Plans for development
In the near term, before we reach version 1.0:
* Backup/restore functionality
* Many, many more tests to ensure correctness
* Benchmarks
Further down the road:
* More tuning options
* Streaming/reactive data
* Extension system
* The core of Cozo should be kept small at all times. Additional functionalities should be in extensions for the
user to choose from.
* What can be extended: datatypes, functions, aggregations, and fixed algorithms.
* Extensions should be written in a compiled language such as Rust or C++ and compiled into a dynamic library, to be
loaded by Cozo at runtime.
* There will probably be a few "official" extension bundles, such as
* arbitrary precision arithmetic
* full-text "indexing" and searching
* relations that can emulate spatial and other types of non-lexicographic indices
* reading from external databases directly
* more exotic graph algorithms
Ideas and discussions are welcome.
## Storage engine
Cozo is written in Rust, with [RocksDB](http://rocksdb.org/) as the storage engine
(this may change in the future).
We manually wrote the C++/Rust bindings for RocksDB with [cxx](https://cxx.rs/).
## Licensing
The contents of this project are licensed under AGPL-3.0 or later, with the exception
of files under `cozorocks/`, which are licensed under MIT, or Apache-2.0, or BSD-3-Clause.

@ -1,19 +0,0 @@
[package]
name = "cozo_c"
version = "0.1.1"
edition = "2021"
[lib]
crate-type = ["cdylib", "staticlib"]
[features]
jemalloc = ["cozo/jemalloc"]
io-uring = ["cozo/io-uring"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
cozo = { version = "0.1.1", path = ".." }
miette = { version = "=5.3.0", features = ["fancy"] }
serde_json = "1.0.81"
lazy_static = "1.4.0"

@ -1,6 +0,0 @@
language = "C"
include_guard = "cozo_c_h"
autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */"
cpp_compat = true
documentation = true
header = "/* Copyright 2022, The Cozo Project Authors. Licensed under MIT/Apache-2.0/BSD-3-Clause. */"

@ -1,72 +0,0 @@
/* Copyright 2022, The Cozo Project Authors. Licensed under MIT/Apache-2.0/BSD-3-Clause. */
#ifndef cozo_c_h
#define cozo_c_h
/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
/**
* Open a database.
*
* `path`: should contain the UTF-8 encoded path name as a null-terminated C-string.
* `db_id`: will contain the id of the database opened.
*
* When the function is successful, null pointer is returned,
* otherwise a pointer to a C-string containing the error message will be returned.
* The returned C-string must be freed with `cozo_free_str`.
*/
int8_t *cozo_open_db(const int8_t *path, int32_t *db_id);
/**
* Close a database.
*
* `id`: the ID representing the database to close.
*
* Returns `true` if the database is closed,
* `false` if it has already been closed, or does not exist.
*/
bool cozo_close_db(int32_t id);
/**
* Run query against a database.
*
* `db_id`: the ID representing the database to run the query.
* `script_raw`: a UTF-8 encoded C-string for the CozoScript to execute.
* `params_raw`: a UTF-8 encoded C-string for the params of the query,
* in JSON format. You must always pass in a valid JSON map,
* even if you do not use params in your query
* (pass "{}" in this case).
* `errored`: will point to `false` if the query is successful,
* `true` if an error occurred.
*
* Returns a UTF-8-encoded C-string that must be freed with `cozo_free_str`.
* If `*errored` is false, then the string contains the JSON return value of the query.
* If `*errored` is true, then the string contains the error message.
*/
int8_t *cozo_run_query(int32_t db_id,
const int8_t *script_raw,
const int8_t *params_raw,
bool *errored);
/**
* Free any C-string returned from the Cozo C API.
* Must be called exactly once for each returned C-string.
*
* `s`: the C-string to free.
*/
void cozo_free_str(int8_t *s);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif /* cozo_c_h */

@ -1,157 +0,0 @@
/*
* Copyright 2022, The Cozo Project Authors. Licensed under MIT/Apache-2.0/BSD-3-Clause.
*/
#![warn(rust_2018_idioms, future_incompatible)]
use std::collections::BTreeMap;
use std::ffi::{CStr, CString};
use std::ptr::null_mut;
use std::sync::atomic::{AtomicI32, Ordering};
use std::sync::Mutex;
use lazy_static::lazy_static;
use cozo::Db;
#[derive(Default)]
struct Handles {
current: AtomicI32,
dbs: Mutex<BTreeMap<i32, Db>>,
}
lazy_static! {
static ref HANDLES: Handles = Handles::default();
}
/// Open a database.
///
/// `path`: should contain the UTF-8 encoded path name as a null-terminated C-string.
/// `db_id`: will contain the id of the database opened.
///
/// When the function is successful, null pointer is returned,
/// otherwise a pointer to a C-string containing the error message will be returned.
/// The returned C-string must be freed with `cozo_free_str`.
#[no_mangle]
pub unsafe extern "C" fn cozo_open_db(path: *const i8, db_id: &mut i32) -> *mut i8 {
let path = match CStr::from_ptr(path).to_str() {
Ok(p) => p,
Err(err) => return CString::new(format!("{}", err)).unwrap().into_raw(),
};
match Db::new(path) {
Ok(db) => {
let id = HANDLES.current.fetch_add(1, Ordering::AcqRel);
let mut dbs = HANDLES.dbs.lock().unwrap();
dbs.insert(id, db);
*db_id = id;
null_mut()
}
Err(err) => CString::new(format!("{}", err)).unwrap().into_raw(),
}
}
/// Close a database.
///
/// `id`: the ID representing the database to close.
///
/// Returns `true` if the database is closed,
/// `false` if it has already been closed, or does not exist.
#[no_mangle]
pub unsafe extern "C" fn cozo_close_db(id: i32) -> bool {
let db = {
let mut dbs = HANDLES.dbs.lock().unwrap();
dbs.remove(&id)
};
db.is_some()
}
/// Run query against a database.
///
/// `db_id`: the ID representing the database to run the query.
/// `script_raw`: a UTF-8 encoded C-string for the CozoScript to execute.
/// `params_raw`: a UTF-8 encoded C-string for the params of the query,
/// in JSON format. You must always pass in a valid JSON map,
/// even if you do not use params in your query
/// (pass "{}" in this case).
/// `errored`: will point to `false` if the query is successful,
/// `true` if an error occurred.
///
/// Returns a UTF-8-encoded C-string that must be freed with `cozo_free_str`.
/// If `*errored` is false, then the string contains the JSON return value of the query.
/// If `*errored` is true, then the string contains the error message.
#[no_mangle]
pub unsafe extern "C" fn cozo_run_query(
db_id: i32,
script_raw: *const i8,
params_raw: *const i8,
errored: &mut bool,
) -> *mut i8 {
let script = match CStr::from_ptr(script_raw).to_str() {
Ok(p) => p,
Err(err) => {
*errored = true;
return CString::new(format!("{}", err)).unwrap().into_raw();
}
};
let db = {
let db_ref = {
let dbs = HANDLES.dbs.lock().unwrap();
dbs.get(&db_id).cloned()
};
match db_ref {
None => {
*errored = true;
return CString::new("database already closed").unwrap().into_raw();
}
Some(db) => db,
}
};
let params_str = match CStr::from_ptr(params_raw).to_str() {
Ok(p) => p,
Err(err) => {
*errored = true;
return CString::new(format!("{}", err)).unwrap().into_raw();
}
};
let params_map: serde_json::Value = match serde_json::from_str(&params_str) {
Ok(m) => m,
Err(_) => {
*errored = true;
return CString::new("the given params argument is not valid JSON")
.unwrap()
.into_raw();
}
};
let params_arg: BTreeMap<_, _> = match params_map {
serde_json::Value::Object(m) => m.into_iter().collect(),
_ => {
*errored = true;
return CString::new("the given params argument is not a JSON map")
.unwrap()
.into_raw();
}
};
let result = db.run_script(script, &params_arg);
match result {
Ok(json) => {
let json_str = json.to_string();
CString::new(json_str).unwrap().into_raw()
}
Err(err) => {
let err_str = format!("{:?}", err);
*errored = true;
CString::new(err_str).unwrap().into_raw()
}
}
}
/// Free any C-string returned from the Cozo C API.
/// Must be called exactly once for each returned C-string.
///
/// `s`: the C-string to free.
#[no_mangle]
pub unsafe extern "C" fn cozo_free_str(s: *mut i8) {
let _ = CString::from_raw(s);
}

141
docs/.gitignore vendored

@ -1,141 +0,0 @@
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
tutorial/test.ipynb

@ -1,93 +0,0 @@
## Contributor Agreement
Thank you for your interest in contributing to Ziyang Hu's Cozo ("We" or "Us").
The purpose of this contributor agreement ("Agreement") is to clarify and document the rights granted by contributors to Us.
### How to use this Contributor Agreement
If You are an employee and have created the Contribution as part of your employment, You need to have Your employer approve this Agreement. If You do not own the Copyright in the entire work of authorship, any other author of the Contribution should also sign this in any event, please contact Us at hu.ziyang@cantab.net
### 1\. Definitions
**"You"** means the individual Copyright owner who Submits a Contribution to Us.
**"Legal Entity"** means an entity that is not a natural person.
**"Affiliate"** means any other Legal Entity that controls, is controlled by, or under common control with that Legal Entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such Legal Entity, whether by contract or otherwise, (ii) ownership of fifty percent (50%) or more of the outstanding shares or securities that vote to elect the management or other persons who direct such Legal Entity or (iii) beneficial ownership of such entity.
**"Contribution"** means any original work of authorship, including any original modifications or additions to an existing work of authorship, Submitted by You to Us, in which You own the Copyright.
**"Copyright"** means all rights protecting works of authorship, including copyright, moral and neighboring rights, as appropriate, for the full term of their existence.
**"Material"** means the software or documentation made available by Us to third parties. When this Agreement covers more than one software project, the Material means the software or documentation to which the Contribution was Submitted. After You Submit the Contribution, it may be included in the Material.
**"Submit"** means any act by which a Contribution is transferred to Us by You by means of tangible or intangible media, including but not limited to electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, Us, but excluding any transfer that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution."
**"Documentation"** means any non-software portion of a Contribution.
### 2\. License grant
#### 2.1 Copyright license to Us
Subject to the terms and conditions of this Agreement, You hereby grant to Us a worldwide, royalty-free, Exclusive, perpetual and irrevocable (except as stated in Section 8.1) license, with the right to transfer an unlimited number of non-exclusive licenses or to grant sublicenses to third parties, under the Copyright covering the Contribution to use the Contribution by all means, including, but not limited to:
* publish the Contribution,
* modify the Contribution,
* prepare derivative works based upon or containing the Contribution and/or to combine the Contribution with other Materials,
* reproduce the Contribution in original or modified form,
* distribute, to make the Contribution available to the public, display and publicly perform the Contribution in original or modified form.
#### 2.2 Moral rights
Moral Rights remain unaffected to the extent they are recognized and not waivable by applicable law. Notwithstanding, You may add your name to the attribution mechanism customary used in the Materials you Contribute to, such as the header of the source code files of Your Contribution, and We will respect this attribution when using Your Contribution.
#### 2.3 Copyright license back to You
Upon such grant of rights to Us, We immediately grant to You a worldwide, royalty-free, non-exclusive, perpetual and irrevocable license, with the right to transfer an unlimited number of non-exclusive licenses or to grant sublicenses to third parties, under the Copyright covering the Contribution to use the Contribution by all means, including, but not limited to:
* publish the Contribution,
* modify the Contribution,
* prepare derivative works based upon or containing the Contribution and/or to combine the Contribution with other Materials,
* reproduce the Contribution in original or modified form,
* distribute, to make the Contribution available to the public, display and publicly perform the Contribution in original or modified form.
This license back is limited to the Contribution and does not provide any rights to the Material.
### 3\. Patents
#### 3.1 Patent license
Subject to the terms and conditions of this Agreement You hereby grant to Us and to recipients of Materials distributed by Us a worldwide, royalty-free, non-exclusive, perpetual and irrevocable (except as stated in Section 3.2) patent license, with the right to transfer an unlimited number of non-exclusive licenses or to grant sublicenses to third parties, to make, have made, use, sell, offer for sale, import and otherwise transfer the Contribution and the Contribution in combination with any Material (and portions of such combination). This license applies to all patents owned or controlled by You, whether already acquired or hereafter acquired, that would be infringed by making, having made, using, selling, offering for sale, importing or otherwise transferring of Your Contribution(s) alone or by combination of Your Contribution(s) with any Material.
#### 3.2 Revocation of patent license
You reserve the right to revoke the patent license stated in section 3.1 if We make any infringement claim that is targeted at your Contribution and not asserted for a Defensive Purpose. An assertion of claims of the Patents shall be considered for a "Defensive Purpose" if the claims are asserted against an entity that has filed, maintained, threatened, or voluntarily participated in a patent infringement lawsuit against Us or any of Our licensees.
### 4. Disclaimer
THE CONTRIBUTION IS PROVIDED "AS IS". MORE PARTICULARLY, ALL EXPRESS OR IMPLIED WARRANTIES INCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTY OF SATISFACTORY QUALITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED BY YOU TO US AND BY US TO YOU. TO THE EXTENT THAT ANY SUCH WARRANTIES CANNOT BE DISCLAIMED, SUCH WARRANTY IS LIMITED IN DURATION AND EXTENT TO THE MINIMUM PERIOD AND EXTENT PERMITTED BY LAW.
### 5. Consequential damage waiver
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL YOU OR WE BE LIABLE FOR ANY LOSS OF PROFITS, LOSS OF ANTICIPATED SAVINGS, LOSS OF DATA, INDIRECT, SPECIAL, INCIDENTAL, CONSEQUENTIAL AND EXEMPLARY DAMAGES ARISING OUT OF THIS AGREEMENT REGARDLESS OF THE LEGAL OR EQUITABLE THEORY (CONTRACT, TORT OR OTHERWISE) UPON WHICH THE CLAIM IS BASED.
### 6. Approximation of disclaimer and damage waiver
IF THE DISCLAIMER AND DAMAGE WAIVER MENTIONED IN SECTION 4. AND SECTION 5. CANNOT BE GIVEN LEGAL EFFECT UNDER APPLICABLE LOCAL LAW, REVIEWING COURTS SHALL APPLY LOCAL LAW THAT MOST CLOSELY APPROXIMATES AN ABSOLUTE WAIVER OF ALL CIVIL OR CONTRACTUAL LIABILITY IN CONNECTION WITH THE CONTRIBUTION.
### 7. Term
7.1 This Agreement shall come into effect upon Your acceptance of the terms and conditions.
7.3 In the event of a termination of this Agreement Sections 4, 5, 6, 7 and 8 shall survive such termination and shall remain in full force thereafter. For the avoidance of doubt, Free and Open Source Software (sub)licenses that have already been granted for Contributions at the date of the termination shall remain in full force after the termination of this Agreement.
### 8 Miscellaneous
8.1 This Agreement sets out the entire agreement between You and Us for Your Contributions to Us and overrides all other agreements or understandings.
8.2 In case of Your death, this agreement shall continue with Your heirs. In case of more than one heir, all heirs must exercise their rights through a commonly authorized person.
8.3 If any provision of this Agreement is found void and unenforceable, such provision will be replaced to the extent possible with a provision that comes closest to the meaning of the original provision and that is enforceable. The terms and conditions set forth in this Agreement shall apply notwithstanding any failure of essential purpose of this Agreement or any limited remedy to the maximum extent possible under law.
8.4 You agree to notify Us of any facts or circumstances of which you become aware that would make this Agreement inaccurate in any respect.

@ -1,427 +0,0 @@
Attribution-ShareAlike 4.0 International
=======================================================================
Creative Commons Corporation ("Creative Commons") is not a law firm and
does not provide legal services or legal advice. Distribution of
Creative Commons public licenses does not create a lawyer-client or
other relationship. Creative Commons makes its licenses and related
information available on an "as-is" basis. Creative Commons gives no
warranties regarding its licenses, any material licensed under their
terms and conditions, or any related information. Creative Commons
disclaims all liability for damages resulting from their use to the
fullest extent possible.
Using Creative Commons Public Licenses
Creative Commons public licenses provide a standard set of terms and
conditions that creators and other rights holders may use to share
original works of authorship and other material subject to copyright
and certain other rights specified in the public license below. The
following considerations are for informational purposes only, are not
exhaustive, and do not form part of our licenses.
Considerations for licensors: Our public licenses are
intended for use by those authorized to give the public
permission to use material in ways otherwise restricted by
copyright and certain other rights. Our licenses are
irrevocable. Licensors should read and understand the terms
and conditions of the license they choose before applying it.
Licensors should also secure all rights necessary before
applying our licenses so that the public can reuse the
material as expected. Licensors should clearly mark any
material not subject to the license. This includes other CC-
licensed material, or material used under an exception or
limitation to copyright. More considerations for licensors:
wiki.creativecommons.org/Considerations_for_licensors
Considerations for the public: By using one of our public
licenses, a licensor grants the public permission to use the
licensed material under specified terms and conditions. If
the licensor's permission is not necessary for any reason--for
example, because of any applicable exception or limitation to
copyright--then that use is not regulated by the license. Our
licenses grant only permissions under copyright and certain
other rights that a licensor has authority to grant. Use of
the licensed material may still be restricted for other
reasons, including because others have copyright or other
rights in the material. A licensor may make special requests,
such as asking that all changes be marked or described.
Although not required by our licenses, you are encouraged to
respect those requests where reasonable. More considerations
for the public:
wiki.creativecommons.org/Considerations_for_licensees
=======================================================================
Creative Commons Attribution-ShareAlike 4.0 International Public
License
By exercising the Licensed Rights (defined below), You accept and agree
to be bound by the terms and conditions of this Creative Commons
Attribution-ShareAlike 4.0 International Public License ("Public
License"). To the extent this Public License may be interpreted as a
contract, You are granted the Licensed Rights in consideration of Your
acceptance of these terms and conditions, and the Licensor grants You
such rights in consideration of benefits the Licensor receives from
making the Licensed Material available under these terms and
conditions.
Section 1 -- Definitions.
a. Adapted Material means material subject to Copyright and Similar
Rights that is derived from or based upon the Licensed Material
and in which the Licensed Material is translated, altered,
arranged, transformed, or otherwise modified in a manner requiring
permission under the Copyright and Similar Rights held by the
Licensor. For purposes of this Public License, where the Licensed
Material is a musical work, performance, or sound recording,
Adapted Material is always produced where the Licensed Material is
synched in timed relation with a moving image.
b. Adapter's License means the license You apply to Your Copyright
and Similar Rights in Your contributions to Adapted Material in
accordance with the terms and conditions of this Public License.
c. BY-SA Compatible License means a license listed at
creativecommons.org/compatiblelicenses, approved by Creative
Commons as essentially the equivalent of this Public License.
d. Copyright and Similar Rights means copyright and/or similar rights
closely related to copyright including, without limitation,
performance, broadcast, sound recording, and Sui Generis Database
Rights, without regard to how the rights are labeled or
categorized. For purposes of this Public License, the rights
specified in Section 2(b)(1)-(2) are not Copyright and Similar
Rights.
e. Effective Technological Measures means those measures that, in the
absence of proper authority, may not be circumvented under laws
fulfilling obligations under Article 11 of the WIPO Copyright
Treaty adopted on December 20, 1996, and/or similar international
agreements.
f. Exceptions and Limitations means fair use, fair dealing, and/or
any other exception or limitation to Copyright and Similar Rights
that applies to Your use of the Licensed Material.
g. License Elements means the license attributes listed in the name
of a Creative Commons Public License. The License Elements of this
Public License are Attribution and ShareAlike.
h. Licensed Material means the artistic or literary work, database,
or other material to which the Licensor applied this Public
License.
i. Licensed Rights means the rights granted to You subject to the
terms and conditions of this Public License, which are limited to
all Copyright and Similar Rights that apply to Your use of the
Licensed Material and that the Licensor has authority to license.
j. Licensor means the individual(s) or entity(ies) granting rights
under this Public License.
k. Share means to provide material to the public by any means or
process that requires permission under the Licensed Rights, such
as reproduction, public display, public performance, distribution,
dissemination, communication, or importation, and to make material
available to the public including in ways that members of the
public may access the material from a place and at a time
individually chosen by them.
l. Sui Generis Database Rights means rights other than copyright
resulting from Directive 96/9/EC of the European Parliament and of
the Council of 11 March 1996 on the legal protection of databases,
as amended and/or succeeded, as well as other essentially
equivalent rights anywhere in the world.
m. You means the individual or entity exercising the Licensed Rights
under this Public License. Your has a corresponding meaning.
Section 2 -- Scope.
a. License grant.
1. Subject to the terms and conditions of this Public License,
the Licensor hereby grants You a worldwide, royalty-free,
non-sublicensable, non-exclusive, irrevocable license to
exercise the Licensed Rights in the Licensed Material to:
a. reproduce and Share the Licensed Material, in whole or
in part; and
b. produce, reproduce, and Share Adapted Material.
2. Exceptions and Limitations. For the avoidance of doubt, where
Exceptions and Limitations apply to Your use, this Public
License does not apply, and You do not need to comply with
its terms and conditions.
3. Term. The term of this Public License is specified in Section
6(a).
4. Media and formats; technical modifications allowed. The
Licensor authorizes You to exercise the Licensed Rights in
all media and formats whether now known or hereafter created,
and to make technical modifications necessary to do so. The
Licensor waives and/or agrees not to assert any right or
authority to forbid You from making technical modifications
necessary to exercise the Licensed Rights, including
technical modifications necessary to circumvent Effective
Technological Measures. For purposes of this Public License,
simply making modifications authorized by this Section 2(a)
(4) never produces Adapted Material.
5. Downstream recipients.
a. Offer from the Licensor -- Licensed Material. Every
recipient of the Licensed Material automatically
receives an offer from the Licensor to exercise the
Licensed Rights under the terms and conditions of this
Public License.
b. Additional offer from the Licensor -- Adapted Material.
Every recipient of Adapted Material from You
automatically receives an offer from the Licensor to
exercise the Licensed Rights in the Adapted Material
under the conditions of the Adapter's License You apply.
c. No downstream restrictions. You may not offer or impose
any additional or different terms or conditions on, or
apply any Effective Technological Measures to, the
Licensed Material if doing so restricts exercise of the
Licensed Rights by any recipient of the Licensed
Material.
6. No endorsement. Nothing in this Public License constitutes or
may be construed as permission to assert or imply that You
are, or that Your use of the Licensed Material is, connected
with, or sponsored, endorsed, or granted official status by,
the Licensor or others designated to receive attribution as
provided in Section 3(a)(1)(A)(i).
b. Other rights.
1. Moral rights, such as the right of integrity, are not
licensed under this Public License, nor are publicity,
privacy, and/or other similar personality rights; however, to
the extent possible, the Licensor waives and/or agrees not to
assert any such rights held by the Licensor to the limited
extent necessary to allow You to exercise the Licensed
Rights, but not otherwise.
2. Patent and trademark rights are not licensed under this
Public License.
3. To the extent possible, the Licensor waives any right to
collect royalties from You for the exercise of the Licensed
Rights, whether directly or through a collecting society
under any voluntary or waivable statutory or compulsory
licensing scheme. In all other cases the Licensor expressly
reserves any right to collect such royalties.
Section 3 -- License Conditions.
Your exercise of the Licensed Rights is expressly made subject to the
following conditions.
a. Attribution.
1. If You Share the Licensed Material (including in modified
form), You must:
a. retain the following if it is supplied by the Licensor
with the Licensed Material:
i. identification of the creator(s) of the Licensed
Material and any others designated to receive
attribution, in any reasonable manner requested by
the Licensor (including by pseudonym if
designated);
ii. a copyright notice;
iii. a notice that refers to this Public License;
iv. a notice that refers to the disclaimer of
warranties;
v. a URI or hyperlink to the Licensed Material to the
extent reasonably practicable;
b. indicate if You modified the Licensed Material and
retain an indication of any previous modifications; and
c. indicate the Licensed Material is licensed under this
Public License, and include the text of, or the URI or
hyperlink to, this Public License.
2. You may satisfy the conditions in Section 3(a)(1) in any
reasonable manner based on the medium, means, and context in
which You Share the Licensed Material. For example, it may be
reasonable to satisfy the conditions by providing a URI or
hyperlink to a resource that includes the required
information.
3. If requested by the Licensor, You must remove any of the
information required by Section 3(a)(1)(A) to the extent
reasonably practicable.
b. ShareAlike.
In addition to the conditions in Section 3(a), if You Share
Adapted Material You produce, the following conditions also apply.
1. The Adapter's License You apply must be a Creative Commons
license with the same License Elements, this version or
later, or a BY-SA Compatible License.
2. You must include the text of, or the URI or hyperlink to, the
Adapter's License You apply. You may satisfy this condition
in any reasonable manner based on the medium, means, and
context in which You Share Adapted Material.
3. You may not offer or impose any additional or different terms
or conditions on, or apply any Effective Technological
Measures to, Adapted Material that restrict exercise of the
rights granted under the Adapter's License You apply.
Section 4 -- Sui Generis Database Rights.
Where the Licensed Rights include Sui Generis Database Rights that
apply to Your use of the Licensed Material:
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
to extract, reuse, reproduce, and Share all or a substantial
portion of the contents of the database;
b. if You include all or a substantial portion of the database
contents in a database in which You have Sui Generis Database
Rights, then the database in which You have Sui Generis Database
Rights (but not its individual contents) is Adapted Material,
including for purposes of Section 3(b); and
c. You must comply with the conditions in Section 3(a) if You Share
all or a substantial portion of the contents of the database.
For the avoidance of doubt, this Section 4 supplements and does not
replace Your obligations under this Public License where the Licensed
Rights include other Copyright and Similar Rights.
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
c. The disclaimer of warranties and limitation of liability provided
above shall be interpreted in a manner that, to the extent
possible, most closely approximates an absolute disclaimer and
waiver of all liability.
Section 6 -- Term and Termination.
a. This Public License applies for the term of the Copyright and
Similar Rights licensed here. However, if You fail to comply with
this Public License, then Your rights under this Public License
terminate automatically.
b. Where Your right to use the Licensed Material has terminated under
Section 6(a), it reinstates:
1. automatically as of the date the violation is cured, provided
it is cured within 30 days of Your discovery of the
violation; or
2. upon express reinstatement by the Licensor.
For the avoidance of doubt, this Section 6(b) does not affect any
right the Licensor may have to seek remedies for Your violations
of this Public License.
c. For the avoidance of doubt, the Licensor may also offer the
Licensed Material under separate terms or conditions or stop
distributing the Licensed Material at any time; however, doing so
will not terminate this Public License.
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
License.
Section 7 -- Other Terms and Conditions.
a. The Licensor shall not be bound by any additional or different
terms or conditions communicated by You unless expressly agreed.
b. Any arrangements, understandings, or agreements regarding the
Licensed Material not stated herein are separate from and
independent of the terms and conditions of this Public License.
Section 8 -- Interpretation.
a. For the avoidance of doubt, this Public License does not, and
shall not be interpreted to, reduce, limit, restrict, or impose
conditions on any use of the Licensed Material that could lawfully
be made without permission under this Public License.
b. To the extent possible, if any provision of this Public License is
deemed unenforceable, it shall be automatically reformed to the
minimum extent necessary to make it enforceable. If the provision
cannot be reformed, it shall be severed from this Public License
without affecting the enforceability of the remaining terms and
conditions.
c. No term or condition of this Public License will be waived and no
failure to comply consented to unless expressly agreed to by the
Licensor.
d. Nothing in this Public License constitutes or may be interpreted
as a limitation upon, or waiver of, any privileges and immunities
that apply to the Licensor or You, including from the legal
processes of any jurisdiction or authority.
=======================================================================
Creative Commons is not a party to its public licenses.
Notwithstanding, Creative Commons may elect to apply one of its public
licenses to material it publishes and in those instances will be
considered the “Licensor.” The text of the Creative Commons public
licenses is dedicated to the public domain under the CC0 Public Domain
Dedication. Except for the limited purpose of indicating that material
is shared under a Creative Commons public license or as otherwise
permitted by the Creative Commons policies published at
creativecommons.org/policies, Creative Commons does not authorize the
use of the trademark "Creative Commons" or any other trademark or logo
of Creative Commons without its prior written consent including,
without limitation, in connection with any unauthorized modifications
to any of its public licenses or any other arrangements,
understandings, or agreements concerning use of licensed material. For
the avoidance of doubt, this paragraph does not form part of the public
licenses.
Creative Commons may be contacted at creativecommons.org.

@ -1,20 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

@ -1,17 +0,0 @@
#!/usr/bin/env bash
set -e
VERSION=$(cat ../Cargo.toml | grep -E "^version" | grep -Eo '[0-9.]+')
DEST_DIR=../../cozodb_site/$VERSION
make html
touch build/html/.nojekyll
rm -fr $DEST_DIR
mkdir -p $DEST_DIR
mv build/html $DEST_DIR/manual
make latexpdf
mv build/latex/thecozodatabasemanual.pdf $DEST_DIR/manual.pdf
jupyter nbconvert --to html tutorial/tutorial.ipynb
mv tutorial/tutorial.html $DEST_DIR/

@ -1,35 +0,0 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

@ -1,153 +0,0 @@
==============
Aggregations
==============
Aggregations in Cozo can be thought of as a function that acts on a stream of values
and produces a single value (the aggregate).
There are two kinds of aggregations in Cozo, *ordinary aggregations* and *semi-lattice aggregations*.
They are implemented differently in Cozo, with semi-lattice aggregations generally faster and more powerful
(only the latter can be used recursively).
The power of semi-lattice aggregations derive from the additional properties they satisfy: a `semilattice <https://en.wikipedia.org/wiki/Semilattice>`_:
idempotency
the aggregate of a single value ``a`` is ``a`` itself,
commutativity
the aggregate of ``a`` then ``b`` is equal to the aggregate of ``b`` then ``a``,
associativity
it is immaterial where we put the parentheses in an aggregate application.
------------------------------------
Semi-lattice aggregations
------------------------------------
.. module:: Aggr.SemiLattice
:noindex:
.. function:: min(x)
Aggregate the minimum value of all ``x``.
.. function:: max(x)
Aggregate the maximum value of all ``x``.
.. function:: and(var)
Aggregate the logical conjunction of the variable passed in.
.. function:: or(var)
Aggregate the logical disjunction of the variable passed in.
.. function:: union(var)
Aggregate the unions of ``var``, which must be a list.
.. function:: intersection(var)
Aggregate the intersections of ``var``, which must be a list.
.. function:: choice(var)
Non-deterministically chooses one of the values of ``var`` as the aggregate.
It simply chooses the first value it meets (the order that it meets values is non-deterministic).
.. function:: choice_last(var)
Non-deterministically chooses one of the values of ``var`` as the aggregate.
It simply chooses the last value it meets.
.. function:: min_cost([data, cost])
The argument should be a list of two elements and this aggregation chooses the list of the minimum ``cost``.
.. function:: shortest(var)
``var`` must be a list. Returns the shortest list among all values. Ties will be broken non-deterministically.
.. function:: coalesce(var)
Returns the first non-null value it meets. The order is non-deterministic.
.. function:: bit_and(var)
``var`` must be bytes. Returns the bitwise 'and' of the values.
.. function:: bit_or(var)
``var`` must be bytes. Returns the bitwise 'or' of the values.
---------------------
Ordinary aggregations
---------------------
.. module:: Aggr.Ord
:noindex:
.. function:: count(var)
Count how many values are generated for ``var`` (using bag instead of set semantics).
.. function:: count_unique(var)
Count how many unique values there are for ``var``.
.. function:: collect(var)
Collect all values for ``var`` into a list.
.. function:: unique(var)
Collect ``var`` into a list, keeping each unique value only once.
.. function:: group_count(var)
Count the occurrence of unique values of ``var``, putting the result into a list of lists,
e.g. when applied to ``'a'``, ``'b'``, ``'c'``, ``'c'``, ``'a'``, ``'c'``, the results is ``[['a', 2], ['b', 1], ['c', 3]]``.
.. function:: bit_xor(var)
``var`` must be bytes. Returns the bitwise 'xor' of the values.
.. function:: latest_by([data, time])
The argument should be a list of two elements and this aggregation returns the ``data`` of the maximum ``cost``.
This is very similar to ``min_cost``, the differences being that maximum instead of minimum is used,
only the data itself is returned, and the aggregation is deliberately not a semi-lattice aggregation. Intended to be used in timestamped audit trails.
.. function:: choice_rand(var)
Non-deterministically chooses one of the values of ``var`` as the aggregate.
Each value the aggregation encounters has the same probability of being chosen.
.. NOTE::
This version of ``choice`` is not a semi-lattice aggregation
since it is impossible to satisfy the uniform sampling requirement while maintaining no state,
which is an implementation restriction unlikely to be lifted.
^^^^^^^^^^^^^^^^^^^^^^^^^
Statistical aggregations
^^^^^^^^^^^^^^^^^^^^^^^^^
.. function:: mean(x)
The mean value of ``x``.
.. function:: sum(x)
The sum of ``x``.
.. function:: product(x)
The product of ``x``.
.. function:: variance(x)
The sample variance of ``x``.
.. function:: std_dev(x)
The sample standard deviation of ``x``.

@ -1,238 +0,0 @@
==============================
Utilities and algorithms
==============================
Fixed rules in CozoScript apply utilities or algorithms.
.. module:: Algo
:noindex:
-------------------
Utilities
-------------------
.. function:: Constant(data: [...])
Returns a relation containing the data passed in. The constant rule ``?[] <- ...`` is
syntax sugar for ``?[] <~ Constant(data: ...)``.
:param data: A list of lists, representing the rows of the returned relation.
.. function:: ReorderSort(rel[...], out: [...], sort_by: [...], descending: false, break_ties: false, skip: 0, take: 0)
Sort and then extract new columns of the passed in relation ``rel``.
:param required out: A list of expressions which will be used to produce the output relation. Any bindings in the expressions will be bound to the named positions in ``rel``.
:param sort_by: A list of expressions which will be used to produce the sort keys. Any bindings in the expressions will be bound to the named positions in ``rel``.
:param descending: Whether the sorting process should be done in descending order. Defaults to ``false``.
:param break_ties: Whether ties should be broken, e.g. whether the first two rows with *identical sort keys* should be given ordering numbers ``1`` and ``2`` instead of ``1`` and ``1``. Defaults to false.
:param skip: How many rows to skip before producing rows. Defaults to zero.
:param take: How many rows at most to produce. Zero means no limit. Defaults to zero.
:return: The returned relation, in addition to the rows specified in the parameter ``out``, will have the ordering prepended. The ordering starts at ``1``.
.. TIP::
This algorithm serves a similar purpose to the global ``:order``, ``:limit`` and ``:offset`` options, but can be applied to intermediate results. Prefer the global options if it is applied to the final output.
.. function:: CsvReader(url: ..., types: [...], delimiter: ',', prepend_index: false, has_headers: true)
Read a CSV file from disk or an HTTP GET request and convert the result to a relation.
:param required url: URL for the CSV file. For local file, use ``file://<PATH_TO_FILE>``.
:param required types: A list of strings interpreted as types for the columns of the output relation. If any type is specified as nullable and conversion to the specified type fails, ``null`` will be the result. This is more lenient than other functions since CSVs tend to contain lots of bad values.
:param delimiter: The delimiter to use when parsing the CSV file.
:param prepend_index: If ``true``, row index will be prepended to the columns.
:param has_headers: Whether the CSV file has headers. The reader will not interpret the header in any way but will instead simply ignore it.
.. function:: JsonReader(url: ..., fields: [...], json_lines: true, null_if_absent: false, prepend_index: false)
Read a JSON file for disk or an HTTP GET request and convert the result to a relation.
:param required url: URL for the JSON file. For local file, use ``file://<PATH_TO_FILE>``.
:param required fields: A list of field names, for extracting fields from JSON arrays into the relation.
:param json_lines: If ``true``, parse the file as lines of JSON objects, each line containing a single object; if false, parse the file as a JSON array containing many objects.
:param null_if_absent: If a ``true`` and a requested field is absent, will output ``null`` in its place. If ``false`` and the requested field is absent, will throw an error.
:param prepend_index: If ``true``, row index will be prepended to the columns.
------------------------------------
Connectedness algorithms
------------------------------------
.. function:: ConnectedComponents(edges[from, to])
Computes the `connected components <https://en.wikipedia.org/wiki/Connected_component_(graph_theory)>`_ of a graph with the provided edges.
:return: Pairs containing the node index, and its component index.
.. function:: StronglyConnectedComponent(edges[from, to])
Computes the `strongly connected components <https://en.wikipedia.org/wiki/Strongly_connected_component>`_ of a graph with the provided edges.
:return: Pairs containing the node index, and its component index.
.. function:: SCC(...)
See :func:`Algo.StronglyConnectedComponent`.
.. function:: MinimumSpanningForestKruskal(edges[from, to, weight?])
Runs `Kruskal's algorithm <https://en.wikipedia.org/wiki/Kruskal%27s_algorithm>`_ on the provided edges to compute a `minimum spanning forest <https://en.wikipedia.org/wiki/Minimum_spanning_tree>`_. Negative weights are fine.
:return: Triples containing the from-node, the to-node, and the cost from the tree root to the to-node. Which nodes are chosen to be the roots are non-deterministic. Multiple roots imply the graph is disconnected.
.. function:: MinimumSpanningTreePrim(edges[from, to, weight?], starting?[idx])
Runs `Prim's algorithm <https://en.wikipedia.org/wiki/Prim%27s_algorithm>`_ on the provided edges to compute a `minimum spanning tree <https://en.wikipedia.org/wiki/Minimum_spanning_tree>`_. ``starting`` should be a relation producing exactly one node index as the starting node. Only the connected component of the starting node is returned. If ``starting`` is omitted, which component is returned is arbitrary.
:return: Triples containing the from-node, the to-node, and the cost from the tree root to the to-node.
.. function:: TopSort(edges[from, to])
Performs `topological sorting <https://en.wikipedia.org/wiki/Topological_sorting>`_ on the graph with the provided edges. The graph is required to be connected in the first place.
:return: Pairs containing the sort order and the node index.
------------------------------------
Pathfinding algorithms
------------------------------------
.. function:: ShortestPathDijkstra(edges[from, to, weight?], starting[idx], goals[idx], undirected: false, keep_ties: false)
Runs `Dijkstra's algorithm <https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm>`_ to determine the shortest paths between the ``starting`` nodes and the ``goals``. Weights, if given, must be non-negative.
:param undirected: Whether the graph should be interpreted as undirected. Defaults to ``false``.
:param keep_ties: Whether to return all paths with the same lowest cost. Defaults to ``false``, in which any one path of the lowest cost could be returned.
:return: 4-tuples containing the starting node, the goal, the lowest cost, and a path with the lowest cost.
.. function:: KShortestPathYen(edges[from, to, weight?], starting[idx], goals[idx], k: expr, undirected: false)
Runs `Yen's algorithm <https://en.wikipedia.org/wiki/Yen%27s_algorithm>`_ (backed by Dijkstra's algorithm) to find the k-shortest paths between nodes in ``starting`` and nodes in ``goals``.
:param required k: How many routes to return for each start-goal pair.
:param undirected: Whether the graph should be interpreted as undirected. Defaults to ``false``.
:return: 4-tuples containing the starting node, the goal, the cost, and a path with the cost.
.. function:: BreadthFirstSearch(edges[from, to], nodes[idx, ...], starting?[idx], condition: expr, limit: 1)
Runs breadth first search on the directed graph with the given edges and nodes, starting at the nodes in ``starting``. If ``starting`` is not given, it will default to all of ``nodes``, which may be quite a lot to calculate.
:param required condition: The stopping condition, will be evaluated with the bindings given to ``nodes``. Should evaluate to a boolean, with ``true`` indicating an acceptable answer was found.
:param limit: How many answers to produce for each starting nodes. Defaults to 1.
:return: Triples containing the starting node, the answer node, and the found path connecting them.
.. function:: BFS(...)
See :func:`Algo.BreadthFirstSearch`.
.. function:: DepthFirstSearch(edges[from, to], nodes[idx, ...], starting?[idx], condition: expr, limit: 1)
Runs depth first search on the directed graph with the given edges and nodes, starting at the nodes in ``starting``. If ``starting`` is not given, it will default to all of ``nodes``, which may be quite a lot to calculate.
:param required condition: The stopping condition, will be evaluated with the bindings given to ``nodes``. Should evaluate to a boolean, with ``true`` indicating an acceptable answer was found.
:param limit: How many answers to produce for each starting nodes. Defaults to 1.
:return: Triples containing the starting node, the answer node, and the found path connecting them.
.. function:: DFS(...)
See :func:`Algo.DepthFirstSearch`.
.. function:: ShortestPathAStar(edges[from, to, weight], nodes[idx, ...], starting[idx], goals[idx], heuristic: expr)
Computes the shortest path from every node in ``starting`` to every node in ``goals`` by the `A\* algorithm <https://en.wikipedia.org/wiki/A*_search_algorithm>`_.
``edges`` are interpreted as directed, weighted edges with non-negative weights.
:param required heuristic: The search heuristic expression. It will be evaluated with the bindings from ``goals`` and ``nodes``. It should return a number which is a lower bound of the true shortest distance from a node to the goal node. If the estimate is not a valid lower-bound, i.e. it over-estimates, the results returned may not be correct.
:return: 4-tuples containing the starting node index, the goal node index, the lowest cost, and a path with the lowest cost.
.. TIP::
The performance of A\* star algorithm heavily depends on how good your heuristic function is. Passing in ``0`` as the estimate is always valid, but then you really should be using Dijkstra's algorithm.
Good heuristics usually come about from a metric in the ambient space in which your data live, e.g. spherical distance on the surface of a sphere, or Manhattan distance on a grid. :func:`Func.Math.haversine_deg_input` could be helpful for the spherical case. Note that you must use the correct units for the distance.
Providing a heuristic that is not guaranteed to be a lower-bound *might* be acceptable if you are fine with inaccuracies. The errors in the answers are bound by the sum of the margins of your over-estimates.
-------------------------------------
Community detection algorithms
-------------------------------------
.. function:: ClusteringCoefficients(edges[from, to, weight?])
Computes the `clustering coefficients <https://en.wikipedia.org/wiki/Clustering_coefficient>`_ of the graph with the provided edges.
:return: 4-tuples containing the node index, the clustering coefficient, the number of triangles attached to the node, and the total degree of the node.
.. function:: CommunityDetectionLouvain(edges[from, to, weight?], undirected: false, max_iter: 10, delta: 0.0001, keep_depth?: depth)
Runs the `Louvain algorithm <https://en.wikipedia.org/wiki/Louvain_method>`_ on the graph with the provided edges, optionally non-negatively weighted.
:param undirected: Whether the graph should be interpreted as undirected. Defaults to ``false``.
:param max_iter: The maximum number of iterations to run within each epoch of the algorithm. Defaults to 10.
:param delta: How much the `modularity <https://en.wikipedia.org/wiki/Modularity_(networks)>`_ has to change before a step in the algorithm is considered to be an improvement.
:param keep_depth: How many levels in the hierarchy of communities to keep in the final result. If omitted, all levels are kept.
:return: Pairs containing the label for a community, and a node index belonging to the community. Each label is a list of integers with maximum length constrained by the parameter ``keep_depth``. This list represents the hierarchy of sub-communities containing the list.
.. function:: LabelPropagation(edges[from, to, weight?], undirected: false, max_iter: 10)
Runs the `label propagation algorithm <https://en.wikipedia.org/wiki/Label_propagation_algorithm>`_ on the graph with the provided edges, optionally weighted.
:param undirected: Whether the graph should be interpreted as undirected. Defaults to ``false``.
:param max_iter: The maximum number of iterations to run. Defaults to 10.
:return: Pairs containing the integer label for a community, and a node index belonging to the community.
-------------------------------------
Centrality measures
-------------------------------------
.. function:: DegreeCentrality(edges[from, to])
Computes the degree centrality of the nodes in the graph with the given edges. The computation is trivial, so this should be your first thing to try when exploring new data.
:return: 4-tuples containing the node index, the total degree (how many edges involve this node), the out-degree (how many edges point away from this node), and the in-degree (how many edges point to this node).
.. function:: PageRank(edges[from, to, weight?], undirected: false, theta: 0.8, epsilon: 0.05, iterations: 20)
Computes the `PageRank <https://en.wikipedia.org/wiki/PageRank>`_ from the given graph with the provided edges, optionally weighted.
:param undirected: Whether the graph should be interpreted as undirected. Defaults to ``false``.
:param theta: A number between 0 and 1 indicating how much weight in the PageRank matrix is due to the explicit edges. A number of 1 indicates no random restarts. Defaults to 0.8.
:param epsilon: Minimum PageRank change in any node for an iteration to be considered an improvement. Defaults to 0.05.
:param iterations: How many iterations to run. Fewer iterations are run if convergence is reached. Defaults to 20.
:return: Pairs containing the node label and its PageRank. For a graph with uniform edges, the PageRank of every node is 1. The `L2-norm <https://en.wikipedia.org/wiki/Norm_(mathematics)>`_ of the results is forced to be invariant, i.e. in the results those nodes with a PageRank greater than 1 is "more central" than the average node in a certain sense.
.. function:: ClosenessCentrality(edges[from, to, weight?], undirected: false)
Computes the `closeness centrality <https://en.wikipedia.org/wiki/Closeness_centrality>`_ of the graph. The input relation represent edges connecting node indices which are optionally weighted.
:param undirected: Whether the edges should be interpreted as undirected. Defaults to ``false``.
:return: Node index together with its centrality.
.. function:: BetweennessCentrality(edges[from, to, weight?], undirected: false)
Computes the `betweenness centrality <https://en.wikipedia.org/wiki/Betweenness_centrality>`_ of the graph. The input relation represent edges connecting node indices which are optionally weighted.
:param undirected: Whether the edges should be interpreted as undirected. Defaults to ``false``.
:return: Node index together with its centrality.
.. WARNING::
``BetweennessCentrality`` is very expensive for medium to large graphs. If possible, collapse large graphs into supergraphs by running a community detection algorithm first.
------------------
Miscellaneous
------------------
.. function:: RandomWalk(edges[from, to, ...], nodes[idx, ...], starting[idx], steps: 10, weight?: expr, iterations: 1)
Performs random walk on the graph with the provided edges and nodes, starting at the nodes in ``starting``.
:param required steps: How many steps to walk for each node in ``starting``. Produced paths may be shorter if dead ends are reached.
:param weight: An expression evaluated against bindings of ``nodes`` and bindings of ``edges``, at a time when the walk is at a node and choosing between multiple edges to follow. It should evaluate to a non-negative number indicating the weight of the given choice of edge to follow. If omitted, which edge to follow is chosen uniformly.
:param iterations: How many times walking is repeated for each starting node.
:return: Triples containing a numerical index for the walk, the starting node, and the path followed.

@ -1,32 +0,0 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = 'The Cozo Database Manual'
copyright = '2022, Ziyang Hu'
author = 'Ziyang Hu'
release = '0.1.0'
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = []
templates_path = ['_templates']
exclude_patterns = []
add_function_parentheses = False
add_module_names = False
latex_show_urls = 'footnote'
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = 'furo'
html_static_path = ['_static']
html_title = "Cozo Manual"
html_baseurl = '/manual/'

@ -1,104 +0,0 @@
==============
Types
==============
--------------
Runtime types
--------------
Values in Cozo have the following *runtime types*:
* ``Null``
* ``Bool``
* ``Number``
* ``String``
* ``Bytes``
* ``Uuid``
* ``List``
``Number`` can be ``Float`` (double precision) or ``Int`` (signed, 64 bits). Cozo will auto-promote ``Int`` to ``Float`` when necessary.
``List`` can contain any number of mixed-type values, including other lists.
Cozo sorts values according to the above order, e.g. ``null`` is smaller than ``true``, which is in turn smaller than the list ``[]``.
Within each type values are *compared* according to:
* ``false < true``;
* ``-1 == -1.0 < 0 == 0.0 < 0.5 == 0.5 < 1 == 1.0``;
* Lists are ordered lexicographically by their elements;
* Bytes are compared lexicographically;
* Strings are compared lexicographically by their UTF-8 byte representations;
* UUIDs are sorted in a way that UUIDv1 with similar timestamps are near each other.
This is to improve data locality and should be considered an implementation detail.
Depending on the order of UUID in your application is not recommended.
.. WARNING::
``1 == 1.0`` evaluates to ``true``, but ``1`` and ``1.0`` are distinct values,
meaning that a relation can contain both as keys according to set semantics.
This is especially confusing when using JavaScript, which converts all numbers to float,
and python, which does not show a difference between the two when printing.
Using floating point numbers in keys is not recommended if the rows are accessed by these keys
(instead of accessed by iteration).
----------------
Literals
----------------
The standard notations ``null`` for the type ``Null``, ``false`` and ``true`` for the type ``Bool`` are used.
Besides the usual decimal notation for signed integers,
you can prefix a number with ``0x`` or ``-0x`` for hexadecimal representation,
with ``0o`` or ``-0o`` for octal,
or with ``0b`` or ``-0b`` for binary.
Floating point numbers include the decimal dot (may be trailing),
and may be in scientific notation.
All numbers may include underscores ``_`` in their representation for clarity.
For example, ``299_792_458`` is the speed of light in meters per second.
Strings can be typed in the same way as they do in JSON using double quotes ``""``,
with the same escape rules.
You can also use single quotes ``''`` in which case the roles of double quotes and single quotes are switched.
There is also a "raw string" notation::
___"I'm a raw string"___
A raw string starts with an arbitrary number of underscores, and then a double quote.
It terminates when followed by a double quote and the same number of underscores.
Everything in between is interpreted exactly as typed, including any newlines.
By varying the number of underscores, you can represent any string without quoting.
There is no literal representation for ``Bytes`` or ``Uuid``.
Use the appropriate functions to create them.
If you are inserting data into a stored relation with a column specified to contain bytes or UUIDs,
auto-coercion will kick in and use ``decode_base64`` and ``to_uuid`` for conversion.
Lists are items enclosed between square brackets ``[]``, separated by commas.
A trailing comma is allowed after the last item.
------------------------------------------------
Column types
------------------------------------------------
The following *atomic types* can be specified for columns in stored relations:
* ``Int``
* ``Float``
* ``Bool``
* ``String``
* ``Bytes``
* ``Uuid``
There is no ``Null`` type. Instead, if you put a question mark after a type, it is treated as *nullable*,
meaning that it either takes value in the type or is null.
Two composite types are available. A *homogeneous list* is specified by square brackets,
with the inner type in between, like this: ``[Int]``.
You may optionally specify how many elements are expected, like this: ``[Int; 10]``.
A *heterogeneous list*, or a *tuple*, is specified by round brackets, with the element types listed by position,
like this: ``(Int, Float, String)``. Tuples always have fixed lengths.
A special type ``Any`` can be specified, allowing all values except null.
If you want to allow null as well, use ``Any?``.
Composite types may contain other composite types or ``Any`` types as their inner types.

@ -1,180 +0,0 @@
====================================
Query execution
====================================
Databases often consider how queries are executed an implementation detail
hidden behind an abstraction barrier that users need not care about,
so that databases can utilize query optimizers to choose the best query execution plan
regardless of how the query was originally written.
This abstraction barrier is leaky, however,
since bad query execution plans invariably occur,
and users need to "reach behind the curtain" to fix performance problems,
which is a difficult and tiring task.
The problem becomes more severe the more joins a query contains,
and graph queries tend to contain a large number of joins.
So in Cozo we take the pragmatic approach and make query execution deterministic
and easy to tell from how the query was written.
The flip side is that we demand the user to
know what is the best way to store their data,
which is in general less demanding than coercing the query optimizer.
Then, armed with knowledge of this chapter, writing efficient queries is easy.
--------------------------------------
Disjunctive normal form
--------------------------------------
Evaluation starts by canonicalizing inline rules into
`disjunction normal form <https://en.wikipedia.org/wiki/Disjunctive_normal_form>`_,
i.e., a disjunction of conjunctions, with any negation pushed to the innermost level.
Each clause of the outmost disjunction is then treated as a separate rule.
The consequence is that the safety rule may be violated
even though textually every variable in the head occurs in the body.
As an example::
rule[a, b] := rule1[a] or rule2[b]
is a violation of the safety rule since it is rewritten into two rules, each of which is missing a different binding.
--------------------------------------
Stratification
--------------------------------------
The next step in the processing is *stratification*.
It begins by making a graph of the named rules,
with the rules themselves as nodes,
and a link is added between two nodes when one of the rules applies the other.
This application is through atoms for inline rules, and input relations for fixed rules.
Next, some of the links are labelled *stratifying*:
* when an inline rule applies another rule through negation,
* when an inline rule applies another inline rule that contains aggregations,
* when an inline rule applies itself and it has non-semi-lattice,
* when an inline rule applies another rule which is a fixed rule,
* when a fixed rule has another rule as an input relation.
The strongly connected components of the graph of rules are then determined and tested,
and if it found that some strongly connected component contains a stratifying link,
the graph is deemed *unstratifiable*, and the execution aborts.
Otherwise, Cozo will topologically sort the strongly connected components to
determine the strata of the rules:
rules within the same stratum are logically executed together,
and no two rules within the same stratum can have a stratifying link between them.
In this process,
Cozo will merge the strongly connected components into as few supernodes as possible
while still maintaining the restriction on stratifying links.
The resulting strata are then passed on to be processed in the next step.
You can see the stratum number assigned to rules by using the ``::explain`` system op.
--------------------------------------
Magic set rewrites
--------------------------------------
Within each stratum, the input rules are rewritten using the technique of *magic sets*.
This rewriting ensures that the query execution does not
waste time calculating results that are later simply discarded.
As an example, consider::
reachable[a, b] := link[a, n]
reachable[a, b] := reachable[a, c], link[c, b]
?[r] := reachable['A', r]
Without magic set rewrites, the whole ``reachable`` relation is generated first,
then most of them are thrown away, keeping only those starting from ``'A'``.
Magic set rewriting avoids this problem.
You can see the result of the rewriting using ``::explain``.
The rewritten query is guaranteed to yield the same relation for ``?``,
and will in general yield fewer intermediate rows.
The rewrite currently only applies to inline rules without aggregations.
--------------------------------------
Semi-naïve evaluation
--------------------------------------
Now each stratum contains either a single fixed rule or a set of inline rules.
The single fixed rules are executed by running their specific implementations.
For the inline rules, each of them is assigned an output relation.
Assuming we know how to evaluate each rule given all the relations it depends on,
the semi-naïve algorithm can now be applied to the rules to yield all output rows.
The semi-naïve algorithm is a bottom-up evaluation strategy, meaning that it tries to deduce
all facts from a set of given facts.
.. NOTE::
By contrast, top-down strategies start with stated goals and try to find proof for the goals.
Bottom-up strategies have many advantages over top-down ones when the whole output of each rule
is needed, but may waste time generating unused facts if only some of the output is kept.
Magic set rewrites are introduced to eliminate precisely this weakness.
---------------------------------------
Ordering of atoms
---------------------------------------
The compiler reorders the atoms in the body of the inline rules, and then
the atoms are evaluated.
After conversion to disjunctive normal forms,
each atom can only be one of the following:
* an explicit unification,
* applying a rule or a stored relation,
* an expression, which should evaluate to a boolean,
* a negation of an application.
The first two cases may introduce fresh bindings, whereas the last two cannot.
The reordering make all atoms that introduce new bindings stay where they are,
whereas all atoms that do not introduce new bindings are moved to the earliest possible place
where all their bindings are bound.
All atoms that introduce bindings correspond to
joining with a pre-existing relation followed by projections
in relational algebra, and all atoms that do not correspond to filters.
By applying filters as early as possible,
we minimize the number of rows before joining them with the next relation.
When writing the body of rules, we should aim to minimize the total number of rows generated.
A strategy that works almost in all cases is to put the most restrictive atoms which generate new bindings first.
---------------------------------------
Evaluating atoms
---------------------------------------
We now explain how a single atom which generates new bindings is processed.
For unifications, the right-hand side, an expression with all variables bound,
is simply evaluated, and the result is joined
to the current relation (as in a ``map-cat`` operation in functional languages).
Rules or stored relations are conceptually trees, with composite keys sorted lexicographically.
The complexity of their applications in atoms
is therefore determined by whether the bound variables and constants in the application bindings form a *key prefix*.
For example, the following application::
a_rule['A', 'B', c]
with ``c`` unbound, is very efficient, since this corresponds to a prefix scan in the tree with the key prefix ``['A', 'B']``,
whereas the following application::
a_rule[a, 'B', 'C']
where ``a`` is unbound, is very expensive, since we must do a full scan.
On the other hand, if ``a`` is bound, then this is only a logarithmic-time existence check.
For stored relations, you need to check its schema for the order of keys to deduce the complexity.
The system op ``::explain`` may also give you some information.
Rows are generated in a streaming fashion,
meaning that relation joins proceed as soon as one row is available,
and do not wait until the whole relation is generated.
---------------------------------------
Early stopping
---------------------------------------
For the entry rule ``?``, if ``:limit`` is specified as a query option,
a counter is used to monitor how many valid rows are already generated.
If enough rows are generated, the query stops.
This only works when the entry rule is inline
and you do not specify ``:order``.

@ -1,659 +0,0 @@
=========
Functions
=========
Functions can be used to build expressions.
In the following, all functions except those having names starting with ``rand_`` are deterministic.
------------------------
Equality and Comparisons
------------------------
.. module:: Func.EqCmp
:noindex:
.. function:: eq(x, y)
Equality comparison. The operator form is ``x == y``. The two arguments of the equality can be of different types, in which case the result is ``false``.
.. function:: neq(x, y)
Inequality comparison. The operator form is ``x != y``. The two arguments of the equality can be of different types, in which case the result is ``true``.
.. function:: gt(x, y)
Equivalent to ``x > y``
.. function:: ge(x, y)
Equivalent to ``x >= y``
.. function:: lt(x, y)
Equivalent to ``x < y``
.. function:: le(x, y)
Equivalent to ``x <= y``
.. NOTE::
The four comparison operators can only compare values of the same runtime type. Integers and floats are of the same type ``Number``.
.. function:: max(x, ...)
Returns the maximum of the arguments. Can only be applied to numbers.
.. function:: min(x, ...)
Returns the minimum of the arguments. Can only be applied to numbers.
------------------------
Boolean functions
------------------------
.. module:: Func.Bool
:noindex:
.. function:: and(...)
Variadic conjunction. For binary arguments it is equivalent to ``x && y``.
.. function:: or(...)
Variadic disjunction. For binary arguments it is equivalent to ``x || y``.
.. function:: negate(x)
Negation. Equivalent to ``!x``.
.. function:: assert(x, ...)
Returns ``true`` if ``x`` is ``true``, otherwise will raise an error containing all its arguments as the error message.
------------------------
Mathematics
------------------------
.. module:: Func.Math
:noindex:
.. function:: add(...)
Variadic addition. The binary version is the same as ``x + y``.
.. function:: sub(x, y)
Equivalent to ``x - y``.
.. function:: mul(...)
Variadic multiplication. The binary version is the same as ``x * y``.
.. function:: div(x, y)
Equivalent to ``x / y``.
.. function:: minus(x)
Equivalent to ``-x``.
.. function:: pow(x, y)
Raises ``x`` to the power of ``y``. Equivalent to ``x ^ y``. Always returns floating number.
.. function:: mod(x, y)
Returns the remainder when ``x`` is divided by ``y``. Arguments can be floats. The returned value has the same sign as ``x``. Equivalent to ``x % y``.
.. function:: abs(x)
Returns the absolute value.
.. function:: signum(x)
Returns ``1``, ``0`` or ``-1``, whichever has the same sign as the argument, e.g. ``signum(to_float('NEG_INFINITY')) == -1``, ``signum(0.0) == 0``, but ``signum(-0.0) == -1``. Returns ``NAN`` when applied to ``NAN``.
.. function:: floor(x)
Returns the floor of ``x``.
.. function:: ceil(x)
Returns the ceiling of ``x``.
.. function:: round(x)
Returns the nearest integer to the argument (represented as Float if the argument itself is a Float). Round halfway cases away from zero. E.g. ``round(0.5) == 1.0``, ``round(-0.5) == -1.0``, ``round(1.4) == 1.0``.
.. function:: exp(x)
Returns the exponential of the argument, natural base.
.. function:: exp2(x)
Returns the exponential base 2 of the argument. Always returns a float.
.. function:: ln(x)
Returns the natual logarithm.
.. function:: log2(x)
Returns the logarithm base 2.
.. function:: log10(x)
Returns the logarithm base 10.
.. function:: sin(x)
The sine trigonometric function.
.. function:: cos(x)
The cosine trigonometric function.
.. function:: tan(x)
The tangent trigonometric function.
.. function:: asin(x)
The inverse sine.
.. function:: acos(x)
The inverse cosine.
.. function:: atan(x)
The inverse tangent.
.. function:: atan2(x, y)
The inverse tangent `atan2 <https://en.wikipedia.org/wiki/Atan2>`_ by passing `x` and `y` separately.
.. function:: sinh(x)
The hyperbolic sine.
.. function:: cosh(x)
The hyperbolic cosine.
.. function:: tanh(x)
The hyperbolic tangent.
.. function:: asinh(x)
The inverse hyperbolic sine.
.. function:: acosh(x)
The inverse hyperbolic cosine.
.. function:: atanh(x)
The inverse hyperbolic tangent.
.. function:: deg_to_rad(x)
Converts degrees to radians.
.. function:: rad_to_deg(x)
Converts radians to degrees.
.. function:: haversine(a_lat, a_lon, b_lat, b_lon)
Computes with the `haversine formula <https://en.wikipedia.org/wiki/Haversine_formula>`_
the angle measured in radians between two points ``a`` and ``b`` on a sphere
specified by their latitudes and longitudes. The inputs are in radians.
You probably want the next function when you are dealing with maps,
since most maps measure angles in degrees instead of radians.
.. function:: haversine_deg_input(a_lat, a_lon, b_lat, b_lon)
Same as the previous function, but the inputs are in degrees instead of radians.
The return value is still in radians.
If you want the approximate distance measured on the surface of the earth instead of the angle between two points,
multiply the result by the radius of the earth,
which is about ``6371`` kilometres, ``3959`` miles, or ``3440`` nautical miles.
.. NOTE::
The haversine formula, when applied to the surface of the earth, which is not a perfect sphere, can result in an error of less than one percent.
------------------------
String functions
------------------------
.. module:: Func.String
:noindex:
.. function:: length(str)
Returns the number of Unicode characters in the string.
Can also be applied to a list or a byte array.
.. WARNING::
``length(str)`` does not return the number of bytes of the string representation.
Also, what is returned depends on the normalization of the string.
So if such details are important, apply ``unicode_normalize`` before ``length``.
.. function:: concat(x, ...)
Concatenates strings. Equivalent to ``x ++ y`` in the binary case.
Can also be applied to lists.
.. function:: str_includes(x, y)
Returns ``true`` if ``x`` contains the substring ``y``, ``false`` otherwise.
.. function:: lowercase(x)
Convert to lowercase. Supports Unicode.
.. function:: uppercase(x)
Converts to uppercase. Supports Unicode.
.. function:: trim(x)
Removes `whitespace <https://en.wikipedia.org/wiki/Whitespace_character>`_ from both ends of the string.
.. function:: trim_start(x)
Removes `whitespace <https://en.wikipedia.org/wiki/Whitespace_character>`_ from the start of the string.
.. function:: trim_end(x)
Removes `whitespace <https://en.wikipedia.org/wiki/Whitespace_character>`_ from the end of the string.
.. function:: starts_with(x, y)
Tests if ``x`` starts with ``y``.
.. TIP::
``starts_with(var, str)`` is preferred over equivalent (e.g. regex) conditions,
since the compiler may more easily compile the clause into a range scan.
.. function:: ends_with(x, y)
tests if ``x`` ends with ``y``.
.. function:: unicode_normalize(str, norm)
Converts ``str`` to the `normalization <https://en.wikipedia.org/wiki/Unicode_equivalence>`_ specified by ``norm``.
The valid values of ``norm`` are ``'nfc'``, ``'nfd'``, ``'nfkc'`` and ``'nfkd'``.
.. function:: chars(str)
Returns Unicode characters of the string as a list of substrings.
.. function:: from_substrings(list)
Combines the strings in ``list`` into a big string. In a sense, it is the inverse function of ``chars``.
.. WARNING::
If you want substring slices, indexing strings, etc., first convert the string to a list with ``chars``,
do the manipulation on the list, and then recombine with ``from_substring``.
--------------------------
List functions
--------------------------
.. module:: Func.List
:noindex:
.. function:: list(x, ...)
Constructs a list from its argument, e.g. ``list(1, 2, 3)``. Equivalent to the literal form ``[1, 2, 3]``.
.. function:: is_in(el, list)
Tests the membership of an element in a list.
.. function:: first(l)
Extracts the first element of the list. Returns ``null`` if given an empty list.
.. function:: last(l)
Extracts the last element of the list. Returns ``null`` if given an empty list.
.. function:: get(l, n)
Returns the element at index ``n`` in the list ``l``. Raises an error if the access is out of bounds. Indices start with 0.
.. function:: maybe_get(l, n)
Returns the element at index ``n`` in the list ``l``. Returns ``null`` if the access is out of bounds. Indices start with 0.
.. function:: length(list)
Returns the length of the list.
Can also be applied to a string or a byte array.
.. function:: slice(l, start, end)
Returns the slice of list between the index ``start`` (inclusive) and ``end`` (exclusive).
Negative numbers may be used, which is interpreted as counting from the end of the list.
E.g. ``slice([1, 2, 3, 4], 1, 3) == [2, 3]``, ``slice([1, 2, 3, 4], 1, -1) == [2, 3]``.
.. function:: concat(x, ...)
Concatenates lists. The binary case is equivalent to `x ++ y`.
Can also be applied to strings.
.. function:: prepend(l, x)
Prepends ``x`` to ``l``.
.. function:: append(l, x)
Appends ``x`` to ``l``.
.. function:: reverse(l)
Reverses the list.
.. function:: sorted(l)
Sorts the list and returns the sorted copy.
.. function:: chunks(l, n)
Splits the list ``l`` into chunks of ``n``, e.g. ``chunks([1, 2, 3, 4, 5], 2) == [[1, 2], [3, 4], [5]]``.
.. function:: chunks_exact(l, n)
Splits the list ``l`` into chunks of ``n``, discarding any trailing elements, e.g. ``chunks([1, 2, 3, 4, 5], 2) == [[1, 2], [3, 4]]``.
.. function:: windows(l, n)
Splits the list ``l`` into overlapping windows of length ``n``. e.g. ``windows([1, 2, 3, 4, 5], 3) == [[1, 2, 3], [2, 3, 4], [3, 4, 5]]``.
.. function:: union(x, y, ...)
Computes the set-theoretic union of all the list arguments.
.. function:: intersection(x, y, ...)
Computes the set-theoretic intersection of all the list arguments.
.. function:: difference(x, y, ...)
Computes the set-theoretic difference of the first argument with respect to the rest.
----------------
Binary functions
----------------
.. module:: Func.Bin
:noindex:
.. function:: length(bytes)
Returns the length of the byte array.
Can also be applied to a list or a string.
.. function:: bit_and(x, y)
Calculate the bitwise and. The two bytes must have the same lengths.
.. function:: bit_or(x, y)
Calculate the bitwise or. The two bytes must have the same lengths.
.. function:: bit_not(x)
Calculate the bitwise not.
.. function:: bit_xor(x, y)
Calculate the bitwise xor. The two bytes must have the same lengths.
.. function:: pack_bits([...])
packs a list of booleans into a byte array; if the list is not divisible by 8, it is padded with ``false``.
.. function:: unpack_bits(x)
Unpacks a byte array into a list of booleans.
.. function:: encode_base64(b)
Encodes the byte array ``b`` into the `Base64 <https://en.wikipedia.org/wiki/Base64>`_-encoded string.
.. NOTE::
``encode_base64`` is automatically applied when output to JSON since JSON cannot represent bytes natively.
.. function:: decode_base64(str)
Tries to decode the ``str`` as a `Base64 <https://en.wikipedia.org/wiki/Base64>`_-encoded byte array.
--------------------------------
Type checking and conversions
--------------------------------
.. module:: Func.Typing
:noindex:
.. function:: to_string(x)
Convert ``x`` to a string: the argument is unchanged if it is already a string, otherwise its JSON string representation will be returned.
.. function:: to_float(x)
Tries to convert ``x`` to a float. Conversion from numbers always succeeds. Conversion from strings has the following special cases in addition to the usual string representation:
* ``INF`` is converted to infinity;
* ``NEG_INF`` is converted to negative infinity;
* ``NAN`` is converted to NAN (but don't compare NAN by equality, use ``is_nan`` instead);
* ``PI`` is converted to pi (3.14159...);
* ``E`` is converted to the base of natural logarithms, or Euler's constant (2.71828...).
.. function:: to_uuid(x)
Tries to convert ``x`` to a UUID. The input must either be a hyphenated UUID string representation or already a UUID for it to succeed.
.. function:: uuid_timestamp(x)
Extracts the timestamp from a UUID version 1, as seconds since the UNIX epoch. If the UUID is not of version 1, ``null`` is returned. If ``x`` is not a UUID, an error is raised.
.. function:: is_null(x)
Checks for ``null``.
.. function:: is_int(x)
Checks for integers.
.. function:: is_float(x)
Checks for floats.
.. function:: is_finite(x)
Returns ``true`` if ``x`` is an integer or a finite float.
.. function:: is_infinite(x)
Returns ``true`` if ``x`` is infinity or negative infinity.
.. function:: is_nan(x)
Returns ``true`` if ``x`` is the special float ``NAN``. Returns ``false`` when the argument is not of number type.
.. function:: is_num(x)
Checks for numbers.
.. function:: is_bytes(x)
Checks for bytes.
.. function:: is_list(x)
Checks for lists.
.. function:: is_string(x)
Checks for strings.
.. function:: is_uuid(x)
Checks for UUIDs.
-----------------
Random functions
-----------------
.. module:: Func.Rand
:noindex:
.. function:: rand_float()
Generates a float in the interval [0, 1], sampled uniformly.
.. function:: rand_bernoulli(p)
Generates a boolean with probability ``p`` of being ``true``.
.. function:: rand_int(lower, upper)
Generates an integer within the given bounds, both bounds are inclusive.
.. function:: rand_choose(list)
Randomly chooses an element from ``list`` and returns it. If the list is empty, it returns ``null``.
.. function:: rand_uuid_v1()
Generate a random UUID, version 1 (random bits plus timestamp).
.. function:: rand_uuid_v4()
Generate a random UUID, version 4 (completely random bits).
------------------
Regex functions
------------------
.. module:: Func.Regex
:noindex:
.. function:: regex_matches(x, reg)
Tests if ``x`` matches the regular expression ``reg``.
.. function:: regex_replace(x, reg, y)
Replaces the first occurrence of the pattern ``reg`` in ``x`` with ``y``.
.. function:: regex_replace_all(x, reg, y)
Replaces all occurrences of the pattern ``reg`` in ``x`` with ``y``.
.. function:: regex_extract(x, reg)
Extracts all occurrences of the pattern ``reg`` in ``x`` and returns them in a list.
.. function:: regex_extract_first(x, reg)
Extracts the first occurrence of the pattern ``reg`` in ``x`` and returns it. If none is found, returns ``null``.
^^^^^^^^^^^^^^^^^
Regex syntax
^^^^^^^^^^^^^^^^^
Matching one character::
. any character except new line
\d digit (\p{Nd})
\D not digit
\pN One-letter name Unicode character class
\p{Greek} Unicode character class (general category or script)
\PN Negated one-letter name Unicode character class
\P{Greek} negated Unicode character class (general category or script)
Character classes::
[xyz] A character class matching either x, y or z (union).
[^xyz] A character class matching any character except x, y and z.
[a-z] A character class matching any character in range a-z.
[[:alpha:]] ASCII character class ([A-Za-z])
[[:^alpha:]] Negated ASCII character class ([^A-Za-z])
[x[^xyz]] Nested/grouping character class (matching any character except y and z)
[a-y&&xyz] Intersection (matching x or y)
[0-9&&[^4]] Subtraction using intersection and negation (matching 0-9 except 4)
[0-9--4] Direct subtraction (matching 0-9 except 4)
[a-g~~b-h] Symmetric difference (matching `a` and `h` only)
[\[\]] Escaping in character classes (matching [ or ])
Composites::
xy concatenation (x followed by y)
x|y alternation (x or y, prefer x)
Repetitions::
x* zero or more of x (greedy)
x+ one or more of x (greedy)
x? zero or one of x (greedy)
x*? zero or more of x (ungreedy/lazy)
x+? one or more of x (ungreedy/lazy)
x?? zero or one of x (ungreedy/lazy)
x{n,m} at least n x and at most m x (greedy)
x{n,} at least n x (greedy)
x{n} exactly n x
x{n,m}? at least n x and at most m x (ungreedy/lazy)
x{n,}? at least n x (ungreedy/lazy)
x{n}? exactly n x
Empty matches::
^ the beginning of the text
$ the end of the text
\A only the beginning of the text
\z only the end of the text
\b a Unicode word boundary (\w on one side and \W, \A, or \z on the other)
\B not a Unicode word boundary
--------------------
Timestamp functions
--------------------
.. function:: now()
Returns the current timestamp as seconds since the UNIX epoch.
.. function:: format_timestamp(ts, tz?)
Interpret ``ts`` as seconds since the epoch and format as a string according to `RFC3339 <https://www.rfc-editor.org/rfc/rfc3339>`_.
If a second string argument is provided, it is interpreted as a `timezone <https://en.wikipedia.org/wiki/Tz_database>`_ and used to format the timestamp.
.. function:: parse_timestamp(str)
Parse ``str`` into seconds since the epoch according to RFC3339.

@ -1,30 +0,0 @@
.. Cozo documentation master file, created by
sphinx-quickstart on Mon Sep 12 12:36:08 2022.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
The Cozo Database Manual
=====================================
.. toctree::
:maxdepth: 2
:caption: Contents
setup
queries
stored
sysops
datatypes
execution
functions
aggregations
algorithms
.. only:: html
------------
Indices
------------
* :ref:`genindex`

@ -1,314 +0,0 @@
==============
Queries
==============
CozoScript, a `Datalog <https://en.wikipedia.org/wiki/Datalog>`_ dialect, is the query language of Cozo.
A CozoScript query consists of one or many named rules.
Each named rule represents a *relation*, i.e. collection of data divided into rows and columns.
The rule named ``?`` is the *entry* to the query,
and the relation it represents is the result of the query.
Each named rule has a rule head, which corresponds to the columns of the relation,
and a rule body, which specifies the content of the relation, or how the content should be computed.
Relations in Cozo (stored or otherwise) abide by the *set semantics*.
Thus even if a rule computes a row multiple times,
the resulting relation only contains a single copy.
There are two types of named rules in CozoScript:
* *Inline rules*, distinguished by using ``:=`` to connect the head and the body.
The logic used to compute the resulting relation is defined *inline*.
* *Fixed rules*, distinguished by using ``<~`` to connect the head and the body.
The logic used to compute the resulting relation is *fixed* according to which algorithm or utility is requested.
The *constant rules* which use ``<-`` to connect the head and the body are syntax sugar. For example::
const_rule[a, b, c] <- [[1, 2, 3], [4, 5, 6]]
is identical to::
const_rule[a, b, c] <~ Constant(data: [[1, 2, 3], [4, 5, 6]])
-----------------
Inline rules
-----------------
An example of an inline rule is::
hc_rule[a, e] := rule_a['constant_string', b], rule_b[b, d, a, e]
The rule body of an inline rule consists of multiple *atoms* joined by commas,
and is interpreted as representing the *conjunction* of these atoms.
^^^^^^^^^^^^^^
Atoms
^^^^^^^^^^^^^^
Atoms come in various flavours.
In the example above::
rule_a['constant_string', b]
is an atom representing a *rule application*: a rule named ``rule_a`` must exist in the same query
and have the correct arity (2 here).
Each row in the named rule is then *unified* with the bindings given as parameters in the square bracket:
here the first column is unified with a constant string, and unification succeeds only when the string
completely matches what is given;
the second column is unified with the *variable* ``b``,
and as the variable is fresh at this point (because this is its first appearance),
the unification will always succeed. For subsequent atoms, the variable becomes *bound*:
it take on the value of whatever it was
unified with in the named relation.
When a bound variable is unified again, for example ``b`` in ``rule_b[b, d, a, e]``,
this unification will only succeed when the unified value is the same as the current value.
Thus, repeated use of the same variable in named rules corresponds to inner joins in relational algebra.
Atoms representing applications of *stored relations* are written as::
*stored_relation[bind1, bind2]
with the asterisk before the name.
Written in this way using square brackets, as many bindings as the arity of the stored relation must be given.
You can also bind columns by name::
*stored_relation{col1: bind1, col2: bind2}
In this form, any number of columns may be omitted.
If the name you want to give the binding is the same as the name of the column, you can write instead
``*stored_relation{col1}``, which is the same as ``*stored_relation{col1: col1}``.
*Expressions* are also atoms, such as::
a > b + 1
``a`` and ``b`` must be bound somewhere else in the rule. Expression atoms must evaluate to booleans,
and act as *filters*. Only rows where the expression atom evaluates to ``true`` are kept.
*Unification atoms* unify explicitly::
a = b + c + d
Whatever appears on the left-hand side must be a single variable and is unified with the result of the right-hand side.
.. NOTE::
This is different from the equality operator ``==``,
where the left-hand side is a completely bound expression.
When the left-hand side is a single *bound* variable,
the equality and the unification operators are equivalent.
*Unification atoms* can also unify with multiple values in a list::
a in [x, y, z]
If the right-hand side does not evaluate to a list, an error is raised.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Head
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
As explained above, Atoms correspond to either relations, projections or filters in relational algebra.
Linked by commas, they therefore represent a joined relation, with columns either constants or variables.
The *head* of the rule, which in the simplest case is just a list of variables,
then defines the columns to keep in the output relation and their order.
Each variable in the head must be bound in the body (the *safety rule*).
Not all variables appearing in the body need to appear in the head.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Multiple definitions and disjunction
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For inline rules only, multiple rule definitions may share the same name,
with the requirement that the arity of the head in each definition must match.
The returned relation is then formed by the *disjunction* of the multiple definitions (a *union* of rows).
You may also use the explicit disjunction operator ``or`` in a single rule definition::
rule1[a, b] := rule2[a] or rule3[a], rule4[a, b]
There is also an ``and`` operator, semantically identical to the comma ``,``
but has higher operator precedence than ``or`` (the comma has the lowest precedence).
^^^^^^^^^^^^^^^^
Negation
^^^^^^^^^^^^^^^^
Atoms in inline rules may be *negated* by putting ``not`` in front of them::
not rule1[a, b]
When negating rule applications and stored relations,
at least one binding must be bound somewhere else in the rule in a non-negated context (another *safety rule*).
The unbound bindings in negated rules remain unbound: negation cannot introduce new bindings to be used in the head.
Negated expressions act as negative filters,
which is semantically equivalent to putting ``!`` in front of the expression.
Explict unification cannot be negated unless the left-hand side is bound,
in which case it is treated as an expression atom and then negated.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Recursion and stratification
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The body of an inline rule may contain rule applications of itself,
and multiple inline rules may apply each other recursively.
The only exception is the entry rule ``?``, which cannot be referred to by other rules including itself.
Recursion cannot occur in negated positions (*safety rule*): ``r[a] := not r[a]`` is not allowed.
.. WARNING::
As CozoScript allows explicit unification,
queries that produce infinite relations may be accepted by the compiler.
One of the simplest examples is::
r[a] := a = 0
r[a] := r[b], a = b + 1
?[a] := r[a]
It is not even in principle possible for Cozo to rule out all infinite queries without wrongly rejecting valid ones.
If you accidentally submitted one, refer to the system ops chapter for how to terminate queries.
Alternatively, you can give a timeout for the query when you submit.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Aggregation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In CozoScript, aggregations are specified for inline rules by applying *aggregation operators* to variables
in the rule head::
?[department, count(employee)] := *personnel{department, employee}
here we have used the familiar ``count`` operator.
Any variables in the head without aggregation operators are treated as *grouping variables*,
and aggregation is applied using them as keys.
If you do not specify any grouping variables, then the resulting relation contains at most one row.
Aggregation operators are applied to the rows computed by the body of the rule using bag semantics.
The reason for this complication is that if aggregations are applied with set semantics, then the following query::
?[count(employee)] := *personnel{employee}
does not do what you expect: it either returns a row with a single value ``1`` if there are any matching rows,
or it returns nothing at all if the stored relation is empty.
If a rule has several definitions, they must have identical aggregations applied in the same positions.
Cozo allows aggregations for self-recursion for a limited subset of aggregation operators,
the so-called *semi-lattice aggregations*::
shortest_distance[destination, min(distance)] :=
route{source: 'A', destination, distance}
shortest_distance[destination, min(distance)] :=
shortest_distance[existing_node, prev_distance], # recursion
route{source: existing_node, distance: route_distance},
distance = prev_distance + route_distance
?[destination, min_distance] :=
shortest_distance[destination, min_distance]
Here self-recursion of ``shortest_distance`` contains the ``min`` aggregation.
----------------------------------
Fixed rules
----------------------------------
The body of a fixed rule starts with the name of the utility or algorithm being applied,
then takes a specified number of named or stored relations as its *input relations*,
followed by *options* that you provide.
For example::
?[] <~ PageRank(*route[], theta: 0.5)
In the above example, the relation ``*route`` is the single input relation expected.
Input relations may be stored relations or relations resulting from rules.
Each utility/algorithm expects specific shapes for their input relations.
You must consult the documentation for each utility/algorithm to understand its API.
In fixed rules, bindings for input relations are usually omitted, but sometimes if they are provided
they are interpreted and used in algorithm-specific ways, for example in the DFS algorithm bindings.
In the example above, ``theta`` is an option of the algorithm,
which is required by the API to be an expression evaluating to a constant.
Each utility/algorithm expects specific types for the options;
some options have default values and may be omitted.
Each fixed rule has a determinate output arity.
Thus, the bindings in the rule head can be omitted,
but if they are provided, you must abide by the arity.
-----------------------
Query options
-----------------------
Each query can have options associated with it::
?[name] := *personnel{name}
:limit 10
:offset 20
In the example, ``:limit`` and ``:offset`` are query options with familiar meanings.
All query options start with a single colon ``:``.
Queries options can appear before or after rules, or even sandwiched between rules.
Several query options deal with transactions for the database.
Those will be discussed in the chapter on stored relations and transactions.
The rest of the query options are explained in the following.
.. module:: QueryOp
:noindex:
.. function:: :limit <N>
Limit output relation to at most ``<N>`` rows.
If possible, execution will stop as soon as this number of output rows is collected.
.. function:: :offset <N>
Skip the first ``<N>`` rows of the returned relation.
.. function:: :timeout <N>
Abort if the query does not complete within ``<N>`` seconds.
Seconds may be specified as an expression so that random timeouts are possible.
.. function:: :sleep <N>
If specified, the query will wait for ``<N>`` seconds after completion,
before committing or proceeding to the next query.
Seconds may be specified as an expression so that random timeouts are possible.
Useful for deliberately interleaving concurrent queries to test complex logic.
.. function:: :sort <SORT_ARG> (, <SORT_ARG>)*
Sort the output relation. If ``:limit`` or ``:offset`` are specified, they are applied after ``:sort``.
Specify ``<SORT_ARG>`` as they appear in the rule head of the entry, separated by commas.
You can optionally specify the sort direction of each argument by prefixing them with ``+`` or ``-``,
with minus denoting descending order, e.g. ``:sort -count(employee), dept_name``
sorts by employee count in reverse order first,
then break ties with department name in ascending alphabetical order.
.. WARNING::
Aggregations must be done in inline rules, not in output sorting. In the above example,
the entry rule head must contain ``count(employee)``, ``employee`` alone is not acceptable.
.. function:: :order <SORT_ARG> (, <SORT_ARG>)*
Alias for ``:sort``.
.. function:: :assert none
The query returns nothing if the output relation is empty, otherwise execution aborts with an error.
Useful for transactions and triggers.
.. function:: :assert some
The query returns nothing if the output relation contains at least one row,
otherwise, execution aborts with an error.
Useful for transactions and triggers.
You should consider adding ``:limit 1`` to the query to ensure early termination
if you do not need to check all return tuples.

@ -1,226 +0,0 @@
======================
Getting started
======================
Welcome to the Cozo Manual. The latest version of this manual can be read at https://cozodb.github.io/current/manual.
Alternatively, you can download a PDF version for offline viewing at https://cozodb.github.io/current/manual.pdf.
This manual touches upon all features currently implemented in Cozo,
though the coverage of some topics may be sketchy at this stage.
This manual assumes that you already know the basics of the Cozo database,
at the level of the `Tutorial <https://nbviewer.org/github/cozodb/cozo/blob/main/docs/tutorial/tutorial.ipynb>`_.
------------------------
Downloading Cozo
------------------------
Cozo is distributed as a single executable.
Precompiled binaries can be downloaded from the `release page <https://github.com/cozodb/cozo/releases>`_,
currently available for Linux (Intel x64), Mac (Intel x64 and Apple ARM) and Windows (Intel x64).
For Windows users,
we recommend running Cozo under `WSL <https://learn.microsoft.com/en-us/windows/wsl/install>`_ if possible,
especially if your workload is heavy, as the Windows version runs more slowly.
---------------
Starting Cozo
---------------
Run the ``cozoserver`` command in a terminal::
./cozoserver <PATH_TO_DATA_DIRECTORY>
If ``<PATH_TO_DATA_DIRECTORY>`` does not exist, it will be created.
Cozo will then start a web server and bind to address ``127.0.0.1`` and port ``9070``.
These two can be customized: run the executable with the ``-h`` option to learn how.
To stop Cozo, press ``CTRL-C`` in the terminal, or send ``SIGTERM`` to the process with e.g. ``kill``.
-----------------------
The query API
-----------------------
Queries are run by sending HTTP POST requests to the server.
By default, the API endpoint is ``http://127.0.0.1:9070/text-query``.
A JSON body is expected::
{
"script": "<COZOSCRIPT QUERY STRING>",
"params": {}
}
``params`` should be an object of named parameters.
For example, if ``params`` is ``{"num": 1}``,
then ``$num`` can be used anywhere in your query string where an expression is expected.
Always use ``params`` instead of concatenating strings when you need parametrized queries.
.. WARNING::
Cozo is designed to run in a trusted environment and be used by trusted clients.
It does not come with elaborate authentication and security features.
If you must access Cozo remotely,
you are responsible for setting up firewalls, encryptions and proxies yourself.
As a guard against users accidentally exposing sensitive data,
If you bind Cozo to non-loopback addresses,
Cozo will generate a token string and require all queries
from non-loopback addresses to provide the token string
in the HTTP header field ``x-cozo-auth``.
The warning printed when you start Cozo with a non-default binding will tell you
where to find the token string.
This "security measure" is not considered sufficient for any purpose
and is only intended as a last defence against carelessness.
--------------------------------------------------
Running queries
--------------------------------------------------
^^^^^^^^^^^^^^^^^^^^^^^^^^
Making HTTP requests
^^^^^^^^^^^^^^^^^^^^^^^^^^
As Cozo has a HTTP-based API,
it is accessible by all languages that are capable of making web requests.
The structure of the API is also deliberately kept minimal so that no dedicated clients are necessary.
As an example, the following runs a system op with the ``curl`` command line tool::
curl -X POST localhost:9070/text-query \
-H 'content-type: application/json' \
-d '{"script": "::running", "params": {}}'
The responses are JSON when queries are successful,
or text descriptions when errors occur,
so a language only needs to be able to process JSON to use Cozo.
^^^^^^^^^^^^^^^^^^^^^^^^^
JupyterLab
^^^^^^^^^^^^^^^^^^^^^^^^^
Cozo has special support for running queries in `JupyterLab <https://jupyterlab.readthedocs.io/en/stable/>`_,
a web-based notebook interface
in the python ecosystem heavily used by data scientists.
First, install JupyterLab by following its instructions.
Then install the ``pycozo`` library::
pip install "pycozo[pandas]"
Open the JupyterLab web interface, start a Python 3 kernel,
and in a cell run the following `magic command <https://ipython.readthedocs.io/en/stable/interactive/magics.html>`_::
%load_ext pycozo.ipyext_direct
If you need to connect to Cozo using a non-default address or port,
or you require an authentication string, you need to run the following magic commands as well::
%cozo_host http://<ADDRESS>:<PORT>
%cozo_auth <AUTH_STRING>
Now, when you execute cells in the notebook,
the content will be sent to Cozo and interpreted as CozoScript.
Returned relations will be formatted as `Pandas dataframe <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
The extension ``pycozo.ipyext_direct`` used above sets up the notebook in the Direct Cozo mode,
where cells are by default interpreted as CozoScript.
Python code can be run by starting the first line of a cell with the ``%%py``.
The Indirect Cozo mode can be started by::
%load_ext pycozo.ipyext
In this mode, only cells with the first line ``%%cozo`` are interpreted as CozoScript.
Other cells are interpreted in the normal way (python code).
The Indirect mode is useful if you need post-processing and visualizations.
When a query execution is successfully,
the resulting Pandas dataframe will be bound to the python variable ``_``.
A few other magic commands are available:
* ``%cozo_run_file <PATH_TO_FILE>`` runs a local file as CozoScript.
* ``%cozo_run_string <VARIABLE>`` runs variable containing string as CozoScript.
* ``%cozo_set <KEY> <VALUE>`` sets a parameter with the name ``<KEY>`` to the expression ``<VALUE>``.
The updated parameters will be used by subsequent queries.
* ``%cozo_set_params <PARAM_MAP>`` replace all parameters by the given expression,
which must evaluate to a dictionary with string keys.
* ``%cozo_clear`` clears all set parameters.
* ``%cozo_params`` returns the parameters currently set.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Python
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You can also use the Python client without JupyterLab. First::
pip install pycozo
Next, in your Python code, do something like the following::
from pycozo.client import Client
client = Client(host='http://127.0.0.1:9070', auth=None, dataframe=False)
print(client.run('::relations'))
If ``dataframe=True``, the client will transform the returned relation into Pandas dataframes, which must be separately installed.
The ``client.run`` method also takes an optional second argument ``params``.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Web Browser
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If you are reluctant to install python and Jupyter, you may consider the Makeshift JavaScript Console.
To get started, you need a browser on your local machine.
We recommend `Firefox <https://www.mozilla.org/en-US/firefox/new/>`_, `Chrome <https://www.google.com/chrome/>`_,
or any Chromium-based browser for best display.
If Cozo is running under the default configuration,
navigate to ``http://127.0.0.1:9070``.
You should be greeted with a mostly empty page telling you that Cozo is running.
Now open the Developer Console
(`Firefox console <https://firefox-source-docs.mozilla.org/devtools-user/browser_console/index.html>`_
or `Chrome console <https://developer.chrome.com/docs/devtools/console/javascript/>`_)
and switch to the "Console" tab. Now you can execute CozoScript by running::
await run("<COZOSCRIPT>")
The returned relations will be formatted as tables.
If you need to pass in parameters, provide a second parameter with a JavaScript object::
await run("<COZOSCRIPT>", <PARAMS>)
If you need to set an auth string, modify the global variable ``COZO_AUTH``.
----------------------------
Building Cozo from source
----------------------------
If for some reason the binary distribution does not work for you,
you can build Cozo from source.
You need to install the `Rust toolchain <https://www.rust-lang.org/tools/install>`_ on your system.
You also need a C++17 compiler.
Clone the Cozo git repo::
git clone https://github.com/cozodb/cozo.git --recursive
You need to pass the ``--recursive`` flag so that submodules are also cloned. Next, run in the root of the cloned repo::
cargo build --release
Wait for potentially a long time, and you will find the compiled binary in ``target/release``.
You can run ``cargo build --release -F jemalloc`` instead
to indicate that you want to compile and use jemalloc as the memory allocator for the RocksDB storage backend,
which can make a difference in performance depending on your workload.
--------------------------------
Embedding Cozo
--------------------------------
You can run Cozo in the same process as your main program.
For Rust programs, as ``cozoserver`` is just a very thin wrapper around the Cozo rust library,
you can use the library directly.
For languages other than Rust, you will need to provide custom bindings,
but again for `Python <https://pyo3.rs/>`_ and `NodeJS <https://neon-bindings.com/>`_ this is trivial.

@ -1,259 +0,0 @@
====================================
Stored relations and transactions
====================================
In Cozo, data are stored in *stored relations* on disk.
---------------------------
Stored relations
---------------------------
To query stored relations,
use the ``*relation[...]`` or ``*relation{...}`` atoms in inline or fixed rules,
as explained in the last chapter.
To manipulate stored relations, use one of the following query options:
.. module:: QueryOp
:noindex:
.. function:: :create <NAME> <SPEC>
Create a stored relation with the given name and spec.
No stored relation with the same name can exist beforehand.
If a query is specified, data from the resulting relation is put into the newly created stored relation.
This is the only stored relation-related query option in which a query may be omitted.
.. function:: :replace <NAME> <SPEC>
Similar to ``:create``, except that if the named stored relation exists beforehand,
it is completely replaced. The schema of the replaced relation need not match the new one.
You cannot omit the query for ``:replace``.
If there are any triggers associated, they will be preserved. Note that this may lead to errors if ``:replace``
leads to schema change.
.. function:: :put <NAME> <SPEC>
Put rows from the resulting relation into the named stored relation.
If keys from the data exist beforehand, the corresponding rows are replaced with new ones.
.. function:: :ensure <NAME> <SPEC>
Ensure that rows specified by the output relation and spec exist in the database,
and that no other process has written to these rows when the enclosing transaction commits.
Useful for ensuring read-write consistency.
.. function:: :rm <NAME> <SPEC>
Remove rows from the named stored relation. Only keys should be specified in ``<SPEC>``.
Removing a non-existent key is not an error and does nothing.
.. function:: :ensure_not <NAME> <SPEC>
Ensure that rows specified by the output relation and spec do not exist in the database
and that no other process has written to these rows when the enclosing transaction commits.
Useful for ensuring read-write consistency.
You can rename and remove stored relations with the system ops ``::relation rename`` and ``::relation remove``,
described in the system op chapter.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Create and replace
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The format of ``<SPEC>`` is identical for all four ops, but the semantics is a bit different.
We first describe the format and semantics for ``:create`` and ``:replace``.
A spec, or a specification for columns, is enclosed in curly braces ``{}`` and separated by commas::
?[address, company_name, department_name, head_count] <- $input_data
:create dept_info {
company_name: String,
department_name: String,
=>
head_count: Int,
address: String,
}
Columns before the symbol ``=>`` form the *keys* (actually a composite key) for the stored relation,
and those after it form the *values*.
If all columns are keys, the symbol ``=>`` may be omitted.
The order of columns matters.
Rows are stored in lexicographically sorted order in trees according to their keys.
In the above example, we explicitly specified the types for all columns.
In case of type mismatch,
the system will first try to coerce the values given, and if that fails, the query is aborted with an error.
You can omit types for columns, in which case their types default to ``Any?``,
i.e. all values are acceptable.
For example, the above query with all types omitted is::
?[address, company_name, department_name, head_count] <- $input_data
:create dept_info { company_name, department_name => head_count, address }
In the example, the bindings for the output match the columns exactly (though not in the same order).
You can also explicitly specify the correspondence::
?[a, b, count(c)] <- $input_data
:create dept_info {
company_name = a,
department_name = b,
=>
head_count = count(c),
address: String = b
}
You *must* use explicit correspondence if the entry head contains aggregation,
since names such as ``count(c)`` are not valid column names.
The ``address`` field above shows how to specify both a type and a correspondence.
Instead of specifying bindings, you can specify an expression that generates default values by using ``default``::
?[a, b] <- $input_data
:create dept_info {
company_name = a,
department_name = b,
=>
head_count default 0,
address default ''
}
The expression is evaluated anew for each row, so if you specified a UUID-generating functions,
you will get a different UUID for each row.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Put, remove, ensure and ensure-not
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For ``:put``, ``:remove``, ``:ensure`` and ``:ensure_not``,
you do not need to specify all existing columns in the spec if the omitted columns have a default generator,
or if the type of the column is nullable, in which case the value defaults to ``null``.
For these operations, specifying default values does not have any effect and will not replace existing ones.
For ``:put`` and ``:ensure``, the spec needs to contain enough bindings to generate all keys and values.
For ``:rm`` and ``:ensure_not``, it only needs to generate all keys.
------------------------------------------------------
Chaining queries
------------------------------------------------------
Each script you send to Cozo is executed in its own transaction.
To ensure consistency of multiple operations on data,
You can define multiple queries in a single script,
by wrapping each query in curly braces ``{}``.
Each query can have its independent query options.
Execution proceeds for each query serially, and aborts at the first error encountered.
The returned relation is that of the last query.
The ``:assert (some|none)``, ``:ensure`` and ``:ensure_not`` query options allow you to express complicated constraints
that must be satisfied for your transaction to commit.
This example uses three queries to put and remove rows atomically
(either all succeed or all fail), and ensure that at the end of the transaction
an untouched row exists::
{
?[a, b] <- [[1, 'one'], [3, 'three']]
:put rel {a => b}
}
{
?[a] <- [[2]]
:rm rel {a}
}
{
?[a, b] <- [[4, 'four']]
:ensure rel {a => b}
}
When a transaction starts, a snapshot is used,
so that only already committed data,
or data written within the same transaction, are visible to queries.
At the end of the transaction, changes are only committed if there are no conflicts
and no errors are raised.
If any mutation activate triggers, those triggers execute in the same transaction.
------------------------------------------------------
Triggers and indices
------------------------------------------------------
Cozo does not have traditional indices on stored relations.
Instead, you define regular stored relations that are used as indices.
At query time, you explicitly query the index instead of the original stored relation.
You synchronize your indices and the original by ensuring that any mutations you do on the database
write the correct data to the "canonical" relation and its indices in the same transaction.
As doing this by hand for every mutation leads to lots of repetitions
and is error-prone,
Cozo supports *triggers* to do it automatically for you.
You attach triggers to a stored relation by running the system op ``::set_triggers``::
::set_triggers <REL_NAME>
on put { <QUERY> }
on rm { <QUERY> }
on replace { <QUERY> }
on put { <QUERY> } # you can specify as many triggers as you need
``<QUERY>`` can be any valid query.
The ``on put`` triggers will run when new data is inserted or upserted,
which can be activated by ``:put``, ``:create`` and ``:replace`` query options.
The implicitly defined rules ``_new[]`` and ``_old[]`` can be used in the triggers, and
contain the added rows and the replaced rows respectively.
The ``on rm`` triggers will run when data is deleted, which can be activated by a ``:rm`` query option.
The implicitly defined rules ``_new[]`` and ``_old[]`` can be used in the triggers,
and contain the keys of the rows for deleted rows (even if no row with the key actually exist) and the rows
actually deleted (with both keys and non-keys).
The ``on replace`` triggers will be activated by a ``:replace`` query option.
They are run before any ``on put`` triggers.
All triggers for a relation must be specified together, in the same ``::set_triggers`` system op.
If used again, all the triggers associated with the stored relation are replaced.
To remove all triggers from a stored relation, use ``::set_triggers <REL_NAME>`` followed by nothing.
As an example of using triggers to maintain an index, suppose we have the following relation::
:create rel {a => b}
We often want to query ``*rel[a, b]`` with ``b`` bound but ``a`` unbound. This will cause a full scan,
which can be expensive. So we need an index::
:create rel.rev {b, a}
In the general case, we cannot assume a functional dependency ``b => a``, so in the index both fields appear as keys.
To manage the index automatically::
::relation set_triggers rel
on put {
?[a, b] := _new[a, b]
:put rel.rev{ b, a }
}
on rm {
?[a, b] := _old[a, b]
:rm rel.rev{ b, a }
}
With the index set up, you can use ``*rel.rev{..}`` in place of ``*rel{..}`` in your queries.
Indices in Cozo are manual, but extremely flexible, since you need not conform to any predetermined patterns
in your use of ``_old[]`` and ``_new[]``.
For simple queries, the need to explicitly elect to use an index can seem cumbersome,
but for complex ones, the deterministic evaluation entailed can be a huge blessing.
Triggers can be creatively used for other purposes as well.
.. WARNING::
Loops in your triggers can cause non-termination.
A loop occurs when a relation has triggers which affect other relations,
which in turn have other triggers that ultimately affect the starting relation.

@ -1,82 +0,0 @@
==============
System ops
==============
.. module:: SysOp
:noindex:
System ops start with a double-colon ``::`` and must appear alone in a script.
In the following, we explain what each system op does, and the arguments they expect.
--------------
Explain
--------------
.. function:: ::explain { <QUERY> }
A single query is enclosed in curly braces. Query options are allowed but ignored.
The query is not executed, but its query plan is returned instead.
Currently, there is no specification for the return format,
but if you are familiar with the semi-naïve evaluation of stratified Datalog programs
subject to magic-set rewrites, you can decipher the result.
----------------------------------
Ops for stored relations
----------------------------------
.. function:: ::relations
List all stored relations in the database
.. function:: ::columns <REL_NAME>
List all columns for the stored relation ``<REL_NAME>``.
.. function:: ::remove <REL_NAME> (, <REL_NAME>)*
Remove stored relations. Several can be specified, joined by commas.
.. function:: ::rename <OLD_NAME> -> <NEW_NAME> (, <OLD_NAME> -> <NEW_NAME>)*
Rename stored relation ``<OLD_NAME>`` into ``<NEW_NAME>``. Several may be specified, joined by commas.
.. function:: ::show_triggers <REL_NAME>
Display triggers associated with the stored relation ``<REL_NAME>``.
.. function:: ::set_triggers <REL_NAME> ...
Set triggers for the stored relation ``<REL_NAME>``. This is explained in more detail in the transaction chapter.
.. function:: ::access_level <ACCESS_LEVEL> <REL_NAME> (, <REL_NAME>)*
Sets the access level of ``<REL_NAME>`` to the given level. The levels are:
* ``normal`` allows everything,
* ``protected`` disallows ``::remove`` and ``:replace``,
* ``read_only`` additionally disallows any mutations and setting triggers,
* ``hidden`` additionally disallows any data access (metadata access via ``::relations``, etc., are still allowed).
The access level functionality is to protect data from mistakes of the programmer,
not from attacks by malicious parties.
------------------------------------
Monitor and kill
------------------------------------
.. function:: ::running
Display running queries and their IDs.
.. function:: ::kill <ID>
Kill a running query specified by ``<ID>``. The ID may be obtained by ``::running``.
------------------------------------
Maintenance
------------------------------------
.. function:: ::compact
Instructs Cozo to run a compaction job.
Compaction makes the database smaller on disk and faster for read queries.

File diff suppressed because it is too large Load Diff

25
java/.gitignore vendored

@ -1,25 +0,0 @@
### Java template
# Compiled class file
*.class
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

@ -1,20 +0,0 @@
[package]
name = "cozo_java"
version = "0.1.1"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[features]
jemalloc = ["cozo/jemalloc"]
io-uring = ["cozo/io-uring"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
robusta_jni = "0.2.0"
cozo = { version = "0.1.1", path = ".." }
miette = { version = "=5.3.0", features = ["fancy"] }
serde_json = "1.0.81"
lazy_static = "1.4.0"

@ -1,40 +0,0 @@
package org.cozodb;
import java.util.*;
class CozoDb {
private static native int openDb(String path);
private static native boolean closeDb(int id);
private static native String runQuery(int id, String script, String params);
private int dbId;
static {
System.loadLibrary("cozo_java");
}
CozoDb(String path) {
this.dbId = CozoDb.openDb(path);
}
String query(String script, String params) {
return CozoDb.runQuery(this.dbId, script, params);
}
boolean close() {
return CozoDb.closeDb(this.dbId);
}
public static void main(String[] args) {
CozoDb db = new CozoDb("_test_db");
System.out.println(db);
System.out.println(db.query("?[] <- [[1, 2, 3]]", "{}"));
try {
System.out.println(db.query("?[z] <- [[1, 2, 3]]", "{}"));
} catch (Exception e) {
String msg = e.getMessage().substring("JNI call error!. Cause: ".length());
System.out.println(msg);
}
System.out.println(db.close());
}
}

@ -1,94 +0,0 @@
/*
* Copyright 2022, The Cozo Project Authors. Licensed under MIT/Apache-2.0/BSD-3-Clause.
*/
use std::collections::BTreeMap;
use std::sync::atomic::AtomicI32;
use std::sync::Mutex;
use lazy_static::lazy_static;
use robusta_jni::bridge;
use cozo::Db;
#[derive(Default)]
struct Handles {
current: AtomicI32,
dbs: Mutex<BTreeMap<i32, Db>>,
}
lazy_static! {
static ref HANDLES: Handles = Handles::default();
}
#[bridge]
mod jni {
use std::collections::BTreeMap;
use std::sync::atomic::Ordering;
use robusta_jni::convert::{IntoJavaValue, Signature, TryFromJavaValue, TryIntoJavaValue};
use robusta_jni::jni::errors::Error as JniError;
use robusta_jni::jni::errors::Result as JniResult;
use robusta_jni::jni::objects::AutoLocal;
use cozo::Db;
use crate::HANDLES;
#[derive(Signature, TryIntoJavaValue, IntoJavaValue, TryFromJavaValue)]
#[package(org.cozodb)]
pub struct CozoDb<'env: 'borrow, 'borrow> {
#[instance]
raw: AutoLocal<'env, 'borrow>,
}
impl<'env: 'borrow, 'borrow> CozoDb<'env, 'borrow> {
pub extern "jni" fn openDb(path: String) -> JniResult<i32> {
match Db::new(path) {
Ok(db) => {
let id = HANDLES.current.fetch_add(1, Ordering::AcqRel);
let mut dbs = HANDLES.dbs.lock().unwrap();
dbs.insert(id, db);
Ok(id)
}
Err(err) => Err(JniError::from(format!("{:?}", err))),
}
}
pub extern "jni" fn closeDb(id: i32) -> JniResult<bool> {
let db = {
let mut dbs = HANDLES.dbs.lock().unwrap();
dbs.remove(&id)
};
Ok(db.is_some())
}
pub extern "jni" fn runQuery(
id: i32,
script: String,
params_str: String,
) -> JniResult<String> {
let db = {
let db_ref = {
let dbs = HANDLES.dbs.lock().unwrap();
dbs.get(&id).cloned()
};
let db = db_ref.ok_or_else(|| JniError::from("database already closed"))?;
db
};
let params_map: serde_json::Value = serde_json::from_str(&params_str)
.map_err(|_| JniError::from("the given params argument is not valid JSON"))?;
let params_arg: BTreeMap<_, _> = match params_map {
serde_json::Value::Object(m) => m.into_iter().collect(),
_ => {
return Err(JniError::from(
"the given params argument is not a JSON map",
))
}
};
let result = db.run_script(&script, &params_arg);
match result {
Ok(json) => Ok(json.to_string()),
Err(err) => Err(JniError::from(format!("{:?}", err))),
}
}
}
}

5
nodejs/.gitignore vendored

@ -1,5 +0,0 @@
target
index.node
**/node_modules
**/.DS_Store
npm-debug.log*

@ -1,28 +0,0 @@
[package]
name = "cozo-nodejs"
version = "0.1.1"
description = "Cozo database for NodeJS"
authors = ["Ziyang Hu"]
license = "MIT/Apache-2.0/BSD-3-Clause"
edition = "2021"
exclude = ["index.node"]
[lib]
crate-type = ["cdylib"]
[features]
jemalloc = ["cozo/jemalloc"]
io-uring = ["cozo/io-uring"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
cozo = { version = "0.1.1", path = ".." }
miette = { version = "=5.3.0", features = ["fancy"] }
serde_json = "1.0.81"
lazy_static = "1.4.0"
[dependencies.neon]
version = "0.10"
default-features = false
features = ["napi-6", "channel-api"]

@ -1,121 +0,0 @@
# cozo-nodejs
**cozo-nodejs:** Cozo database for NodeJS
This project was bootstrapped by [create-neon](https://www.npmjs.com/package/create-neon).
## Installing cozo-nodejs
Installing cozo-nodejs requires a [supported version of Node and Rust](https://github.com/neon-bindings/neon#platform-support).
You can install the project with npm. In the project directory, run:
```sh
$ npm install
```
This fully installs the project, including installing any dependencies and running the build.
## Building cozo-nodejs
If you have already installed the project and only want to run the build, run:
```sh
$ npm run build
```
This command uses the [cargo-cp-artifact](https://github.com/neon-bindings/cargo-cp-artifact) utility to run the Rust build and copy the built library into `./index.node`.
## Exploring cozo-nodejs
After building cozo-nodejs, you can explore its exports at the Node REPL:
```sh
$ npm install
$ node
> require('.').hello()
"hello node"
```
## Available Scripts
In the project directory, you can run:
### `npm install`
Installs the project, including running `npm run build`.
### `npm build`
Builds the Node addon (`index.node`) from source.
Additional [`cargo build`](https://doc.rust-lang.org/cargo/commands/cargo-build.html) arguments may be passed to `npm build` and `npm build-*` commands. For example, to enable a [cargo feature](https://doc.rust-lang.org/cargo/reference/features.html):
```
npm run build -- --feature=beetle
```
#### `npm build-debug`
Alias for `npm build`.
#### `npm build-release`
Same as [`npm build`](#npm-build) but, builds the module with the [`release`](https://doc.rust-lang.org/cargo/reference/profiles.html#release) profile. Release builds will compile slower, but run faster.
### `npm test`
Runs the unit tests by calling `cargo test`. You can learn more about [adding tests to your Rust code](https://doc.rust-lang.org/book/ch11-01-writing-tests.html) from the [Rust book](https://doc.rust-lang.org/book/).
## Project Layout
The directory structure of this project is:
```
cozo-nodejs/
├── Cargo.toml
├── README.md
├── index.node
├── package.json
├── src/
| └── lib.rs
└── target/
```
### Cargo.toml
The Cargo [manifest file](https://doc.rust-lang.org/cargo/reference/manifest.html), which informs the `cargo` command.
### README.md
This file.
### index.node
The Node addon—i.e., a binary Node module—generated by building the project. This is the main module for this package, as dictated by the `"main"` key in `package.json`.
Under the hood, a [Node addon](https://nodejs.org/api/addons.html) is a [dynamically-linked shared object](https://en.wikipedia.org/wiki/Library_(computing)#Shared_libraries). The `"build"` script produces this file by copying it from within the `target/` directory, which is where the Rust build produces the shared object.
### package.json
The npm [manifest file](https://docs.npmjs.com/cli/v7/configuring-npm/package-json), which informs the `npm` command.
### src/
The directory tree containing the Rust source code for the project.
### src/lib.rs
The Rust library's main module.
### target/
Binary artifacts generated by the Rust build.
## Learn More
To learn more about Neon, see the [Neon documentation](https://neon-bindings.com).
To learn more about Rust, see the [Rust documentation](https://www.rust-lang.org).
To learn more about Node, see the [Node documentation](https://nodejs.org).

@ -1,34 +0,0 @@
{
"name": "cozo-nodejs",
"version": "0.1.1",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "cozo-nodejs",
"version": "0.1.1",
"hasInstallScript": true,
"license": "MIT",
"devDependencies": {
"cargo-cp-artifact": "^0.1"
}
},
"node_modules/cargo-cp-artifact": {
"version": "0.1.6",
"resolved": "https://registry.npmmirror.com/cargo-cp-artifact/-/cargo-cp-artifact-0.1.6.tgz",
"integrity": "sha512-CQw0doK/aaF7j041666XzuilHxqMxaKkn+I5vmBsd8SAwS0cO5CqVEVp0xJwOKstyqWZ6WK4Ww3O6p26x/Goyg==",
"dev": true,
"bin": {
"cargo-cp-artifact": "bin/cargo-cp-artifact.js"
}
}
},
"dependencies": {
"cargo-cp-artifact": {
"version": "0.1.6",
"resolved": "https://registry.npmmirror.com/cargo-cp-artifact/-/cargo-cp-artifact-0.1.6.tgz",
"integrity": "sha512-CQw0doK/aaF7j041666XzuilHxqMxaKkn+I5vmBsd8SAwS0cO5CqVEVp0xJwOKstyqWZ6WK4Ww3O6p26x/Goyg==",
"dev": true
}
}
}

@ -1,31 +0,0 @@
{
"name": "cozo-nodejs",
"version": "0.1.1",
"description": "Cozo database for NodeJS",
"main": "index.node",
"scripts": {
"build": "cargo-cp-artifact -nc index.node -- cargo build --message-format=json-render-diagnostics",
"build-debug": "npm run build --",
"build-release": "npm run build -- --release",
"install": "npm run build-release",
"test": "cargo test"
},
"author": "Ziyang Hu",
"license": "MIT",
"devDependencies": {
"cargo-cp-artifact": "^0.1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/cozodb/cozo.git"
},
"keywords": [
"database",
"datalog",
"graph"
],
"bugs": {
"url": "https://github.com/cozodb/cozo/issues"
},
"homepage": "https://github.com/cozodb/cozo#readme"
}

@ -1,117 +0,0 @@
/*
* Copyright 2022, The Cozo Project Authors. Licensed under MIT/Apache-2.0/BSD-3-Clause.
*/
use std::collections::BTreeMap;
use std::sync::atomic::{AtomicI32, Ordering};
use std::sync::Mutex;
use lazy_static::lazy_static;
use neon::prelude::*;
use cozo::Db;
#[derive(Default)]
struct Handles {
current: AtomicI32,
dbs: Mutex<BTreeMap<u32, Db>>,
}
lazy_static! {
static ref HANDLES: Handles = Handles::default();
}
fn open_db(mut cx: FunctionContext) -> JsResult<JsNumber> {
let path = cx.argument::<JsString>(0)?.value(&mut cx);
match Db::new(path) {
Ok(db) => {
let id = HANDLES.current.fetch_add(1, Ordering::AcqRel);
let mut dbs = HANDLES.dbs.lock().unwrap();
dbs.insert(id, db);
Ok(cx.number(id))
}
Err(err) => {
let s = cx.string(format!("{:?}", err));
cx.throw(s)
}
}
}
fn close_db(mut cx: FunctionContext) -> JsResult<JsBoolean> {
let id = cx.argument::<JsNumber>(0)?.value(&mut cx) as u32;
let db = {
let mut dbs = HANDLES.dbs.lock().unwrap();
dbs.remove(&id)
};
Ok(cx.boolean(db.is_some()))
}
fn query_db(mut cx: FunctionContext) -> JsResult<JsUndefined> {
let id = cx.argument::<JsNumber>(0)?.value(&mut cx) as u32;
let db = {
let db_ref = {
let dbs = HANDLES.dbs.lock().unwrap();
dbs.get(&id).cloned()
};
match db_ref {
None => {
let s = cx.string("database already closed");
cx.throw(s)?
}
Some(db) => db,
}
};
let query = cx.argument::<JsString>(1)?.value(&mut cx);
let params_str = cx.argument::<JsString>(2)?.value(&mut cx);
let params_map: serde_json::Value = match serde_json::from_str(&params_str) {
Ok(m) => m,
Err(_) => {
let s = cx.string("the given params argument is not valid JSON");
cx.throw(s)?
}
};
let params_arg: BTreeMap<_, _> = match params_map {
serde_json::Value::Object(m) => m.into_iter().collect(),
_ => {
let s = cx.string("the given params argument is not a JSON map");
cx.throw(s)?
}
};
let callback = cx.argument::<JsFunction>(3)?.root(&mut cx);
let channel = cx.channel();
std::thread::spawn(move || {
let result = db.run_script(&query, &params_arg);
channel.send(move |mut cx| {
let callback = callback.into_inner(&mut cx);
let this = cx.undefined();
let args = match result {
Ok(json) => {
let json_str = cx.string(json.to_string());
vec![cx.null().upcast::<JsValue>(), json_str.upcast()]
}
Err(err) => {
let err = cx.string(format!("{:?}", err));
vec![err.upcast::<JsValue>()]
}
};
callback.call(&mut cx, this, args)?;
Ok(())
});
});
Ok(cx.undefined())
}
#[neon::main]
fn main(mut cx: ModuleContext) -> NeonResult<()> {
cx.export_function("open_db", open_db)?;
cx.export_function("close_db", close_db)?;
cx.export_function("query_db", query_db)?;
Ok(())
}

@ -1,69 +0,0 @@
name: CI
on:
push:
branches:
- main
- master
pull_request:
jobs:
linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: messense/maturin-action@v1
with:
manylinux: auto
command: build
args: --release --sdist -o dist --find-interpreter
- name: Upload wheels
uses: actions/upload-artifact@v2
with:
name: wheels
path: dist
windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: messense/maturin-action@v1
with:
command: build
args: --release -o dist --find-interpreter
- name: Upload wheels
uses: actions/upload-artifact@v2
with:
name: wheels
path: dist
macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
- uses: messense/maturin-action@v1
with:
command: build
args: --release -o dist --universal2 --find-interpreter
- name: Upload wheels
uses: actions/upload-artifact@v2
with:
name: wheels
path: dist
release:
name: Release
runs-on: ubuntu-latest
if: "startsWith(github.ref, 'refs/tags/')"
needs: [ macos, windows, linux ]
steps:
- uses: actions/download-artifact@v2
with:
name: wheels
- name: Publish to PyPI
uses: messense/maturin-action@v1
env:
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
with:
command: upload
args: --skip-existing *

72
python/.gitignore vendored

@ -1,72 +0,0 @@
/target
# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
.DS_Store
# Sphinx documentation
docs/_build/
# PyCharm
.idea/
# VSCode
.vscode/
# Pyenv
.python-version

@ -1,19 +0,0 @@
[package]
name = "cozo_py_module"
version = "0.1.1"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "cozo_py_module"
crate-type = ["cdylib"]
[features]
jemalloc = ["cozo/jemalloc"]
io-uring = ["cozo/io-uring"]
[dependencies]
pyo3 = { version = "0.17.1", features = ["extension-module", "abi3", "abi3-py37"] }
cozo = { version = "0.1.1", path = ".." }
miette = { version = "=5.3.0", features = ["fancy"] }
serde_json = "1.0.81"

@ -1,14 +0,0 @@
[build-system]
requires = ["maturin>=0.13,<0.14"]
build-backend = "maturin"
[project]
name = "python"
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]

@ -1,52 +0,0 @@
/*
* Copyright 2022, The Cozo Project Authors. Licensed under MIT/Apache-2.0/BSD-3-Clause.
*/
use std::collections::BTreeMap;
use miette::miette;
use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use cozo::Db;
trait PyResultExt<T> {
fn into_py_res(self) -> PyResult<T>;
}
impl<T> PyResultExt<T> for miette::Result<T> {
fn into_py_res(self) -> PyResult<T> {
match self {
Ok(t) => Ok(t),
Err(e) => Err(PyException::new_err(format!("{:?}", e))),
}
}
}
#[pyclass]
struct CozoDbPy {
db: Db,
}
#[pymethods]
impl CozoDbPy {
#[new]
#[args(create_if_missing = true, destroy_on_exit = false)]
fn new(path: &str) -> PyResult<Self> {
let db = Db::new(path).into_py_res()?;
Ok(Self { db })
}
pub fn run_query(&self, py: Python<'_>, query: &str, params: &str) -> PyResult<String> {
let params_map: serde_json::Value = serde_json::from_str(params)
.map_err(|_| miette!("the given params argument is not valid JSON"))
.into_py_res()?;
let params_arg: BTreeMap<_, _> = match params_map {
serde_json::Value::Object(m) => m.into_iter().collect(),
_ => Err(miette!("the given params argument is not a JSON map")).into_py_res()?,
};
let ret = py.allow_threads(|| self.db.run_script(query, &params_arg).into_py_res())?;
Ok(ret.to_string())
}
}
#[pymodule]
fn cozo_py_module(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_class::<CozoDbPy>()?;
Ok(())
}
Loading…
Cancel
Save