From 42ff1918a366a798b286065930f6fcecf31404ba Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Fri, 21 Oct 2022 18:41:35 +0800 Subject: [PATCH] bump deps; intro tutorial --- Cargo.lock | 58 ++-- tutorial/tutorial.ipynb | 676 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 712 insertions(+), 22 deletions(-) create mode 100644 tutorial/tutorial.ipynb diff --git a/Cargo.lock b/Cargo.lock index 055d46a8..c4351aa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -625,9 +625,9 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" +checksum = "4b9663d381d07ae25dc88dbdf27df458faa83a9b25336bcac83d5e452b5fc9d3" dependencies = [ "cfg-if 1.0.0", "libc", @@ -662,9 +662,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if 1.0.0", "libc", @@ -1576,9 +1576,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.145" +version = "1.0.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +checksum = "6df50b7a60a0ad48e1b42eb38373eac8ff785d619fb14db917b4e63d5439361f" [[package]] name = "serde_bytes" @@ -1591,9 +1591,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.145" +version = "1.0.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +checksum = "a714fd32ba1d66047ce7d53dabd809e9922d538f9047de13cc4cffca47b36205" dependencies = [ "proc-macro2", "quote", @@ -1722,9 +1722,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1" +checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" dependencies = [ "proc-macro2", "quote", @@ -2137,46 +2137,60 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ + "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", + "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" + [[package]] name = "windows_aarch64_msvc" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" [[package]] name = "windows_i686_gnu" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" [[package]] name = "windows_i686_msvc" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" [[package]] name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" [[package]] name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" [[package]] name = "zstd-sys" diff --git a/tutorial/tutorial.ipynb b/tutorial/tutorial.ipynb new file mode 100644 index 00000000..0f0c3761 --- /dev/null +++ b/tutorial/tutorial.ipynb @@ -0,0 +1,676 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3df9ce6e-3722-4334-abf9-703d62c27eab", + "metadata": {}, + "source": [ + "# The Cozo database tutorial" + ] + }, + { + "cell_type": "markdown", + "id": "63c695dc-6e85-42bb-b39b-1f0fd14ed5eb", + "metadata": {}, + "source": [ + "This tutorial will teach you the basics of using the Cozo database with the query language CozoScript.\n", + "There are no database-specific prerequisites, \n", + "though it would be helpful if you already know some other databases, \n", + "especially SQL databases." + ] + }, + { + "cell_type": "markdown", + "id": "c2bfcfe4-c06c-4b02-a040-6fd840b77820", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "id": "eb1fefcf-81b5-4ccf-8179-631fdb011bae", + "metadata": {}, + "source": [ + "The best way to learn from this tutorial is to run the queries as they are introduced.\n", + "For this, you need to install the Cozo database on your local machine." + ] + }, + { + "cell_type": "markdown", + "id": "7f26f0f5-a3ff-4145-a9d5-84c65e368afa", + "metadata": {}, + "source": [ + "Cozo is distributed as a single gzipped/zipped binary. \n", + "Go to https://github.com/cozodb/cozo/releases and download the latest release binary for your operating system to a local directory.\n", + "\n", + "If your operating system is Linux/Mac, \n", + "open a terminal command prompt, \n", + "`cd` into the directory of your download,\n", + "and run" + ] + }, + { + "cell_type": "raw", + "id": "72a21b8e-d66a-48d3-b684-e9554ea382cc", + "metadata": {}, + "source": [ + "gunzip cozoserver-*.gz\n", + "mv cozoserver-* cozoserver\n", + "chmod +x cozoserver" + ] + }, + { + "cell_type": "markdown", + "id": "536fb265-e25f-469d-843e-935b272c7298", + "metadata": {}, + "source": [ + "If you are on Windows instead, open a PowerShell and run" + ] + }, + { + "cell_type": "raw", + "id": "a11f0388-dd7b-49a2-825f-0dde834ceb6b", + "metadata": {}, + "source": [ + "Expand-Archive -Path .\\cozoserver-*.zip -DestinationPath ." + ] + }, + { + "cell_type": "markdown", + "id": "12da9726-3c18-4011-98ea-e2aae105ee08", + "metadata": {}, + "source": [ + "To run the server, you need to specify a directory to store persistent data on your file system. \n", + "In the following we will use a directory called `tutorial-data` in the same directory as the binary executable.\n", + "In the terminal, run" + ] + }, + { + "cell_type": "raw", + "id": "b4f837e4-6f26-4c57-a7a9-fb5765e4bc92", + "metadata": {}, + "source": [ + "./cozoserver ./tutorial-data" + ] + }, + { + "cell_type": "markdown", + "id": "41820cf5-a800-4c81-8a17-227e9692aae2", + "metadata": {}, + "source": [ + "The same command should work in powershell as well.\n", + "\n", + "If you see something like `Database web API running at ...` displayed in your terminal, \n", + "then the server is successfully started. \n", + "Keep the server running when you are following the tutorial.\n", + "When you are done, `CTRL-C` in the terminal will stop the server.\n", + "You can restart the server again by running the command again.\n", + "\n", + "More options when starting the server is available. Run" + ] + }, + { + "cell_type": "raw", + "id": "e7bd8545-8d42-43a4-ae3b-e946fc5ff19b", + "metadata": {}, + "source": [ + "./cozoserver -h" + ] + }, + { + "cell_type": "markdown", + "id": "f192cf0b-a30d-46ad-85a1-d81f7590b78b", + "metadata": {}, + "source": [ + "for more details." + ] + }, + { + "cell_type": "markdown", + "id": "2ec06d2e-1187-48c8-8777-f84b4c71c741", + "metadata": {}, + "source": [ + "## A place to run queries\n", + "\n", + "Cozo exposes an HTTP API, so theoretically you can follow along using tools like `curl`. \n", + "If you are interested, consult the manual for the request format the API expects.\n", + "For better user experience, though, we suggest following one of the following two subsections instead." + ] + }, + { + "cell_type": "markdown", + "id": "c4e8b790-7c0f-4538-b20e-2c3fb4065dc8", + "metadata": {}, + "source": [ + "### Option 1: the JupyterLab notebook\n", + "\n", + "This option provides the best user experience but also requires you to install quite a lot of things, \n", + "though you may already have them installed on your computer if you use the python data science stack.\n", + "First you will need python installed. \n", + "Then install JupyterLab by following the instruction at https://jupyter.org/install.\n", + "Next, run the following to install a Jupyter extension to help querying Cozo:" + ] + }, + { + "cell_type": "raw", + "id": "6ffa6d95-101b-4229-900a-ce9b14685439", + "metadata": {}, + "source": [ + "pip install pycozo pandas" + ] + }, + { + "cell_type": "markdown", + "id": "9c07f306-9abc-4dd2-b78c-5b46a823982e", + "metadata": {}, + "source": [ + "While you are at it, go to the source of this tutorial at https://github.com/cozodb/cozo/blob/main/tutorial/tutorial.ipynb, \n", + "right-click on `Raw` and save the tutorial document to your disk.\n", + "\n", + "Then run jupyter lab, open a the saved tutorial document, and follow along." + ] + }, + { + "cell_type": "markdown", + "id": "5154e084-d997-4f22-97fb-ce5808fb1da3", + "metadata": {}, + "source": [ + "We need to enable the extension in the notebook. Run" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2e0d11d7-64d7-4577-a28d-7541ee8875fa", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext pycozo.ipyext_direct" + ] + }, + { + "cell_type": "markdown", + "id": "c3f0d80d-9854-4a21-9121-3220e7bcc73b", + "metadata": {}, + "source": [ + "Then the \"hello world\" query:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f3dfb8a1-35f0-4dc2-b8d7-e81fa3d45b75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Completed in 0ms

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 012
0helloworldCozo!
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "?[] <- [['hello', 'world', 'Cozo!']]" + ] + }, + { + "cell_type": "markdown", + "id": "7e6dda2a-393a-4d87-8ba1-41c5d0b229d5", + "metadata": {}, + "source": [ + "If you get the same words back formatted in a table, congratulations! \n", + "You can skip to the next section where we starts learning CozoScript proper.\n", + "If you want to know more about what the `pycozo` extension did, read the manual." + ] + }, + { + "cell_type": "markdown", + "id": "ac86f14f-72c7-4206-9e6b-6daf9e77b67a", + "metadata": {}, + "source": [ + "### Option 2: the JavaScript console in your browser" + ] + }, + { + "cell_type": "markdown", + "id": "8ddc55d2-604b-4218-af45-8c62d79db201", + "metadata": {}, + "source": [ + "If you have never used Python before, the first option may be overwhelming. \n", + "Or you just want to try Cozo out first to decide quickly if it has anything interesting for you.\n", + "Whatever your reason for not wanting to install the whole python toolchain, we have you covered." + ] + }, + { + "cell_type": "markdown", + "id": "1147466d-d925-4c32-8b6c-28bdabc656fb", + "metadata": {}, + "source": [ + "Your local machine at least have a modern web browser, like a recent version of Firefox, Chrome, or Edge, right?\n", + "Good. \n", + "\n", + "Use your browser to navigate to http://127.0.0.1:9070 (the address shown in your terminal when you run `cozoserver`).\n", + "You should be greeted be a page saying that the server is running.\n", + "Now open the developer tools of your browser by right clicking the page and select \"Inspect\" from the menu \n", + "(if you cannot find it, you may need to fiddle with your browser settings to enable the developer tools).\n", + "Switch to the \"Console\" tab of the developer tools if it is not already open. \n", + "\n", + "If you see some messages where \n", + "the \"Cozo Makeshift Javascript Console\" welcomes you, you are ready. Run the \"hello world\" query by typing the following into the console and press enter:" + ] + }, + { + "cell_type": "raw", + "id": "fb8fee0c-4069-45c8-bb2a-9aa96037a6ed", + "metadata": {}, + "source": [ + "await run(`?[] <- [['hello', 'world', 'Cozo!']]`)" + ] + }, + { + "cell_type": "markdown", + "id": "81fe58e2-3c52-4859-828d-533b25a62468", + "metadata": {}, + "source": [ + "If you see the three words echoed back in a table, you are successful. When following the tutorial, you have to wrap all queries within the backticks `` in the above command to run them in the JavaScript console." + ] + }, + { + "cell_type": "markdown", + "id": "545ed77c-eeb9-4008-972e-0bdc1d6b7478", + "metadata": {}, + "source": [ + "## Rules and relations" + ] + }, + { + "cell_type": "markdown", + "id": "61d8ce38-d2e0-4b5f-a9f7-64e9a65e0fe0", + "metadata": {}, + "source": [ + "Cozo is a relational database. As you might have guessed, the \"hello world\" query" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "30b8148b-4daa-4de0-a844-d8254a07d990", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Completed in 0ms

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 012
0helloworldCozo!
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "?[] <- [['hello', 'world', 'Cozo!']]" + ] + }, + { + "cell_type": "markdown", + "id": "a7de1353-7970-41ee-843e-cbe6287ded8d", + "metadata": {}, + "source": [ + "simply passes an ad hoc relation, here represented by a list of lists, and ask the database to return the relation to you.\n", + "You can pass more rows, or a different number of columns, to corroborate further your guess:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "27995b4b-35b3-427f-af32-c3945d177463", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Completed in 0ms

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 012
0123
1abc
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "?[] <- [[1, 2, 3], ['a', 'b', 'c']]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "54901c40-ff7e-4c26-bfed-0d38aa7a80dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Completed in 0ms

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 01234
0TrueFalseNone-0.014000A string with double quotes
11.5000002.500000345.500000
2aAbBcCdDeE
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "?[] <- [[1.5, 2.5, 3, 4, 5.5], \n", + " ['aA', 'bB', 'cC', 'dD', 'eE'], \n", + " [true, false, null, -1.4e-2, \"A string with double quotes\"]]" + ] + }, + { + "cell_type": "markdown", + "id": "615e35d7-f5c0-4d71-ac62-6934c658a715", + "metadata": {}, + "source": [ + "The last example shows how to enter literals for numbers, strings, booleans and `null`.\n", + "The literal representations are similar to those in JavaScript. \n", + "In particular, strings in double quotes are guaranteed to be interpreted in exactly the same way as in JSON." + ] + }, + { + "cell_type": "markdown", + "id": "1d76ab84-36a3-4d1d-9da2-a891de52b011", + "metadata": {}, + "source": [ + "You may be surprised by the order of the returned rows in the last example: the returned order is not the same as the input order.\n", + "This is because in Cozo relations are stored (either in memory or on disk) as trees, and trees are always sorted.\n", + "Another consequence of trees is that you can have no duplicate rows:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "28ab3f6e-0e32-486d-83fb-4fd5581e20dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Completed in 0ms

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 0
01
12
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "?[] <- [[1], [2], [1], [2], [1]]" + ] + }, + { + "cell_type": "markdown", + "id": "5a5d9022-c9b2-48c9-b447-ae69a2fc3a21", + "metadata": {}, + "source": [ + "We say that relations in Cozo follow _set semantics_ where de-duplication is automatic. \n", + "By constrast, SQL usually follows _bag semantics_ (some databases does this by secretly having a unique internal key for every row, in Cozo you must do this explicitly if you need to simulate duplicate rows).\n", + "Why does Cozo break tradition and go with set semantics?\n", + "Set semantics is much more convenient when you have recursions between relations involved,\n", + "and Cozo is designed to deal with very complicated recursions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a55e4ab1-e0f1-4126-9252-db2b16718106", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}