diff --git a/docs/source/conf.py b/docs/source/conf.py index ce818a72..800adbcd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,6 +19,7 @@ extensions = [] templates_path = ['_templates'] exclude_patterns = [] +add_function_parentheses = False add_module_names = False latex_show_urls = 'footnote' @@ -27,3 +28,4 @@ latex_show_urls = 'footnote' html_theme = 'furo' html_static_path = ['_static'] +html_title = "Cozo Manual" diff --git a/docs/source/index.rst b/docs/source/index.rst index 870d1b4a..522e8186 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,7 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to the Cozo Database Manual! +The Cozo Database Manual ===================================== .. toctree:: diff --git a/jupyter/cozoscript-kernel/src/kernel.ts b/jupyter/cozoscript-kernel/src/kernel.ts index 004fa22c..17795d3f 100644 --- a/jupyter/cozoscript-kernel/src/kernel.ts +++ b/jupyter/cozoscript-kernel/src/kernel.ts @@ -62,14 +62,24 @@ export class CozoScriptKernel extends BaseKernel { metadata: {} }); } else { - // let res = await response.json(); - this.publishExecuteResult({ - execution_count: this.executionCount, - data: { - 'text/html': displayTable(await response.json()) - }, - metadata: {} - }); + let res = await response.json(); + if ('rows' in res) { + this.publishExecuteResult({ + execution_count: this.executionCount, + data: { + 'text/html': displayTable(res) + }, + metadata: {} + }); + } else { + this.publishExecuteResult({ + execution_count: this.executionCount, + data: { + 'text/html': `
${escapeHtml(JSON.stringify(res, null, 2))}
` + }, + metadata: {} + }); + } } } catch (e) { console.error(e); diff --git a/src/cozoscript.pest b/src/cozoscript.pest index 49f8c71f..a349e56b 100644 --- a/src/cozoscript.pest +++ b/src/cozoscript.pest @@ -148,11 +148,11 @@ bin_pos_int = @{"0b" ~ ASCII_BIN_DIGIT ~ ("_" | ASCII_BIN_DIGIT)*} int = _{(hex_pos_int | octo_pos_int | bin_pos_int | pos_int)} dot_float = @{ ("0" | ASCII_NONZERO_DIGIT ~ ("_" | ASCII_DIGIT)*) - ~ ("." ~ ("_" | ASCII_DIGIT)+) + ~ ("." ~ ("_" | ASCII_DIGIT)*) } sci_float = @{ ("0" | ASCII_NONZERO_DIGIT ~ ("_" | ASCII_DIGIT)*) - ~ ("." ~ ("_" | ASCII_DIGIT)+)? + ~ ("." ~ ("_" | ASCII_DIGIT)*)? ~ (^"e" ~ ("+" | "-")? ~ ("_" | ASCII_DIGIT)+) } float = _{(sci_float | dot_float)} diff --git a/src/data/attr.rs b/src/data/attr.rs index 4310ac82..9dd107b5 100644 --- a/src/data/attr.rs +++ b/src/data/attr.rs @@ -156,12 +156,11 @@ pub(crate) enum AttributeIndex { None = 0, Indexed = 1, Unique = 2, - Identity = 3, } impl AttributeIndex { pub(crate) fn is_unique_index(&self) -> bool { - matches!(self, AttributeIndex::Identity | AttributeIndex::Unique) + matches!(self, AttributeIndex::Unique) } pub(crate) fn should_index(&self) -> bool { *self != AttributeIndex::None @@ -174,7 +173,6 @@ impl Display for AttributeIndex { AttributeIndex::None => write!(f, "none"), AttributeIndex::Indexed => write!(f, "index"), AttributeIndex::Unique => write!(f, "unique"), - AttributeIndex::Identity => write!(f, "identity"), } } } diff --git a/src/parse/schema.rs b/src/parse/schema.rs index 7158eac3..4f515dbe 100644 --- a/src/parse/schema.rs +++ b/src/parse/schema.rs @@ -55,7 +55,6 @@ fn parse_attr_defs(src: Pairs<'_>) -> Result { "many" => attr.cardinality = AttributeCardinality::Many, "history" => attr.with_history = true, "no_history" => attr.with_history = false, - "identity" => attr.indexing = AttributeIndex::Identity, "index" => attr.indexing = AttributeIndex::Indexed, "no_index" => attr.indexing = AttributeIndex::None, "unique" => attr.indexing = AttributeIndex::Unique, diff --git a/src/runtime/db.rs b/src/runtime/db.rs index 84f5b510..9c6332a9 100644 --- a/src/runtime/db.rs +++ b/src/runtime/db.rs @@ -467,6 +467,7 @@ impl Db { pub(crate) fn remove_relation(&self, name: &Symbol) -> Result<()> { let mut tx = self.transact_write()?; let (lower, upper) = tx.destroy_relation(name)?; + tx.commit_tx("", false)?; self.db.range_del(&lower, &upper, Snd)?; Ok(()) } diff --git a/tests/air_routes.rs b/tests/air_routes.rs index b4025fea..f1b57c1a 100644 --- a/tests/air_routes.rs +++ b/tests/air_routes.rs @@ -32,15 +32,15 @@ fn air_routes() -> Result<()> { :schema put country { - code: string identity, + code: string unique, desc: string } put continent { - code: string identity, + code: string unique, desc: string } put airport { - iata: string identity, + iata: string unique, icao: string index, city: string index, desc: string, diff --git a/tests/simple.rs b/tests/simple.rs index 3065bc8a..784605af 100644 --- a/tests/simple.rs +++ b/tests/simple.rs @@ -30,7 +30,7 @@ fn simple() { r#" :schema put person { - id: string identity, + id: string unique, first_name: string index, last_name: string index, age: int, diff --git a/tutorial/2-pilgrim.ipynb b/tutorial/2-pilgrim.ipynb index 07916f4d..46816e2d 100644 --- a/tutorial/2-pilgrim.ipynb +++ b/tutorial/2-pilgrim.ipynb @@ -27,12 +27,12 @@ }, { "cell_type": "markdown", - "source": "## Stored", + "source": "## Stored relations", "metadata": {} }, { "cell_type": "markdown", - "source": "An obvious shortcoming of our previous acrobatics is that we have to carry around our love triangles network and enter it anew for every query, which leads to rapid deterioration of the `CTRL`, `C` and `V` on the keyboard. So let's fix that first.", + "source": "An obvious shortcoming of our previous acrobatics is that we have to carry around our love triangles network and enter it anew for every query, which leads to rapid deterioration of the `CTRL`, `C` and `V` keys. So let's fix that:", "metadata": {} }, { @@ -41,26 +41,381 @@ "metadata": { "trusted": true }, - "execution_count": 10, + "execution_count": 59, "outputs": [ { - "execution_count": 10, + "execution_count": 59, "output_type": "execute_result", "data": { - "text/html": "
eval::stored_relation_conflict\n\n  × Stored relation triangles conflicts with an existing one\n    ╭─[9:1]\n  9 │         \n 10 │ :relation create triangles\n    ·                  ─────────\n    ╰────\n
" + "text/html": "
{\n  "relation": "OK",\n  "time_taken": 1\n}
" }, "metadata": {} } ] }, + { + "cell_type": "markdown", + "source": "We have the _query directive_ `:relation create` together with a normal query. The results will then be stored on your disk with the name `triangles` instead of returned to you.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "You will receive an error if you try to run this script twice. In which case don't worry and continue.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "Stored relations are safe from restarts and power failures. Let's query against it:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "?[a, b] := :triangles[a, b]", + "metadata": { + "trusted": true + }, + "execution_count": 60, + "outputs": [ + { + "execution_count": 60, + "output_type": "execute_result", + "data": { + "text/html": "
ab
aliceeve
bobalice
charlieeve
davidgeorge
evealice
evebob
evecharlie
georgegeorge
Took 0ms
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "The colon `:` in front of the name tells the database that we want a _stored_ relation instead of a relation defined within the query itself.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "Now, Fred finally comes to the party and Fred loves Alice and Eve. We add these facts in the following way:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "?[] <- [['fred', 'alice'],\n ['fred', 'eve']]\n\n:relation put triangles", + "metadata": { + "trusted": true + }, + "execution_count": 61, + "outputs": [ + { + "execution_count": 61, + "output_type": "execute_result", + "data": { + "text/html": "
{\n  "relation": "OK",\n  "time_taken": 0\n}
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": "?[a, b] := :triangles[a, b]", + "metadata": { + "trusted": true + }, + "execution_count": 62, + "outputs": [ + { + "execution_count": 62, + "output_type": "execute_result", + "data": { + "text/html": "
ab
aliceeve
bobalice
charlieeve
davidgeorge
evealice
evebob
evecharlie
fredalice
fredeve
georgegeorge
Took 0ms
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "Notice that we used `:relation put` instead of `:relation create`. In fact, you can use `:relation put` before any call to `:relation create`. The `create` op just ensures that the insertion is into a new stored relation.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "Now Eve no longer loves Alice and Charlie! Let's reflect this fact by using `retract`", + "metadata": {} + }, + { + "cell_type": "code", + "source": "?[] <- [['eve', 'charlie'],\n ['eve', 'alice']]\n\n:relation retract triangles", + "metadata": { + "trusted": true + }, + "execution_count": 63, + "outputs": [ + { + "execution_count": 63, + "output_type": "execute_result", + "data": { + "text/html": "
{\n  "relation": "OK",\n  "time_taken": 0\n}
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": "?[a, b] := :triangles[a, b]", + "metadata": { + "trusted": true + }, + "execution_count": 64, + "outputs": [ + { + "execution_count": 64, + "output_type": "execute_result", + "data": { + "text/html": "
ab
aliceeve
bobalice
charlieeve
davidgeorge
evebob
fredalice
fredeve
georgegeorge
Took 0ms
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "It is OK to retract non-existent facts, in which case the operation does nothing.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "You can also reset the whole relation with `rederive`:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "?[] <- [['eve', 'charlie'],\n ['eve', 'alice']]\n\n:relation rederive triangles", + "metadata": { + "trusted": true + }, + "execution_count": 65, + "outputs": [ + { + "execution_count": 65, + "output_type": "execute_result", + "data": { + "text/html": "
{\n  "relation": "OK",\n  "time_taken": 0\n}
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": "?[a, b] := :triangles[a, b]", + "metadata": { + "trusted": true + }, + "execution_count": 66, + "outputs": [ + { + "execution_count": 66, + "output_type": "execute_result", + "data": { + "text/html": "
ab
evealice
evecharlie
Took 0ms
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "Only the `rederive`ed tuples remain.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "You can see what stored relations you currently have in your database by running the following _system directive_:", + "metadata": {} + }, + { + "cell_type": "code", + "source": ":db relations", + "metadata": { + "trusted": true + }, + "execution_count": 67, + "outputs": [ + { + "execution_count": 67, + "output_type": "execute_result", + "data": { + "text/html": "
namearity
code_lat_lon3
flies_to3
flies_to_code3
triangles2
Took 0ms
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "Now this triangles business is becoming tiring. Let's get rid of it:", + "metadata": {} + }, { "cell_type": "code", "source": ":db remove relation triangles", "metadata": { "trusted": true }, - "execution_count": 13, - "outputs": [] + "execution_count": 68, + "outputs": [ + { + "execution_count": 68, + "output_type": "execute_result", + "data": { + "text/html": "
{\n  "status": "OK",\n  "time_taken": 0\n}
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "Since we do not have any queries to run when nuking relations, we use a system directive instead of a query directive. Now you can no longer query the triangles:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "?[a, b] := :triangles[a, b]", + "metadata": { + "trusted": true + }, + "execution_count": 69, + "outputs": [ + { + "execution_count": 69, + "output_type": "execute_result", + "data": { + "text/html": "
query::relation_not_found\n\n  × Cannot find requested stored relation 'triangles'\n   ╭────\n 1 │ ?[a, b] := :triangles[a, b]\n   ·            ──────────\n   ╰────\n
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "This completes all the operations on stored relations: `create`, `put`, `retract`, `rederive`. The syntax for `remove` is different from the rest for technical reasons.\n\nAll these operations are _atomic_, meaning that for all the tuples they affect, either all are affected at the same time, or the operation completely fails. There is no in-between, corrupted state.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "## A schema for data", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "The stored relation operations introduced above are simple, fast, and very raw. They can be used in exactly the same way as rules defined inline with the query. The way you use them is also not very different than in a traditional SQL database.\n\nStored relations are suitable for data that has a well-defined structure at the onset, and which is loaded and updated in bulk. For example, you may have obtained from domain experts an [ontology](https://www.wikiwand.com/en/Ontology_\\(information_science\\)) in the form of a network of metadata. The ontology comes in nice tables with clear, detailed documentation. You store this ontology as a group of stored relations, and use them to extract insights from your business data. The ontology is updated periodically, and when an update comes you just use the `rederive` operation to replace the old version. Very simple and efficient.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "But your _business_ data is mostly likely not as simple as that. At this age of BigDataⒸ, you must have one billion active users on your platforms carrying out all sorts of activities, concurrently of course. You don't want these activities to step on each other. You don't want to store the wrong thing into your user's accounts. You _especially_ don't want any money in transit to disappear in midair. To make things worse, hundreds of new activities pop up each day. \n\nYou don't want to store any of those in a stored relation. With a traditional RDBMS, [data migrations](https://en.wikipedia.org/wiki/Data_migration) would have already killed you. And with Cozo, stored relations don't even try to support schema change (in fact, the only 'schema' for a stored relation is its arity).\n\nSo let's step back a bit and recap what we want in this case:\n\n* high concurrency;\n* fine-grained transactions;\n* checks for data integrity;\n* ability to rapidly adapt to changing data shapes.\n\nBut at what cost? We have to give up something, right? Here are the prices we are willing to pay in this case:\n\n* we restrict most transactions to _local changes_: i.e. they only touch on a tiny fraction of the data;\n* we can tolerate levels of indirections.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "After we've paid our price we got our solution. It is a very old solution actually: the [triple store](https://en.wikipedia.org/wiki/Triplestore).", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "The idea is very simple. a _triple_ is a sentence consisting of a subject, a verb, and an object. In the Cozo flavour, the subject is always an opaque identity, such as _entity42_. The following are examples of triples in this sense:\n\n* _entity42_ has first name `'Alice'`.\n* _entity42_ has last name `'Liddell'`.\n* _entity42_ loves _entity81_.\n* _entity81_ is aged `20` years old.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "We put schema into triples by schematize the verbs. We see that in our case, the schema for first name and last name should have type strings, the schema for age should have type integers, and the schema for the \"loves\" relationship should be other entities. Here the types refer to the objects in the triple, since the subject is always an entity.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "So let's finally put this into code:", + "metadata": {} + }, + { + "cell_type": "code", + "source": ":schema\n\nput person {\n first_name: string index,\n last_name: string index,\n loves: ref many,\n age: int\n}", + "metadata": { + "trusted": true + }, + "execution_count": 70, + "outputs": [ + { + "execution_count": 70, + "output_type": "execute_result", + "data": { + "text/html": "
{\n  "results": [\n    [\n      10000020,\n      "+"\n    ],\n    [\n      10000021,\n      "+"\n    ],\n    [\n      10000022,\n      "+"\n    ],\n    [\n      10000023,\n      "+"\n    ]\n  ],\n  "time_taken": 0,\n  "tx_id": 10011\n}
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "Let's explain. The `:schema` at the top indicates that we want to manage the schema instead of run normal queries. We then `put` a _group_ of related schema. Now even though they are declared together similarly to a table definition in SQL, we need to stress that this actually defines four separate, independent attributes named `person.first_name`, `person.last_name`, `person.loves`, `person.age`. An entity can have whatever attributes associated with it, even those with different prefixes.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "The allowed types for attributes are:\n\n* `ref`\n* `bool`\n* `int`\n* `float`\n* `string`\n* `bytes`\n* `list`\n\nThe list type is heterogeneous in its elements. There is no concept of a nullable type and you can't put `null` into values of triples (other than wrapping them in lists first). To indicate missing values, you simply omit the attribute.\n\nThe `ref` type has the special meaning of refering to other entities.\n\nAfter the type comes one or more _modifiers_. The `many` modifier indicates that `loves` is a to-many relationship. If we omit it, any person can love at most one other person, which is not very realistic.\n\nThe modifier `index` indicates that we want values of this attribute to be _indexed_. Only indexed attributes support efficient value lookups and range scans. `ref` types are always implicitly indexed since the database wants to be able to traverse the graph in both directions.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "Instead of `index`, we can mark attributes with the modifier `unique`, indicating there cannot be two entities with the same value for the attribute. The value then acts as an _unique identifier_ for the entity, which is convenient in certain circumstances since the entity ID is assigned by the database automatically and you cannot choose it. So let's add an explicit `person.id` attribute, this time using the non-grouped syntax:", + "metadata": {} + }, + { + "cell_type": "code", + "source": ":schema\n\nput person.id: string unique;", + "metadata": { + "trusted": true + }, + "execution_count": 72, + "outputs": [ + { + "execution_count": 72, + "output_type": "execute_result", + "data": { + "text/html": "
{\n  "results": [\n    [\n      10000024,\n      "+"\n    ]\n  ],\n  "time_taken": 0,\n  "tx_id": 10012\n}
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "We can see what schema are there in the database now by running a system directive:", + "metadata": {} + }, + { + "cell_type": "code", + "source": ":db schema", + "metadata": { + "trusted": true + }, + "execution_count": 73, + "outputs": [ + { + "execution_count": 73, + "output_type": "execute_result", + "data": { + "text/html": "
idnametypecardinalityindexhistory
10000001country.codestringoneuniquefalse
10000002country.descstringonenonefalse
10000003continent.codestringoneuniquefalse
10000004continent.descstringonenonefalse
10000005airport.iatastringoneuniquefalse
10000006airport.icaostringoneindexfalse
10000007airport.citystringoneindexfalse
10000008airport.descstringonenonefalse
10000009airport.regionstringoneindexfalse
10000010airport.countryrefonenonefalse
10000011airport.runwaysintonenonefalse
10000012airport.longestintonenonefalse
10000013airport.altitudeintonenonefalse
10000014airport.latfloatonenonefalse
10000015airport.lonfloatonenonefalse
10000016route.srcrefonenonefalse
10000017route.dstrefonenonefalse
10000018route.distanceintonenonefalse
10000019geo.containsrefmanynonefalse
10000020person.first_namestringoneindexfalse
10000021person.last_namestringoneindexfalse
10000022person.lovesrefmanynonefalse
10000023person.ageintonenonefalse
10000024personstringoneuniquefalse
Took 0ms
" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": "## The time machine", + "metadata": {} }, { "cell_type": "code",