the db history API

main
Ziyang Hu 2 years ago
parent ba48c8bb5a
commit 46b37dc6c6

@ -19,7 +19,7 @@ remove_relations_op = {"remove" ~ "relation" ~ ident }
rename_relations_op = {"rename" ~ "relation" ~ ident ~ ident } rename_relations_op = {"rename" ~ "relation" ~ ident ~ ident }
remove_attribute_op = {"remove" ~ "attr" ~ compound_ident } remove_attribute_op = {"remove" ~ "attr" ~ compound_ident }
rename_attribute_op = {"rename" ~ "attr" ~ compound_ident ~ compound_ident} rename_attribute_op = {"rename" ~ "attr" ~ compound_ident ~ compound_ident}
history_op = {"history" ~ from_clause? ~ to_clause? ~ "for" ~ expr ~ ":" ~ (compound_ident ~ ",")* ~ compound_ident} history_op = {"history" ~ from_clause? ~ to_clause? ~ "for" ~ (expr ~ ",")* ~ expr ~ ":" ~ (compound_ident ~ ",")* ~ compound_ident}
from_clause = {"from" ~ expr} from_clause = {"from" ~ expr}
to_clause = {"to" ~ expr} to_clause = {"to" ~ expr}

@ -1,24 +1,26 @@
use std::collections::BTreeSet; use std::collections::BTreeSet;
use miette::{bail, Diagnostic, Result}; use miette::{bail, Diagnostic, ensure, Result};
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use thiserror::Error; use thiserror::Error;
use crate::data::id::Validity;
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::value::DataValue; use crate::data::value::DataValue;
use crate::parse::{ExtractSpan, Pairs, ParseError, Rule, SourceSpan};
use crate::parse::expr::build_expr; use crate::parse::expr::build_expr;
use crate::parse::{ExtractSpan, Pairs, Rule, SourceSpan}; use crate::parse::tx::EntityRep;
#[derive( #[derive(
Debug, Debug,
Eq, Eq,
PartialEq, PartialEq,
Ord, Ord,
PartialOrd, PartialOrd,
Copy, Copy,
Clone, Clone,
serde_derive::Serialize, serde_derive::Serialize,
serde_derive::Deserialize, serde_derive::Deserialize,
)] )]
pub(crate) enum CompactTarget { pub(crate) enum CompactTarget {
Triples, Triples,
@ -37,6 +39,12 @@ pub(crate) enum SysOp {
RemoveAttribute(Symbol), RemoveAttribute(Symbol),
RenameAttribute(Symbol, Symbol), RenameAttribute(Symbol, Symbol),
ExecuteLocalScript(SmartString<LazyCompact>), ExecuteLocalScript(SmartString<LazyCompact>),
History {
from: Option<Validity>,
to: Option<Validity>,
entities: Vec<EntityRep>,
attributes: Vec<Symbol>,
},
} }
#[derive(Debug, Diagnostic, Error)] #[derive(Debug, Diagnostic, Error)]
@ -109,6 +117,56 @@ pub(crate) fn parse_sys(mut src: Pairs<'_>) -> Result<SysOp> {
let new_attr_name = Symbol::new(p.as_str(), p.extract_span()); let new_attr_name = Symbol::new(p.as_str(), p.extract_span());
SysOp::RenameAttribute(attr_name, new_attr_name) SysOp::RenameAttribute(attr_name, new_attr_name)
} }
Rule::history_op => {
let mut from = None;
let mut to = None;
let mut attributes = vec![];
let mut entities = vec![];
for p in inner.into_inner() {
match p.as_rule() {
Rule::from_clause => {
let expr = build_expr(p.into_inner().next().unwrap(), &Default::default())?;
let vld = Validity::try_from(expr)?;
from = Some(vld)
}
Rule::to_clause => {
let expr = build_expr(p.into_inner().next().unwrap(), &Default::default())?;
let vld = Validity::try_from(expr)?;
to = Some(vld)
}
Rule::expr => {
let span = p.extract_span();
match build_expr(p, &Default::default())?.eval_to_const()? {
v @ DataValue::Str(_) => {
let e = v.get_entity_id().ok_or_else(|| ParseError { span })?;
entities.push(EntityRep::Id(e))
}
DataValue::List(c) => {
ensure!(c.len() == 2, ParseError { span });
let mut c = c.into_iter();
let attr = match c.next().unwrap() {
DataValue::Str(s) => s,
_ => bail!(ParseError { span }),
};
let val = c.next().unwrap();
entities.push(EntityRep::PullByKey(attr, val));
}
_ => {}
}
}
Rule::compound_ident => {
attributes.push(Symbol::new(SmartString::from(p.as_str()), p.extract_span()))
}
_ => unreachable!()
}
}
SysOp::History {
from,
to,
entities,
attributes,
}
}
_ => unreachable!(), _ => unreachable!(),
}) })
} }

@ -31,7 +31,7 @@ impl Display for TxAction {
} }
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, serde_derive::Serialize, serde_derive::Deserialize)]
pub(crate) enum EntityRep { pub(crate) enum EntityRep {
Id(EntityId), Id(EntityId),
UserTempId(SmartString<LazyCompact>), UserTempId(SmartString<LazyCompact>),

@ -19,22 +19,22 @@ use cozorocks::{DbBuilder, DbIter, RocksDb};
use cozorocks::CfHandle::{Pri, Snd}; use cozorocks::CfHandle::{Pri, Snd};
use crate::data::compare::{DB_KEY_PREFIX_LEN, rusty_cmp}; use crate::data::compare::{DB_KEY_PREFIX_LEN, rusty_cmp};
use crate::data::encode::{ use crate::data::encode::{decode_ae_key, decode_value_from_key, decode_value_from_val, encode_aev_key, encode_ave_key, encode_ave_ref_key, largest_key, smallest_key};
encode_aev_key, encode_ave_key, encode_ave_ref_key, largest_key, smallest_key,
};
use crate::data::id::{EntityId, TxId, Validity}; use crate::data::id::{EntityId, TxId, Validity};
use crate::data::json::JsonValue; use crate::data::json::JsonValue;
use crate::data::program::{InputProgram, QueryAssertion, RelationOp}; use crate::data::program::{InputProgram, QueryAssertion, RelationOp};
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::triple::StoreOp;
use crate::data::tuple::{EncodedTuple, rusty_scratch_cmp, SCRATCH_DB_KEY_PREFIX_LEN, Tuple}; use crate::data::tuple::{EncodedTuple, rusty_scratch_cmp, SCRATCH_DB_KEY_PREFIX_LEN, Tuple};
use crate::data::value::{DataValue, LARGEST_UTF_CHAR}; use crate::data::value::{DataValue, LARGEST_UTF_CHAR};
use crate::parse::{CozoScript, parse_script, SourceSpan}; use crate::parse::{CozoScript, parse_script, SourceSpan};
use crate::parse::schema::AttrTxItem; use crate::parse::schema::AttrTxItem;
use crate::parse::sys::{CompactTarget, SysOp}; use crate::parse::sys::{CompactTarget, SysOp};
use crate::parse::tx::TripleTx; use crate::parse::tx::{EntityRep, TripleTx};
use crate::runtime::relation::{RelationId, RelationMetadata}; use crate::runtime::relation::{RelationId, RelationMetadata};
use crate::runtime::transact::SessionTx; use crate::runtime::transact::SessionTx;
use crate::transact::meta::AttrNotFoundError; use crate::transact::meta::AttrNotFoundError;
use crate::transact::triple::EntityNotFound;
use crate::utils::swap_option_result; use crate::utils::swap_option_result;
struct RunningQueryHandle { struct RunningQueryHandle {
@ -388,7 +388,80 @@ impl Db {
bail!(NonLaxSecurity) bail!(NonLaxSecurity)
} }
} }
SysOp::History { from, to, entities, attributes, } => {
self.pull_history(from, to, entities, attributes)
}
}
}
fn pull_history(&self, from: Option<Validity>, to: Option<Validity>, entities: Vec<EntityRep>, attributes: Vec<Symbol>) -> Result<JsonValue> {
let tx = self.transact()?;
let to_vld = to.unwrap_or(Validity::MAX);
let from_vld = from.unwrap_or(Validity::MIN);
let mut ret = vec![];
for entity in entities {
let eid = match entity {
EntityRep::Id(id) => id,
EntityRep::PullByKey(k, v) => {
let attr = tx.attr_by_name(&k)?
.ok_or_else(|| AttrNotFoundError(k.to_string()))?;
tx.eid_by_unique_av(&attr, &v, to_vld)?.ok_or_else(|| {
EntityNotFound(format!("{}: {:?}", attr.name, v))
})?
}
_ => unreachable!()
};
for attr_name in &attributes {
let attr = tx.attr_by_name(&attr_name)?
.ok_or_else(|| AttrNotFoundError(attr_name.to_string()))?;
let mut lower_bound = encode_aev_key(attr.id, eid, &DataValue::Null, to_vld);
let upper_bound = encode_aev_key(attr.id, eid, &DataValue::Bot, to_vld);
let mut it = tx.tx.iterator(Pri).upper_bound(&upper_bound).start();
it.seek(&lower_bound);
while let Some((k_slice, v_slice)) = it.pair()? {
let (_aid, eid, vld) = decode_ae_key(k_slice)?;
if vld != Validity::NO_HISTORY {
if vld > to_vld {
lower_bound.copy_from_slice(k_slice);
lower_bound.encoded_entity_amend_validity(to_vld);
it.seek(&lower_bound);
continue;
} else if vld < from_vld {
lower_bound.copy_from_slice(k_slice);
lower_bound.encoded_entity_amend_validity_to_inf_past();
it.seek(&lower_bound);
continue;
}
}
let op = StoreOp::try_from(v_slice[0])?;
let v = match op {
StoreOp::Retract => DataValue::Null,
StoreOp::Assert => {
let mut v = decode_value_from_key(k_slice)?;
if v == DataValue::Guard {
v = decode_value_from_val(v_slice)?;
}
v
}
};
let tss = if vld == Validity::NO_HISTORY {
JsonValue::Null
} else {
json!(vld.0)
};
let v_json = JsonValue::from(v);
ret.push(json!([eid.0, attr.name, tss, format!("{:?}", vld), op.to_string(), v_json]));
it.next();
}
}
} }
Ok(json!({
"headers": ["entity_id", "attr", "timestamp", "timestamp_str", "op", "value"],
"rows": ret
}))
} }
fn run_query( fn run_query(
&self, &self,

@ -832,12 +832,12 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "In fact, this is doable, but the resulting system is a total pain to use. First, you will need to _reify_ most of your values. Instead of saying that `[bob person.name 'Bob']`, you need something like `[bob person.used_name name]`, where `[name name.is_spelled 'Bob']` and `[name name.is_valid_at '2020-03-04']`, etc. Next, how are you going to find our what everything was at a particular moment? You cannot use equality conditions to filter entities based on `is_valid_at`, since something that was introduced in 1999 is still valid in 2020, _unless_ some other fact supercedes it or it was retracted _after_ 1999. And we are only after the latest valid fact, not all historical facts at a point in time. Fulfilling these requirements _is_ possible in Cozo with aggregations, but they add a huge amount of complexities to the queries for something that was intuitively very simple.", "source": "In fact, this is doable, but the resulting system is a total pain to use. First, you will need to _reify_ most of your values. Instead of saying that `[bob person.name 'Bob']`, you need something like `[bob person.used_name name]`, where `[name name.is_spelled 'Bob']` and `[name name.is_valid_at '2020-03-04']`, etc. Next, how are you going to find our what everything was at a particular moment? You cannot use equality conditions to filter entities based on `is_valid_at`, since something that was introduced in 1999 is still valid in 2020, _unless_ some other fact supercedes it or it was retracted _after_ 1999. And we are only after the latest valid fact, not all historical facts at a point in time. Fulfilling these requirements _is_ possible in Cozo with aggregations, but they necessitate a huge amount of complexities for even the simplest queries.",
"metadata": {} "metadata": {}
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "To solve this particular problem, which occurs more common than you might think, Cozo has built-in support for historical facts. This functionality carries a non-trivial performance penalty, so you have to request it explicitly for each attribute. And like other properties of attributes, whether it has history support is immutable. If you change your mind, you need to define a new attribute and copy data over, as usual.\n\nIf you are already worried about performance, let's assure you that it is MUCH MORE performant than the hand-rolled solution indicated above. In fact, querying a history-enabled attribute is about $c \\log n$ times slower than the corresponding query for a non-history-enabled attribute, where $c$ is a constant and $n$ is the number of historical facts a given entity-attribute pair has. The logarithmic complexity beats any simple-minded implementation.", "source": "To solve this particular problem, which occurs more commonly than you might think, Cozo has built-in support for historical facts. This functionality carries a non-trivial performance penalty, so you have to request it explicitly for each attribute. And like other properties of attributes, whether it has history support is immutable. If you later change your mind, you need to define a new attribute and copy data over, as usual.\n\nIf you are already worried about performance, let's assure you that Cozo's historical facts implementation is MUCH MORE performant than the hand-rolled solution indicated above. In fact, querying a history-enabled attribute is on average $c \\log n$ times slower than the corresponding query for a non-history-enabled attribute, where $c$ is a small constant and $n$ is the number of historical facts a given entity-attribute pair has. The logarithmic complexity beats any simple-minded implementation, especially when the amount of historical records is enormous.",
"metadata": {} "metadata": {}
}, },
{ {
@ -851,10 +851,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 102, "execution_count": 127,
"outputs": [ "outputs": [
{ {
"execution_count": 102, "execution_count": 127,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">attr_id</td><td style=\"font-weight: bold\">op</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">10000001</span></td><td>assert</td></tr><tr><td><span style=\"color: #307fc1;\">10000002</span></td><td>assert</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">attr_id</td><td style=\"font-weight: bold\">op</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">10000001</span></td><td>assert</td></tr><tr><td><span style=\"color: #307fc1;\">10000002</span></td><td>assert</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -865,12 +865,12 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "For simplicity we assumed a country's name does not change, but obviously its head of state will change, indicated by the modifier `history`. That's actually all you need for the schema.", "source": "For simplicity we assume that a country's name does not change, but obviously its head of state changes every few years, indicated by the modifier `history`. That's all you need for the schema.",
"metadata": {} "metadata": {}
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "Now let's insert some data. You can actually insert data as you do before:", "source": "You insert data as you do before:",
"metadata": {} "metadata": {}
}, },
{ {
@ -879,10 +879,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 103, "execution_count": 128,
"outputs": [ "outputs": [
{ {
"execution_count": 103, "execution_count": 128,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">4</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">4</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -897,10 +897,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 104, "execution_count": 129,
"outputs": [ "outputs": [
{ {
"execution_count": 104, "execution_count": 129,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -925,10 +925,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 105, "execution_count": 130,
"outputs": [ "outputs": [
{ {
"execution_count": 105, "execution_count": 130,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">2</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">2</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -939,7 +939,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "The syntax should explain itself. You can specify the date in ISO 8601 format, in which case it is interpreted as a timestamp at the stated date at midnight UTC, or as RFC 3339 format such as `'1996-12-19T16:39:57-08:00'`, or as an integer indicating the number of _microseconds_ since the UNIX epoch (negative numbers for before). Let's see who are the heads of states _now_:", "source": "The syntax should explain itself. You can specify the date in ISO 8601 format, in which case it is interpreted as a timestamp at the stated date at midnight UTC, or as RFC 3339 format such as `'1996-12-19T16:39:57-08:00'`, or as an integer indicating the number of _microseconds_ since the UNIX epoch (negative numbers for before the epoch). The validity marker only affects attributes that were defined with the `history` modifier.\n\nLet's see who are the heads of states _now_:",
"metadata": {} "metadata": {}
}, },
{ {
@ -948,10 +948,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 106, "execution_count": 131,
"outputs": [ "outputs": [
{ {
"execution_count": 106, "execution_count": 131,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -962,7 +962,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "As expected, the historical data does not affect facts now.", "source": "As expected.",
"metadata": {} "metadata": {}
}, },
{ {
@ -976,10 +976,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 107, "execution_count": 132,
"outputs": [ "outputs": [
{ {
"execution_count": 107, "execution_count": 132,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Johnson</td></tr><tr><td>US</td><td>Trump</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Johnson</td></tr><tr><td>US</td><td>Trump</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -999,10 +999,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 108, "execution_count": 133,
"outputs": [ "outputs": [
{ {
"execution_count": 108, "execution_count": 133,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Johnson</td></tr><tr><td>US</td><td>Trump</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Johnson</td></tr><tr><td>US</td><td>Trump</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1013,7 +1013,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "Umm ... that doesn't look right. The problem is, when we inserted facts about Biden and Truss, we did not tell the system when that fact starts being valid, so the system assumes the current timestamp. Let's fix that:", "source": "Umm ... that doesn't look right. The problem is, when we inserted facts about Biden and Truss, we did not tell the system when that fact starts being valid, so the system assumes the current timestamp. If you are inserting facts in real time, this is what you want. But if you are inserting historical facts as we are doing here, or are doing catch-ups, this causes problems. In our case the fix is easy:",
"metadata": {} "metadata": {}
}, },
{ {
@ -1022,10 +1022,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 109, "execution_count": 134,
"outputs": [ "outputs": [
{ {
"execution_count": 109, "execution_count": 134,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">2</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">2</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1040,10 +1040,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 110, "execution_count": 135,
"outputs": [ "outputs": [
{ {
"execution_count": 110, "execution_count": 135,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Johnson</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Johnson</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1063,10 +1063,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 111, "execution_count": 136,
"outputs": [ "outputs": [
{ {
"execution_count": 111, "execution_count": 136,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1077,7 +1077,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "Wow, that can't happen no matter what the world is coming to. We fix that by _retracting_ facts as before, but with a timestamp attached (we will use a _very_ generous timestamp):", "source": "Wow, that can't happen no matter what the world is coming to. We fix that by _retracting_ facts as before, but with a timestamp attached (we will use a _very_ generous timestamp for them):",
"metadata": {} "metadata": {}
}, },
{ {
@ -1086,10 +1086,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 112, "execution_count": 137,
"outputs": [ "outputs": [
{ {
"execution_count": 112, "execution_count": 137,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">0</span></td><td><span style=\"color: #307fc1;\">6</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">0</span></td><td><span style=\"color: #307fc1;\">6</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1104,10 +1104,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 113, "execution_count": 138,
"outputs": [ "outputs": [
{ {
"execution_count": 113, "execution_count": 138,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1127,10 +1127,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 114, "execution_count": 139,
"outputs": [ "outputs": [
{ {
"execution_count": 114, "execution_count": 139,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>UK</td><td>Truss</td></tr><tr><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1150,10 +1150,10 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 115, "execution_count": 140,
"outputs": [ "outputs": [
{ {
"execution_count": 115, "execution_count": 140,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>US</td><td>Trump</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td>US</td><td>Trump</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>"
@ -1164,7 +1164,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "UK is missing since we have yet to enter the head of state for UK at this period into the database. Fix it:", "source": "UK is missing since we have yet to enter the head of state for UK at this period into the database. Fix:",
"metadata": {} "metadata": {}
}, },
{ {
@ -1173,13 +1173,13 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 116, "execution_count": 151,
"outputs": [ "outputs": [
{ {
"execution_count": 116, "execution_count": 151,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">1</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">asserts</td><td style=\"font-weight: bold\">retracts</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">1</span></td><td><span style=\"color: #307fc1;\">0</span></td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 2ms</span></div>"
}, },
"metadata": {} "metadata": {}
} }
@ -1196,13 +1196,13 @@
"metadata": { "metadata": {
"trusted": true "trusted": true
}, },
"execution_count": 121, "execution_count": 152,
"outputs": [ "outputs": [
{ {
"execution_count": 121, "execution_count": 152,
"output_type": "execute_result", "output_type": "execute_result",
"data": { "data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">year</td><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">2019</span></td><td>UK</td><td>May</td></tr><tr><td><span style=\"color: #307fc1;\">2019</span></td><td>US</td><td>Trump</td></tr><tr><td><span style=\"color: #307fc1;\">2022</span></td><td>UK</td><td>Johnson</td></tr><tr><td><span style=\"color: #307fc1;\">2022</span></td><td>US</td><td>Biden</td></tr><tr><td>now</td><td>UK</td><td>Truss</td></tr><tr><td>now</td><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 0ms</span></div>" "text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">year</td><td style=\"font-weight: bold\">country</td><td style=\"font-weight: bold\">head</td></tr></thead><tbody><tr><td><span style=\"color: #307fc1;\">2019</span></td><td>UK</td><td>May</td></tr><tr><td><span style=\"color: #307fc1;\">2019</span></td><td>US</td><td>Trump</td></tr><tr><td><span style=\"color: #307fc1;\">2022</span></td><td>UK</td><td>Johnson</td></tr><tr><td><span style=\"color: #307fc1;\">2022</span></td><td>US</td><td>Biden</td></tr><tr><td>now</td><td>UK</td><td>Truss</td></tr><tr><td>now</td><td>US</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 9ms</span></div>"
}, },
"metadata": {} "metadata": {}
} }
@ -1215,7 +1215,63 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": "A final API before we are done with this time-travelling thing.", "source": "A final API before we are done with this time-travelling thing. If you want a record of the actual history of attributes for a certain entity instead of its time slices, use this system op:",
"metadata": {}
},
{
"cell_type": "code",
"source": ":db history for ['country.name', 'UK'], ['country.name', 'US'] : country.name, country.head",
"metadata": {
"trusted": true
},
"execution_count": 156,
"outputs": [
{
"execution_count": 156,
"output_type": "execute_result",
"data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">entity_id</td><td style=\"font-weight: bold\">attr</td><td style=\"font-weight: bold\">timestamp</td><td style=\"font-weight: bold\">timestamp_str</td><td style=\"font-weight: bold\">op</td><td style=\"font-weight: bold\">value</td></tr></thead><tbody><tr><td>1e661e6c-35c2-11ed-b6e2-e96183bbfaa3</td><td>country.name</td><td><span style=\"color: #bf5b3d;\">null</span></td><td>NO_HISTORY</td><td>assert</td><td>UK</td></tr><tr><td>1e661e6c-35c2-11ed-b6e2-e96183bbfaa3</td><td>country.head</td><td><span style=\"color: #307fc1;\">4070908800000000</span></td><td>2099-01-01T00:00:00+00:00</td><td>retract</td><td><span style=\"color: #bf5b3d;\">null</span></td></tr><tr><td>1e661e6c-35c2-11ed-b6e2-e96183bbfaa3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1663334336828959</span></td><td>2022-09-16T13:18:56.828959+00:00</td><td>assert</td><td>Truss</td></tr><tr><td>1e661e6c-35c2-11ed-b6e2-e96183bbfaa3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1662336000000000</span></td><td>2022-09-05T00:00:00+00:00</td><td>assert</td><td>Truss</td></tr><tr><td>1e661e6c-35c2-11ed-b6e2-e96183bbfaa3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1563926400000000</span></td><td>2019-07-24T00:00:00+00:00</td><td>assert</td><td>Johnson</td></tr><tr><td>1e661e6c-35c2-11ed-b6e2-e96183bbfaa3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1468195200000000</span></td><td>2016-07-11T00:00:00+00:00</td><td>assert</td><td>May</td></tr><tr><td>1e661d86-35c2-11ed-ad53-211616b65ff3</td><td>country.name</td><td><span style=\"color: #bf5b3d;\">null</span></td><td>NO_HISTORY</td><td>assert</td><td>US</td></tr><tr><td>1e661d86-35c2-11ed-ad53-211616b65ff3</td><td>country.head</td><td><span style=\"color: #307fc1;\">4070908800000000</span></td><td>2099-01-01T00:00:00+00:00</td><td>retract</td><td><span style=\"color: #bf5b3d;\">null</span></td></tr><tr><td>1e661d86-35c2-11ed-ad53-211616b65ff3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1663334336828959</span></td><td>2022-09-16T13:18:56.828959+00:00</td><td>assert</td><td>Biden</td></tr><tr><td>1e661d86-35c2-11ed-ad53-211616b65ff3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1611100800000000</span></td><td>2021-01-20T00:00:00+00:00</td><td>assert</td><td>Biden</td></tr><tr><td>1e661d86-35c2-11ed-ad53-211616b65ff3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1484870400000000</span></td><td>2017-01-20T00:00:00+00:00</td><td>assert</td><td>Trump</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 6ms</span></div>"
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": "We have used a unique key to identify the entity. You can of course use the entity ID itself. The time ordering within each entity-attribute pair is reverse chronological.",
"metadata": {}
},
{
"cell_type": "markdown",
"source": "Restricting the range of time for the query is also possible:",
"metadata": {}
},
{
"cell_type": "code",
"source": ":db history from '2020-01-01' to '2022-01-01' for ['country.name', 'UK'], ['country.name', 'US'] : country.name, country.head",
"metadata": {
"trusted": true
},
"execution_count": 158,
"outputs": [
{
"execution_count": 158,
"output_type": "execute_result",
"data": {
"text/html": "<div style=\"display: flex; align-items: end; flex-direction: row;\"><table><thead><tr><td style=\"font-weight: bold\">entity_id</td><td style=\"font-weight: bold\">attr</td><td style=\"font-weight: bold\">timestamp</td><td style=\"font-weight: bold\">timestamp_str</td><td style=\"font-weight: bold\">op</td><td style=\"font-weight: bold\">value</td></tr></thead><tbody><tr><td>1e661e6c-35c2-11ed-b6e2-e96183bbfaa3</td><td>country.name</td><td><span style=\"color: #bf5b3d;\">null</span></td><td>NO_HISTORY</td><td>assert</td><td>UK</td></tr><tr><td>1e661d86-35c2-11ed-ad53-211616b65ff3</td><td>country.name</td><td><span style=\"color: #bf5b3d;\">null</span></td><td>NO_HISTORY</td><td>assert</td><td>US</td></tr><tr><td>1e661d86-35c2-11ed-ad53-211616b65ff3</td><td>country.head</td><td><span style=\"color: #307fc1;\">1611100800000000</span></td><td>2021-01-20T00:00:00+00:00</td><td>assert</td><td>Biden</td></tr></tbody></table><span style=\"color: darkgrey; font-size: xx-small; margin: 13px;\">Took 3ms</span></div>"
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": "Note that even though the UK had a head of state in this period, it is not included in the output since its _assertions_ lies outside the time range. This API is only meant for administrative purposes. For general queries, use Datalog queries instead.",
"metadata": {}
},
{
"cell_type": "markdown",
"source": "As we have seen, for attributes with history, retraction does not really remove the data from the database. If you are e.g. legally required to make sure a piece of data is physically gone, retract with exactly the same timestamp as the piece of data originally had. In this case it is recommended to use the integer form of the timestamp. You won't be able to retrieve the data with the public API after the retraction, but some traces of the data may still persist in write ahead logs and other places. Complete eradication may take an unspecified amount of time. That is, if you did not have any backups set up yourself (GASP). Yes, absolute elimination of data is difficult and uncertain.",
"metadata": {} "metadata": {}
}, },
{ {

Loading…
Cancel
Save