UUID as a DataValue

main
Ziyang Hu 2 years ago
parent af8b328d13
commit 6f567f089a

@ -23,16 +23,16 @@
<cargoProject FILE="$PROJECT_DIR$/Cargo.toml" />
</component>
<component name="ChangeListManager">
<list default="true" id="fb7002fa-47b1-45d9-bc6d-711b16e752b3" name="Changes" comment="manifest file">
<list default="true" id="fb7002fa-47b1-45d9-bc6d-711b16e752b3" name="Changes" comment="transactions are hard">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/cozorocks/src/bridge/db.rs" beforeDir="false" afterPath="$PROJECT_DIR$/cozorocks/src/bridge/db.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/cozoscript.pest" beforeDir="false" afterPath="$PROJECT_DIR$/src/cozoscript.pest" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/parse/expr.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/parse/expr.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/parse/tx.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/parse/tx.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/runtime/db.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/runtime/db.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/transact/triple.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/transact/triple.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/tests/air_routes.rs" beforeDir="false" afterPath="$PROJECT_DIR$/tests/air_routes.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/tests/simple.rs" beforeDir="false" afterPath="$PROJECT_DIR$/tests/simple.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Cargo.toml" beforeDir="false" afterPath="$PROJECT_DIR$/Cargo.toml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docs/source/datatypes.rst" beforeDir="false" afterPath="$PROJECT_DIR$/docs/source/datatypes.rst" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docs/source/functions.rst" beforeDir="false" afterPath="$PROJECT_DIR$/docs/source/functions.rst" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/data/expr.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/data/expr.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/data/functions.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/data/functions.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/data/json.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/data/json.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/data/tests/functions.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/data/tests/functions.rs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/data/value.rs" beforeDir="false" afterPath="$PROJECT_DIR$/src/data/value.rs" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@ -73,7 +73,7 @@
"org.rust.cargo.project.model.PROJECT_DISCOVERY": "true"
}
}]]></component>
<component name="RunManager" selected="Cargo.Test data::id::p">
<component name="RunManager" selected="Cargo.Test data::tests::functions::test_uuid">
<configuration default="true" type="CLionExternalRunConfiguration" factoryName="Application" REDIRECT_INPUT="false" ELEVATE="false" USE_EXTERNAL_CONSOLE="false" PASS_PARENT_ENVS_2="true">
<method v="2">
<option name="CLION.EXTERNAL.BUILD" enabled="true" />
@ -101,6 +101,40 @@
<option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
</method>
</configuration>
<configuration name="Test data::json::x" type="CargoCommandRunConfiguration" factoryName="Cargo Command" temporary="true">
<option name="command" value="test --package cozo --lib data::json::x -- --exact" />
<option name="workingDirectory" value="file://$PROJECT_DIR$" />
<option name="channel" value="DEFAULT" />
<option name="requiredFeatures" value="true" />
<option name="allFeatures" value="false" />
<option name="emulateTerminal" value="false" />
<option name="withSudo" value="false" />
<option name="buildTarget" value="REMOTE" />
<option name="backtrace" value="SHORT" />
<envs />
<option name="isRedirectInput" value="false" />
<option name="redirectInputPath" value="" />
<method v="2">
<option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
</method>
</configuration>
<configuration name="Test data::tests::functions::test_uuid" type="CargoCommandRunConfiguration" factoryName="Cargo Command" temporary="true">
<option name="command" value="test --package cozo --lib data::tests::functions::test_uuid -- --exact" />
<option name="workingDirectory" value="file://$PROJECT_DIR$" />
<option name="channel" value="DEFAULT" />
<option name="requiredFeatures" value="true" />
<option name="allFeatures" value="false" />
<option name="emulateTerminal" value="false" />
<option name="withSudo" value="false" />
<option name="buildTarget" value="REMOTE" />
<option name="backtrace" value="SHORT" />
<envs />
<option name="isRedirectInput" value="false" />
<option name="redirectInputPath" value="" />
<method v="2">
<option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
</method>
</configuration>
<configuration default="true" type="CargoCommandRunConfiguration" factoryName="Cargo Command">
<option name="command" value="run" />
<option name="channel" value="DEFAULT" />
@ -118,11 +152,15 @@
</method>
</configuration>
<list>
<item itemvalue="Cargo.Test data::json::x" />
<item itemvalue="Cargo.Test data::tests::functions::test_uuid" />
<item itemvalue="Cargo.Test data::id::p" />
<item itemvalue="CMake Application.cozorocks" />
</list>
<recent_temporary>
<list>
<item itemvalue="Cargo.Test data::tests::functions::test_uuid" />
<item itemvalue="Cargo.Test data::json::x" />
<item itemvalue="Cargo.Test data::id::p" />
</list>
</recent_temporary>
@ -138,7 +176,7 @@
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1663161524540</updated>
<workItem from="1663161527741" duration="19713000" />
<workItem from="1663161527741" duration="26111000" />
</task>
<task id="LOCAL-00001" summary="regenerate idea files">
<created>1663161616722</created>
@ -175,7 +213,14 @@
<option name="project" value="LOCAL" />
<updated>1663220906043</updated>
</task>
<option name="localTasksCounter" value="6" />
<task id="LOCAL-00006" summary="transactions are hard">
<created>1663233714452</created>
<option name="number" value="00006" />
<option name="presentableId" value="LOCAL-00006" />
<option name="project" value="LOCAL" />
<updated>1663233714452</updated>
</task>
<option name="localTasksCounter" value="7" />
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
@ -198,6 +243,7 @@
<MESSAGE value="enforce local file security" />
<MESSAGE value="reintroduce bloom filter settings" />
<MESSAGE value="manifest file" />
<option name="LAST_COMMIT_MESSAGE" value="manifest file" />
<MESSAGE value="transactions are hard" />
<option name="LAST_COMMIT_MESSAGE" value="transactions are hard" />
</component>
</project>

@ -40,6 +40,7 @@ nalgebra = "0.31.1"
approx = "0.5.1"
unicode-normalization = "0.1.21"
thiserror = "1.0.34"
uuid = { version = "1.1.2", features = ["v1", "v4", "serde"] }
cozorocks = { path = "cozorocks" }
#[target.'cfg(not(target_env = "msvc"))'.dependencies]

@ -13,6 +13,7 @@ A runtime value in Cozo can be of the following *value-types*:
* ``Number``
* ``String``
* ``Bytes``
* ``Uuid``
* ``List``
``Number`` can be ``Float`` (double precision) or ``Int`` (signed, 64 bits). Cozo will auto-promote ``Int`` to ``Float`` when necessary.
@ -28,6 +29,7 @@ Within each type values are *compared* according to logic custom to each type:
* Lists are ordered lexicographically by their elements;
* Bytes are compared lexicographically;
* Strings are ordered lexicographically by their UTF-8 byte representations.
* If two UUIDs are both of version one, they are compared based on their timestamp. Otherwise, or when the timestamp comparison results in a tie, they are compared by their bytes.
.. WARNING::

@ -465,6 +465,13 @@ Type checking and conversions
* ``PI`` is converted to pi (3.14159...);
* ``E`` is converted to the base of natural logarithms, or Euler's constant (2.71828...).
.. function:: to_uuid(x)
Tries to convert ``x`` to a UUID. The input must either be a hyphenated UUID string representation or already a UUID for it to succeed.
.. function:: uuid_timestamp(x)
Extracts the timestamp from a UUID version 1, as nanoseconds since the UNIX epoch divided by 100. If the UUID is not of version 1, ``null`` is returned. If ``x`` is not a UUID, an error is raised.
.. function:: is_null(x)
@ -506,6 +513,10 @@ Type checking and conversions
Checks for strings.
.. function:: is_uuid(x)
Checks for UUIDs.
-----------------
Random functions
-----------------
@ -529,6 +540,13 @@ Random functions
Randomly chooses an element from ``list`` and returns it. If the list is empty, it returns ``null``.
.. function:: rand_uuid_v1()
Generate a random UUID, version 1 (random bits plus timestamp).
.. function:: rand_uuid_v4()
Generate a random UUID, version 4 (completely random bits).
------------------
Regex functions

@ -544,6 +544,7 @@ pub(crate) fn get_op(name: &str) -> Option<&'static Op> {
"is_finite" => &OP_IS_FINITE,
"is_infinite" => &OP_IS_INFINITE,
"is_nan" => &OP_IS_NAN,
"is_uuid" => &OP_IS_UUID,
"length" => &OP_LENGTH,
"sorted" => &OP_SORTED,
"reverse" => &OP_REVERSE,
@ -581,6 +582,10 @@ pub(crate) fn get_op(name: &str) -> Option<&'static Op> {
"union" => &OP_UNION,
"intersection" => &OP_INTERSECTION,
"difference" => &OP_DIFFERENCE,
"to_uuid" => &OP_TO_UUID,
"rand_uuid_v1" => &OP_RAND_UUID_V1,
"rand_uuid_v4" => &OP_RAND_UUID_V4,
"uuid_timestamp" => &OP_UUID_TIMESTAMP,
_ => return None,
})
}

@ -1,6 +1,7 @@
use std::collections::BTreeSet;
use std::ops::{Div, Rem};
use std::str::FromStr;
use std::time::{SystemTime, UNIX_EPOCH};
use itertools::Itertools;
use miette::{bail, ensure, miette, Result};
@ -8,10 +9,11 @@ use num_traits::FloatConst;
use rand::prelude::*;
use smartstring::SmartString;
use unicode_normalization::UnicodeNormalization;
use uuid::v1::Timestamp;
use crate::data::expr::Op;
use crate::data::json::JsonValue;
use crate::data::value::{same_value_type, DataValue, Num, RegexWrapper};
use crate::data::value::{DataValue, Num, RegexWrapper, same_value_type, UuidWrapper};
macro_rules! define_op {
($name:ident, $min_arity:expr, $vararg:expr) => {
@ -38,6 +40,11 @@ pub(crate) fn op_eq(args: &[DataValue]) -> Result<DataValue> {
}))
}
define_op!(OP_IS_UUID, 1, false);
pub(crate) fn op_is_uuid(args: &[DataValue]) -> Result<DataValue> {
Ok(DataValue::Bool(matches!(args[0], DataValue::Uuid(_))))
}
define_op!(OP_IS_IN, 2, false);
pub(crate) fn op_is_in(args: &[DataValue]) -> Result<DataValue> {
let left = &args[0];
@ -1365,3 +1372,47 @@ pub(crate) fn op_intersection(args: &[DataValue]) -> Result<DataValue> {
}
Ok(DataValue::List(start.into_iter().collect()))
}
define_op!(OP_TO_UUID, 1, false);
pub(crate) fn op_to_uuid(args: &[DataValue]) -> Result<DataValue> {
match &args[0] {
d @ DataValue::Uuid(_u) => Ok(d.clone()),
DataValue::Str(s) => {
let id = uuid::Uuid::try_parse(s).map_err(|_| miette!("invalid UUID"))?;
Ok(DataValue::uuid(id))
}
_ => bail!("'to_uuid' requires a string")
}
}
define_op!(OP_RAND_UUID_V1, 0, false);
pub(crate) fn op_rand_uuid_v1(_args: &[DataValue]) -> Result<DataValue> {
let mut rng = rand::thread_rng();
let uuid_ctx = uuid::v1::Context::new(rng.gen());
let now = SystemTime::now();
let since_epoch = now.duration_since(UNIX_EPOCH).unwrap();
let ts = Timestamp::from_unix(uuid_ctx, since_epoch.as_secs(), since_epoch.subsec_nanos());
let mut rand_vals = [0u8; 6];
rng.fill(&mut rand_vals);
let id = uuid::Uuid::new_v1(ts, &rand_vals);
Ok(DataValue::uuid(id))
}
define_op!(OP_RAND_UUID_V4, 0, false);
pub(crate) fn op_rand_uuid_v4(_args: &[DataValue]) -> Result<DataValue> {
let id = uuid::Uuid::new_v4();
Ok(DataValue::uuid(id))
}
define_op!(OP_UUID_TIMESTAMP, 1, false);
pub(crate) fn op_uuid_timestamp(args: &[DataValue]) -> Result<DataValue> {
Ok(match &args[0] {
DataValue::Uuid(UuidWrapper(id)) => {
match id.get_timestamp() {
None => DataValue::Null,
Some(t) => (t.to_unix().0 as i64).into()
}
}
_ => bail!("not an UUID")
})
}

@ -88,9 +88,10 @@ impl From<DataValue> for JsonValue {
}
DataValue::Regex(r) => {
json!(r.0.as_str())
} // DataValue::Map(m) => {
// JsonValue::Array(m.into_iter().map(|(k, v)| json!([k, v])).collect())
// }
}
DataValue::Uuid(u) => {
json!(u.0)
}
}
}
}
@ -98,8 +99,8 @@ impl From<DataValue> for JsonValue {
#[cfg(test)]
mod tests {
use serde_json::json;
use crate::data::json::JsonValue;
use crate::data::json::JsonValue;
use crate::data::value::DataValue;
#[test]

@ -1,6 +1,7 @@
use approx::AbsDiffEq;
use num_traits::FloatConst;
use regex::Regex;
use smartstring::SmartString;
use crate::data::functions::*;
use crate::data::value::{DataValue, RegexWrapper};
@ -617,7 +618,7 @@ fn test_concat() {
DataValue::List(vec![
DataValue::Bool(true),
DataValue::Bool(false),
DataValue::Bool(true)
DataValue::Bool(true),
])
);
}
@ -901,7 +902,7 @@ fn test_prepend_append() {
DataValue::List(vec![
DataValue::Null,
DataValue::from(1),
DataValue::from(2)
DataValue::from(2),
]),
);
assert_eq!(
@ -950,7 +951,7 @@ fn test_sort_reverse() {
DataValue::from(2.0),
DataValue::from(1),
DataValue::from(2),
DataValue::Null
DataValue::Null,
])])
.unwrap(),
DataValue::List(vec![
@ -965,7 +966,7 @@ fn test_sort_reverse() {
DataValue::from(2.0),
DataValue::from(1),
DataValue::from(2),
DataValue::Null
DataValue::Null,
])])
.unwrap(),
DataValue::List(vec![
@ -1038,7 +1039,7 @@ fn test_first_last() {
assert_eq!(
op_first(&[DataValue::List(vec![
DataValue::from(1),
DataValue::from(2)
DataValue::from(2),
])])
.unwrap(),
DataValue::from(1),
@ -1046,7 +1047,7 @@ fn test_first_last() {
assert_eq!(
op_last(&[DataValue::List(vec![
DataValue::from(1),
DataValue::from(2)
DataValue::from(2),
])])
.unwrap(),
DataValue::from(2),
@ -1068,8 +1069,8 @@ fn test_chunks() {
])
.unwrap(),
DataValue::List(vec![
DataValue::List(vec![DataValue::from(1), DataValue::from(2),]),
DataValue::List(vec![DataValue::from(3), DataValue::from(4),]),
DataValue::List(vec![DataValue::from(1), DataValue::from(2)]),
DataValue::List(vec![DataValue::from(3), DataValue::from(4)]),
DataValue::List(vec![DataValue::from(5)]),
])
);
@ -1086,8 +1087,8 @@ fn test_chunks() {
])
.unwrap(),
DataValue::List(vec![
DataValue::List(vec![DataValue::from(1), DataValue::from(2),]),
DataValue::List(vec![DataValue::from(3), DataValue::from(4),]),
DataValue::List(vec![DataValue::from(1), DataValue::from(2)]),
DataValue::List(vec![DataValue::from(3), DataValue::from(4)]),
])
);
assert_eq!(
@ -1106,17 +1107,17 @@ fn test_chunks() {
DataValue::List(vec![
DataValue::from(1),
DataValue::from(2),
DataValue::from(3)
DataValue::from(3),
]),
DataValue::List(vec![
DataValue::from(2),
DataValue::from(3),
DataValue::from(4)
DataValue::from(4),
]),
DataValue::List(vec![
DataValue::from(3),
DataValue::from(4),
DataValue::from(5)
DataValue::from(5),
]),
])
)
@ -1130,7 +1131,7 @@ fn test_get() {
DataValue::List(vec![
DataValue::from(1),
DataValue::from(2),
DataValue::from(3)
DataValue::from(3),
]),
DataValue::from(1)
])
@ -1146,7 +1147,7 @@ fn test_get() {
DataValue::List(vec![
DataValue::from(1),
DataValue::from(2),
DataValue::from(3)
DataValue::from(3),
]),
DataValue::from(1)
])
@ -1161,7 +1162,7 @@ fn test_slice() {
DataValue::List(vec![
DataValue::from(1),
DataValue::from(2),
DataValue::from(3)
DataValue::from(3),
]),
DataValue::from(1),
DataValue::from(4)
@ -1173,7 +1174,7 @@ fn test_slice() {
DataValue::List(vec![
DataValue::from(1),
DataValue::from(2),
DataValue::from(3)
DataValue::from(3),
]),
DataValue::from(1),
DataValue::from(-1)
@ -1276,9 +1277,9 @@ fn test_rand() {
fn test_set_ops() {
assert_eq!(
op_union(&[
DataValue::List([1, 2, 3].into_iter().map(DataValue::from).collect(),),
DataValue::List([2, 3, 4].into_iter().map(DataValue::from).collect(),),
DataValue::List([3, 4, 5].into_iter().map(DataValue::from).collect(),)
DataValue::List([1, 2, 3].into_iter().map(DataValue::from).collect()),
DataValue::List([2, 3, 4].into_iter().map(DataValue::from).collect()),
DataValue::List([3, 4, 5].into_iter().map(DataValue::from).collect())
])
.unwrap(),
DataValue::List([1, 2, 3, 4, 5].into_iter().map(DataValue::from).collect())
@ -1291,8 +1292,8 @@ fn test_set_ops() {
.map(DataValue::from)
.collect(),
),
DataValue::List([2, 3, 4].into_iter().map(DataValue::from).collect(),),
DataValue::List([3, 4, 5].into_iter().map(DataValue::from).collect(),)
DataValue::List([2, 3, 4].into_iter().map(DataValue::from).collect()),
DataValue::List([3, 4, 5].into_iter().map(DataValue::from).collect())
])
.unwrap(),
DataValue::List([3, 4].into_iter().map(DataValue::from).collect())
@ -1305,10 +1306,20 @@ fn test_set_ops() {
.map(DataValue::from)
.collect(),
),
DataValue::List([2, 3, 4].into_iter().map(DataValue::from).collect(),),
DataValue::List([3, 4, 5].into_iter().map(DataValue::from).collect(),)
DataValue::List([2, 3, 4].into_iter().map(DataValue::from).collect()),
DataValue::List([3, 4, 5].into_iter().map(DataValue::from).collect())
])
.unwrap(),
DataValue::List([1, 6].into_iter().map(DataValue::from).collect())
);
}
#[test]
fn test_uuid() {
let v1 = op_rand_uuid_v1(&[]).unwrap();
let v4 = op_rand_uuid_v4(&[]).unwrap();
assert!(op_is_uuid(&[v4]).unwrap().get_bool().unwrap());
assert!(op_uuid_timestamp(&[v1]).unwrap().get_int().is_some());
assert!(op_to_uuid(&[DataValue::Str(SmartString::from(""))]).is_err());
assert!(op_to_uuid(&[DataValue::Str(SmartString::from("f3b4958c-52a1-11e7-802a-010203040506"))]).is_ok());
}

@ -9,11 +9,34 @@ use rmp_serde::Serializer;
use serde::{Deserialize, Deserializer, Serialize};
use smallvec::SmallVec;
use smartstring::{LazyCompact, SmartString};
use uuid::Uuid;
use crate::data::encode::EncodedVec;
use crate::data::id::{EntityId, TxId};
use crate::data::triple::StoreOp;
#[derive(Clone, Hash, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)]
pub(crate) struct UuidWrapper(pub(crate) Uuid);
impl UuidWrapper {
pub(crate) fn to_100_nanos(&self) -> Option<u64> {
self.0.get_timestamp().map(|t| t.to_unix_nanos())
}
}
impl PartialOrd<Self> for UuidWrapper {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for UuidWrapper {
fn cmp(&self, other: &Self) -> Ordering {
self.to_100_nanos().cmp(&other.to_100_nanos()).then_with(||
self.0.as_bytes().cmp(other.0.as_bytes()))
}
}
#[derive(Clone)]
pub(crate) struct RegexWrapper(pub(crate) Regex);
@ -75,6 +98,8 @@ pub(crate) enum DataValue {
Str(SmartString<LazyCompact>),
#[serde(rename = "X", alias = "Bytes", with = "serde_bytes")]
Bytes(Vec<u8>),
#[serde(rename = "U", alias = "Uuid")]
Uuid(UuidWrapper),
#[serde(rename = "R", alias = "Regex")]
Regex(RegexWrapper),
#[serde(rename = "L", alias = "List")]
@ -240,6 +265,10 @@ impl Debug for DataValue {
DataValue::Guard => {
write!(f, "guard")
}
DataValue::Uuid(u) => {
let encoded = base64::encode_config(u.0.as_bytes(), base64::URL_SAFE_NO_PAD);
write!(f, "{}", encoded)
}
}
}
}
@ -297,6 +326,15 @@ impl DataValue {
_ => None,
}
}
pub(crate) fn get_bool(&self) -> Option<bool> {
match self {
DataValue::Bool(b) => Some(*b),
_ => None
}
}
pub(crate) fn uuid(uuid: uuid::Uuid) -> Self {
Self::Uuid(UuidWrapper(uuid))
}
}
pub(crate) const LARGEST_UTF_CHAR: char = '\u{10ffff}';

Loading…
Cancel
Save