diff --git a/cozo-core/src/cozoscript.pest b/cozo-core/src/cozoscript.pest index 279f5071..524e1bdf 100644 --- a/cozo-core/src/cozoscript.pest +++ b/cozo-core/src/cozoscript.pest @@ -114,7 +114,9 @@ unary_op = _{ minus | negate } minus = { "-" } negate = { "!" } -term = _{ literal | param | grouping | apply | var | list } +term = _{ literal | param | grouping | apply | var | list | object } +object = { "{" ~ (object_pair ~ ",")* ~ object_pair? ~ "}" } +object_pair = {expr ~ ":" ~ expr} list = { "[" ~ (expr ~ ",")* ~ expr? ~ "]" } grouping = { "(" ~ expr ~ ")" } @@ -197,7 +199,10 @@ literal = _{ null | boolean | number | string} table_schema = {"{" ~ table_cols ~ ("=>" ~ table_cols)? ~ "}"} table_cols = {(table_col ~ ",")* ~ table_col?} table_col = {ident ~ (":" ~ col_type)? ~ (("default" ~ expr) | ("=" ~ out_arg))?} -col_type = {(any_type | bool_type | int_type | float_type | string_type | bytes_type | uuid_type | validity_type | vec_type | list_type | tuple_type) ~ "?"?} +col_type = {( + any_type | bool_type | int_type | float_type | string_type | + bytes_type | uuid_type | validity_type | vec_type | + json_type | list_type | tuple_type) ~ "?"?} col_type_with_term = {SOI ~ col_type ~ EOI} any_type = {"Any"} int_type = {"Int"} @@ -206,6 +211,7 @@ string_type = {"String"} bytes_type = {"Bytes"} uuid_type = {"Uuid"} bool_type = {"Bool"} +json_type = {"Json"} validity_type = {"Validity"} list_type = {"[" ~ col_type ~ (";" ~ expr)? ~ "]"} tuple_type = {"(" ~ (col_type ~ ",")* ~ col_type? ~ ")"} diff --git a/cozo-core/src/data/functions.rs b/cozo-core/src/data/functions.rs index 94313855..b0ded9ac 100644 --- a/cozo-core/src/data/functions.rs +++ b/cozo-core/src/data/functions.rs @@ -21,6 +21,7 @@ use js_sys::Date; use miette::{bail, ensure, miette, Result}; use num_traits::FloatConst; use rand::prelude::*; +use serde_json::Value; use smartstring::SmartString; use unicode_normalization::UnicodeNormalization; use uuid::v1::Timestamp; @@ -1602,6 +1603,14 @@ pub(crate) fn op_to_bool(args: &[DataValue]) -> Result { DataValue::Vec(_) => true, DataValue::Validity(vld) => vld.is_assert.0, DataValue::Bot => false, + DataValue::Json(json) => match &json.0 { + Value::Null => false, + Value::Bool(b) => *b, + Value::Number(n) => n.as_i64() != Some(0), + Value::String(s) => !s.is_empty(), + Value::Array(a) => !a.is_empty(), + Value::Object(o) => !o.is_empty(), + }, })) } @@ -1620,6 +1629,14 @@ pub(crate) fn op_to_unity(args: &[DataValue]) -> Result { DataValue::Vec(_) => 1, DataValue::Validity(vld) => i64::from(vld.is_assert.0), DataValue::Bot => 0, + DataValue::Json(json) => match &json.0 { + Value::Null => 0, + Value::Bool(b) => *b as i64, + Value::Number(n) => (n.as_i64() != Some(0)) as i64, + Value::String(s) => !s.is_empty() as i64, + Value::Array(a) => !a.is_empty() as i64, + Value::Object(o) => !o.is_empty() as i64, + }, })) } diff --git a/cozo-core/src/data/json.rs b/cozo-core/src/data/json.rs index 212c61d1..b8fb53b8 100644 --- a/cozo-core/src/data/json.rs +++ b/cozo-core/src/data/json.rs @@ -11,7 +11,7 @@ use base64::Engine; use serde_json::json; pub(crate) use serde_json::Value as JsonValue; -use crate::data::value::{Vector, DataValue, Num}; +use crate::data::value::{DataValue, Num, Vector}; impl From for DataValue { fn from(v: JsonValue) -> Self { @@ -97,15 +97,14 @@ impl From for JsonValue { DataValue::Uuid(u) => { json!(u.0) } - DataValue::Vec(arr) => { - match arr { - Vector::F32(a) => json!(a.as_slice().unwrap()), - Vector::F64(a) => json!(a.as_slice().unwrap()), - } - } + DataValue::Vec(arr) => match arr { + Vector::F32(a) => json!(a.as_slice().unwrap()), + Vector::F64(a) => json!(a.as_slice().unwrap()), + }, DataValue::Validity(v) => { json!([v.timestamp.0, v.is_assert]) } + DataValue::Json(j) => j.0, } } } diff --git a/cozo-core/src/data/memcmp.rs b/cozo-core/src/data/memcmp.rs index 5cee0a94..d0f052a7 100644 --- a/cozo-core/src/data/memcmp.rs +++ b/cozo-core/src/data/memcmp.rs @@ -14,7 +14,9 @@ use std::str::FromStr; use byteorder::{BigEndian, ByteOrder, WriteBytesExt}; use regex::Regex; -use crate::data::value::{Vector, DataValue, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs}; +use crate::data::value::{ + DataValue, JsonData, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs, Vector, +}; const INIT_TAG: u8 = 0x00; const NULL_TAG: u8 = 0x01; @@ -29,6 +31,7 @@ const REGEX_TAG: u8 = 0x09; const LIST_TAG: u8 = 0x0A; const SET_TAG: u8 = 0x0B; const VLD_TAG: u8 = 0x0C; +const JSON_TAG: u8 = 0x0D; const BOT_TAG: u8 = 0xFF; const VEC_F32: u8 = 0x01; @@ -74,6 +77,11 @@ pub(crate) trait MemCmpEncoder: Write { self.write_u8(STR_TAG).unwrap(); self.encode_bytes(s.as_bytes()); } + DataValue::Json(j) => { + self.write_u8(JSON_TAG).unwrap(); + let s = j.0.to_string(); + self.encode_bytes(s.as_bytes()); + } DataValue::Bytes(b) => { self.write_u8(BYTES_TAG).unwrap(); self.encode_bytes(b) @@ -262,6 +270,13 @@ impl DataValue { let s = unsafe { String::from_utf8_unchecked(bytes) }; (DataValue::Str(s.into()), remaining) } + JSON_TAG => { + let (bytes, remaining) = decode_bytes(remaining); + ( + DataValue::Json(JsonData(serde_json::from_slice(&bytes).unwrap())), + remaining, + ) + } BYTES_TAG => { let (bytes, remaining) = decode_bytes(remaining); (DataValue::Bytes(bytes), remaining) @@ -345,7 +360,7 @@ impl DataValue { } (DataValue::Vec(Vector::F64(res_arr)), rest) } - _ => unreachable!() + _ => unreachable!(), } } _ => unreachable!("{:?}", bs), diff --git a/cozo-core/src/data/relation.rs b/cozo-core/src/data/relation.rs index d04252d4..ea5e3405 100644 --- a/cozo-core/src/data/relation.rs +++ b/cozo-core/src/data/relation.rs @@ -15,11 +15,13 @@ use base64::Engine; use chrono::DateTime; use itertools::Itertools; use miette::{bail, ensure, Diagnostic, Result}; +use serde_json::json; use smartstring::{LazyCompact, SmartString}; use thiserror::Error; use crate::data::expr::Expr; -use crate::data::value::{DataValue, UuidWrapper, Validity, ValidityTs, Vector}; +use crate::data::value::{DataValue, JsonData, UuidWrapper, Validity, ValidityTs, Vector}; +use crate::Num; #[derive(Debug, Clone, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)] pub struct NullableColType { @@ -66,6 +68,9 @@ impl Display for NullableColType { write!(f, ";{len}")?; f.write_str(">")?; } + ColType::Json => { + f.write_str("Json")?; + } } if self.nullable { f.write_str("?")?; @@ -93,9 +98,12 @@ pub enum ColType { }, Tuple(Vec), Validity, + Json, } -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde_derive::Deserialize, serde_derive::Serialize)] +#[derive( + Debug, Copy, Clone, Eq, PartialEq, Hash, serde_derive::Deserialize, serde_derive::Serialize, +)] pub enum VecElementType { F32, F64, @@ -242,41 +250,43 @@ impl NullableColType { bail!(make_err()) } } - ColType::Vec { eltype, len } => { - match &data { - DataValue::List(l) => { - if l.len() != *len { - bail!(BadListLength(self.clone(), l.len())) - } - match eltype { - VecElementType::F32 => { - let mut res_arr = ndarray::Array1::zeros(*len); - for (mut row, el) in res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) { - let f = el.get_float().ok_or_else(make_err)? as f32; - row.fill(f); - } - DataValue::Vec(Vector::F32(res_arr)) + ColType::Vec { eltype, len } => match &data { + DataValue::List(l) => { + if l.len() != *len { + bail!(BadListLength(self.clone(), l.len())) + } + match eltype { + VecElementType::F32 => { + let mut res_arr = ndarray::Array1::zeros(*len); + for (mut row, el) in + res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) + { + let f = el.get_float().ok_or_else(make_err)? as f32; + row.fill(f); } - VecElementType::F64 => { - let mut res_arr = ndarray::Array1::zeros(*len); - for (mut row, el) in res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) { - let f = el.get_float().ok_or_else(make_err)?; - row.fill(f); - } - DataValue::Vec(Vector::F64(res_arr)) + DataValue::Vec(Vector::F32(res_arr)) + } + VecElementType::F64 => { + let mut res_arr = ndarray::Array1::zeros(*len); + for (mut row, el) in + res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) + { + let f = el.get_float().ok_or_else(make_err)?; + row.fill(f); } + DataValue::Vec(Vector::F64(res_arr)) } } - DataValue::Vec(arr) => { - if *eltype != arr.el_type() || *len != arr.len() { - bail!(make_err()) - } else { - data - } + } + DataValue::Vec(arr) => { + if *eltype != arr.el_type() || *len != arr.len() { + bail!(make_err()) + } else { + data } - _ => bail!(make_err()), } - } + _ => bail!(make_err()), + }, ColType::Tuple(typ) => { if let DataValue::List(l) = data { ensure!(typ.len() == l.len(), BadListLength(self.clone(), l.len())); @@ -347,6 +357,71 @@ impl NullableColType { v => bail!(InvalidValidity(v)), } } + ColType::Json => DataValue::Json(JsonData(match data { + DataValue::Null => { + json!(null) + } + DataValue::Bool(b) => { + json!(b) + } + DataValue::Num(n) => match n { + Num::Int(i) => { + json!(i) + } + Num::Float(f) => { + json!(f) + } + }, + DataValue::Str(s) => { + json!(s) + } + DataValue::Bytes(b) => { + json!(b) + } + DataValue::Uuid(u) => { + json!(u.0.as_bytes()) + } + DataValue::Regex(r) => { + json!(r.0.as_str()) + } + DataValue::List(l) => { + let mut arr = Vec::with_capacity(l.len()); + for el in l { + arr.push(self.coerce(el, cur_vld)?); + } + arr.into() + } + DataValue::Set(l) => { + let mut arr = Vec::with_capacity(l.len()); + for el in l { + arr.push(self.coerce(el, cur_vld)?); + } + arr.into() + } + DataValue::Vec(v) => { + let mut arr = Vec::with_capacity(v.len()); + match v { + Vector::F32(a) => { + for el in a { + arr.push(json!(el)); + } + } + Vector::F64(a) => { + for el in a { + arr.push(json!(el)); + } + } + } + arr.into() + } + DataValue::Json(j) => j.0, + DataValue::Validity(vld) => { + json!([vld.timestamp.0, vld.is_assert.0]) + } + DataValue::Bot => { + json!(null) + } + })), }) } } diff --git a/cozo-core/src/data/value.rs b/cozo-core/src/data/value.rs index f3472e23..4e7b38f0 100644 --- a/cozo-core/src/data/value.rs +++ b/cozo-core/src/data/value.rs @@ -13,7 +13,9 @@ use std::cmp::{Ordering, Reverse}; use std::collections::BTreeSet; use std::fmt::{Debug, Display, Formatter}; use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use crate::data::json::JsonValue; use crate::data::relation::VecElementType; use ordered_float::OrderedFloat; use regex::Regex; @@ -154,12 +156,43 @@ pub enum DataValue { Set(BTreeSet), /// Array, mainly for proximity search Vec(Vector), + /// Json + Json(JsonData), /// validity, Validity(Validity), /// bottom type, used internally only Bot, } +#[derive(Clone, PartialEq, Eq, serde_derive::Deserialize, serde_derive::Serialize)] +pub struct JsonData(pub JsonValue); + +impl PartialOrd for JsonData { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for JsonData { + fn cmp(&self, other: &Self) -> Ordering { + self.0.to_string().cmp(&other.0.to_string()) + } +} + +impl Deref for JsonData { + type Target = JsonValue; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Hash for JsonData { + fn hash(&self, state: &mut H) { + self.0.to_string().hash(state) + } +} + /// Vector of floating numbers #[derive(Debug, Clone)] pub enum Vector { @@ -573,6 +606,9 @@ impl Display for DataValue { write!(f, "vec({:?}, \"F64\")", a.to_vec()) } }, + DataValue::Json(j) => { + write!(f, "json({})", j.0) + } } } }