json type

main
Ziyang Hu 1 year ago
parent 83451f4929
commit 019657dded

@ -114,7 +114,9 @@ unary_op = _{ minus | negate }
minus = { "-" } minus = { "-" }
negate = { "!" } negate = { "!" }
term = _{ literal | param | grouping | apply | var | list } term = _{ literal | param | grouping | apply | var | list | object }
object = { "{" ~ (object_pair ~ ",")* ~ object_pair? ~ "}" }
object_pair = {expr ~ ":" ~ expr}
list = { "[" ~ (expr ~ ",")* ~ expr? ~ "]" } list = { "[" ~ (expr ~ ",")* ~ expr? ~ "]" }
grouping = { "(" ~ expr ~ ")" } grouping = { "(" ~ expr ~ ")" }
@ -197,7 +199,10 @@ literal = _{ null | boolean | number | string}
table_schema = {"{" ~ table_cols ~ ("=>" ~ table_cols)? ~ "}"} table_schema = {"{" ~ table_cols ~ ("=>" ~ table_cols)? ~ "}"}
table_cols = {(table_col ~ ",")* ~ table_col?} table_cols = {(table_col ~ ",")* ~ table_col?}
table_col = {ident ~ (":" ~ col_type)? ~ (("default" ~ expr) | ("=" ~ out_arg))?} table_col = {ident ~ (":" ~ col_type)? ~ (("default" ~ expr) | ("=" ~ out_arg))?}
col_type = {(any_type | bool_type | int_type | float_type | string_type | bytes_type | uuid_type | validity_type | vec_type | list_type | tuple_type) ~ "?"?} col_type = {(
any_type | bool_type | int_type | float_type | string_type |
bytes_type | uuid_type | validity_type | vec_type |
json_type | list_type | tuple_type) ~ "?"?}
col_type_with_term = {SOI ~ col_type ~ EOI} col_type_with_term = {SOI ~ col_type ~ EOI}
any_type = {"Any"} any_type = {"Any"}
int_type = {"Int"} int_type = {"Int"}
@ -206,6 +211,7 @@ string_type = {"String"}
bytes_type = {"Bytes"} bytes_type = {"Bytes"}
uuid_type = {"Uuid"} uuid_type = {"Uuid"}
bool_type = {"Bool"} bool_type = {"Bool"}
json_type = {"Json"}
validity_type = {"Validity"} validity_type = {"Validity"}
list_type = {"[" ~ col_type ~ (";" ~ expr)? ~ "]"} list_type = {"[" ~ col_type ~ (";" ~ expr)? ~ "]"}
tuple_type = {"(" ~ (col_type ~ ",")* ~ col_type? ~ ")"} tuple_type = {"(" ~ (col_type ~ ",")* ~ col_type? ~ ")"}

@ -21,6 +21,7 @@ use js_sys::Date;
use miette::{bail, ensure, miette, Result}; use miette::{bail, ensure, miette, Result};
use num_traits::FloatConst; use num_traits::FloatConst;
use rand::prelude::*; use rand::prelude::*;
use serde_json::Value;
use smartstring::SmartString; use smartstring::SmartString;
use unicode_normalization::UnicodeNormalization; use unicode_normalization::UnicodeNormalization;
use uuid::v1::Timestamp; use uuid::v1::Timestamp;
@ -1602,6 +1603,14 @@ pub(crate) fn op_to_bool(args: &[DataValue]) -> Result<DataValue> {
DataValue::Vec(_) => true, DataValue::Vec(_) => true,
DataValue::Validity(vld) => vld.is_assert.0, DataValue::Validity(vld) => vld.is_assert.0,
DataValue::Bot => false, DataValue::Bot => false,
DataValue::Json(json) => match &json.0 {
Value::Null => false,
Value::Bool(b) => *b,
Value::Number(n) => n.as_i64() != Some(0),
Value::String(s) => !s.is_empty(),
Value::Array(a) => !a.is_empty(),
Value::Object(o) => !o.is_empty(),
},
})) }))
} }
@ -1620,6 +1629,14 @@ pub(crate) fn op_to_unity(args: &[DataValue]) -> Result<DataValue> {
DataValue::Vec(_) => 1, DataValue::Vec(_) => 1,
DataValue::Validity(vld) => i64::from(vld.is_assert.0), DataValue::Validity(vld) => i64::from(vld.is_assert.0),
DataValue::Bot => 0, DataValue::Bot => 0,
DataValue::Json(json) => match &json.0 {
Value::Null => 0,
Value::Bool(b) => *b as i64,
Value::Number(n) => (n.as_i64() != Some(0)) as i64,
Value::String(s) => !s.is_empty() as i64,
Value::Array(a) => !a.is_empty() as i64,
Value::Object(o) => !o.is_empty() as i64,
},
})) }))
} }

@ -11,7 +11,7 @@ use base64::Engine;
use serde_json::json; use serde_json::json;
pub(crate) use serde_json::Value as JsonValue; pub(crate) use serde_json::Value as JsonValue;
use crate::data::value::{Vector, DataValue, Num}; use crate::data::value::{DataValue, Num, Vector};
impl From<JsonValue> for DataValue { impl From<JsonValue> for DataValue {
fn from(v: JsonValue) -> Self { fn from(v: JsonValue) -> Self {
@ -97,15 +97,14 @@ impl From<DataValue> for JsonValue {
DataValue::Uuid(u) => { DataValue::Uuid(u) => {
json!(u.0) json!(u.0)
} }
DataValue::Vec(arr) => { DataValue::Vec(arr) => match arr {
match arr { Vector::F32(a) => json!(a.as_slice().unwrap()),
Vector::F32(a) => json!(a.as_slice().unwrap()), Vector::F64(a) => json!(a.as_slice().unwrap()),
Vector::F64(a) => json!(a.as_slice().unwrap()), },
}
}
DataValue::Validity(v) => { DataValue::Validity(v) => {
json!([v.timestamp.0, v.is_assert]) json!([v.timestamp.0, v.is_assert])
} }
DataValue::Json(j) => j.0,
} }
} }
} }

@ -14,7 +14,9 @@ use std::str::FromStr;
use byteorder::{BigEndian, ByteOrder, WriteBytesExt}; use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
use regex::Regex; use regex::Regex;
use crate::data::value::{Vector, DataValue, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs}; use crate::data::value::{
DataValue, JsonData, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs, Vector,
};
const INIT_TAG: u8 = 0x00; const INIT_TAG: u8 = 0x00;
const NULL_TAG: u8 = 0x01; const NULL_TAG: u8 = 0x01;
@ -29,6 +31,7 @@ const REGEX_TAG: u8 = 0x09;
const LIST_TAG: u8 = 0x0A; const LIST_TAG: u8 = 0x0A;
const SET_TAG: u8 = 0x0B; const SET_TAG: u8 = 0x0B;
const VLD_TAG: u8 = 0x0C; const VLD_TAG: u8 = 0x0C;
const JSON_TAG: u8 = 0x0D;
const BOT_TAG: u8 = 0xFF; const BOT_TAG: u8 = 0xFF;
const VEC_F32: u8 = 0x01; const VEC_F32: u8 = 0x01;
@ -74,6 +77,11 @@ pub(crate) trait MemCmpEncoder: Write {
self.write_u8(STR_TAG).unwrap(); self.write_u8(STR_TAG).unwrap();
self.encode_bytes(s.as_bytes()); self.encode_bytes(s.as_bytes());
} }
DataValue::Json(j) => {
self.write_u8(JSON_TAG).unwrap();
let s = j.0.to_string();
self.encode_bytes(s.as_bytes());
}
DataValue::Bytes(b) => { DataValue::Bytes(b) => {
self.write_u8(BYTES_TAG).unwrap(); self.write_u8(BYTES_TAG).unwrap();
self.encode_bytes(b) self.encode_bytes(b)
@ -262,6 +270,13 @@ impl DataValue {
let s = unsafe { String::from_utf8_unchecked(bytes) }; let s = unsafe { String::from_utf8_unchecked(bytes) };
(DataValue::Str(s.into()), remaining) (DataValue::Str(s.into()), remaining)
} }
JSON_TAG => {
let (bytes, remaining) = decode_bytes(remaining);
(
DataValue::Json(JsonData(serde_json::from_slice(&bytes).unwrap())),
remaining,
)
}
BYTES_TAG => { BYTES_TAG => {
let (bytes, remaining) = decode_bytes(remaining); let (bytes, remaining) = decode_bytes(remaining);
(DataValue::Bytes(bytes), remaining) (DataValue::Bytes(bytes), remaining)
@ -345,7 +360,7 @@ impl DataValue {
} }
(DataValue::Vec(Vector::F64(res_arr)), rest) (DataValue::Vec(Vector::F64(res_arr)), rest)
} }
_ => unreachable!() _ => unreachable!(),
} }
} }
_ => unreachable!("{:?}", bs), _ => unreachable!("{:?}", bs),

@ -15,11 +15,13 @@ use base64::Engine;
use chrono::DateTime; use chrono::DateTime;
use itertools::Itertools; use itertools::Itertools;
use miette::{bail, ensure, Diagnostic, Result}; use miette::{bail, ensure, Diagnostic, Result};
use serde_json::json;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use thiserror::Error; use thiserror::Error;
use crate::data::expr::Expr; use crate::data::expr::Expr;
use crate::data::value::{DataValue, UuidWrapper, Validity, ValidityTs, Vector}; use crate::data::value::{DataValue, JsonData, UuidWrapper, Validity, ValidityTs, Vector};
use crate::Num;
#[derive(Debug, Clone, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)] #[derive(Debug, Clone, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)]
pub struct NullableColType { pub struct NullableColType {
@ -66,6 +68,9 @@ impl Display for NullableColType {
write!(f, ";{len}")?; write!(f, ";{len}")?;
f.write_str(">")?; f.write_str(">")?;
} }
ColType::Json => {
f.write_str("Json")?;
}
} }
if self.nullable { if self.nullable {
f.write_str("?")?; f.write_str("?")?;
@ -93,9 +98,12 @@ pub enum ColType {
}, },
Tuple(Vec<NullableColType>), Tuple(Vec<NullableColType>),
Validity, Validity,
Json,
} }
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde_derive::Deserialize, serde_derive::Serialize)] #[derive(
Debug, Copy, Clone, Eq, PartialEq, Hash, serde_derive::Deserialize, serde_derive::Serialize,
)]
pub enum VecElementType { pub enum VecElementType {
F32, F32,
F64, F64,
@ -242,41 +250,43 @@ impl NullableColType {
bail!(make_err()) bail!(make_err())
} }
} }
ColType::Vec { eltype, len } => { ColType::Vec { eltype, len } => match &data {
match &data { DataValue::List(l) => {
DataValue::List(l) => { if l.len() != *len {
if l.len() != *len { bail!(BadListLength(self.clone(), l.len()))
bail!(BadListLength(self.clone(), l.len())) }
} match eltype {
match eltype { VecElementType::F32 => {
VecElementType::F32 => { let mut res_arr = ndarray::Array1::zeros(*len);
let mut res_arr = ndarray::Array1::zeros(*len); for (mut row, el) in
for (mut row, el) in res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) { res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter())
let f = el.get_float().ok_or_else(make_err)? as f32; {
row.fill(f); let f = el.get_float().ok_or_else(make_err)? as f32;
} row.fill(f);
DataValue::Vec(Vector::F32(res_arr))
} }
VecElementType::F64 => { DataValue::Vec(Vector::F32(res_arr))
let mut res_arr = ndarray::Array1::zeros(*len); }
for (mut row, el) in res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) { VecElementType::F64 => {
let f = el.get_float().ok_or_else(make_err)?; let mut res_arr = ndarray::Array1::zeros(*len);
row.fill(f); for (mut row, el) in
} res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter())
DataValue::Vec(Vector::F64(res_arr)) {
let f = el.get_float().ok_or_else(make_err)?;
row.fill(f);
} }
DataValue::Vec(Vector::F64(res_arr))
} }
} }
DataValue::Vec(arr) => { }
if *eltype != arr.el_type() || *len != arr.len() { DataValue::Vec(arr) => {
bail!(make_err()) if *eltype != arr.el_type() || *len != arr.len() {
} else { bail!(make_err())
data } else {
} data
} }
_ => bail!(make_err()),
} }
} _ => bail!(make_err()),
},
ColType::Tuple(typ) => { ColType::Tuple(typ) => {
if let DataValue::List(l) = data { if let DataValue::List(l) = data {
ensure!(typ.len() == l.len(), BadListLength(self.clone(), l.len())); ensure!(typ.len() == l.len(), BadListLength(self.clone(), l.len()));
@ -347,6 +357,71 @@ impl NullableColType {
v => bail!(InvalidValidity(v)), v => bail!(InvalidValidity(v)),
} }
} }
ColType::Json => DataValue::Json(JsonData(match data {
DataValue::Null => {
json!(null)
}
DataValue::Bool(b) => {
json!(b)
}
DataValue::Num(n) => match n {
Num::Int(i) => {
json!(i)
}
Num::Float(f) => {
json!(f)
}
},
DataValue::Str(s) => {
json!(s)
}
DataValue::Bytes(b) => {
json!(b)
}
DataValue::Uuid(u) => {
json!(u.0.as_bytes())
}
DataValue::Regex(r) => {
json!(r.0.as_str())
}
DataValue::List(l) => {
let mut arr = Vec::with_capacity(l.len());
for el in l {
arr.push(self.coerce(el, cur_vld)?);
}
arr.into()
}
DataValue::Set(l) => {
let mut arr = Vec::with_capacity(l.len());
for el in l {
arr.push(self.coerce(el, cur_vld)?);
}
arr.into()
}
DataValue::Vec(v) => {
let mut arr = Vec::with_capacity(v.len());
match v {
Vector::F32(a) => {
for el in a {
arr.push(json!(el));
}
}
Vector::F64(a) => {
for el in a {
arr.push(json!(el));
}
}
}
arr.into()
}
DataValue::Json(j) => j.0,
DataValue::Validity(vld) => {
json!([vld.timestamp.0, vld.is_assert.0])
}
DataValue::Bot => {
json!(null)
}
})),
}) })
} }
} }

@ -13,7 +13,9 @@ use std::cmp::{Ordering, Reverse};
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fmt::{Debug, Display, Formatter}; use std::fmt::{Debug, Display, Formatter};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::ops::Deref;
use crate::data::json::JsonValue;
use crate::data::relation::VecElementType; use crate::data::relation::VecElementType;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use regex::Regex; use regex::Regex;
@ -154,12 +156,43 @@ pub enum DataValue {
Set(BTreeSet<DataValue>), Set(BTreeSet<DataValue>),
/// Array, mainly for proximity search /// Array, mainly for proximity search
Vec(Vector), Vec(Vector),
/// Json
Json(JsonData),
/// validity, /// validity,
Validity(Validity), Validity(Validity),
/// bottom type, used internally only /// bottom type, used internally only
Bot, Bot,
} }
#[derive(Clone, PartialEq, Eq, serde_derive::Deserialize, serde_derive::Serialize)]
pub struct JsonData(pub JsonValue);
impl PartialOrd<Self> for JsonData {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for JsonData {
fn cmp(&self, other: &Self) -> Ordering {
self.0.to_string().cmp(&other.0.to_string())
}
}
impl Deref for JsonData {
type Target = JsonValue;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Hash for JsonData {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.to_string().hash(state)
}
}
/// Vector of floating numbers /// Vector of floating numbers
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Vector { pub enum Vector {
@ -573,6 +606,9 @@ impl Display for DataValue {
write!(f, "vec({:?}, \"F64\")", a.to_vec()) write!(f, "vec({:?}, \"F64\")", a.to_vec())
} }
}, },
DataValue::Json(j) => {
write!(f, "json({})", j.0)
}
} }
} }
} }

Loading…
Cancel
Save