json type

main
Ziyang Hu 1 year ago
parent 83451f4929
commit 019657dded

@ -114,7 +114,9 @@ unary_op = _{ minus | negate }
minus = { "-" }
negate = { "!" }
term = _{ literal | param | grouping | apply | var | list }
term = _{ literal | param | grouping | apply | var | list | object }
object = { "{" ~ (object_pair ~ ",")* ~ object_pair? ~ "}" }
object_pair = {expr ~ ":" ~ expr}
list = { "[" ~ (expr ~ ",")* ~ expr? ~ "]" }
grouping = { "(" ~ expr ~ ")" }
@ -197,7 +199,10 @@ literal = _{ null | boolean | number | string}
table_schema = {"{" ~ table_cols ~ ("=>" ~ table_cols)? ~ "}"}
table_cols = {(table_col ~ ",")* ~ table_col?}
table_col = {ident ~ (":" ~ col_type)? ~ (("default" ~ expr) | ("=" ~ out_arg))?}
col_type = {(any_type | bool_type | int_type | float_type | string_type | bytes_type | uuid_type | validity_type | vec_type | list_type | tuple_type) ~ "?"?}
col_type = {(
any_type | bool_type | int_type | float_type | string_type |
bytes_type | uuid_type | validity_type | vec_type |
json_type | list_type | tuple_type) ~ "?"?}
col_type_with_term = {SOI ~ col_type ~ EOI}
any_type = {"Any"}
int_type = {"Int"}
@ -206,6 +211,7 @@ string_type = {"String"}
bytes_type = {"Bytes"}
uuid_type = {"Uuid"}
bool_type = {"Bool"}
json_type = {"Json"}
validity_type = {"Validity"}
list_type = {"[" ~ col_type ~ (";" ~ expr)? ~ "]"}
tuple_type = {"(" ~ (col_type ~ ",")* ~ col_type? ~ ")"}

@ -21,6 +21,7 @@ use js_sys::Date;
use miette::{bail, ensure, miette, Result};
use num_traits::FloatConst;
use rand::prelude::*;
use serde_json::Value;
use smartstring::SmartString;
use unicode_normalization::UnicodeNormalization;
use uuid::v1::Timestamp;
@ -1602,6 +1603,14 @@ pub(crate) fn op_to_bool(args: &[DataValue]) -> Result<DataValue> {
DataValue::Vec(_) => true,
DataValue::Validity(vld) => vld.is_assert.0,
DataValue::Bot => false,
DataValue::Json(json) => match &json.0 {
Value::Null => false,
Value::Bool(b) => *b,
Value::Number(n) => n.as_i64() != Some(0),
Value::String(s) => !s.is_empty(),
Value::Array(a) => !a.is_empty(),
Value::Object(o) => !o.is_empty(),
},
}))
}
@ -1620,6 +1629,14 @@ pub(crate) fn op_to_unity(args: &[DataValue]) -> Result<DataValue> {
DataValue::Vec(_) => 1,
DataValue::Validity(vld) => i64::from(vld.is_assert.0),
DataValue::Bot => 0,
DataValue::Json(json) => match &json.0 {
Value::Null => 0,
Value::Bool(b) => *b as i64,
Value::Number(n) => (n.as_i64() != Some(0)) as i64,
Value::String(s) => !s.is_empty() as i64,
Value::Array(a) => !a.is_empty() as i64,
Value::Object(o) => !o.is_empty() as i64,
},
}))
}

@ -11,7 +11,7 @@ use base64::Engine;
use serde_json::json;
pub(crate) use serde_json::Value as JsonValue;
use crate::data::value::{Vector, DataValue, Num};
use crate::data::value::{DataValue, Num, Vector};
impl From<JsonValue> for DataValue {
fn from(v: JsonValue) -> Self {
@ -97,15 +97,14 @@ impl From<DataValue> for JsonValue {
DataValue::Uuid(u) => {
json!(u.0)
}
DataValue::Vec(arr) => {
match arr {
DataValue::Vec(arr) => match arr {
Vector::F32(a) => json!(a.as_slice().unwrap()),
Vector::F64(a) => json!(a.as_slice().unwrap()),
}
}
},
DataValue::Validity(v) => {
json!([v.timestamp.0, v.is_assert])
}
DataValue::Json(j) => j.0,
}
}
}

@ -14,7 +14,9 @@ use std::str::FromStr;
use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
use regex::Regex;
use crate::data::value::{Vector, DataValue, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs};
use crate::data::value::{
DataValue, JsonData, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs, Vector,
};
const INIT_TAG: u8 = 0x00;
const NULL_TAG: u8 = 0x01;
@ -29,6 +31,7 @@ const REGEX_TAG: u8 = 0x09;
const LIST_TAG: u8 = 0x0A;
const SET_TAG: u8 = 0x0B;
const VLD_TAG: u8 = 0x0C;
const JSON_TAG: u8 = 0x0D;
const BOT_TAG: u8 = 0xFF;
const VEC_F32: u8 = 0x01;
@ -74,6 +77,11 @@ pub(crate) trait MemCmpEncoder: Write {
self.write_u8(STR_TAG).unwrap();
self.encode_bytes(s.as_bytes());
}
DataValue::Json(j) => {
self.write_u8(JSON_TAG).unwrap();
let s = j.0.to_string();
self.encode_bytes(s.as_bytes());
}
DataValue::Bytes(b) => {
self.write_u8(BYTES_TAG).unwrap();
self.encode_bytes(b)
@ -262,6 +270,13 @@ impl DataValue {
let s = unsafe { String::from_utf8_unchecked(bytes) };
(DataValue::Str(s.into()), remaining)
}
JSON_TAG => {
let (bytes, remaining) = decode_bytes(remaining);
(
DataValue::Json(JsonData(serde_json::from_slice(&bytes).unwrap())),
remaining,
)
}
BYTES_TAG => {
let (bytes, remaining) = decode_bytes(remaining);
(DataValue::Bytes(bytes), remaining)
@ -345,7 +360,7 @@ impl DataValue {
}
(DataValue::Vec(Vector::F64(res_arr)), rest)
}
_ => unreachable!()
_ => unreachable!(),
}
}
_ => unreachable!("{:?}", bs),

@ -15,11 +15,13 @@ use base64::Engine;
use chrono::DateTime;
use itertools::Itertools;
use miette::{bail, ensure, Diagnostic, Result};
use serde_json::json;
use smartstring::{LazyCompact, SmartString};
use thiserror::Error;
use crate::data::expr::Expr;
use crate::data::value::{DataValue, UuidWrapper, Validity, ValidityTs, Vector};
use crate::data::value::{DataValue, JsonData, UuidWrapper, Validity, ValidityTs, Vector};
use crate::Num;
#[derive(Debug, Clone, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)]
pub struct NullableColType {
@ -66,6 +68,9 @@ impl Display for NullableColType {
write!(f, ";{len}")?;
f.write_str(">")?;
}
ColType::Json => {
f.write_str("Json")?;
}
}
if self.nullable {
f.write_str("?")?;
@ -93,9 +98,12 @@ pub enum ColType {
},
Tuple(Vec<NullableColType>),
Validity,
Json,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde_derive::Deserialize, serde_derive::Serialize)]
#[derive(
Debug, Copy, Clone, Eq, PartialEq, Hash, serde_derive::Deserialize, serde_derive::Serialize,
)]
pub enum VecElementType {
F32,
F64,
@ -242,8 +250,7 @@ impl NullableColType {
bail!(make_err())
}
}
ColType::Vec { eltype, len } => {
match &data {
ColType::Vec { eltype, len } => match &data {
DataValue::List(l) => {
if l.len() != *len {
bail!(BadListLength(self.clone(), l.len()))
@ -251,7 +258,9 @@ impl NullableColType {
match eltype {
VecElementType::F32 => {
let mut res_arr = ndarray::Array1::zeros(*len);
for (mut row, el) in res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) {
for (mut row, el) in
res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter())
{
let f = el.get_float().ok_or_else(make_err)? as f32;
row.fill(f);
}
@ -259,7 +268,9 @@ impl NullableColType {
}
VecElementType::F64 => {
let mut res_arr = ndarray::Array1::zeros(*len);
for (mut row, el) in res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter()) {
for (mut row, el) in
res_arr.axis_iter_mut(ndarray::Axis(0)).zip(l.iter())
{
let f = el.get_float().ok_or_else(make_err)?;
row.fill(f);
}
@ -275,8 +286,7 @@ impl NullableColType {
}
}
_ => bail!(make_err()),
}
}
},
ColType::Tuple(typ) => {
if let DataValue::List(l) = data {
ensure!(typ.len() == l.len(), BadListLength(self.clone(), l.len()));
@ -347,6 +357,71 @@ impl NullableColType {
v => bail!(InvalidValidity(v)),
}
}
ColType::Json => DataValue::Json(JsonData(match data {
DataValue::Null => {
json!(null)
}
DataValue::Bool(b) => {
json!(b)
}
DataValue::Num(n) => match n {
Num::Int(i) => {
json!(i)
}
Num::Float(f) => {
json!(f)
}
},
DataValue::Str(s) => {
json!(s)
}
DataValue::Bytes(b) => {
json!(b)
}
DataValue::Uuid(u) => {
json!(u.0.as_bytes())
}
DataValue::Regex(r) => {
json!(r.0.as_str())
}
DataValue::List(l) => {
let mut arr = Vec::with_capacity(l.len());
for el in l {
arr.push(self.coerce(el, cur_vld)?);
}
arr.into()
}
DataValue::Set(l) => {
let mut arr = Vec::with_capacity(l.len());
for el in l {
arr.push(self.coerce(el, cur_vld)?);
}
arr.into()
}
DataValue::Vec(v) => {
let mut arr = Vec::with_capacity(v.len());
match v {
Vector::F32(a) => {
for el in a {
arr.push(json!(el));
}
}
Vector::F64(a) => {
for el in a {
arr.push(json!(el));
}
}
}
arr.into()
}
DataValue::Json(j) => j.0,
DataValue::Validity(vld) => {
json!([vld.timestamp.0, vld.is_assert.0])
}
DataValue::Bot => {
json!(null)
}
})),
})
}
}

@ -13,7 +13,9 @@ use std::cmp::{Ordering, Reverse};
use std::collections::BTreeSet;
use std::fmt::{Debug, Display, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use crate::data::json::JsonValue;
use crate::data::relation::VecElementType;
use ordered_float::OrderedFloat;
use regex::Regex;
@ -154,12 +156,43 @@ pub enum DataValue {
Set(BTreeSet<DataValue>),
/// Array, mainly for proximity search
Vec(Vector),
/// Json
Json(JsonData),
/// validity,
Validity(Validity),
/// bottom type, used internally only
Bot,
}
#[derive(Clone, PartialEq, Eq, serde_derive::Deserialize, serde_derive::Serialize)]
pub struct JsonData(pub JsonValue);
impl PartialOrd<Self> for JsonData {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for JsonData {
fn cmp(&self, other: &Self) -> Ordering {
self.0.to_string().cmp(&other.0.to_string())
}
}
impl Deref for JsonData {
type Target = JsonValue;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Hash for JsonData {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.to_string().hash(state)
}
}
/// Vector of floating numbers
#[derive(Debug, Clone)]
pub enum Vector {
@ -573,6 +606,9 @@ impl Display for DataValue {
write!(f, "vec({:?}, \"F64\")", a.to_vec())
}
},
DataValue::Json(j) => {
write!(f, "json({})", j.0)
}
}
}
}

Loading…
Cancel
Save