vec values and typings

main
Ziyang Hu 1 year ago
parent 01b5dfca9b
commit 833d155027

@ -16,6 +16,7 @@ sys_script = {SOI ~ "::" ~ (list_relations_op | list_relation_op | remove_relati
access_level_op | index_op | compact_op | list_fixed_rules) ~ EOI}
index_op = {"index" ~ (index_create | index_drop)}
index_create = {"create" ~ compound_ident ~ ":" ~ ident ~ "{" ~ (ident ~ ",")* ~ ident? ~ "}"}
index_create_hnsw = {"create_hnsw" ~ compound_ident ~ ":" ~ ident ~ "{" ~ (index_opt_field ~ ",")* ~ index_opt_field? ~ "}"}
index_drop = {"drop" ~ compound_ident ~ ":" ~ ident }
compact_op = {"compact"}
list_fixed_rules = {"fixed_rules"}
@ -37,6 +38,7 @@ trigger_replace = {"replace"}
rename_pair = {compound_ident ~ "->" ~ compound_ident}
from_clause = {"from" ~ expr}
to_clause = {"to" ~ expr}
index_opt_field = {ident ~ ":" ~ expr}
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
BLOCK_COMMENT = _{ "/*" ~ (BLOCK_COMMENT | !"*/" ~ ANY)* ~ "*/" }
@ -192,7 +194,7 @@ literal = _{ null | boolean | number | string}
table_schema = {"{" ~ table_cols ~ ("=>" ~ table_cols)? ~ "}"}
table_cols = {(table_col ~ ",")* ~ table_col?}
table_col = {ident ~ (":" ~ col_type)? ~ (("default" ~ expr) | ("=" ~ out_arg))?}
col_type = {(any_type | bool_type | int_type | float_type | string_type | bytes_type | uuid_type | validity_type | list_type | tuple_type) ~ "?"?}
col_type = {(any_type | bool_type | int_type | float_type | string_type | bytes_type | uuid_type | validity_type | vec_type | list_type | tuple_type) ~ "?"?}
col_type_with_term = {SOI ~ col_type ~ EOI}
any_type = {"Any"}
int_type = {"Int"}
@ -204,6 +206,8 @@ bool_type = {"Bool"}
validity_type = {"Validity"}
list_type = {"[" ~ col_type ~ (";" ~ expr)? ~ "]"}
tuple_type = {"(" ~ (col_type ~ ",")* ~ col_type? ~ ")"}
vec_type = {"<" ~ vec_el_type ~ ";" ~ pos_int ~ ">"}
vec_el_type = {"F32" | "F64" | "F32" | "F64" | "Float" | "Double" | "Long" | "Int" }
imperative_stmt = _{
break_stmt | continue_stmt | return_stmt | debug_stmt |

@ -1274,7 +1274,7 @@ pub(crate) fn op_to_bool(args: &[DataValue]) -> Result<DataValue> {
DataValue::Regex(r) => !r.0.as_str().is_empty(),
DataValue::List(l) => !l.is_empty(),
DataValue::Set(s) => !s.is_empty(),
DataValue::Arr(_) => true,
DataValue::Vec(_) => true,
DataValue::Validity(vld) => vld.is_assert.0,
DataValue::Bot => false,
}))
@ -1292,7 +1292,7 @@ pub(crate) fn op_to_unity(args: &[DataValue]) -> Result<DataValue> {
DataValue::Regex(r) => i64::from(!r.0.as_str().is_empty()),
DataValue::List(l) => i64::from(!l.is_empty()),
DataValue::Set(s) => i64::from(!s.is_empty()),
DataValue::Arr(_) => 1,
DataValue::Vec(_) => 1,
DataValue::Validity(vld) => i64::from(vld.is_assert.0),
DataValue::Bot => 0,
}))

@ -11,7 +11,7 @@ use base64::Engine;
use serde_json::json;
pub(crate) use serde_json::Value as JsonValue;
use crate::data::value::{Array, DataValue, Num};
use crate::data::value::{Vector, DataValue, Num};
impl From<JsonValue> for DataValue {
fn from(v: JsonValue) -> Self {
@ -97,12 +97,12 @@ impl From<DataValue> for JsonValue {
DataValue::Uuid(u) => {
json!(u.0)
}
DataValue::Arr(arr) => {
DataValue::Vec(arr) => {
match arr {
Array::F32(a) => json!(a),
Array::F64(a) => json!(a),
Array::I32(a) => json!(a),
Array::I64(a) => json!(a),
Vector::F32(a) => json!(a),
Vector::F64(a) => json!(a),
Vector::I32(a) => json!(a),
Vector::I64(a) => json!(a),
}
}
DataValue::Validity(v) => {

@ -14,13 +14,13 @@ use std::str::FromStr;
use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
use regex::Regex;
use crate::data::value::{Array, DataValue, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs};
use crate::data::value::{Vector, DataValue, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs};
const INIT_TAG: u8 = 0x00;
const NULL_TAG: u8 = 0x01;
const FALSE_TAG: u8 = 0x02;
const TRUE_TAG: u8 = 0x03;
const ARR_TAG: u8 = 0x04;
const VEC_TAG: u8 = 0x04;
const NUM_TAG: u8 = 0x05;
const STR_TAG: u8 = 0x06;
const BYTES_TAG: u8 = 0x07;
@ -31,10 +31,10 @@ const SET_TAG: u8 = 0x0B;
const VLD_TAG: u8 = 0x0C;
const BOT_TAG: u8 = 0xFF;
const ARR_F32: u8 = 0x01;
const ARR_F64: u8 = 0x02;
const ARR_I32: u8 = 0x03;
const ARR_I64: u8 = 0x04;
const VEC_F32: u8 = 0x01;
const VEC_F64: u8 = 0x02;
const VEC_I32: u8 = 0x03;
const VEC_I64: u8 = 0x04;
const IS_FLOAT: u8 = 0b00010000;
const IS_APPROX_INT: u8 = 0b00000100;
@ -47,35 +47,35 @@ pub(crate) trait MemCmpEncoder: Write {
DataValue::Null => self.write_u8(NULL_TAG).unwrap(),
DataValue::Bool(false) => self.write_u8(FALSE_TAG).unwrap(),
DataValue::Bool(true) => self.write_u8(TRUE_TAG).unwrap(),
DataValue::Arr(arr) => {
self.write_u8(ARR_TAG).unwrap();
DataValue::Vec(arr) => {
self.write_u8(VEC_TAG).unwrap();
match arr {
Array::F32(a) => {
self.write_u8(ARR_F32).unwrap();
Vector::F32(a) => {
self.write_u8(VEC_F32).unwrap();
let l = a.len();
self.write_u64::<BigEndian>(l as u64).unwrap();
for el in a {
self.write_f32::<BigEndian>(*el).unwrap();
}
}
Array::F64(a) => {
self.write_u8(ARR_F64).unwrap();
Vector::F64(a) => {
self.write_u8(VEC_F64).unwrap();
let l = a.len();
self.write_u64::<BigEndian>(l as u64).unwrap();
for el in a {
self.write_f64::<BigEndian>(*el).unwrap();
}
}
Array::I32(a) => {
self.write_u8(ARR_I32).unwrap();
Vector::I32(a) => {
self.write_u8(VEC_I32).unwrap();
let l = a.len();
self.write_u64::<BigEndian>(l as u64).unwrap();
for el in a {
self.write_i32::<BigEndian>(*el).unwrap();
}
}
Array::I64(a) => {
self.write_u8(ARR_I64).unwrap();
Vector::I64(a) => {
self.write_u8(VEC_I64).unwrap();
let l = a.len();
self.write_u64::<BigEndian>(l as u64).unwrap();
for el in a {
@ -338,12 +338,12 @@ impl DataValue {
)
}
BOT_TAG => (DataValue::Bot, remaining),
ARR_TAG => {
VEC_TAG => {
let (t_tag, remaining) = remaining.split_first().unwrap();
let (len_bytes, mut rest) = remaining.split_at(8);
let len = BigEndian::read_u64(len_bytes) as usize;
match *t_tag {
ARR_F32 => {
VEC_F32 => {
let mut res_arr = Vec::with_capacity(len);
for _ in 0..len {
let (f_bytes, next_chunk) = rest.split_at(4);
@ -351,9 +351,9 @@ impl DataValue {
let f = BigEndian::read_f32(f_bytes);
res_arr.push(f);
}
(DataValue::Arr(Array::F32(res_arr)), rest)
(DataValue::Vec(Vector::F32(res_arr)), rest)
}
ARR_F64 => {
VEC_F64 => {
let mut res_arr = Vec::with_capacity(len);
for _ in 0..len {
let (f_bytes, next_chunk) = rest.split_at(8);
@ -361,9 +361,9 @@ impl DataValue {
let f = BigEndian::read_f64(f_bytes);
res_arr.push(f);
}
(DataValue::Arr(Array::F64(res_arr)), rest)
(DataValue::Vec(Vector::F64(res_arr)), rest)
}
ARR_I32 => {
VEC_I32 => {
let mut res_arr = Vec::with_capacity(len);
for _ in 0..len {
let (i_bytes, next_chunk) = rest.split_at(4);
@ -371,9 +371,9 @@ impl DataValue {
let i = BigEndian::read_i32(i_bytes);
res_arr.push(i);
}
(DataValue::Arr(Array::I32(res_arr)), rest)
(DataValue::Vec(Vector::I32(res_arr)), rest)
}
ARR_I64 => {
VEC_I64 => {
let mut res_arr = Vec::with_capacity(len);
for _ in 0..len {
let (i_bytes, next_chunk) = rest.split_at(8);
@ -381,7 +381,7 @@ impl DataValue {
let i = BigEndian::read_i64(i_bytes);
res_arr.push(i);
}
(DataValue::Arr(Array::I64(res_arr)), rest)
(DataValue::Vec(Vector::I64(res_arr)), rest)
}
_ => unreachable!()
}

@ -19,7 +19,7 @@ use smartstring::{LazyCompact, SmartString};
use thiserror::Error;
use crate::data::expr::Expr;
use crate::data::value::{DataValue, UuidWrapper, Validity, ValidityTs};
use crate::data::value::{DataValue, UuidWrapper, Validity, ValidityTs, Vector};
#[derive(Debug, Clone, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)]
pub(crate) struct NullableColType {
@ -57,13 +57,13 @@ impl Display for NullableColType {
}
f.write_str(")")?;
}
ColType::Array { eltype, len } => {
ColType::Vec { eltype, len } => {
f.write_str("<")?;
match eltype {
ArrayElementType::F32 => f.write_str("F32")?,
ArrayElementType::F64 => f.write_str("F64")?,
ArrayElementType::I32 => f.write_str("I32")?,
ArrayElementType::I64 => f.write_str("I64")?,
VecElementType::F32 => f.write_str("F32")?,
VecElementType::F64 => f.write_str("F64")?,
VecElementType::I32 => f.write_str("I32")?,
VecElementType::I64 => f.write_str("I64")?,
}
write!(f, ";{len}")?;
f.write_str(">")?;
@ -89,8 +89,8 @@ pub(crate) enum ColType {
eltype: Box<NullableColType>,
len: Option<usize>,
},
Array {
eltype: ArrayElementType,
Vec {
eltype: VecElementType,
len: usize,
},
Tuple(Vec<NullableColType>),
@ -98,7 +98,7 @@ pub(crate) enum ColType {
}
#[derive(Debug, Clone, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)]
pub(crate) enum ArrayElementType {
pub(crate) enum VecElementType {
F32,
F64,
I32,
@ -246,8 +246,52 @@ impl NullableColType {
bail!(make_err())
}
}
ColType::Array { eltype, len } => {
todo!("array coercion")
ColType::Vec { eltype, len } => {
match &data {
DataValue::List(l) => {
if l.len() != *len {
bail!(BadListLength(self.clone(), l.len()))
}
match eltype {
VecElementType::F32 => {
let mut v = Vec::with_capacity(l.len());
for el in l {
v.push(el.get_float().ok_or_else(make_err)? as f32)
}
DataValue::Vec(Vector::F32(v))
}
VecElementType::F64 => {
let mut v = Vec::with_capacity(l.len());
for el in l {
v.push(el.get_float().ok_or_else(make_err)?)
}
DataValue::Vec(Vector::F64(v))
}
VecElementType::I32 => {
let mut v = Vec::with_capacity(l.len());
for el in l {
v.push(el.get_int().ok_or_else(make_err)? as i32)
}
DataValue::Vec(Vector::I32(v))
}
VecElementType::I64 => {
let mut v = Vec::with_capacity(l.len());
for el in l {
v.push(el.get_int().ok_or_else(make_err)?)
}
DataValue::Vec(Vector::I64(v))
}
}
}
DataValue::Vec(arr) => {
if *eltype != arr.el_type() || *len != arr.len() {
bail!(make_err())
} else {
data
}
}
_ => bail!(make_err()),
}
}
ColType::Tuple(typ) => {
if let DataValue::List(l) = data {

@ -18,6 +18,7 @@ use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize};
use smartstring::{LazyCompact, SmartString};
use uuid::Uuid;
use crate::data::relation::VecElementType;
/// UUID value in the database
#[derive(Clone, Hash, Eq, PartialEq, serde_derive::Deserialize, serde_derive::Serialize)]
@ -147,7 +148,7 @@ pub enum DataValue {
/// set, used internally only
Set(BTreeSet<DataValue>),
/// Array, mainly for proximity search
Arr(Array),
Vec(Vector),
/// validity,
Validity(Validity),
/// bottom type, used internally only
@ -155,28 +156,45 @@ pub enum DataValue {
}
#[derive(Clone, serde_derive::Serialize, serde_derive::Deserialize)]
pub enum Array {
pub enum Vector {
F32(Vec<f32>),
F64(Vec<f64>),
I32(Vec<i32>),
I64(Vec<i64>),
}
impl Array {
impl Vector {
pub fn len(&self) -> usize {
match self {
Array::F32(v) => v.len(),
Array::F64(v) => v.len(),
Array::I32(v) => v.len(),
Array::I64(v) => v.len(),
Vector::F32(v) => v.len(),
Vector::F64(v) => v.len(),
Vector::I32(v) => v.len(),
Vector::I64(v) => v.len(),
}
}
pub fn is_compatible(&self, other: &Self) -> bool {
match (self, other) {
(Vector::F32(_), Vector::F32(_)) => true,
(Vector::F64(_), Vector::F64(_)) => true,
(Vector::I32(_), Vector::I32(_)) => true,
(Vector::I64(_), Vector::I64(_)) => true,
_ => false,
}
}
pub(crate) fn el_type(&self) -> VecElementType {
match self {
Vector::F32(_) => VecElementType::F32,
Vector::F64(_) => VecElementType::F64,
Vector::I32(_) => VecElementType::I32,
Vector::I64(_) => VecElementType::I64,
}
}
}
impl PartialEq<Self> for Array {
impl PartialEq<Self> for Vector {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Array::F32(l), Array::F32(r)) => {
(Vector::F32(l), Vector::F32(r)) => {
for (le, re) in l.iter().zip(r) {
if !OrderedFloat(*le).eq(&OrderedFloat(*re)) {
return false;
@ -184,7 +202,7 @@ impl PartialEq<Self> for Array {
}
true
}
(Array::F64(l), Array::F64(r)) => {
(Vector::F64(l), Vector::F64(r)) => {
for (le, re) in l.iter().zip(r) {
if !OrderedFloat(*le).eq(&OrderedFloat(*re)) {
return false;
@ -192,25 +210,25 @@ impl PartialEq<Self> for Array {
}
true
}
(Array::I32(l), Array::I32(r)) => l == r,
(Array::I64(l), Array::I64(r)) => l == r,
(Vector::I32(l), Vector::I32(r)) => l == r,
(Vector::I64(l), Vector::I64(r)) => l == r,
_ => false,
}
}
}
impl Eq for Array {}
impl Eq for Vector {}
impl PartialOrd for Array {
impl PartialOrd for Vector {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Array {
impl Ord for Vector {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(Array::F32(l), Array::F32(r)) => {
(Vector::F32(l), Vector::F32(r)) => {
for (le, re) in l.iter().zip(r) {
match OrderedFloat(*le).cmp(&OrderedFloat(*re)) {
Ordering::Equal => continue,
@ -219,8 +237,8 @@ impl Ord for Array {
}
return Ordering::Equal;
}
(Array::F32(_), _) => Ordering::Less,
(Array::F64(l), Array::F64(r)) => {
(Vector::F32(_), _) => Ordering::Less,
(Vector::F64(l), Vector::F64(r)) => {
for (le, re) in l.iter().zip(r) {
match OrderedFloat(*le).cmp(&OrderedFloat(*re)) {
Ordering::Equal => continue,
@ -229,32 +247,32 @@ impl Ord for Array {
}
return Ordering::Equal;
}
(Array::F64(_), Array::F32(_)) => Ordering::Greater,
(Array::F64(_), _) => Ordering::Less,
(Array::I32(l), Array::I32(r)) => l.cmp(r),
(Array::I32(_), Array::I64(_)) => Ordering::Less,
(Array::I32(_), _) => Ordering::Greater,
(Array::I64(l), Array::I64(r)) => l.cmp(r),
(Array::I64(_), _) => Ordering::Greater,
(Vector::F64(_), Vector::F32(_)) => Ordering::Greater,
(Vector::F64(_), _) => Ordering::Less,
(Vector::I32(l), Vector::I32(r)) => l.cmp(r),
(Vector::I32(_), Vector::I64(_)) => Ordering::Less,
(Vector::I32(_), _) => Ordering::Greater,
(Vector::I64(l), Vector::I64(r)) => l.cmp(r),
(Vector::I64(_), _) => Ordering::Greater,
}
}
}
impl Hash for Array {
impl Hash for Vector {
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
Array::F32(a) => {
Vector::F32(a) => {
for el in a {
OrderedFloat(*el).hash(state)
}
}
Array::F64(a) => {
Vector::F64(a) => {
for el in a {
OrderedFloat(*el).hash(state)
}
}
Array::I32(a) => {a.hash(state)}
Array::I64(a) => {a.hash(state)}
Vector::I32(a) => {a.hash(state)}
Vector::I64(a) => {a.hash(state)}
}
}
}
@ -429,7 +447,7 @@ impl Display for DataValue {
.field("timestamp", &v.timestamp.0)
.field("retracted", &v.is_assert)
.finish(),
DataValue::Arr(a) => {
DataValue::Vec(a) => {
write!(f, "array<{:?} elements>", a.len())
}
}

@ -9,14 +9,14 @@
use std::collections::BTreeSet;
use itertools::Itertools;
use miette::{bail, ensure, Diagnostic, Result};
use miette::{bail, ensure, Diagnostic, Result, IntoDiagnostic};
use smartstring::SmartString;
use thiserror::Error;
use crate::data::relation::{ColType, ColumnDef, NullableColType, StoredRelationMetadata};
use crate::data::relation::{VecElementType, ColType, ColumnDef, NullableColType, StoredRelationMetadata};
use crate::data::symb::Symbol;
use crate::data::value::DataValue;
use crate::parse::expr::build_expr;
use crate::parse::expr::{build_expr};
use crate::parse::{ExtractSpan, Pair, Rule, SourceSpan};
pub(crate) fn parse_schema(
@ -148,6 +148,22 @@ fn parse_type_inner(pair: Pair<'_>) -> Result<ColType> {
len,
}
}
Rule::vec_type => {
let mut inner = pair.into_inner();
let eltype = match inner.next().unwrap().as_str() {
"F32" | "Float" => VecElementType::F32,
"F64" | "Double" => VecElementType::F64,
"I32" | "Int" => VecElementType::I32,
"I64" | "Long" => VecElementType::I64,
_ => unreachable!()
};
let len = inner.next().unwrap();
let len = len.as_str().replace('_', "").parse::<usize>().into_diagnostic()?;
ColType::Vec {
eltype,
len,
}
}
Rule::tuple_type => {
ColType::Tuple(pair.into_inner().map(parse_nullable_type).try_collect()?)
}

@ -738,3 +738,21 @@ fn test_multi_tx() {
tx.abort().unwrap();
assert!(db.run_script("?[a] := *a[a]", Default::default()).is_err());
}
#[test]
fn test_vec_types() {
let db = DbInstance::new("mem", "", "").unwrap();
db.run_script(
":create a {k: String => v: <F32; 8>}",
Default::default(),
).unwrap();
db.run_script(
"?[k, v] <- [['k', [1,2,3,4,5,6,7,8]]] :put a {k => v}",
Default::default(),
).unwrap();
let res = db.run_script(
"?[k, v] := *a{k, v}",
Default::default(),
).unwrap();
assert_eq!(json!([1.,2.,3.,4.,5.,6.,7.,8.]), res.into_json()["rows"][0][1]);
}
Loading…
Cancel
Save