diff --git a/server/src/engine/core/data.rs b/server/src/engine/core/data.rs new file mode 100644 index 00000000..247eca24 --- /dev/null +++ b/server/src/engine/core/data.rs @@ -0,0 +1,140 @@ +/* + * Created on Sat Feb 04 2023 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2023, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +use crate::engine::ql::lex::{Lit, LitIR}; + +/// A [`DataType`] represents the underlying data-type, although this enumeration when used in a collection will always +/// be of one type. +// TODO(@ohsayan): Change the underlying structures, there are just rudimentary ones used during integration with the QL +#[derive(Debug, PartialEq, Clone)] +#[repr(u8)] +pub enum HSData { + /// An UTF-8 string + String(Box) = DataKind::STR_BX.d(), + /// Bytes + Binary(Box<[u8]>) = DataKind::BIN_BX.d(), + /// An unsigned integer + /// + /// **NOTE:** This is the default evaluated type for unsigned integers by the query processor. It is the + /// responsibility of the executor to ensure integrity checks depending on actual type width in the declared + /// schema (if any) + UnsignedInt(u64) = DataKind::UINT64.d(), + /// A signed integer + /// + /// **NOTE:** This is the default evaluated type for signed integers by the query processor. It is the + /// responsibility of the executor to ensure integrity checks depending on actual type width in the declared + /// schema (if any) + SignedInt(i64) = DataKind::SINT64.d(), + /// A boolean + Boolean(bool) = DataKind::BOOL.d(), + /// A float (64-bit) + Float(f64) = DataKind::FLOAT64.d(), + /// A single-type list. Note, you **need** to keep up the invariant that the [`DataType`] disc. remains the same for all + /// elements to ensure correctness in this specific context + /// FIXME(@ohsayan): Try enforcing this somehow + List(Vec) = DataKind::LIST.d(), +} + +enum_impls! { + HSData => { + String as String, + Vec as Binary, + u64 as UnsignedInt, + bool as Boolean, + Vec as List, + &'static str as String, + } +} + +impl HSData { + #[inline(always)] + pub(super) fn clone_from_lit(lit: Lit) -> Self { + match lit { + Lit::Str(s) => HSData::String(s.clone()), + Lit::Bool(b) => HSData::Boolean(b), + Lit::UnsignedInt(u) => HSData::UnsignedInt(u), + Lit::SignedInt(i) => HSData::SignedInt(i), + Lit::Bin(l) => HSData::Binary(l.to_vec().into_boxed_slice()), + } + } + #[inline(always)] + pub(super) fn clone_from_litir<'a>(lit: LitIR<'a>) -> Self { + match lit { + LitIR::Str(s) => Self::String(s.to_owned().into_boxed_str()), + LitIR::Bin(b) => Self::Binary(b.to_owned().into_boxed_slice()), + LitIR::Float(f) => Self::Float(f), + LitIR::SInt(s) => Self::SignedInt(s), + LitIR::UInt(u) => Self::UnsignedInt(u), + LitIR::Bool(b) => Self::Boolean(b), + } + } +} + +impl<'a> From> for HSData { + fn from(l: Lit<'a>) -> Self { + Self::clone_from_lit(l) + } +} + +impl<'a> From> for HSData { + fn from(l: LitIR<'a>) -> Self { + Self::clone_from_litir(l) + } +} + +impl From<[HSData; N]> for HSData { + fn from(f: [HSData; N]) -> Self { + Self::List(f.into()) + } +} + +flags! { + #[derive(PartialEq, Eq, Clone, Copy)] + pub struct DataKind: u8 { + // primitive: integer unsigned + UINT8 = 0, + UINT16 = 1, + UINT32 = 2, + UINT64 = 3, + // primitive: integer unsigned + SINT8 = 4, + SINT16 = 5, + SINT32 = 6, + SINT64 = 7, + // primitive: misc + BOOL = 8, + // primitive: floating point + FLOAT32 = 9, + FLOAT64 = 10, + // compound: flat + STR = 11, + STR_BX = DataKind::_BASE_HB | DataKind::STR.d(), + BIN = 12, + BIN_BX = DataKind::_BASE_HB | DataKind::BIN.d(), + // compound: recursive + LIST = 13, + } +} diff --git a/server/src/engine/core/mod.rs b/server/src/engine/core/mod.rs index fae09acf..bce453b0 100644 --- a/server/src/engine/core/mod.rs +++ b/server/src/engine/core/mod.rs @@ -24,106 +24,10 @@ * */ -// TODO(@ohsayan): Change the underlying structures, there are just rudimentary ones used during integration with the QL +mod data; -use super::ql::lex::{Lit, LitIR}; +use super::mem::AStr; +pub use data::HSData; -/// A [`DataType`] represents the underlying data-type, although this enumeration when used in a collection will always -/// be of one type. -#[derive(Debug, PartialEq, Clone)] -#[repr(u8)] -pub enum DataType { - /// An UTF-8 string - String(Box) = DataKind::STR_BX.d(), - /// Bytes - Binary(Box<[u8]>) = DataKind::BIN_BX.d(), - /// An unsigned integer - /// - /// **NOTE:** This is the default evaluated type for unsigned integers by the query processor. It is the - /// responsibility of the executor to ensure integrity checks depending on actual type width in the declared - /// schema (if any) - UnsignedInt(u64) = DataKind::UINT64.d(), - /// A signed integer - /// - /// **NOTE:** This is the default evaluated type for signed integers by the query processor. It is the - /// responsibility of the executor to ensure integrity checks depending on actual type width in the declared - /// schema (if any) - SignedInt(i64) = DataKind::SINT64.d(), - /// A boolean - Boolean(bool) = DataKind::BOOL.d(), - /// A float (64-bit) - Float(f64) = DataKind::FLOAT64.d(), - /// A single-type list. Note, you **need** to keep up the invariant that the [`DataType`] disc. remains the same for all - /// elements to ensure correctness in this specific context - /// FIXME(@ohsayan): Try enforcing this somehow - List(Vec) = DataKind::LIST.d(), -} - -enum_impls! { - DataType => { - String as String, - Vec as Binary, - u64 as UnsignedInt, - bool as Boolean, - Vec as List, - &'static str as String, - } -} - -impl DataType { - #[inline(always)] - pub(super) fn clone_from_lit(lit: &Lit) -> Self { - match lit { - Lit::Str(s) => DataType::String(s.clone()), - Lit::Bool(b) => DataType::Boolean(*b), - Lit::UnsignedInt(u) => DataType::UnsignedInt(*u), - Lit::SignedInt(i) => DataType::SignedInt(*i), - Lit::Bin(l) => DataType::Binary(l.to_vec().into_boxed_slice()), - } - } - #[inline(always)] - pub(super) fn clone_from_litir<'a>(lit: LitIR<'a>) -> Self { - match lit { - LitIR::Str(s) => Self::String(s.to_owned().into_boxed_str()), - LitIR::Bin(b) => Self::Binary(b.to_owned().into_boxed_slice()), - LitIR::Float(f) => Self::Float(f), - LitIR::SInt(s) => Self::SignedInt(s), - LitIR::UInt(u) => Self::UnsignedInt(u), - LitIR::Bool(b) => Self::Boolean(b), - } - } -} - -impl From<[DataType; N]> for DataType { - fn from(f: [DataType; N]) -> Self { - Self::List(f.into()) - } -} - -flags! { - #[derive(PartialEq, Eq, Clone, Copy)] - pub struct DataKind: u8 { - // primitive: integer unsigned - UINT8 = 0, - UINT16 = 1, - UINT32 = 2, - UINT64 = 3, - // primitive: integer unsigned - SINT8 = 4, - SINT16 = 5, - SINT32 = 6, - SINT64 = 7, - // primitive: misc - BOOL = 8, - // primitive: floating point - FLOAT32 = 9, - FLOAT64 = 10, - // compound: flat - STR = 11, - STR_BX = DataKind::_BASE_HB | DataKind::STR.d(), - BIN = 12, - BIN_BX = DataKind::_BASE_HB | DataKind::BIN.d(), - // compound: recursive - LIST = 13, - } -} +const IDENT_MX: usize = 64; +type ItemID = AStr; diff --git a/server/src/engine/ql/ast/mod.rs b/server/src/engine/ql/ast/mod.rs index 994aef2f..c421faf1 100644 --- a/server/src/engine/ql/ast/mod.rs +++ b/server/src/engine/ql/ast/mod.rs @@ -36,7 +36,7 @@ use { LangError, LangResult, }, crate::{ - engine::core::DataType, + engine::core::HSData, util::{compiler, MaybeInit}, }, core::cmp, @@ -236,7 +236,7 @@ impl<'a, Qd: QueryData<'a>> State<'a, Qd> { /// /// Caller should have checked that the token matches a lit signature and that enough data is available /// in the data source. (ideally should run `can_read_lit_from` or `can_read_lit_rounded`) - pub unsafe fn read_lit_into_data_type_unchecked_from(&mut self, tok: &'a Token) -> DataType { + pub unsafe fn read_lit_into_data_type_unchecked_from(&mut self, tok: &'a Token) -> HSData { self.d.read_data_type(tok) } #[inline(always)] @@ -278,7 +278,7 @@ pub trait QueryData<'a> { /// /// ## Safety /// The current token must match the signature of a lit - unsafe fn read_data_type(&mut self, tok: &'a Token) -> DataType; + unsafe fn read_data_type(&mut self, tok: &'a Token) -> HSData; /// Returns true if the data source has enough data fn nonzero(&self) -> bool; } @@ -302,8 +302,8 @@ impl<'a> QueryData<'a> for InplaceData { extract!(tok, Token::Lit(l) => l.as_ir()) } #[inline(always)] - unsafe fn read_data_type(&mut self, tok: &'a Token) -> DataType { - DataType::clone_from_lit(extract!(tok, Token::Lit(ref l) => l)) + unsafe fn read_data_type(&mut self, tok: &'a Token) -> HSData { + HSData::from(extract!(tok, Token::Lit(ref l) => l.to_owned())) } #[inline(always)] fn nonzero(&self) -> bool { @@ -335,11 +335,11 @@ impl<'a> QueryData<'a> for SubstitutedData<'a> { ret } #[inline(always)] - unsafe fn read_data_type(&mut self, tok: &'a Token) -> DataType { + unsafe fn read_data_type(&mut self, tok: &'a Token) -> HSData { debug_assert!(Token![?].eq(tok)); let ret = self.data[0]; self.data = &self.data[1..]; - DataType::clone_from_litir(ret) + HSData::from(ret) } #[inline(always)] fn nonzero(&self) -> bool { diff --git a/server/src/engine/ql/ddl/syn.rs b/server/src/engine/ql/ddl/syn.rs index 911c303c..44ebd679 100644 --- a/server/src/engine/ql/ddl/syn.rs +++ b/server/src/engine/ql/ddl/syn.rs @@ -46,10 +46,13 @@ use { crate::{ - engine::ql::{ - ast::{QueryData, State}, - lex::{Ident, LitIR, LitIROwned, Token}, - LangError, LangResult, + engine::{ + core::HSData, + ql::{ + ast::{QueryData, State}, + lex::{Ident, Lit, LitIR, Token}, + LangError, LangResult, + }, }, util::{compiler, MaybeInit}, }, @@ -59,13 +62,19 @@ use { #[derive(Debug, PartialEq)] /// A dictionary entry type. Either a literal or another dictionary pub enum DictEntry { - Lit(LitIROwned), + Lit(HSData), Map(Dict), } impl<'a> From> for DictEntry { fn from(l: LitIR<'a>) -> Self { - Self::Lit(l.to_litir_owned()) + Self::Lit(HSData::from(l)) + } +} + +impl<'a> From> for DictEntry { + fn from(value: Lit<'a>) -> Self { + Self::Lit(HSData::from(value)) } } diff --git a/server/src/engine/ql/dml/ins.rs b/server/src/engine/ql/dml/ins.rs index f25ea5e4..8e0a2ac1 100644 --- a/server/src/engine/ql/dml/ins.rs +++ b/server/src/engine/ql/dml/ins.rs @@ -28,7 +28,7 @@ use { super::read_ident, crate::{ engine::{ - core::DataType, + core::HSData, ql::{ ast::{Entity, QueryData, State}, lex::{Ident, Token}, @@ -56,7 +56,7 @@ pub const T_UUIDSTR: &str = "4593264b-0231-43e9-b0aa-50784f14e204"; pub const T_UUIDBIN: &[u8] = T_UUIDSTR.as_bytes(); pub const T_TIMESEC: u64 = 1673187839_u64; -type ProducerFn = fn() -> DataType; +type ProducerFn = fn() -> HSData; // base #[inline(always)] @@ -77,16 +77,16 @@ fn pfnbase_uuid() -> Uuid { } // impl #[inline(always)] -fn pfn_timesec() -> DataType { - DataType::UnsignedInt(pfnbase_time().as_secs()) +fn pfn_timesec() -> HSData { + HSData::UnsignedInt(pfnbase_time().as_secs()) } #[inline(always)] -fn pfn_uuidstr() -> DataType { - DataType::String(pfnbase_uuid().to_string().into_boxed_str()) +fn pfn_uuidstr() -> HSData { + HSData::String(pfnbase_uuid().to_string().into_boxed_str()) } #[inline(always)] -fn pfn_uuidbin() -> DataType { - DataType::Binary(pfnbase_uuid().as_bytes().to_vec().into_boxed_slice()) +fn pfn_uuidbin() -> HSData { + HSData::Binary(pfnbase_uuid().as_bytes().to_vec().into_boxed_slice()) } static PRODUCER_G: [u8; 4] = [0, 2, 3, 0]; @@ -141,8 +141,8 @@ unsafe fn ldfunc_unchecked(func: &[u8]) -> ProducerFn { /// - If tt length is less than 1 pub(super) fn parse_list<'a, Qd: QueryData<'a>>( state: &mut State<'a, Qd>, - list: &mut Vec, -) -> Option> { + list: &mut Vec, +) -> Option> { let mut stop = state.cursor_eq(Token![close []]); state.cursor_ahead_if(stop); let mut overall_dscr = None; @@ -169,7 +169,7 @@ pub(super) fn parse_list<'a, Qd: QueryData<'a>>( if prev_nlist_dscr.is_none() && nlist_dscr.is_some() { prev_nlist_dscr = nlist_dscr; } - DataType::List(nested_list) + HSData::List(nested_list) } Token![@] if state.cursor_signature_match_fn_arity0_rounded() => match unsafe { // UNSAFE(@ohsayan): Just verified at guard @@ -202,7 +202,7 @@ pub(super) fn parse_list<'a, Qd: QueryData<'a>>( #[inline(always)] /// ## Safety /// - Cursor must match arity(0) function signature -unsafe fn handle_func_sub<'a, Qd: QueryData<'a>>(state: &mut State<'a, Qd>) -> Option { +unsafe fn handle_func_sub<'a, Qd: QueryData<'a>>(state: &mut State<'a, Qd>) -> Option { let func = read_ident(state.fw_read()); state.cursor_ahead_by(2); // skip tt:paren ldfunc(func).map(move |f| f()) @@ -212,7 +212,7 @@ unsafe fn handle_func_sub<'a, Qd: QueryData<'a>>(state: &mut State<'a, Qd>) -> O /// - If tt is empty pub(super) fn parse_data_tuple_syntax<'a, Qd: QueryData<'a>>( state: &mut State<'a, Qd>, -) -> Vec> { +) -> Vec> { let mut stop = state.cursor_eq(Token![() close]); state.cursor_ahead_if(stop); let mut data = Vec::new(); @@ -259,7 +259,7 @@ pub(super) fn parse_data_tuple_syntax<'a, Qd: QueryData<'a>>( /// Panics if tt is empty pub(super) fn parse_data_map_syntax<'a, Qd: QueryData<'a>>( state: &mut State<'a, Qd>, -) -> HashMap, Option> { +) -> HashMap, Option> { let mut stop = state.cursor_eq(Token![close {}]); state.cursor_ahead_if(stop); let mut data = HashMap::with_capacity(2); @@ -313,18 +313,18 @@ pub(super) fn parse_data_map_syntax<'a, Qd: QueryData<'a>>( #[derive(Debug, PartialEq)] pub enum InsertData<'a> { - Ordered(Vec>), - Map(HashMap, Option>), + Ordered(Vec>), + Map(HashMap, Option>), } -impl<'a> From>> for InsertData<'a> { - fn from(v: Vec>) -> Self { +impl<'a> From>> for InsertData<'a> { + fn from(v: Vec>) -> Self { Self::Ordered(v) } } -impl<'a> From, Option>> for InsertData<'a> { - fn from(m: HashMap, Option>) -> Self { +impl<'a> From, Option>> for InsertData<'a> { + fn from(m: HashMap, Option>) -> Self { Self::Map(m) } } @@ -409,7 +409,7 @@ mod impls { pub mod test { use { super::super::{ - parse_data_map_syntax, parse_data_tuple_syntax, parse_list, DataType, HashMap, + parse_data_map_syntax, parse_data_tuple_syntax, parse_list, HSData, HashMap, }, crate::engine::ql::{ ast::{traits::ASTNode, QueryData, State}, @@ -417,7 +417,7 @@ mod impls { }, }; #[derive(sky_macros::Wrapper, Debug)] - pub struct List(Vec); + pub struct List(Vec); impl<'a> ASTNode<'a> for List { // important: upstream must verify this const VERIFY: bool = true; @@ -428,7 +428,7 @@ mod impls { } } #[derive(sky_macros::Wrapper, Debug)] - pub struct DataTuple(Vec>); + pub struct DataTuple(Vec>); impl<'a> ASTNode<'a> for DataTuple { // important: upstream must verify this const VERIFY: bool = true; @@ -438,7 +438,7 @@ mod impls { } } #[derive(sky_macros::Wrapper, Debug)] - pub struct DataMap(HashMap, Option>); + pub struct DataMap(HashMap, Option>); impl<'a> ASTNode<'a> for DataMap { // important: upstream must verify this const VERIFY: bool = true; diff --git a/server/src/engine/ql/lex/mod.rs b/server/src/engine/ql/lex/mod.rs index 2786c254..4daa44c3 100644 --- a/server/src/engine/ql/lex/mod.rs +++ b/server/src/engine/ql/lex/mod.rs @@ -33,7 +33,7 @@ use { core::{cmp, fmt, ops::BitOr, slice, str}, }; -pub use self::raw::{Ident, Keyword, Lit, LitIR, LitIROwned, Symbol, Token}; +pub use self::raw::{Ident, Keyword, Lit, LitIR, Symbol, Token}; pub type Slice<'a> = &'a [u8]; /* diff --git a/server/src/engine/ql/lex/raw.rs b/server/src/engine/ql/lex/raw.rs index f151bec6..82a897ae 100644 --- a/server/src/engine/ql/lex/raw.rs +++ b/server/src/engine/ql/lex/raw.rs @@ -419,29 +419,6 @@ pub enum LitIR<'a> { Float(f64), } -impl<'a> LitIR<'a> { - pub fn to_litir_owned(&self) -> LitIROwned { - match self { - Self::Str(s) => LitIROwned::Str(s.to_string().into_boxed_str()), - Self::Bin(b) => LitIROwned::Bin(b.to_vec().into_boxed_slice()), - Self::UInt(u) => LitIROwned::UInt(*u), - Self::SInt(s) => LitIROwned::SInt(*s), - Self::Bool(b) => LitIROwned::Bool(*b), - Self::Float(f) => LitIROwned::Float(*f), - } - } -} - -#[derive(Debug, PartialEq)] -pub enum LitIROwned { - Str(Box), - Bin(Box<[u8]>), - UInt(u64), - SInt(i64), - Bool(bool), - Float(f64), -} - #[derive(Debug)] pub struct RawLexer<'a> { c: *const u8, diff --git a/server/src/engine/ql/tests.rs b/server/src/engine/ql/tests.rs index a87c538b..808c7788 100644 --- a/server/src/engine/ql/tests.rs +++ b/server/src/engine/ql/tests.rs @@ -29,7 +29,7 @@ use { lex::{InsecureLexer, SafeLexer, Symbol, Token}, LangResult, }, - crate::{engine::core::DataType, util::test_utils}, + crate::{engine::core::HSData, util::test_utils}, rand::{self, Rng}, }; @@ -54,24 +54,24 @@ pub trait NullableData { fn data(self) -> Option; } -impl NullableData for T +impl NullableData for T where - T: Into, + T: Into, { - fn data(self) -> Option { + fn data(self) -> Option { Some(self.into()) } } struct Null; -impl NullableData for Null { - fn data(self) -> Option { +impl NullableData for Null { + fn data(self) -> Option { None } } -fn nullable_datatype(v: impl NullableData) -> Option { +fn nullable_datatype(v: impl NullableData) -> Option { v.data() } @@ -87,9 +87,7 @@ impl NullableDictEntry for Null { impl<'a> NullableDictEntry for super::lex::Lit<'a> { fn data(self) -> Option { - Some(super::ddl::syn::DictEntry::Lit( - self.as_ir().to_litir_owned(), - )) + Some(super::ddl::syn::DictEntry::from(self.as_ir())) } }