From 68ed434c964fd077233dcd3c3aa61e428a82037e Mon Sep 17 00:00:00 2001 From: Sayan Nandan Date: Fri, 14 Oct 2022 21:16:24 +0530 Subject: [PATCH] Add complete list support --- server/src/engine/macros.rs | 51 ++++++++++++++++++ server/src/engine/memory/mod.rs | 62 ++++++++++++++++++++++ server/src/engine/mod.rs | 3 ++ server/src/engine/ql/ast.rs | 64 ++++++++++------------- server/src/engine/ql/dml.rs | 91 +++++++++++++++++++++++++++++++++ server/src/engine/ql/lexer.rs | 38 +++++++++++++- server/src/engine/ql/macros.rs | 16 +----- server/src/engine/ql/mod.rs | 1 + server/src/engine/ql/schema.rs | 6 +-- server/src/engine/ql/tests.rs | 74 +++++++++++++++++++++++++++ 10 files changed, 351 insertions(+), 55 deletions(-) create mode 100644 server/src/engine/macros.rs create mode 100644 server/src/engine/memory/mod.rs create mode 100644 server/src/engine/ql/dml.rs diff --git a/server/src/engine/macros.rs b/server/src/engine/macros.rs new file mode 100644 index 00000000..3c68cab4 --- /dev/null +++ b/server/src/engine/macros.rs @@ -0,0 +1,51 @@ +/* + * Created on Wed Oct 12 2022 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2022, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +macro_rules! extract { + ($src:expr, $what:pat => $ret:expr) => { + if let $what = $src { + $ret + } else { + $crate::impossible!() + } + }; +} + +macro_rules! multi_assert_eq { + ($($lhs:expr),* => $rhs:expr) => { + $(assert_eq!($lhs, $rhs);)* + }; +} + +macro_rules! enum_impls { + ($for:ty => {$($other:ty as $me:ident),*$(,)?}) => { + $(impl ::core::convert::From<$other> for $for {fn from(v: $other) -> Self {Self::$me(v.into())}})* + } +} + +macro_rules! assertions { + ($($assert:expr),*$(,)?) => {$(const _:()=::core::assert!($assert);)*} +} diff --git a/server/src/engine/memory/mod.rs b/server/src/engine/memory/mod.rs new file mode 100644 index 00000000..570e353f --- /dev/null +++ b/server/src/engine/memory/mod.rs @@ -0,0 +1,62 @@ +/* + * Created on Wed Oct 12 2022 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2022, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +// TODO(@ohsayan): Change the underlying structures, there are just rudimentary ones used during integration with the QL + +/// A [`DataType`] represents the underlying data-type, although this enumeration when used in a collection will always +/// be of one type. +#[derive(Debug, PartialEq)] +pub enum DataType { + /// An UTF-8 string + String(String), + /// Bytes + Binary(Vec), + /// An integer + Number(u64), + /// A boolean + Boolean(bool), + /// A single-type list. Note, you **need** to keep up the invariant that the [`DataType`] disc. remains the same for all + /// elements to ensure correctness in this specific context + /// FIXME(@ohsayan): Try enforcing this somehow + List(Vec), +} + +enum_impls! { + DataType => { + String as String, + Vec as Binary, + u64 as Number, + bool as Boolean, + Vec as List, + &'static str as String, + } +} + +impl From<[DataType; N]> for DataType { + fn from(f: [DataType; N]) -> Self { + Self::List(f.into()) + } +} diff --git a/server/src/engine/mod.rs b/server/src/engine/mod.rs index 6edec42a..b1e0940f 100644 --- a/server/src/engine/mod.rs +++ b/server/src/engine/mod.rs @@ -27,4 +27,7 @@ #![allow(dead_code)] #![allow(unused_macros)] +#[macro_use] +mod macros; +mod memory; mod ql; diff --git a/server/src/engine/ql/ast.rs b/server/src/engine/ql/ast.rs index cecfaab1..c9b918a5 100644 --- a/server/src/engine/ql/ast.rs +++ b/server/src/engine/ql/ast.rs @@ -46,30 +46,37 @@ pub enum Entity { impl Entity { pub(super) fn parse(cm: &mut Compiler) -> LangResult { - let a = cm.nxtok_opt(); - let b = cm.nxtok_opt(); - let c = cm.nxtok_opt(); - match (a, b, c) { - ( - Some(Token::Ident(ks)), - Some(Token::Symbol(Symbol::SymPeriod)), - Some(Token::Ident(tbl)), - ) => { - let r = Ok(Entity::Full(ks.clone(), tbl.clone())); - r - } - (Some(Token::Ident(ident)), _, _) => unsafe { - let r = Ok(Entity::Current(ident.clone())); - cm.decr_cursor_by(2); - r + let sl = cm.remslice(); + let is_partial = + sl.len() > 1 && sl[0] == Token::Symbol(Symbol::SymColon) && sl[1].is_ident(); + let is_current = !sl.is_empty() && sl[0].is_ident(); + let is_full = sl.len() > 2 + && sl[0].is_ident() + && sl[1] == Token::Symbol(Symbol::SymPeriod) + && sl[2].is_ident(); + let c; + let r = match () { + _ if is_full => unsafe { + c = 3; + Entity::Full( + extract!(&sl[0], Token::Ident(sl) => sl.clone()), + extract!(&sl[2], Token::Ident(sl) => sl.clone()), + ) }, - (Some(Token::Symbol(Symbol::SymColon)), Some(Token::Ident(tbl)), _) => unsafe { - let r = Ok(Entity::Partial(tbl.clone())); - cm.decr_cursor_by(1); - r + _ if is_current => unsafe { + c = 1; + Entity::Current(extract!(&sl[0], Token::Ident(sl) => sl.clone())) }, - _ => Err(LangError::UnexpectedToken), + _ if is_partial => unsafe { + c = 2; + Entity::Partial(extract!(&sl[1], Token::Ident(sl) => sl.clone())) + }, + _ => return Err(LangError::UnexpectedToken), + }; + unsafe { + cm.incr_cursor_by(c); } + Ok(r) } } @@ -309,19 +316,4 @@ impl<'a> Compiler<'a> { debug_assert!(self.remaining() >= by); self.c = self.c.add(by); } - #[inline(always)] - pub(super) unsafe fn decr_cursor_by(&mut self, by: usize) { - self.c = self.c.sub(by); - } - fn try_read_index<'b>(&'a self, index: usize) -> Option<&'b Token> - where - 'a: 'b, - { - let sl = self.remslice(); - if sl.len() > index { - Some(&sl[index]) - } else { - None - } - } } diff --git a/server/src/engine/ql/dml.rs b/server/src/engine/ql/dml.rs new file mode 100644 index 00000000..cf852ff0 --- /dev/null +++ b/server/src/engine/ql/dml.rs @@ -0,0 +1,91 @@ +/* + * Created on Fri Oct 14 2022 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2022, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +use std::mem::{discriminant, Discriminant}; + +use super::lexer::{Lit, Symbol}; + +use {super::lexer::Token, crate::engine::memory::DataType}; + +pub(super) fn parse_list( + tok: &[Token], + list: &mut Vec, +) -> (Option>, usize, bool) { + let l = tok.len(); + let mut okay = l != 0; + let mut stop = okay && tok[0] == Symbol::TtCloseSqBracket; + let mut i = stop as usize; + let mut overall_dscr = None; + let mut prev_nlist_dscr = None; + while i < l && okay && !stop { + let d = match &tok[i] { + Token::Lit(Lit::Str(s)) => DataType::String(s.to_string()), + Token::Lit(Lit::Num(n)) => DataType::Number(*n), + Token::Lit(Lit::Bool(b)) => DataType::Boolean(*b), + Token::Symbol(Symbol::TtOpenSqBracket) => { + // a nested list + let mut nested_list = Vec::new(); + let (nlist_dscr, nlist_i, nlist_okay) = parse_list(&tok[i + 1..], &mut nested_list); + okay &= nlist_okay; + i += nlist_i; + // check type return + okay &= { + prev_nlist_dscr.is_none() + || nlist_dscr.is_none() + || prev_nlist_dscr == nlist_dscr + }; + if prev_nlist_dscr.is_none() && nlist_dscr.is_some() { + prev_nlist_dscr = nlist_dscr; + } + DataType::List(nested_list) + } + _ => { + okay = false; + break; + } + }; + i += 1; + okay &= list.is_empty() || discriminant(&d) == discriminant(&list[0]); + overall_dscr = Some(discriminant(&d)); + list.push(d); + let nx_comma = i < l && tok[i] == Symbol::SymComma; + let nx_csqrb = i < l && tok[i] == Symbol::TtCloseSqBracket; + okay &= nx_comma | nx_csqrb; + i += okay as usize; + stop = nx_csqrb; + } + (overall_dscr, i, okay && stop) +} + +#[cfg(test)] +pub(super) fn parse_list_full(tok: &[Token]) -> Option> { + let mut l = Vec::new(); + if let (_, _, true) = parse_list(tok, &mut l) { + Some(l) + } else { + None + } +} diff --git a/server/src/engine/ql/lexer.rs b/server/src/engine/ql/lexer.rs index 3c2ec418..f0b9d871 100644 --- a/server/src/engine/ql/lexer.rs +++ b/server/src/engine/ql/lexer.rs @@ -45,6 +45,15 @@ pub enum Token { Lit(Lit), // literal } +impl PartialEq for Token { + fn eq(&self, other: &Symbol) -> bool { + match self { + Self::Symbol(s) => s == other, + _ => false, + } + } +} + assertions! { size_of::() == 32, // FIXME(@ohsayan): Damn, what? size_of::() == 1, @@ -495,7 +504,7 @@ impl<'a> Lexer<'a> { 1234, // valid 1234a // invalid */ - static TERMINAL_CHAR: [u8; 6] = [b';', b'}', b',', b' ', b'\n', b'\t']; + static TERMINAL_CHAR: [u8; 8] = [b';', b'}', b',', b' ', b'\n', b'\t', b',', b']']; let wseof = self.peek_is(|b| TERMINAL_CHAR.contains(&b)) || self.exhausted(); match str::from_utf8_unchecked(slice::from_raw_parts( s, @@ -549,7 +558,20 @@ impl<'a> Lexer<'a> { match symof(byte) { Some(tok) => self.push_token(tok), #[cfg(test)] - None if byte == b'\r' => self.push_token(Token::IgnorableComma), + None if byte == b'\r' + && self.remaining() > 1 + && !(unsafe { + // UNSAFE(@ohsayan): The previous condition ensures that this doesn't segfault + *self.cursor().add(1) + }) + .is_ascii_digit() => + { + /* + NOTE(@ohsayan): The above guard might look a little messy but is necessary to support raw + literals which will use the carriage return + */ + self.push_token(Token::IgnorableComma) + } _ => { self.last_error = Some(LangError::UnexpectedChar); return; @@ -601,6 +623,18 @@ impl Token { } } } + #[inline(always)] + pub(super) unsafe fn ident_unchecked(&self) -> RawSlice { + if let Self::Ident(id) = self { + id.clone() + } else { + impossible!() + } + } + #[inline(always)] + pub(super) fn is_lit(&self) -> bool { + matches!(self, Self::Lit(_)) + } } impl AsRef for Token { diff --git a/server/src/engine/ql/macros.rs b/server/src/engine/ql/macros.rs index 8b89a2ec..2678d4c7 100644 --- a/server/src/engine/ql/macros.rs +++ b/server/src/engine/ql/macros.rs @@ -46,18 +46,6 @@ macro_rules! set { }}; } -macro_rules! multi_assert_eq { - ($($lhs:expr),* => $rhs:expr) => { - $(assert_eq!($lhs, $rhs);)* - }; -} - -macro_rules! enum_impls { - ($for:ty => {$($other:ty as $me:ident),*$(,)?}) => { - $(impl ::core::convert::From<$other> for $for {fn from(v: $other) -> Self {Self::$me(v)}})* - } -} - -macro_rules! assertions { - ($($assert:expr),*$(,)?) => {$(const _:()=::core::assert!($assert);)*} +macro_rules! into_array { + ($($e:expr),* $(,)?) => { [$($e.into()),*] }; } diff --git a/server/src/engine/ql/mod.rs b/server/src/engine/ql/mod.rs index 180292ee..9c621317 100644 --- a/server/src/engine/ql/mod.rs +++ b/server/src/engine/ql/mod.rs @@ -27,6 +27,7 @@ #[macro_use] mod macros; pub(super) mod ast; +pub(super) mod dml; pub(super) mod lexer; pub(super) mod schema; #[cfg(test)] diff --git a/server/src/engine/ql/schema.rs b/server/src/engine/ql/schema.rs index c33a2f04..007ae945 100644 --- a/server/src/engine/ql/schema.rs +++ b/server/src/engine/ql/schema.rs @@ -44,11 +44,11 @@ Sept. 15, 2022 */ -use super::lexer::DmlKeyword; - use { super::{ - lexer::{DdlKeyword, DdlMiscKeyword, Keyword, Lit, MiscKeyword, Symbol, Token, Type}, + lexer::{ + DdlKeyword, DdlMiscKeyword, DmlKeyword, Keyword, Lit, MiscKeyword, Symbol, Token, Type, + }, LangError, LangResult, RawSlice, }, std::{ diff --git a/server/src/engine/ql/tests.rs b/server/src/engine/ql/tests.rs index 1412365c..581bc230 100644 --- a/server/src/engine/ql/tests.rs +++ b/server/src/engine/ql/tests.rs @@ -1508,3 +1508,77 @@ mod schema_tests { } } } + +mod dml_tests { + use super::*; + mod list_parse { + use super::*; + use crate::engine::ql::dml::parse_list_full; + + #[test] + fn list_mini() { + let tok = lex(b" + [] + ") + .unwrap(); + let r = parse_list_full(&tok[1..]).unwrap(); + assert_eq!(r, vec![]) + } + + #[test] + fn list() { + let tok = lex(b" + [1, 2, 3, 4] + ") + .unwrap(); + let r = parse_list_full(&tok[1..]).unwrap(); + assert_eq!(r.as_slice(), into_array![1, 2, 3, 4]) + } + + #[test] + fn list_pro() { + let tok = lex(b" + [ + [1, 2], + [3, 4], + [5, 6], + [7, 8] + ] + ") + .unwrap(); + let r = parse_list_full(&tok[1..]).unwrap(); + assert_eq!( + r.as_slice(), + into_array![ + into_array![1, 2], + into_array![3, 4], + into_array![5, 6], + into_array![7, 8] + ] + ) + } + + #[test] + fn list_pro_max() { + let tok = lex(b" + [ + [[1, 1], [2, 2]], + [[3, 3], [4, 4]], + [[5, 5], [6, 6]], + [[7, 7], [8, 8]] + ] + ") + .unwrap(); + let r = parse_list_full(&tok[1..]).unwrap(); + assert_eq!( + r.as_slice(), + into_array![ + into_array![into_array![1, 1], into_array![2, 2]], + into_array![into_array![3, 3], into_array![4, 4]], + into_array![into_array![5, 5], into_array![6, 6]], + into_array![into_array![7, 7], into_array![8, 8]], + ] + ) + } + } +}