From f69505182625c84f7646b16b935b6a6ed65e0325 Mon Sep 17 00:00:00 2001 From: Sayan Nandan Date: Wed, 9 Nov 2022 21:35:38 +0530 Subject: [PATCH] Add parsing for insert statements --- server/src/engine/memory/mod.rs | 1 + server/src/engine/ql/ast.rs | 44 +++++-- server/src/engine/ql/dml.rs | 163 ++++++++++++++++++++++--- server/src/engine/ql/lexer.rs | 1 + server/src/engine/ql/macros.rs | 48 +++++++- server/src/engine/ql/tests.rs | 205 ++++++++++++++++++++++++++++++-- server/src/util/macros.rs | 10 +- 7 files changed, 428 insertions(+), 44 deletions(-) diff --git a/server/src/engine/memory/mod.rs b/server/src/engine/memory/mod.rs index 570e353f..ab3a74e2 100644 --- a/server/src/engine/memory/mod.rs +++ b/server/src/engine/memory/mod.rs @@ -29,6 +29,7 @@ /// A [`DataType`] represents the underlying data-type, although this enumeration when used in a collection will always /// be of one type. #[derive(Debug, PartialEq)] +#[cfg_attr(debug_assertions, derive(Clone))] pub enum DataType { /// An UTF-8 string String(String), diff --git a/server/src/engine/ql/ast.rs b/server/src/engine/ql/ast.rs index 24abe98c..8f9f96c0 100644 --- a/server/src/engine/ql/ast.rs +++ b/server/src/engine/ql/ast.rs @@ -39,33 +39,57 @@ use { #[derive(Debug, PartialEq)] pub enum Entity { - Current(RawSlice), Partial(RawSlice), + Single(RawSlice), Full(RawSlice, RawSlice), } impl Entity { + #[inline(always)] + pub(super) unsafe fn full_entity_from_slice(sl: &[Token]) -> Self { + Entity::Full( + extract!(&sl[0], Token::Ident(sl) => sl.clone()), + extract!(&sl[2], Token::Ident(sl) => sl.clone()), + ) + } + #[inline(always)] + pub(super) unsafe fn single_entity_from_slice(sl: &[Token]) -> Self { + Entity::Single(extract!(&sl[0], Token::Ident(sl) => sl.clone())) + } + #[inline(always)] + pub(super) unsafe fn partial_entity_from_slice(sl: &[Token]) -> Self { + Entity::Partial(extract!(&sl[1], Token::Ident(sl) => sl.clone())) + } + #[inline(always)] + pub(super) fn tokens_with_partial(sl: &[Token]) -> bool { + sl.len() > 1 && sl[0] == Token![:] && sl[1].is_ident() + } + #[inline(always)] + pub(super) fn tokens_with_single(sl: &[Token]) -> bool { + !sl.is_empty() && sl[0].is_ident() + } + #[inline(always)] + pub(super) fn tokens_with_full(sl: &[Token]) -> bool { + sl.len() > 2 && sl[0].is_ident() && sl[1] == Token![.] && sl[2].is_ident() + } pub(super) fn parse(cm: &mut Compiler) -> LangResult { let sl = cm.remslice(); - let is_partial = sl.len() > 1 && sl[0] == Token![:] && sl[1].is_ident(); - let is_current = !sl.is_empty() && sl[0].is_ident(); - let is_full = sl.len() > 2 && sl[0].is_ident() && sl[1] == Token![.] && sl[2].is_ident(); + let is_partial = Self::tokens_with_partial(sl); + let is_current = Self::tokens_with_single(sl); + let is_full = Self::tokens_with_full(sl); let c; let r = match () { _ if is_full => unsafe { c = 3; - Entity::Full( - extract!(&sl[0], Token::Ident(sl) => sl.clone()), - extract!(&sl[2], Token::Ident(sl) => sl.clone()), - ) + Self::full_entity_from_slice(sl) }, _ if is_current => unsafe { c = 1; - Entity::Current(extract!(&sl[0], Token::Ident(sl) => sl.clone())) + Self::single_entity_from_slice(sl) }, _ if is_partial => unsafe { c = 2; - Entity::Partial(extract!(&sl[1], Token::Ident(sl) => sl.clone())) + Self::partial_entity_from_slice(sl) }, _ => return Err(LangError::UnexpectedToken), }; diff --git a/server/src/engine/ql/dml.rs b/server/src/engine/ql/dml.rs index 5cda0c8a..feb4f06d 100644 --- a/server/src/engine/ql/dml.rs +++ b/server/src/engine/ql/dml.rs @@ -24,8 +24,14 @@ * */ +use std::mem::MaybeUninit; + use { - super::lexer::{Lit, Symbol, Token}, + super::{ + ast::Entity, + lexer::{Lit, Symbol, Token}, + LangError, LangResult, + }, crate::engine::memory::DataType, std::{ collections::HashMap, @@ -100,11 +106,10 @@ pub(super) fn parse_list_full(tok: &[Token]) -> Option> { } } -#[cfg(test)] /// Parse the tuple data passed in with an insert query. /// /// **Note:** Make sure you pass the `(` token -pub(super) fn parse_data_tuple_syntax(tok: &[Token]) -> (Vec, usize, bool) { +pub(super) fn parse_data_tuple_syntax(tok: &[Token]) -> (Vec>, usize, bool) { let l = tok.len(); let mut okay = l != 0; let mut stop = okay && tok[0] == Token::Symbol(Symbol::TtCloseParen); @@ -113,22 +118,25 @@ pub(super) fn parse_data_tuple_syntax(tok: &[Token]) -> (Vec, usize, b while i < l && okay && !stop { match &tok[i] { Token::Lit(Lit::Str(s)) => { - data.push(s.to_string().into()); + data.push(Some(s.to_string().into())); } Token::Lit(Lit::Num(n)) => { - data.push((*n).into()); + data.push(Some((*n).into())); } Token::Lit(Lit::Bool(b)) => { - data.push((*b).into()); + data.push(Some((*b).into())); } Token::Symbol(Symbol::TtOpenSqBracket) => { // ah, a list let mut l = Vec::new(); let (_, lst_i, lst_okay) = parse_list(&tok[i + 1..], &mut l); - data.push(l.into()); + data.push(Some(l.into())); i += lst_i; okay &= lst_okay; } + Token![null] => { + data.push(None); + } _ => { okay = false; break; @@ -145,7 +153,7 @@ pub(super) fn parse_data_tuple_syntax(tok: &[Token]) -> (Vec, usize, b } #[cfg(test)] -pub(super) fn parse_data_tuple_syntax_full(tok: &[Token]) -> Option> { +pub(super) fn parse_data_tuple_syntax_full(tok: &[Token]) -> Option>> { let (ret, cnt, okay) = parse_data_tuple_syntax(tok); if cnt == tok.len() && okay { Some(ret) @@ -154,7 +162,9 @@ pub(super) fn parse_data_tuple_syntax_full(tok: &[Token]) -> Option (HashMap, DataType>, usize, bool) { +pub(super) fn parse_data_map_syntax<'a>( + tok: &'a [Token], +) -> (HashMap<&'a [u8], Option>, usize, bool) { let l = tok.len(); let mut okay = l != 0; let mut stop = okay && tok[0] == Token::Symbol(Symbol::TtCloseBrace); @@ -166,17 +176,17 @@ pub(super) fn parse_data_map_syntax(tok: &[Token]) -> (HashMap, DataTyp match (field, expression) { (Token::Ident(id), Token::Lit(Lit::Str(s))) => { okay &= data - .insert(unsafe { id.as_str() }.into(), s.to_string().into()) + .insert(unsafe { id.as_slice() }, Some(s.to_string().into())) .is_none(); } (Token::Ident(id), Token::Lit(Lit::Num(n))) => { okay &= data - .insert(unsafe { id.as_str() }.into(), (*n).into()) + .insert(unsafe { id.as_slice() }, Some((*n).into())) .is_none(); } (Token::Ident(id), Token::Lit(Lit::Bool(b))) => { okay &= data - .insert(unsafe { id.as_str() }.into(), (*b).into()) + .insert(unsafe { id.as_slice() }, Some((*b).into())) .is_none(); } (Token::Ident(id), Token::Symbol(Symbol::TtOpenSqBracket)) => { @@ -186,9 +196,12 @@ pub(super) fn parse_data_map_syntax(tok: &[Token]) -> (HashMap, DataTyp okay &= lst_ok; i += lst_i; okay &= data - .insert(unsafe { id.as_str() }.into(), l.into()) + .insert(unsafe { id.as_slice() }, Some(l.into())) .is_none(); } + (Token::Ident(id), Token![null]) => { + okay &= data.insert(unsafe { id.as_slice() }, None).is_none(); + } _ => { okay = false; break; @@ -196,19 +209,133 @@ pub(super) fn parse_data_map_syntax(tok: &[Token]) -> (HashMap, DataTyp } i += 3; let nx_comma = i < l && tok[i] == Symbol::SymComma; - let nx_csprn = i < l && tok[i] == Symbol::TtCloseBrace; - okay &= nx_comma | nx_csprn; + let nx_csbrc = i < l && tok[i] == Symbol::TtCloseBrace; + okay &= nx_comma | nx_csbrc; i += okay as usize; - stop = nx_csprn; + stop = nx_csbrc; } (data, i, okay && stop) } #[cfg(test)] -pub(super) fn parse_data_map_syntax_full(tok: &[Token]) -> Option, DataType>> { +pub(super) fn parse_data_map_syntax_full( + tok: &[Token], +) -> Option, Option>> { let (dat, i, ok) = parse_data_map_syntax(tok); if i == tok.len() && ok { - Some(dat) + Some( + dat.into_iter() + .map(|(ident, val)| { + ( + String::from_utf8_lossy(ident).to_string().into_boxed_str(), + val, + ) + }) + .collect(), + ) + } else { + None + } +} + +#[derive(Debug, PartialEq)] +pub enum InsertData<'a> { + Ordered(Vec>), + Map(HashMap<&'a [u8], Option>), +} + +impl<'a> From>> for InsertData<'a> { + fn from(v: Vec>) -> Self { + Self::Ordered(v) + } +} + +impl<'a> From>> for InsertData<'a> { + fn from(m: HashMap<&'static [u8], Option>) -> Self { + Self::Map(m) + } +} + +#[derive(Debug, PartialEq)] +pub struct InsertStatement<'a> { + pub(super) primary_key: &'a Lit, + pub(super) entity: Entity, + pub(super) data: InsertData<'a>, +} + +pub(super) fn parse_insert<'a>( + src: &'a [Token], + counter: &mut usize, +) -> LangResult> { + /* + smallest: + insert space:primary_key () + ^1 ^2 ^3^4 ^^5,6 + */ + let l = src.len(); + let is_full = Entity::tokens_with_full(src); + let is_half = Entity::tokens_with_single(src); + + let mut okay = is_full | is_half; + let mut i = 0; + let mut entity = MaybeUninit::uninit(); + + if is_full { + i += 3; + entity = MaybeUninit::new(unsafe { Entity::full_entity_from_slice(src) }); + } else if is_half { + i += 1; + entity = MaybeUninit::new(unsafe { Entity::single_entity_from_slice(src) }); + } + + // primary key is a lit; atleast lit + () | () + okay &= l >= (i + 4); + // colon, lit + okay &= src[i] == Token![:] && src[i + 1].is_lit(); + // check data + let is_map = okay && src[i + 2] == Token![open {}]; + let is_tuple = okay && src[i + 2] == Token![() open]; + okay &= is_map | is_tuple; + + if !okay { + return Err(LangError::UnexpectedToken); + } + + let primary_key = unsafe { extract!(&src[i+1], Token::Lit(l) => l) }; + i += 3; // skip col, lit + op/ob + + let data; + if is_tuple { + let (ord, cnt, ok) = parse_data_tuple_syntax(&src[i..]); + okay &= ok; + i += cnt; + data = InsertData::Ordered(ord); + } else { + let (map, cnt, ok) = parse_data_map_syntax(&src[i..]); + okay &= ok; + i += cnt; + data = InsertData::Map(map); + } + + *counter += i; + + if okay { + Ok(InsertStatement { + primary_key, + entity: unsafe { entity.assume_init() }, + data, + }) + } else { + Err(LangError::UnexpectedToken) + } +} + +#[cfg(test)] +pub(super) fn parse_insert_full<'a>(tok: &'a [Token]) -> Option> { + let mut z = 0; + let s = self::parse_insert(tok, &mut z); + if z == tok.len() { + s.ok() } else { None } diff --git a/server/src/engine/ql/lexer.rs b/server/src/engine/ql/lexer.rs index 89868f66..68c2f174 100644 --- a/server/src/engine/ql/lexer.rs +++ b/server/src/engine/ql/lexer.rs @@ -81,6 +81,7 @@ pub enum Lit { enum_impls! { Lit => { Box as Str, + String as Str, bool as Bool, u64 as Num, } diff --git a/server/src/engine/ql/macros.rs b/server/src/engine/ql/macros.rs index e01d23d5..18a3e56c 100644 --- a/server/src/engine/ql/macros.rs +++ b/server/src/engine/ql/macros.rs @@ -70,12 +70,20 @@ macro_rules! __type_token { }; } +macro_rules! __misc_token { + ($ident:ident) => { + $crate::engine::ql::lexer::Token::Keyword($crate::engine::ql::lexer::Keyword::Misc( + $crate::engine::ql::lexer::MiscKeyword::$ident, + )) + }; +} + /* Frankly, this is just for lazy people like me. Do not judge -- Sayan (@ohsayan) */ macro_rules! Token { - // misc + // misc symbol (@) => { __sym_token!(SymAt) }; @@ -262,6 +270,29 @@ macro_rules! Token { (float) => { __type_token!(Float) }; + // tt + (open {}) => { + __sym_token!(TtOpenBrace) + }; + (close {}) => { + __sym_token!(TtCloseBrace) + }; + (() open) => { + __sym_token!(TtOpenParen) + }; + (() close) => { + __sym_token!(TtCloseParen) + }; + (open []) => { + __sym_token!(TtOpenSqBracket) + }; + (close []) => { + __sym_token!(TtCloseSqBracket) + }; + // misc + (null) => { + __misc_token!(Null) + }; } macro_rules! dict { @@ -275,6 +306,17 @@ macro_rules! dict { }}; } +macro_rules! dict_nullable { + () => { + <::std::collections::HashMap<_, _> as ::core::default::Default>::default() + }; + ($($key:expr => $value:expr),* $(,)?) => {{ + let mut hm: ::std::collections::HashMap<_, _> = ::core::default::Default::default(); + $(hm.insert($key.into(), $crate::engine::ql::tests::nullable_datatype($value));)* + hm + }}; +} + macro_rules! set { () => { <::std::collections::HashSet<_> as ::core::default::Default>::default() @@ -289,3 +331,7 @@ macro_rules! set { macro_rules! into_array { ($($e:expr),* $(,)?) => { [$($e.into()),*] }; } + +macro_rules! into_array_nullable { + ($($e:expr),* $(,)?) => { [$($crate::engine::ql::tests::nullable_datatype($e)),*] }; +} diff --git a/server/src/engine/ql/tests.rs b/server/src/engine/ql/tests.rs index ab212fbc..376eed7e 100644 --- a/server/src/engine/ql/tests.rs +++ b/server/src/engine/ql/tests.rs @@ -29,19 +29,46 @@ use { lexer::{Lexer, Token}, LangResult, }, - crate::util::Life, + crate::{engine::memory::DataType, util::Life}, }; fn lex(src: &[u8]) -> LangResult>> { Lexer::lex(src) } +pub trait NullableData { + fn data(self) -> Option; +} + +impl NullableData for T +where + T: Into, +{ + fn data(self) -> Option { + Some(self.into()) + } +} + +struct Null; + +impl NullableData for Null { + fn data(self) -> Option { + None + } +} + +fn nullable_datatype(v: impl NullableData) -> Option { + v.data() +} + mod lexer_tests { - use super::{ - super::lexer::{Lit, Token}, - lex, + use { + super::{ + super::lexer::{Lit, Token}, + lex, + }, + crate::engine::ql::LangError, }; - use crate::engine::ql::LangError; macro_rules! v( ($e:literal) => {{ @@ -173,7 +200,7 @@ mod entity { let t = lex(b"hello").unwrap(); let mut c = Compiler::new(&t); let r = Entity::parse(&mut c).unwrap(); - assert_eq!(r, Entity::Current("hello".into())) + assert_eq!(r, Entity::Single("hello".into())) } #[test] fn entity_partial() { @@ -1620,7 +1647,10 @@ mod dml_tests { "#) .unwrap(); let r = parse_data_tuple_syntax_full(&tok[1..]).unwrap(); - assert_eq!(r.as_slice(), into_array![1234, "email@example.com", true]); + assert_eq!( + r.as_slice(), + into_array_nullable![1234, "email@example.com", true] + ); } #[test] @@ -1637,7 +1667,7 @@ mod dml_tests { let r = parse_data_tuple_syntax_full(&tok[1..]).unwrap(); assert_eq!( r.as_slice(), - into_array![ + into_array_nullable![ 1234, "email@example.com", true, @@ -1667,7 +1697,7 @@ mod dml_tests { let r = parse_data_tuple_syntax_full(&tok[1..]).unwrap(); assert_eq!( r.as_slice(), - into_array![ + into_array_nullable![ 1234, "email@example.com", true, @@ -1708,7 +1738,7 @@ mod dml_tests { let r = parse_data_map_syntax_full(&tok[1..]).unwrap(); assert_eq!( r, - dict! { + dict_nullable! { "name" => "John Appletree", "email" => "john@example.com", "verified" => false, @@ -1732,7 +1762,7 @@ mod dml_tests { let r = parse_data_map_syntax_full(&tok[1..]).unwrap(); assert_eq!( r, - dict! { + dict_nullable! { "name" => "John Appletree", "email" => "john@example.com", "verified" => false, @@ -1761,7 +1791,7 @@ mod dml_tests { let r = parse_data_map_syntax_full(&tok[1..]).unwrap(); assert_eq!( r, - dict! { + dict_nullable! { "name" => "John Appletree", "email" => "john@example.com", "verified" => false, @@ -1779,4 +1809,155 @@ mod dml_tests { ) } } + mod stmt_insert { + use { + super::*, + crate::engine::ql::{ + ast::Entity, + dml::{self, InsertStatement}, + }, + }; + + #[test] + fn insert_tuple_mini() { + let x = lex(br#" + insert twitter.user:"sayan" () + "#) + .unwrap(); + let r = dml::parse_insert_full(&x[1..]).unwrap(); + let e = InsertStatement { + primary_key: &("sayan".to_string().into()), + entity: Entity::Full("twitter".into(), "user".into()), + data: vec![].into(), + }; + assert_eq!(e, r); + } + #[test] + fn insert_tuple() { + let x = lex(br#" + insert twitter.users:"sayan" ( + "Sayan", + "sayan@example.com", + true, + 12345, + 67890 + ) + "#) + .unwrap(); + let r = dml::parse_insert_full(&x[1..]).unwrap(); + let e = InsertStatement { + primary_key: &("sayan".to_string().into()), + entity: Entity::Full("twitter".into(), "users".into()), + data: into_array_nullable!["Sayan", "sayan@example.com", true, 12345, 67890] + .to_vec() + .into(), + }; + assert_eq!(e, r); + } + #[test] + fn insert_tuple_pro() { + let x = lex(br#" + insert twitter.users:"sayan" ( + "Sayan", + "sayan@example.com", + true, + 12345, + 67890, + null, + 12345, + null + ) + "#) + .unwrap(); + let r = dml::parse_insert_full(&x[1..]).unwrap(); + let e = InsertStatement { + primary_key: &("sayan".to_string().into()), + entity: Entity::Full("twitter".into(), "users".into()), + data: into_array_nullable![ + "Sayan", + "sayan@example.com", + true, + 12345, + 67890, + Null, + 12345, + Null + ] + .to_vec() + .into(), + }; + assert_eq!(e, r); + } + #[test] + fn insert_map_mini() { + let tok = lex(br#"insert jotsy.app:"sayan" {}"#).unwrap(); + let r = dml::parse_insert_full(&tok[1..]).unwrap(); + let e = InsertStatement { + primary_key: &("sayan".to_string().into()), + entity: Entity::Full("jotsy".into(), "app".into()), + data: dict! {}.into(), + }; + assert_eq!(e, r); + } + #[test] + fn insert_map() { + let tok = lex(br#" + insert jotsy.app:"sayan" { + name: "Sayan", + email: "sayan@example.com", + verified: true, + following: 12345, + followers: 67890 + } + "#) + .unwrap(); + let r = dml::parse_insert_full(&tok[1..]).unwrap(); + let e = InsertStatement { + primary_key: &("sayan".to_string().into()), + entity: Entity::Full("jotsy".into(), "app".into()), + data: dict_nullable! { + "name".as_bytes() => "Sayan", + "email".as_bytes() => "sayan@example.com", + "verified".as_bytes() => true, + "following".as_bytes() => 12345, + "followers".as_bytes() => 67890 + } + .into(), + }; + assert_eq!(e, r); + } + #[test] + fn insert_map_pro() { + let tok = lex(br#" + insert jotsy.app:"sayan" { + password: "pass123", + email: "sayan@example.com", + verified: true, + following: 12345, + followers: 67890, + linked_smart_devices: null, + bookmarks: 12345, + other_linked_accounts: null + } + "#) + .unwrap(); + let r = dml::parse_insert_full(&tok[1..]).unwrap(); + let e = InsertStatement { + primary_key: &("sayan".to_string()).into(), + entity: Entity::Full("jotsy".into(), "app".into()), + data: dict_nullable! { + "password".as_bytes() => "pass123", + "email".as_bytes() => "sayan@example.com", + "verified".as_bytes() => true, + "following".as_bytes() => 12345, + "followers".as_bytes() => 67890, + "linked_smart_devices".as_bytes() => Null, + "bookmarks".as_bytes() => 12345, + "other_linked_accounts".as_bytes() => Null + } + .into(), + }; + assert_eq!(r, e); + } + } } diff --git a/server/src/util/macros.rs b/server/src/util/macros.rs index 80acde3e..c82f80d9 100644 --- a/server/src/util/macros.rs +++ b/server/src/util/macros.rs @@ -26,9 +26,13 @@ #[macro_export] macro_rules! impossible { - () => { - core::hint::unreachable_unchecked() - }; + () => {{ + if cfg!(debug_assertions) { + panic!("called unreachable code at: {}:{}", ::core::file!(), ::core::line!()); + } else { + core::hint::unreachable_unchecked() + } + }}; } #[macro_export]