Add complete list support

next
Sayan Nandan 2 years ago
parent 7ecec7ffa6
commit 68ed434c96
No known key found for this signature in database
GPG Key ID: 8BC07A0A4D41DD52

@ -0,0 +1,51 @@
/*
* Created on Wed Oct 12 2022
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2022, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
macro_rules! extract {
($src:expr, $what:pat => $ret:expr) => {
if let $what = $src {
$ret
} else {
$crate::impossible!()
}
};
}
macro_rules! multi_assert_eq {
($($lhs:expr),* => $rhs:expr) => {
$(assert_eq!($lhs, $rhs);)*
};
}
macro_rules! enum_impls {
($for:ty => {$($other:ty as $me:ident),*$(,)?}) => {
$(impl ::core::convert::From<$other> for $for {fn from(v: $other) -> Self {Self::$me(v.into())}})*
}
}
macro_rules! assertions {
($($assert:expr),*$(,)?) => {$(const _:()=::core::assert!($assert);)*}
}

@ -0,0 +1,62 @@
/*
* Created on Wed Oct 12 2022
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2022, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
// TODO(@ohsayan): Change the underlying structures, there are just rudimentary ones used during integration with the QL
/// A [`DataType`] represents the underlying data-type, although this enumeration when used in a collection will always
/// be of one type.
#[derive(Debug, PartialEq)]
pub enum DataType {
/// An UTF-8 string
String(String),
/// Bytes
Binary(Vec<u8>),
/// An integer
Number(u64),
/// A boolean
Boolean(bool),
/// A single-type list. Note, you **need** to keep up the invariant that the [`DataType`] disc. remains the same for all
/// elements to ensure correctness in this specific context
/// FIXME(@ohsayan): Try enforcing this somehow
List(Vec<Self>),
}
enum_impls! {
DataType => {
String as String,
Vec<u8> as Binary,
u64 as Number,
bool as Boolean,
Vec<Self> as List,
&'static str as String,
}
}
impl<const N: usize> From<[DataType; N]> for DataType {
fn from(f: [DataType; N]) -> Self {
Self::List(f.into())
}
}

@ -27,4 +27,7 @@
#![allow(dead_code)]
#![allow(unused_macros)]
#[macro_use]
mod macros;
mod memory;
mod ql;

@ -46,30 +46,37 @@ pub enum Entity {
impl Entity {
pub(super) fn parse(cm: &mut Compiler) -> LangResult<Self> {
let a = cm.nxtok_opt();
let b = cm.nxtok_opt();
let c = cm.nxtok_opt();
match (a, b, c) {
(
Some(Token::Ident(ks)),
Some(Token::Symbol(Symbol::SymPeriod)),
Some(Token::Ident(tbl)),
) => {
let r = Ok(Entity::Full(ks.clone(), tbl.clone()));
r
}
(Some(Token::Ident(ident)), _, _) => unsafe {
let r = Ok(Entity::Current(ident.clone()));
cm.decr_cursor_by(2);
r
let sl = cm.remslice();
let is_partial =
sl.len() > 1 && sl[0] == Token::Symbol(Symbol::SymColon) && sl[1].is_ident();
let is_current = !sl.is_empty() && sl[0].is_ident();
let is_full = sl.len() > 2
&& sl[0].is_ident()
&& sl[1] == Token::Symbol(Symbol::SymPeriod)
&& sl[2].is_ident();
let c;
let r = match () {
_ if is_full => unsafe {
c = 3;
Entity::Full(
extract!(&sl[0], Token::Ident(sl) => sl.clone()),
extract!(&sl[2], Token::Ident(sl) => sl.clone()),
)
},
(Some(Token::Symbol(Symbol::SymColon)), Some(Token::Ident(tbl)), _) => unsafe {
let r = Ok(Entity::Partial(tbl.clone()));
cm.decr_cursor_by(1);
r
_ if is_current => unsafe {
c = 1;
Entity::Current(extract!(&sl[0], Token::Ident(sl) => sl.clone()))
},
_ => Err(LangError::UnexpectedToken),
_ if is_partial => unsafe {
c = 2;
Entity::Partial(extract!(&sl[1], Token::Ident(sl) => sl.clone()))
},
_ => return Err(LangError::UnexpectedToken),
};
unsafe {
cm.incr_cursor_by(c);
}
Ok(r)
}
}
@ -309,19 +316,4 @@ impl<'a> Compiler<'a> {
debug_assert!(self.remaining() >= by);
self.c = self.c.add(by);
}
#[inline(always)]
pub(super) unsafe fn decr_cursor_by(&mut self, by: usize) {
self.c = self.c.sub(by);
}
fn try_read_index<'b>(&'a self, index: usize) -> Option<&'b Token>
where
'a: 'b,
{
let sl = self.remslice();
if sl.len() > index {
Some(&sl[index])
} else {
None
}
}
}

@ -0,0 +1,91 @@
/*
* Created on Fri Oct 14 2022
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2022, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
use std::mem::{discriminant, Discriminant};
use super::lexer::{Lit, Symbol};
use {super::lexer::Token, crate::engine::memory::DataType};
pub(super) fn parse_list(
tok: &[Token],
list: &mut Vec<DataType>,
) -> (Option<Discriminant<DataType>>, usize, bool) {
let l = tok.len();
let mut okay = l != 0;
let mut stop = okay && tok[0] == Symbol::TtCloseSqBracket;
let mut i = stop as usize;
let mut overall_dscr = None;
let mut prev_nlist_dscr = None;
while i < l && okay && !stop {
let d = match &tok[i] {
Token::Lit(Lit::Str(s)) => DataType::String(s.to_string()),
Token::Lit(Lit::Num(n)) => DataType::Number(*n),
Token::Lit(Lit::Bool(b)) => DataType::Boolean(*b),
Token::Symbol(Symbol::TtOpenSqBracket) => {
// a nested list
let mut nested_list = Vec::new();
let (nlist_dscr, nlist_i, nlist_okay) = parse_list(&tok[i + 1..], &mut nested_list);
okay &= nlist_okay;
i += nlist_i;
// check type return
okay &= {
prev_nlist_dscr.is_none()
|| nlist_dscr.is_none()
|| prev_nlist_dscr == nlist_dscr
};
if prev_nlist_dscr.is_none() && nlist_dscr.is_some() {
prev_nlist_dscr = nlist_dscr;
}
DataType::List(nested_list)
}
_ => {
okay = false;
break;
}
};
i += 1;
okay &= list.is_empty() || discriminant(&d) == discriminant(&list[0]);
overall_dscr = Some(discriminant(&d));
list.push(d);
let nx_comma = i < l && tok[i] == Symbol::SymComma;
let nx_csqrb = i < l && tok[i] == Symbol::TtCloseSqBracket;
okay &= nx_comma | nx_csqrb;
i += okay as usize;
stop = nx_csqrb;
}
(overall_dscr, i, okay && stop)
}
#[cfg(test)]
pub(super) fn parse_list_full(tok: &[Token]) -> Option<Vec<DataType>> {
let mut l = Vec::new();
if let (_, _, true) = parse_list(tok, &mut l) {
Some(l)
} else {
None
}
}

@ -45,6 +45,15 @@ pub enum Token {
Lit(Lit), // literal
}
impl PartialEq<Symbol> for Token {
fn eq(&self, other: &Symbol) -> bool {
match self {
Self::Symbol(s) => s == other,
_ => false,
}
}
}
assertions! {
size_of::<Token>() == 32, // FIXME(@ohsayan): Damn, what?
size_of::<Symbol>() == 1,
@ -495,7 +504,7 @@ impl<'a> Lexer<'a> {
1234, // valid
1234a // invalid
*/
static TERMINAL_CHAR: [u8; 6] = [b';', b'}', b',', b' ', b'\n', b'\t'];
static TERMINAL_CHAR: [u8; 8] = [b';', b'}', b',', b' ', b'\n', b'\t', b',', b']'];
let wseof = self.peek_is(|b| TERMINAL_CHAR.contains(&b)) || self.exhausted();
match str::from_utf8_unchecked(slice::from_raw_parts(
s,
@ -549,7 +558,20 @@ impl<'a> Lexer<'a> {
match symof(byte) {
Some(tok) => self.push_token(tok),
#[cfg(test)]
None if byte == b'\r' => self.push_token(Token::IgnorableComma),
None if byte == b'\r'
&& self.remaining() > 1
&& !(unsafe {
// UNSAFE(@ohsayan): The previous condition ensures that this doesn't segfault
*self.cursor().add(1)
})
.is_ascii_digit() =>
{
/*
NOTE(@ohsayan): The above guard might look a little messy but is necessary to support raw
literals which will use the carriage return
*/
self.push_token(Token::IgnorableComma)
}
_ => {
self.last_error = Some(LangError::UnexpectedChar);
return;
@ -601,6 +623,18 @@ impl Token {
}
}
}
#[inline(always)]
pub(super) unsafe fn ident_unchecked(&self) -> RawSlice {
if let Self::Ident(id) = self {
id.clone()
} else {
impossible!()
}
}
#[inline(always)]
pub(super) fn is_lit(&self) -> bool {
matches!(self, Self::Lit(_))
}
}
impl AsRef<Token> for Token {

@ -46,18 +46,6 @@ macro_rules! set {
}};
}
macro_rules! multi_assert_eq {
($($lhs:expr),* => $rhs:expr) => {
$(assert_eq!($lhs, $rhs);)*
};
}
macro_rules! enum_impls {
($for:ty => {$($other:ty as $me:ident),*$(,)?}) => {
$(impl ::core::convert::From<$other> for $for {fn from(v: $other) -> Self {Self::$me(v)}})*
}
}
macro_rules! assertions {
($($assert:expr),*$(,)?) => {$(const _:()=::core::assert!($assert);)*}
macro_rules! into_array {
($($e:expr),* $(,)?) => { [$($e.into()),*] };
}

@ -27,6 +27,7 @@
#[macro_use]
mod macros;
pub(super) mod ast;
pub(super) mod dml;
pub(super) mod lexer;
pub(super) mod schema;
#[cfg(test)]

@ -44,11 +44,11 @@
Sept. 15, 2022
*/
use super::lexer::DmlKeyword;
use {
super::{
lexer::{DdlKeyword, DdlMiscKeyword, Keyword, Lit, MiscKeyword, Symbol, Token, Type},
lexer::{
DdlKeyword, DdlMiscKeyword, DmlKeyword, Keyword, Lit, MiscKeyword, Symbol, Token, Type,
},
LangError, LangResult, RawSlice,
},
std::{

@ -1508,3 +1508,77 @@ mod schema_tests {
}
}
}
mod dml_tests {
use super::*;
mod list_parse {
use super::*;
use crate::engine::ql::dml::parse_list_full;
#[test]
fn list_mini() {
let tok = lex(b"
[]
")
.unwrap();
let r = parse_list_full(&tok[1..]).unwrap();
assert_eq!(r, vec![])
}
#[test]
fn list() {
let tok = lex(b"
[1, 2, 3, 4]
")
.unwrap();
let r = parse_list_full(&tok[1..]).unwrap();
assert_eq!(r.as_slice(), into_array![1, 2, 3, 4])
}
#[test]
fn list_pro() {
let tok = lex(b"
[
[1, 2],
[3, 4],
[5, 6],
[7, 8]
]
")
.unwrap();
let r = parse_list_full(&tok[1..]).unwrap();
assert_eq!(
r.as_slice(),
into_array![
into_array![1, 2],
into_array![3, 4],
into_array![5, 6],
into_array![7, 8]
]
)
}
#[test]
fn list_pro_max() {
let tok = lex(b"
[
[[1, 1], [2, 2]],
[[3, 3], [4, 4]],
[[5, 5], [6, 6]],
[[7, 7], [8, 8]]
]
")
.unwrap();
let r = parse_list_full(&tok[1..]).unwrap();
assert_eq!(
r.as_slice(),
into_array![
into_array![into_array![1, 1], into_array![2, 2]],
into_array![into_array![3, 3], into_array![4, 4]],
into_array![into_array![5, 5], into_array![6, 6]],
into_array![into_array![7, 7], into_array![8, 8]],
]
)
}
}
}

Loading…
Cancel
Save