next
Sayan Nandan 2 years ago
parent e34e5c87ca
commit 91704da4aa
No known key found for this signature in database
GPG Key ID: 8BC07A0A4D41DD52

@ -0,0 +1,31 @@
/*
* Created on Mon Jun 13 2022
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2022, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
use super::{LangResult, QueryProcessor};
pub trait AstNode: Sized {
fn parse_node(qp: &mut QueryProcessor) -> LangResult<Self>;
}

@ -92,7 +92,7 @@ impl LexItem for Ident {
}
if is_okay {
let len = find_ptr_distance(start_ptr, qp.cursor());
qp.skip_separator(); // skip whitespace (if any)
qp.skip_delimiter(); // skip whitespace (if any)
unsafe {
// UNSAFE(@ohsayan): The above procedure ensures validity
Ok(Self(Slice::new(start_ptr, len)))
@ -140,7 +140,7 @@ impl LexItem for LitNum {
}
}
if is_okay {
qp.skip_separator();
qp.skip_delimiter();
Ok(Self(ret))
} else {
Err(LangError::TypeParseFailure)
@ -184,7 +184,7 @@ impl<'a> LexItem for LitString<'a> {
if is_okay {
let len = find_ptr_distance(start_ptr, qp.cursor());
let string = str::from_utf8(unsafe { slice::from_raw_parts(start_ptr, len) })?;
qp.skip_separator();
qp.skip_delimiter();
Ok(Self(string))
} else {
Err(LangError::TypeParseFailure)
@ -251,7 +251,7 @@ impl LexItem for LitStringEscaped {
// UNSAFE(@ohsayan): First operand guarantees correctness
qp.deref_cursor() == b'"'
};
qp.skip_separator();
qp.skip_delimiter();
match String::from_utf8(stringbuf) {
Ok(s) if is_okay => Ok(Self(s)),
_ => Err(LangError::TypeParseFailure),
@ -277,7 +277,7 @@ macro_rules! impl_punctuation {
// UNSAFE(@ohsayan): The above condition guarantees safety
qp.incr_cursor()
};
qp.skip_separator();
qp.skip_delimiter();
Ok(Self)
} else {
Err(LangError::InvalidSyntax)
@ -307,27 +307,28 @@ pub enum Type {
List,
}
impl LexItem for Type {
#[inline(always)]
fn lex(qp: &mut QueryProcessor) -> LangResult<Self> {
let ret = match qp.next::<Ident>() {
Ok(ret) => {
match unsafe {
impl Type {
pub fn try_from_ident(id: &Ident) -> LangResult<Self> {
let ret = match unsafe {
// UNSAFE(@ohsayan): The lifetime of the `qp` ensures validity
ret.as_slice()
id.as_slice()
} {
b"string" => Self::String,
b"binary" => Self::Binary,
b"list" => Self::List,
_ => return Err(LangError::UnknownType),
}
}
Err(_) => return Err(LangError::InvalidSyntax),
};
Ok(ret)
}
}
impl LexItem for Type {
#[inline(always)]
fn lex(qp: &mut QueryProcessor) -> LangResult<Self> {
qp.next::<Ident>().and_then(|id| Self::try_from_ident(&id))
}
}
#[derive(PartialEq, Debug)]
pub struct TypeExpression(pub Vec<Type>);
@ -385,7 +386,7 @@ impl LexItem for TypeExpression {
}
valid_expr &= open_c == close_c;
if valid_expr {
qp.skip_separator();
qp.skip_delimiter();
Ok(Self(type_expr))
} else {
Err(LangError::BadExpression)

@ -29,6 +29,7 @@
#[cfg(test)]
mod tests;
// endof tests
mod ast;
mod error;
mod lex;
// imports
@ -38,10 +39,11 @@ pub type LangResult<T> = Result<T, LangError>;
use {
crate::util::Life,
core::{marker::PhantomData, mem::discriminant, slice},
core::{marker::PhantomData, slice},
};
#[derive(Debug, Clone, Copy)]
/// A raw slice that resembles the same structure as a fat ptr
pub struct Slice {
start_ptr: *const u8,
len: usize,
@ -79,10 +81,12 @@ where
}
#[inline(always)]
/// Finds the distance between two pointers. Panics if the stop ptr is behind the start ptr
fn find_ptr_distance(start: *const u8, stop: *const u8) -> usize {
stop as usize - start as usize
}
/// A `QueryProcessor` provides functions to parse queries
pub struct QueryProcessor<'a> {
cursor: *const u8,
end_ptr: *const u8,
@ -92,6 +96,7 @@ pub struct QueryProcessor<'a> {
// init
impl<'a> QueryProcessor<'a> {
#[inline(always)]
/// Init a new query processor
const fn new(buf: &[u8]) -> Self {
unsafe {
Self {
@ -106,28 +111,42 @@ impl<'a> QueryProcessor<'a> {
// helpers
impl<'a> QueryProcessor<'a> {
#[inline(always)]
/// Check if we have exhausted the buffer
pub fn exhausted(&self) -> bool {
self.cursor >= self.end_ptr
}
#[inline(always)]
/// Check if we still have something left in the buffer
pub fn not_exhausted(&self) -> bool {
self.cursor < self.end_ptr
}
/// Move the cursor ahead by `by` positions
#[inline(always)]
unsafe fn incr_cursor_by(&mut self, by: usize) {
self.cursor = self.cursor.add(by);
}
/// Move the cursor ahead by 1
#[inline(always)]
unsafe fn incr_cursor(&mut self) {
self.incr_cursor_by(1)
}
/// Deref the cursor
#[inline(always)]
unsafe fn deref_cursor(&self) -> u8 {
*(self.cursor())
}
/// Returns the cursor
#[inline(always)]
const fn cursor(&self) -> *const u8 {
self.cursor
}
/// Returns the EOA ptr
#[inline(always)]
const fn end_ptr(&self) -> *const u8 {
self.end_ptr
}
/// Peeks at the byte ahead if it exists
#[inline(always)]
fn peek(&self) -> Option<u8> {
if self.not_exhausted() {
Some(unsafe { self.deref_cursor() })
@ -135,9 +154,14 @@ impl<'a> QueryProcessor<'a> {
None
}
}
/// Peeks at the byte ahead to see if it matches the given byte. Returns false if
/// we've reached end of allocation
#[inline(always)]
fn peek_eq(&self, eq_byte: u8) -> bool {
unsafe { self.not_exhausted() && self.deref_cursor() == eq_byte }
}
/// Same as `Self::peek_eq`, but forwards the cursor on match
#[inline(always)]
fn peek_eq_and_forward(&mut self, eq_byte: u8) -> bool {
let eq = self.peek_eq(eq_byte);
unsafe {
@ -145,36 +169,49 @@ impl<'a> QueryProcessor<'a> {
}
eq
}
/// Returns the byte at cursor and moves it ahead
#[inline(always)]
unsafe fn deref_cursor_and_forward(&mut self) -> u8 {
let ret = self.deref_cursor();
self.incr_cursor();
ret
}
}
// parsing
impl<'a> QueryProcessor<'a> {
/// Returns true if:
/// - The byte ahead matches the provided `byte`
/// - If we have reached end of allocation
///
/// Meant to be used in places where you want to either match a predicate, but return
/// true if you've reached EOF
#[inline(always)]
fn peek_eq_and_forward_or_true(&mut self, byte: u8) -> bool {
self.peek_eq(byte) | self.exhausted()
}
#[inline(always)]
/// Peeks ahead and moves the cursor ahead if the peeked byte matches the predicate
fn skip_char_if_present(&mut self, ch: u8) {
self.cursor = unsafe {
self.cursor
.add((self.not_exhausted() && self.deref_cursor() == ch) as usize)
};
unsafe { self.incr_cursor_by(self.peek_eq(ch) as usize) }
}
#[inline(always)]
fn skip_separator(&mut self) {
self.skip_char_if_present(Self::SEPARATOR)
/// Skips the delimiter
fn skip_delimiter(&mut self) {
self.skip_char_if_present(Self::DELIMITER)
}
}
// parsing
impl<'a> QueryProcessor<'a> {
const DELIMITER: u8 = b' ';
#[inline(always)]
pub fn next<T: LexItem>(&mut self) -> LangResult<T> {
T::lex(self)
}
const SEPARATOR: u8 = b' ';
#[inline(always)]
/// Returns the next token separated by the separator
/// Returns the next token separated by the DELIMITER
pub fn next_token_tl(&mut self) -> Slice {
let start_ptr = self.cursor;
let mut ptr = self.cursor;
while self.end_ptr > ptr && unsafe { *ptr != Self::SEPARATOR } {
while self.end_ptr > ptr && unsafe { *ptr != Self::DELIMITER } {
ptr = unsafe {
// UNSAFE(@ohsayan): The loop init invariant ensures this is safe
ptr.add(1)
@ -182,12 +219,13 @@ impl<'a> QueryProcessor<'a> {
}
// update the cursor
self.cursor = ptr;
self.skip_separator();
self.skip_delimiter();
unsafe {
// UNSAFE(@ohsayan): The start_ptr and size were verified by the above steps
Slice::new(start_ptr, find_ptr_distance(start_ptr, ptr))
}
}
#[inline(always)]
pub fn try_next_token(&mut self) -> LangResult<Slice> {
if self.not_exhausted() {
Ok(self.next_token_tl())
@ -195,6 +233,7 @@ impl<'a> QueryProcessor<'a> {
Err(LangError::UnexpectedEOF)
}
}
#[inline(always)]
pub fn parse_into_tokens(buf: &'a [u8]) -> Vec<Life<'a, Slice>> {
let mut slf = QueryProcessor::new(buf);
let mut r = Vec::new();
@ -204,29 +243,3 @@ impl<'a> QueryProcessor<'a> {
r
}
}
#[derive(Debug, Clone, Copy)]
pub enum Token<'a> {
Create,
Drop,
Model,
Space,
String,
Binary,
Ident(Life<'a, Slice>),
Number(Life<'a, Slice>),
}
impl<'a> PartialEq for Token<'a> {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Ident(ref id_a), Self::Ident(ref id_b)) => unsafe {
id_a.as_slice() == id_b.as_slice()
},
(Self::Number(ref id_a), Self::Number(ref id_b)) => unsafe {
id_a.as_slice() == id_b.as_slice()
},
(a, b) => discriminant(a) == discriminant(b),
}
}
}

Loading…
Cancel
Save