From 3927940afd51bdc63c9f817b6dc34bb460cf7933 Mon Sep 17 00:00:00 2001 From: Sayan Nandan Date: Thu, 9 Jun 2022 00:34:16 -0700 Subject: [PATCH] Add `Scanner` --- server/src/blueql/mod.rs | 172 +++++++++++++++++++++++++++++++++++++++ server/src/main.rs | 17 ++-- server/src/util/mod.rs | 80 ++++++++++++++++-- 3 files changed, 253 insertions(+), 16 deletions(-) create mode 100644 server/src/blueql/mod.rs diff --git a/server/src/blueql/mod.rs b/server/src/blueql/mod.rs new file mode 100644 index 00000000..1c6d84fd --- /dev/null +++ b/server/src/blueql/mod.rs @@ -0,0 +1,172 @@ +/* + * Created on Thu Jun 09 2022 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2022, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +#![allow(dead_code)] // TODO(@ohsayan): Remove this once we're done + +use { + crate::util::Life, + core::{marker::PhantomData, slice}, +}; + +#[derive(Debug, Clone, Copy)] +pub struct Slice { + start_ptr: *const u8, + len: usize, +} + +unsafe impl Send for Slice {} +unsafe impl Sync for Slice {} + +impl Slice { + /// ## Safety + /// Ensure that `start_ptr` and `len` are valid during construction and use + #[inline(always)] + pub const unsafe fn new(start_ptr: *const u8, len: usize) -> Self { + Slice { start_ptr, len } + } + /// ## Safety + /// Ensure that the slice is valid in this context + #[inline(always)] + pub unsafe fn as_slice(&self) -> &[u8] { + slice::from_raw_parts(self.start_ptr, self.len) + } +} + +impl<'a, T> From for Slice +where + T: AsRef<[u8]> + 'a, +{ + #[inline(always)] + fn from(oth: T) -> Self { + unsafe { + let oth = oth.as_ref(); + Self::new(oth.as_ptr(), oth.len()) + } + } +} + +#[inline(always)] +fn find_ptr_distance(start: *const u8, stop: *const u8) -> usize { + stop as usize - start as usize +} + +pub struct Scanner<'a> { + cursor: *const u8, + end_ptr: *const u8, + _lt: PhantomData<&'a [u8]>, +} + +// init +impl<'a> Scanner<'a> { + #[inline(always)] + const fn new(buf: &[u8]) -> Self { + unsafe { + Self { + cursor: buf.as_ptr(), + end_ptr: buf.as_ptr().add(buf.len()), + _lt: PhantomData {}, + } + } + } +} + +// helpers +impl<'a> Scanner<'a> { + #[inline(always)] + pub fn exhausted(&self) -> bool { + self.cursor >= self.end_ptr + } + #[inline(always)] + pub fn not_exhausted(&self) -> bool { + self.cursor < self.end_ptr + } +} + +// parsing +impl<'a> Scanner<'a> { + #[inline(always)] + pub fn next_token(&mut self) -> Slice { + let start_ptr = self.cursor; + let mut ptr = self.cursor; + while self.end_ptr > ptr && unsafe { *ptr != b' ' } { + ptr = unsafe { + // UNSAFE(@ohsayan): The loop init invariant ensures this is safe + ptr.add(1) + }; + } + // update the cursor + self.cursor = ptr; + // if self is not exhausted and the cursor is a whitespace + let ptr_is_whitespace = unsafe { + // UNSAFE(@ohsayan): The first operand ensures safety + self.not_exhausted() && *self.cursor == b' ' + }; + // if ptr is whitespace, then move the cursor ahead + self.cursor = unsafe { + // UNSAFE(@ohsayan): The definition of ptr_is_whitespace ensures correctness + self.cursor.add(ptr_is_whitespace as usize) + }; + unsafe { + // UNSAFE(@ohsayan): The start_ptr and size were verified by the above steps + Slice::new(start_ptr, find_ptr_distance(start_ptr, ptr)) + } + } + pub fn parse_into_tokens(buf: &'a [u8]) -> Vec> { + let mut slf = Scanner::new(buf); + let mut r = Vec::new(); + while slf.not_exhausted() { + r.push(Life::new(slf.next_token())); + } + r + } +} + +#[test] +fn scanner_tokenize() { + let tokens = b"create space app".to_vec(); + let scanned_tokens = Scanner::parse_into_tokens(&tokens); + let scanned_tokens: Vec = scanned_tokens + .into_iter() + .map(|tok| unsafe { String::from_utf8_lossy(tok.as_slice()).to_string() }) + .collect(); + assert_eq!(scanned_tokens, ["create", "space", "app"]); +} + +#[test] +fn scanner_step_by_step_tokenize() { + let tokens = b"create space app".to_vec(); + let mut scanner = Scanner::new(&tokens); + unsafe { + assert_eq!(scanner.next_token().as_slice(), b"create"); + assert_eq!(scanner.next_token().as_slice(), b"space"); + assert_eq!(scanner.next_token().as_slice(), b"app"); + assert!(scanner.exhausted()); + assert_eq!(scanner.next_token().as_slice(), b""); + assert_eq!(scanner.next_token().as_slice(), b""); + assert_eq!(scanner.next_token().as_slice(), b""); + } + assert!(scanner.exhausted()); +} diff --git a/server/src/main.rs b/server/src/main.rs index 34d62a2c..421a6009 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -35,19 +35,20 @@ //! is the most important part of the project. There are several modules within this crate; see //! the modules for their respective documentation. -use crate::diskstore::flock::FileLock; -pub use crate::util::exit_error; -use env_logger::Builder; -use libsky::URL; -use libsky::VERSION; -use std::env; -use std::process; +use { + crate::{config::ConfigurationSet, diskstore::flock::FileLock, util::exit_error}, + env_logger::Builder, + libsky::{URL, VERSION}, + std::{env, process}, +}; + #[macro_use] pub mod util; mod actions; mod admin; mod arbiter; mod auth; +mod blueql; mod config; mod corestore; mod dbnet; @@ -124,8 +125,6 @@ fn main() { } } -use self::config::ConfigurationSet; - /// This function checks the command line arguments and either returns a config object /// or prints an error to `stderr` and terminates the server fn check_args_and_get_cfg() -> (ConfigurationSet, Option) { diff --git a/server/src/util/mod.rs b/server/src/util/mod.rs index efd62261..919d4b70 100644 --- a/server/src/util/mod.rs +++ b/server/src/util/mod.rs @@ -29,13 +29,14 @@ mod macros; pub mod compiler; pub mod error; pub mod os; -use crate::actions::{ActionError, ActionResult}; -use crate::protocol::interface::ProtocolSpec; -use core::fmt::Debug; -use core::future::Future; -use core::ops::Deref; -use core::pin::Pin; -use std::process; +use { + crate::{ + actions::{ActionError, ActionResult}, + protocol::interface::ProtocolSpec, + }, + core::{fmt::Debug, future::Future, marker::PhantomData, ops::Deref, pin::Pin}, + std::process, +}; const EXITCODE_ONE: i32 = 0x01; pub type FutureResult<'s, T> = Pin + Send + Sync + 's>>; @@ -135,3 +136,68 @@ impl Clone for Wrapper { } } } + +#[derive(Debug, PartialEq)] +/// This is yet another compiler hack and has no "actual impact" in terms of memory alignment. +/// +/// When it's hard to have a _split mutable borrow_, all across the source we use custom +/// fat pointers which are inherently unbounded in their lifetime; this is needed in cases where +/// it's **impossible** to do so. But when you can _somehow_ bind a lifetime without causing +/// a compiler error, it is always good to do so to avoid misuse of the previously mentioned +/// fat pointers. This is exactly what this type does. It binds a context-dependent lifetime +/// to some type which preferably has no other lifetime (something like an `UnsafeSlice`, for +/// example) +/// +/// ## Important notes +/// - lifetimes are context captured by the compiler. so if this doesn't work, we'll need +/// to explicitly annotate bounds +/// - this type derefs to the base type +pub struct Life<'a, T> { + _lt: PhantomData<&'a T>, + v: T, +} + +impl<'a, T> Life<'a, T> { + /// Ensure compile-time alignment (this is just a sanity check) + const _ENSURE_COMPILETIME_ALIGN: () = + assert!(std::mem::align_of::>>() == std::mem::align_of::>()); + + #[inline(always)] + pub const fn new(v: T) -> Self { + Life { + v, + _lt: PhantomData, + } + } +} + +impl<'a, T> From for Life<'a, T> { + fn from(v: T) -> Self { + Self::new(v) + } +} + +impl<'a, T> Deref for Life<'a, T> { + type Target = T; + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.v + } +} + +impl<'a, T> AsRef for Life<'a, T> { + #[inline(always)] + fn as_ref(&self) -> &T { + Deref::deref(self) + } +} + +impl<'a, T: PartialEq> PartialEq for Life<'a, T> { + #[inline(always)] + fn eq(&self, other: &T) -> bool { + PartialEq::eq(&self.v, other) + } +} + +unsafe impl<'a, T: Send> Send for Life<'a, T> {} +unsafe impl<'a, T: Sync> Sync for Life<'a, T> {}