Add `Scanner`

next
Sayan Nandan 2 years ago
parent 06764d3462
commit 3927940afd
No known key found for this signature in database
GPG Key ID: 8BC07A0A4D41DD52

@ -0,0 +1,172 @@
/*
* Created on Thu Jun 09 2022
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2022, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#![allow(dead_code)] // TODO(@ohsayan): Remove this once we're done
use {
crate::util::Life,
core::{marker::PhantomData, slice},
};
#[derive(Debug, Clone, Copy)]
pub struct Slice {
start_ptr: *const u8,
len: usize,
}
unsafe impl Send for Slice {}
unsafe impl Sync for Slice {}
impl Slice {
/// ## Safety
/// Ensure that `start_ptr` and `len` are valid during construction and use
#[inline(always)]
pub const unsafe fn new(start_ptr: *const u8, len: usize) -> Self {
Slice { start_ptr, len }
}
/// ## Safety
/// Ensure that the slice is valid in this context
#[inline(always)]
pub unsafe fn as_slice(&self) -> &[u8] {
slice::from_raw_parts(self.start_ptr, self.len)
}
}
impl<'a, T> From<T> for Slice
where
T: AsRef<[u8]> + 'a,
{
#[inline(always)]
fn from(oth: T) -> Self {
unsafe {
let oth = oth.as_ref();
Self::new(oth.as_ptr(), oth.len())
}
}
}
#[inline(always)]
fn find_ptr_distance(start: *const u8, stop: *const u8) -> usize {
stop as usize - start as usize
}
pub struct Scanner<'a> {
cursor: *const u8,
end_ptr: *const u8,
_lt: PhantomData<&'a [u8]>,
}
// init
impl<'a> Scanner<'a> {
#[inline(always)]
const fn new(buf: &[u8]) -> Self {
unsafe {
Self {
cursor: buf.as_ptr(),
end_ptr: buf.as_ptr().add(buf.len()),
_lt: PhantomData {},
}
}
}
}
// helpers
impl<'a> Scanner<'a> {
#[inline(always)]
pub fn exhausted(&self) -> bool {
self.cursor >= self.end_ptr
}
#[inline(always)]
pub fn not_exhausted(&self) -> bool {
self.cursor < self.end_ptr
}
}
// parsing
impl<'a> Scanner<'a> {
#[inline(always)]
pub fn next_token(&mut self) -> Slice {
let start_ptr = self.cursor;
let mut ptr = self.cursor;
while self.end_ptr > ptr && unsafe { *ptr != b' ' } {
ptr = unsafe {
// UNSAFE(@ohsayan): The loop init invariant ensures this is safe
ptr.add(1)
};
}
// update the cursor
self.cursor = ptr;
// if self is not exhausted and the cursor is a whitespace
let ptr_is_whitespace = unsafe {
// UNSAFE(@ohsayan): The first operand ensures safety
self.not_exhausted() && *self.cursor == b' '
};
// if ptr is whitespace, then move the cursor ahead
self.cursor = unsafe {
// UNSAFE(@ohsayan): The definition of ptr_is_whitespace ensures correctness
self.cursor.add(ptr_is_whitespace as usize)
};
unsafe {
// UNSAFE(@ohsayan): The start_ptr and size were verified by the above steps
Slice::new(start_ptr, find_ptr_distance(start_ptr, ptr))
}
}
pub fn parse_into_tokens(buf: &'a [u8]) -> Vec<Life<'a, Slice>> {
let mut slf = Scanner::new(buf);
let mut r = Vec::new();
while slf.not_exhausted() {
r.push(Life::new(slf.next_token()));
}
r
}
}
#[test]
fn scanner_tokenize() {
let tokens = b"create space app".to_vec();
let scanned_tokens = Scanner::parse_into_tokens(&tokens);
let scanned_tokens: Vec<String> = scanned_tokens
.into_iter()
.map(|tok| unsafe { String::from_utf8_lossy(tok.as_slice()).to_string() })
.collect();
assert_eq!(scanned_tokens, ["create", "space", "app"]);
}
#[test]
fn scanner_step_by_step_tokenize() {
let tokens = b"create space app".to_vec();
let mut scanner = Scanner::new(&tokens);
unsafe {
assert_eq!(scanner.next_token().as_slice(), b"create");
assert_eq!(scanner.next_token().as_slice(), b"space");
assert_eq!(scanner.next_token().as_slice(), b"app");
assert!(scanner.exhausted());
assert_eq!(scanner.next_token().as_slice(), b"");
assert_eq!(scanner.next_token().as_slice(), b"");
assert_eq!(scanner.next_token().as_slice(), b"");
}
assert!(scanner.exhausted());
}

@ -35,19 +35,20 @@
//! is the most important part of the project. There are several modules within this crate; see
//! the modules for their respective documentation.
use crate::diskstore::flock::FileLock;
pub use crate::util::exit_error;
use env_logger::Builder;
use libsky::URL;
use libsky::VERSION;
use std::env;
use std::process;
use {
crate::{config::ConfigurationSet, diskstore::flock::FileLock, util::exit_error},
env_logger::Builder,
libsky::{URL, VERSION},
std::{env, process},
};
#[macro_use]
pub mod util;
mod actions;
mod admin;
mod arbiter;
mod auth;
mod blueql;
mod config;
mod corestore;
mod dbnet;
@ -124,8 +125,6 @@ fn main() {
}
}
use self::config::ConfigurationSet;
/// This function checks the command line arguments and either returns a config object
/// or prints an error to `stderr` and terminates the server
fn check_args_and_get_cfg() -> (ConfigurationSet, Option<String>) {

@ -29,13 +29,14 @@ mod macros;
pub mod compiler;
pub mod error;
pub mod os;
use crate::actions::{ActionError, ActionResult};
use crate::protocol::interface::ProtocolSpec;
use core::fmt::Debug;
use core::future::Future;
use core::ops::Deref;
use core::pin::Pin;
use std::process;
use {
crate::{
actions::{ActionError, ActionResult},
protocol::interface::ProtocolSpec,
},
core::{fmt::Debug, future::Future, marker::PhantomData, ops::Deref, pin::Pin},
std::process,
};
const EXITCODE_ONE: i32 = 0x01;
pub type FutureResult<'s, T> = Pin<Box<dyn Future<Output = T> + Send + Sync + 's>>;
@ -135,3 +136,68 @@ impl<T: Clone> Clone for Wrapper<T> {
}
}
}
#[derive(Debug, PartialEq)]
/// This is yet another compiler hack and has no "actual impact" in terms of memory alignment.
///
/// When it's hard to have a _split mutable borrow_, all across the source we use custom
/// fat pointers which are inherently unbounded in their lifetime; this is needed in cases where
/// it's **impossible** to do so. But when you can _somehow_ bind a lifetime without causing
/// a compiler error, it is always good to do so to avoid misuse of the previously mentioned
/// fat pointers. This is exactly what this type does. It binds a context-dependent lifetime
/// to some type which preferably has no other lifetime (something like an `UnsafeSlice`, for
/// example)
///
/// ## Important notes
/// - lifetimes are context captured by the compiler. so if this doesn't work, we'll need
/// to explicitly annotate bounds
/// - this type derefs to the base type
pub struct Life<'a, T> {
_lt: PhantomData<&'a T>,
v: T,
}
impl<'a, T> Life<'a, T> {
/// Ensure compile-time alignment (this is just a sanity check)
const _ENSURE_COMPILETIME_ALIGN: () =
assert!(std::mem::align_of::<Life<Vec<u8>>>() == std::mem::align_of::<Vec<u8>>());
#[inline(always)]
pub const fn new(v: T) -> Self {
Life {
v,
_lt: PhantomData,
}
}
}
impl<'a, T> From<T> for Life<'a, T> {
fn from(v: T) -> Self {
Self::new(v)
}
}
impl<'a, T> Deref for Life<'a, T> {
type Target = T;
#[inline(always)]
fn deref(&self) -> &Self::Target {
&self.v
}
}
impl<'a, T> AsRef<T> for Life<'a, T> {
#[inline(always)]
fn as_ref(&self) -> &T {
Deref::deref(self)
}
}
impl<'a, T: PartialEq> PartialEq<T> for Life<'a, T> {
#[inline(always)]
fn eq(&self, other: &T) -> bool {
PartialEq::eq(&self.v, other)
}
}
unsafe impl<'a, T: Send> Send for Life<'a, T> {}
unsafe impl<'a, T: Sync> Sync for Life<'a, T> {}

Loading…
Cancel
Save