From a390120231ab364c078e9a763bd592432e2e7d85 Mon Sep 17 00:00:00 2001 From: Sayan Nandan Date: Tue, 19 Sep 2023 18:57:05 +0000 Subject: [PATCH] Cleanup lexer impl --- Cargo.toml | 2 +- server/src/engine/core/dml/mod.rs | 4 +- server/src/engine/core/dml/upd.rs | 87 +- server/src/engine/core/index/key.rs | 41 +- server/src/engine/core/index/mod.rs | 6 +- server/src/engine/core/tests/dml/mod.rs | 4 +- server/src/engine/data/cell.rs | 29 +- server/src/engine/data/dict.rs | 15 +- server/src/engine/data/lit.rs | 508 +++++----- server/src/engine/data/macros.rs | 60 -- server/src/engine/data/mod.rs | 3 - server/src/engine/data/spec.rs | 310 ------- server/src/engine/data/tests/mod.rs | 16 +- server/src/engine/mem/mod.rs | 6 + server/src/engine/mem/scanner.rs | 326 +++++-- server/src/engine/mem/tests/mod.rs | 1 + server/src/engine/mem/tests/scanner.rs | 249 +++++ server/src/engine/net/mod.rs | 2 +- .../src/engine/net/protocol/data_exchange.rs | 10 +- server/src/engine/net/protocol/handshake.rs | 26 +- server/src/engine/ql/ast/mod.rs | 46 +- server/src/engine/ql/benches.rs | 2 +- server/src/engine/ql/dml/mod.rs | 8 +- server/src/engine/ql/dml/upd.rs | 6 +- server/src/engine/ql/lex/mod.rs | 878 ++++++++---------- server/src/engine/ql/lex/raw.rs | 181 +--- server/src/engine/ql/macros.rs | 8 + server/src/engine/ql/tests.rs | 8 +- server/src/engine/ql/tests/dml_tests.rs | 166 +--- server/src/engine/ql/tests/lexer_tests.rs | 510 ++++------ server/src/engine/ql/tests/schema_tests.rs | 101 +- server/src/engine/ql/tests/structure_syn.rs | 42 +- 32 files changed, 1619 insertions(+), 2042 deletions(-) delete mode 100644 server/src/engine/data/macros.rs delete mode 100644 server/src/engine/data/spec.rs create mode 100644 server/src/engine/mem/tests/scanner.rs diff --git a/Cargo.toml b/Cargo.toml index f3b41279..448c2321 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -resolver = "2" +resolver = "1" members = [ "cli", "server", diff --git a/server/src/engine/core/dml/mod.rs b/server/src/engine/core/dml/mod.rs index bb65edbf..4cf41c44 100644 --- a/server/src/engine/core/dml/mod.rs +++ b/server/src/engine/core/dml/mod.rs @@ -32,7 +32,7 @@ mod upd; use crate::{ engine::{ core::model::Model, - data::{lit::LitIR, spec::DataspecMeta1D, tag::DataTag}, + data::{lit::Lit, tag::DataTag}, error::{Error, QueryResult}, ql::dml::WhereClause, }, @@ -47,7 +47,7 @@ impl Model { pub(self) fn resolve_where<'a>( &self, where_clause: &mut WhereClause<'a>, - ) -> QueryResult> { + ) -> QueryResult> { match where_clause.clauses_mut().remove(self.p_key().as_bytes()) { Some(clause) if clause.filter_hint_none() diff --git a/server/src/engine/core/dml/upd.rs b/server/src/engine/core/dml/upd.rs index 3586168f..95bd6089 100644 --- a/server/src/engine/core/dml/upd.rs +++ b/server/src/engine/core/dml/upd.rs @@ -33,8 +33,7 @@ use { core::{self, model::delta::DataDeltaKind, query_meta::AssignmentOperator}, data::{ cell::Datacell, - lit::LitIR, - spec::{Dataspec1D, DataspecMeta1D}, + lit::Lit, tag::{DataTag, TagClass}, }, error::{Error, QueryResult}, @@ -49,51 +48,51 @@ use { }; #[inline(always)] -unsafe fn dc_op_fail(_: &Datacell, _: LitIR) -> (bool, Datacell) { +unsafe fn dc_op_fail(_: &Datacell, _: Lit) -> (bool, Datacell) { (false, Datacell::null()) } // bool -unsafe fn dc_op_bool_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { - (true, Datacell::new_bool(rhs.read_bool_uck())) +unsafe fn dc_op_bool_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) { + (true, Datacell::new_bool(rhs.bool())) } // uint -unsafe fn dc_op_uint_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { - (true, Datacell::new_uint(rhs.read_uint_uck())) +unsafe fn dc_op_uint_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) { + (true, Datacell::new_uint(rhs.uint())) } -unsafe fn dc_op_uint_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (sum, of) = dc.read_uint().overflowing_add(rhs.read_uint_uck()); +unsafe fn dc_op_uint_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (sum, of) = dc.read_uint().overflowing_add(rhs.uint()); (of, Datacell::new_uint(sum)) } -unsafe fn dc_op_uint_sub(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (diff, of) = dc.read_uint().overflowing_sub(rhs.read_uint_uck()); +unsafe fn dc_op_uint_sub(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (diff, of) = dc.read_uint().overflowing_sub(rhs.uint()); (of, Datacell::new_uint(diff)) } -unsafe fn dc_op_uint_mul(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (prod, of) = dc.read_uint().overflowing_mul(rhs.read_uint_uck()); +unsafe fn dc_op_uint_mul(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (prod, of) = dc.read_uint().overflowing_mul(rhs.uint()); (of, Datacell::new_uint(prod)) } -unsafe fn dc_op_uint_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (quo, of) = dc.read_uint().overflowing_div(rhs.read_uint_uck()); +unsafe fn dc_op_uint_div(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (quo, of) = dc.read_uint().overflowing_div(rhs.uint()); (of, Datacell::new_uint(quo)) } // sint -unsafe fn dc_op_sint_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { - (true, Datacell::new_sint(rhs.read_sint_uck())) +unsafe fn dc_op_sint_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) { + (true, Datacell::new_sint(rhs.sint())) } -unsafe fn dc_op_sint_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (sum, of) = dc.read_sint().overflowing_add(rhs.read_sint_uck()); +unsafe fn dc_op_sint_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (sum, of) = dc.read_sint().overflowing_add(rhs.sint()); (of, Datacell::new_sint(sum)) } -unsafe fn dc_op_sint_sub(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (diff, of) = dc.read_sint().overflowing_sub(rhs.read_sint_uck()); +unsafe fn dc_op_sint_sub(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (diff, of) = dc.read_sint().overflowing_sub(rhs.sint()); (of, Datacell::new_sint(diff)) } -unsafe fn dc_op_sint_mul(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (prod, of) = dc.read_sint().overflowing_mul(rhs.read_sint_uck()); +unsafe fn dc_op_sint_mul(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (prod, of) = dc.read_sint().overflowing_mul(rhs.sint()); (of, Datacell::new_sint(prod)) } -unsafe fn dc_op_sint_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let (quo, of) = dc.read_sint().overflowing_div(rhs.read_sint_uck()); +unsafe fn dc_op_sint_div(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let (quo, of) = dc.read_sint().overflowing_div(rhs.sint()); (of, Datacell::new_sint(quo)) } /* @@ -106,28 +105,28 @@ unsafe fn dc_op_sint_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { -- TODO(@ohsayan): account for float32 overflow */ -unsafe fn dc_op_float_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { - (true, Datacell::new_float(rhs.read_float_uck())) +unsafe fn dc_op_float_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) { + (true, Datacell::new_float(rhs.float())) } -unsafe fn dc_op_float_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let sum = dc.read_float() + rhs.read_float_uck(); +unsafe fn dc_op_float_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let sum = dc.read_float() + rhs.float(); (true, Datacell::new_float(sum)) } -unsafe fn dc_op_float_sub(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let diff = dc.read_float() - rhs.read_float_uck(); +unsafe fn dc_op_float_sub(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let diff = dc.read_float() - rhs.float(); (true, Datacell::new_float(diff)) } -unsafe fn dc_op_float_mul(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let prod = dc.read_float() - rhs.read_float_uck(); +unsafe fn dc_op_float_mul(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let prod = dc.read_float() - rhs.float(); (true, Datacell::new_float(prod)) } -unsafe fn dc_op_float_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let quo = dc.read_float() * rhs.read_float_uck(); +unsafe fn dc_op_float_div(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let quo = dc.read_float() * rhs.float(); (true, Datacell::new_float(quo)) } // binary -unsafe fn dc_op_bin_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let new_bin = rhs.read_bin_uck(); +unsafe fn dc_op_bin_ass(_dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let new_bin = rhs.bin(); let mut v = Vec::new(); if v.try_reserve_exact(new_bin.len()).is_err() { return dc_op_fail(_dc, rhs); @@ -135,8 +134,8 @@ unsafe fn dc_op_bin_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { v.extend_from_slice(new_bin); (true, Datacell::new_bin(v.into_boxed_slice())) } -unsafe fn dc_op_bin_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let push_into_bin = rhs.read_bin_uck(); +unsafe fn dc_op_bin_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let push_into_bin = rhs.bin(); let mut bin = Vec::new(); if compiler::unlikely(bin.try_reserve_exact(push_into_bin.len()).is_err()) { return dc_op_fail(dc, rhs); @@ -146,8 +145,8 @@ unsafe fn dc_op_bin_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { (true, Datacell::new_bin(bin.into_boxed_slice())) } // string -unsafe fn dc_op_str_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let new_str = rhs.read_str_uck(); +unsafe fn dc_op_str_ass(_dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let new_str = rhs.str(); let mut v = String::new(); if v.try_reserve_exact(new_str.len()).is_err() { return dc_op_fail(_dc, rhs); @@ -155,8 +154,8 @@ unsafe fn dc_op_str_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { v.push_str(new_str); (true, Datacell::new_str(v.into_boxed_str())) } -unsafe fn dc_op_str_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { - let push_into_str = rhs.read_str_uck(); +unsafe fn dc_op_str_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) { + let push_into_str = rhs.str(); let mut str = String::new(); if compiler::unlikely(str.try_reserve_exact(push_into_str.len()).is_err()) { return dc_op_fail(dc, rhs); @@ -166,7 +165,7 @@ unsafe fn dc_op_str_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { (true, Datacell::new_str(str.into_boxed_str())) } -static OPERATOR: [unsafe fn(&Datacell, LitIR) -> (bool, Datacell); { +static OPERATOR: [unsafe fn(&Datacell, Lit) -> (bool, Datacell); { TagClass::MAX as usize * AssignmentOperator::VARIANTS }] = [ // bool diff --git a/server/src/engine/core/index/key.rs b/server/src/engine/core/index/key.rs index 470168c9..f8d8280e 100644 --- a/server/src/engine/core/index/key.rs +++ b/server/src/engine/core/index/key.rs @@ -26,13 +26,12 @@ use crate::engine::mem::ZERO_BLOCK; #[cfg(test)] -use crate::{engine::data::spec::Dataspec1D, util::test_utils}; +use crate::util::test_utils; use { crate::engine::{ data::{ cell::Datacell, - lit::LitIR, - spec::DataspecMeta1D, + lit::Lit, tag::{DataTag, TagUnique}, }, idx::meta::Comparable, @@ -245,22 +244,22 @@ impl Hash for PrimaryIndexKey { } } -impl<'a> PartialEq> for PrimaryIndexKey { - fn eq(&self, key: &LitIR<'a>) -> bool { +impl<'a> PartialEq> for PrimaryIndexKey { + fn eq(&self, key: &Lit<'a>) -> bool { debug_assert!(key.kind().tag_unique().is_unique()); self.tag == key.kind().tag_unique() && self.virtual_block() == key.__vdata() } } -impl<'a> Comparable> for PrimaryIndexKey { - fn cmp_eq(&self, key: &LitIR<'a>) -> bool { - >::eq(self, key) +impl<'a> Comparable> for PrimaryIndexKey { + fn cmp_eq(&self, key: &Lit<'a>) -> bool { + >::eq(self, key) } } -impl<'a> Comparable for LitIR<'a> { +impl<'a> Comparable for Lit<'a> { fn cmp_eq(&self, key: &PrimaryIndexKey) -> bool { - >::eq(key, self) + >::eq(key, self) } } @@ -333,16 +332,16 @@ fn check_pk_eq_hash() { fn check_pk_lit_eq_hash() { let state = test_utils::randomstate(); let data = [ - LitIR::UnsignedInt(100), - LitIR::SignedInt(-100), - LitIR::Bin(b"binary bro"), - LitIR::Str("string bro"), + Lit::new_uint(100), + Lit::new_sint(-100), + Lit::new_bin(b"binary bro"), + Lit::new_str("string bro"), ]; - for litir in data { - let pk = PrimaryIndexKey::try_from_dc(Datacell::from(litir.clone())).unwrap(); - assert_eq!(pk, litir); + for lit in data { + let pk = PrimaryIndexKey::try_from_dc(Datacell::from(lit.clone())).unwrap(); + assert_eq!(pk, lit); assert_eq!( - test_utils::hash_rs(&state, &litir), + test_utils::hash_rs(&state, &lit), test_utils::hash_rs(&state, &pk) ); } @@ -352,7 +351,7 @@ fn check_pk_lit_eq_hash() { fn check_pk_extremes() { let state = test_utils::randomstate(); let d1 = PrimaryIndexKey::try_from_dc(Datacell::new_uint(u64::MAX)).unwrap(); - let d2 = PrimaryIndexKey::try_from_dc(Datacell::from(LitIR::UnsignedInt(u64::MAX))).unwrap(); + let d2 = PrimaryIndexKey::try_from_dc(Datacell::from(Lit::new_uint(u64::MAX))).unwrap(); assert_eq!(d1, d2); assert_eq!(d1.uint().unwrap(), u64::MAX); assert_eq!(d2.uint().unwrap(), u64::MAX); @@ -360,7 +359,7 @@ fn check_pk_extremes() { test_utils::hash_rs(&state, &d1), test_utils::hash_rs(&state, &d2) ); - assert_eq!(d1, LitIR::UnsignedInt(u64::MAX)); - assert_eq!(d2, LitIR::UnsignedInt(u64::MAX)); + assert_eq!(d1, Lit::new_uint(u64::MAX)); + assert_eq!(d2, Lit::new_uint(u64::MAX)); assert_eq!(d1.uint().unwrap(), u64::MAX); } diff --git a/server/src/engine/core/index/mod.rs b/server/src/engine/core/index/mod.rs index 4fafe5f3..ab40fbc5 100644 --- a/server/src/engine/core/index/mod.rs +++ b/server/src/engine/core/index/mod.rs @@ -28,7 +28,7 @@ mod key; mod row; use crate::engine::{ - data::lit::LitIR, + data::lit::Lit, idx::{IndexBaseSpec, IndexMTRaw, MTIndex}, sync::atm::Guard, }; @@ -49,12 +49,12 @@ impl PrimaryIndex { data: IndexMTRaw::idx_init(), } } - pub fn remove<'a>(&self, key: LitIR<'a>, g: &Guard) -> bool { + pub fn remove<'a>(&self, key: Lit<'a>, g: &Guard) -> bool { self.data.mt_delete(&key, g) } pub fn select<'a, 'v, 't: 'v, 'g: 't>( &'t self, - key: LitIR<'a>, + key: Lit<'a>, g: &'g Guard, ) -> Option<&'v Row> { self.data.mt_get_element(&key, g) diff --git a/server/src/engine/core/tests/dml/mod.rs b/server/src/engine/core/tests/dml/mod.rs index b2bcdc0e..9d9fa230 100644 --- a/server/src/engine/core/tests/dml/mod.rs +++ b/server/src/engine/core/tests/dml/mod.rs @@ -31,7 +31,7 @@ mod update; use crate::engine::{ core::{dml, index::Row, model::Model}, - data::{cell::Datacell, lit::LitIR}, + data::{cell::Datacell, lit::Lit}, error::QueryResult, fractal::GlobalInstanceLike, ql::{ @@ -75,7 +75,7 @@ fn _exec_only_read_key_and_then( let _irm = mdl.intent_read_model(); let row = mdl .primary_index() - .select(LitIR::from(key_name), &guard) + .select(Lit::from(key_name), &guard) .unwrap() .clone(); drop(guard); diff --git a/server/src/engine/data/cell.rs b/server/src/engine/data/cell.rs index 95017988..1cce866d 100644 --- a/server/src/engine/data/cell.rs +++ b/server/src/engine/data/cell.rs @@ -30,8 +30,7 @@ use { crate::engine::{ self, data::{ - lit::{Lit, LitIR}, - spec::{Dataspec1D, DataspecMeta1D}, + lit::Lit, tag::{CUTag, DataTag, TagClass}, }, mem::{DwordNN, DwordQN, NativeQword, SpecialPaddedWord, WordIO}, @@ -228,8 +227,8 @@ direct_from! { } } -impl<'a> From> for Datacell { - fn from(l: LitIR<'a>) -> Self { +impl<'a> From> for Datacell { + fn from(l: Lit<'a>) -> Self { match l.kind().tag_class() { tag if tag < TagClass::Bin => unsafe { // UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type doesn't need any advanced construction @@ -241,7 +240,7 @@ impl<'a> From> for Datacell { }, TagClass::Bin | TagClass::Str => unsafe { // UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type requires a new heap for construction - let mut bin = ManuallyDrop::new(l.read_bin_uck().to_owned().into_boxed_slice()); + let mut bin = ManuallyDrop::new(l.bin().to_owned().into_boxed_slice()); Datacell::new( CUTag::from(l.kind()), DataRaw::word(DwordQN::dwordqn_store_qw_nw( @@ -269,12 +268,6 @@ impl From for Datacell { } } -impl<'a> From> for Datacell { - fn from(l: Lit<'a>) -> Self { - Self::from(l.as_ir()) - } -} - impl From<[Datacell; N]> for Datacell { fn from(l: [Datacell; N]) -> Self { Self::new_list(l.into()) @@ -459,17 +452,17 @@ impl Clone for Datacell { #[derive(Debug)] pub struct VirtualDatacell<'a> { dc: ManuallyDrop, - _lt: PhantomData>, + _lt: PhantomData>, } impl<'a> VirtualDatacell<'a> { - pub fn new(litir: LitIR<'a>) -> Self { + pub fn new(lit: Lit<'a>) -> Self { Self { dc: ManuallyDrop::new(unsafe { // UNSAFE(@ohsayan): this is a "reference" to a "virtual" aka fake DC. this just works because of memory layouts Datacell::new( - CUTag::from(litir.kind()), - DataRaw::word(litir.data().dwordqn_promote()), + CUTag::from(lit.kind()), + DataRaw::word(lit.data().dwordqn_promote()), ) }), _lt: PhantomData, @@ -477,8 +470,8 @@ impl<'a> VirtualDatacell<'a> { } } -impl<'a> From> for VirtualDatacell<'a> { - fn from(l: LitIR<'a>) -> Self { +impl<'a> From> for VirtualDatacell<'a> { + fn from(l: Lit<'a>) -> Self { Self::new(l) } } @@ -504,6 +497,6 @@ impl<'a> Clone for VirtualDatacell<'a> { #[test] fn virtual_dc_damn() { - let dc = LitIR::Str("hello, world"); + let dc = Lit::new_str("hello, world"); assert_eq!(VirtualDatacell::from(dc), Datacell::from("hello, world")); } diff --git a/server/src/engine/data/dict.rs b/server/src/engine/data/dict.rs index 6dc1894f..e67cd71d 100644 --- a/server/src/engine/data/dict.rs +++ b/server/src/engine/data/dict.rs @@ -26,10 +26,7 @@ use { crate::engine::{ - data::{ - cell::Datacell, - lit::{Lit, LitIR}, - }, + data::{cell::Datacell, lit::Lit}, idx::STIndex, }, std::collections::HashMap, @@ -181,15 +178,9 @@ fn rmerge_metadata_prepare_patch( impls */ -impl<'a> From> for DictEntryGeneric { - fn from(l: LitIR<'a>) -> Self { - Self::Data(Datacell::from(l)) - } -} - impl<'a> From> for DictEntryGeneric { - fn from(value: Lit<'a>) -> Self { - Self::Data(Datacell::from(value)) + fn from(l: Lit<'a>) -> Self { + Self::Data(Datacell::from(l)) } } diff --git a/server/src/engine/data/lit.rs b/server/src/engine/data/lit.rs index d40b02ff..e48e3d6d 100644 --- a/server/src/engine/data/lit.rs +++ b/server/src/engine/data/lit.rs @@ -1,5 +1,5 @@ /* - * Created on Sun Feb 26 2023 + * Created on Wed Sep 20 2023 * * This file is a part of Skytable * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source @@ -25,166 +25,186 @@ */ use { - super::{ - spec::{Dataspec1D, DataspecMeta1D, DataspecMethods1D, DataspecRaw1D}, - tag::{DataTag, FullTag, TagUnique}, + crate::engine::{ + data::tag::{DataTag, FullTag, TagClass, TagUnique}, + mem::{DwordQN, SpecialPaddedWord}, }, - crate::engine::mem::{DwordQN, SpecialPaddedWord, WordIO}, core::{ fmt, hash::{Hash, Hasher}, marker::PhantomData, - mem::{self, ManuallyDrop}, - slice, + mem::ManuallyDrop, + slice, str, }, }; /* - Lit + NOTE(@ohsayan): Heinous hackery that should not ever be repeated. Just don't touch anything here. */ +/// A literal representation pub struct Lit<'a> { - data: SpecialPaddedWord, tag: FullTag, + dtc: u8, + word: SpecialPaddedWord, _lt: PhantomData<&'a [u8]>, } impl<'a> Lit<'a> { - pub fn as_ir(&'a self) -> LitIR<'a> { - unsafe { - // UNSAFE(@ohsayan): 'tis the lifetime. 'tis the savior - mem::transmute_copy(self) - } + /// Create a new bool literal + pub fn new_bool(b: bool) -> Self { + Self::_quad(b as _, FullTag::BOOL) } -} - -impl<'a> DataspecMeta1D for Lit<'a> { - type Tag = FullTag; - type Target = SpecialPaddedWord; - type StringItem = Box; - fn new(flag: Self::Tag, data: Self::Target) -> Self { - Self { - data, - tag: flag, - _lt: PhantomData, - } + /// Create a new unsigned integer + pub fn new_uint(u: u64) -> Self { + Self::_quad(u, FullTag::UINT) } - fn kind(&self) -> Self::Tag { - self.tag + /// Create a new signed integer + pub fn new_sint(s: i64) -> Self { + Self::_quad(s as _, FullTag::SINT) + } + /// Create a new float64 + pub fn new_float(f: f64) -> Self { + Self::_quad(f.to_bits(), FullTag::FLOAT) } - fn data(&self) -> Self::Target { + /// Returns a "shallow clone" + /// + /// This function will fall apart if lifetimes aren't handled correctly (aka will segfault) + pub fn as_ir(&'a self) -> Lit<'a> { unsafe { - // UNSAFE(@ohsayan): This function doesn't create any clones, so we're good - mem::transmute_copy(self) + // UNSAFE(@ohsayan): this is a dirty, uncanny and wild hack that everyone should be forbidden from doing + let mut slf: Lit<'a> = core::mem::transmute_copy(self); + slf.dtc = Self::DTC_NONE; + slf } } } -/* - UNSAFE(@ohsayan): Safety checks: - - Heap str: yes - - Heap bin: no - - Drop str: yes, dealloc - - Drop bin: not needed - - Clone str: yes, alloc - - Clone bin: not needed -*/ -unsafe impl<'a> DataspecRaw1D for Lit<'a> { - const HEAP_STR: bool = true; - const HEAP_BIN: bool = false; - unsafe fn drop_str(&mut self) { - let (len, ptr) = self.data().load(); - drop(String::from_raw_parts(ptr, len, len)); +impl<'a> Lit<'a> { + /// Attempt to read a bool + pub fn try_bool(&self) -> Option { + (self.tag.tag_class() == TagClass::Bool).then_some(unsafe { + // UNSAFE(@ohsayan): +tagck + self.bool() + }) } - unsafe fn drop_bin(&mut self) {} - unsafe fn clone_str(s: &str) -> Self::Target { - let new_string = ManuallyDrop::new(s.to_owned().into_boxed_str()); - WordIO::store((new_string.len(), new_string.as_ptr())) + /// Attempt to read an unsigned integer + pub fn try_uint(&self) -> Option { + (self.tag.tag_class() == TagClass::UnsignedInt).then_some(unsafe { + // UNSAFE(@ohsayan): +tagck + self.uint() + }) } - unsafe fn clone_bin(b: &[u8]) -> Self::Target { - WordIO::store((b.len(), b.as_ptr())) + /// Attempt to read a signed integer + pub fn try_sint(&self) -> Option { + (self.tag.tag_class() == TagClass::SignedInt).then_some(unsafe { + // UNSAFE(@ohsayan): +tagck + self.sint() + }) } -} - -/* - UNSAFE(@ohsayan): Safety checks: - - We LEAK memory because, duh - - We don't touch our own targets, ever (well, I'm a bad boy so I do touch it in fmt::Debug) -*/ -unsafe impl<'a> Dataspec1D for Lit<'a> { - fn Str(s: Box) -> Self { - let md = ManuallyDrop::new(s); - Self::new(FullTag::STR, WordIO::store((md.len(), md.as_ptr()))) + /// Attempt to read a float + pub fn try_float(&self) -> Option { + (self.tag.tag_class() == TagClass::Float).then_some(unsafe { + // UNSAFE(@ohsayan): +tagck + self.float() + }) } -} - -/* - UNSAFE(@ohsayan): - - No target touch -*/ -unsafe impl<'a> DataspecMethods1D for Lit<'a> {} - -impl<'a, T: DataspecMethods1D> PartialEq for Lit<'a> { - fn eq(&self, other: &T) -> bool { - ::self_eq(self, other) + /// Read a bool directly. This function isn't exactly unsafe, but we want to provide a type preserving API + pub unsafe fn bool(&self) -> bool { + self.uint() == 1 } -} -impl<'a> fmt::Debug for Lit<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut f = f.debug_struct("Lit"); - f.field("tag", &self.tag); - self.self_fmt_debug_data("data", &mut f); - f.field("_lt", &self._lt); - f.finish() + /// Read an unsigned integer directly. This function isn't exactly unsafe, but we want to provide a type + /// preserving API + pub unsafe fn uint(&self) -> u64 { + self.word.dwordqn_load_qw_nw().0 } -} - -impl<'a> Drop for Lit<'a> { - fn drop(&mut self) { - self.self_drop(); + /// Read a signed integer directly. This function isn't exactly unsafe, but we want to provide a type + /// preserving API + pub unsafe fn sint(&self) -> i64 { + self.uint() as _ } -} - -impl<'a> Clone for Lit<'a> { - fn clone(&self) -> Self { - self.self_clone() + /// Read a floating point number directly. This function isn't exactly unsafe, but we want to provide a type + /// preserving API + pub unsafe fn float(&self) -> f64 { + f64::from_bits(self.uint()) } } -impl<'a> ToString for Lit<'a> { - fn to_string(&self) -> String { - ::to_string_debug(self) +impl<'a> Lit<'a> { + /// Attempt to read a binary value + pub fn try_bin(&self) -> Option<&'a [u8]> { + (self.tag.tag_class() == TagClass::Bin).then(|| unsafe { + // UNSAFE(@ohsayan): +tagck + self.bin() + }) } -} - -direct_from! { - Lit<'a> => { - bool as Bool, - u64 as UnsignedInt, - i64 as SignedInt, - f64 as Float, - &'a str as Str, - String as Str, - Box as Str, - &'a [u8] as Bin, + /// Attempt to read a string value + pub fn try_str(&self) -> Option<&'a str> { + (self.tag.tag_class() == TagClass::Str).then(|| unsafe { + // UNSAFE(@ohsayan): +tagck + self.str() + }) + } + /// Read a string value directly + /// + /// ## Safety + /// The underlying repr MUST be a string. Otherwise you'll segfault or cause other library functions to misbehave + pub unsafe fn str(&self) -> &'a str { + str::from_utf8_unchecked(self.bin()) + } + /// Read a binary value directly + /// + /// ## Safety + /// The underlying repr MUST be a string. Otherwise you'll segfault + pub unsafe fn bin(&self) -> &'a [u8] { + let (q, n) = self.word.dwordqn_load_qw_nw(); + slice::from_raw_parts(n as *const u8 as *mut u8, q as _) } } -/* - LitIR -*/ - -/// ☒️TRAIT WARNING☒️: The [`Hash`] implementation is strictly intended for usage with [`crate::engine::core`] components ONLY. This will FAIL and PRODUCE INCORRECT results -/// when used elsewhere -pub struct LitIR<'a> { - tag: FullTag, - data: SpecialPaddedWord, - _lt: PhantomData<&'a str>, +impl<'a> Lit<'a> { + /// Create a new string (referenced) + pub fn new_str(s: &'a str) -> Self { + unsafe { + /* + UNSAFE(@ohsayan): the mut cast is just for typesake so it doesn't matter while we also set DTC + to none so it shouldn't matter anyway + */ + Self::_str(s.as_ptr() as *mut u8, s.len(), Self::DTC_NONE) + } + } + /// Create a new boxed string + pub fn new_boxed_str(s: Box) -> Self { + let mut md = ManuallyDrop::new(s); // mut -> aliasing! + unsafe { + // UNSAFE(@ohsayan): correct aliasing, and DTC to destroy heap + Self::_str(md.as_mut_ptr(), md.len(), Self::DTC_HSTR) + } + } + /// Create a new string + pub fn new_string(s: String) -> Self { + Self::new_boxed_str(s.into_boxed_str()) + } + /// Create a new binary (referenced) + pub fn new_bin(b: &'a [u8]) -> Self { + unsafe { + // UNSAFE(@ohsayan): mut cast is once again just a typesake change + Self::_wide_word(b.as_ptr() as *mut _, b.len(), Self::DTC_NONE, FullTag::BIN) + } + } } -impl<'a> LitIR<'a> { - pub fn __vdata(&self) -> &[u8] { - let (vlen, data) = self.data().dwordqn_load_qw_nw(); +impl<'a> Lit<'a> { + /// Returns the type of this literal + pub fn kind(&self) -> FullTag { + self.tag + } + /// Returns the internal representation of this type + pub unsafe fn data(&self) -> &SpecialPaddedWord { + &self.word + } + pub fn __vdata(&self) -> &'a [u8] { + let (vlen, data) = self.word.dwordqn_load_qw_nw(); let len = vlen as usize * (self.kind().tag_unique() >= TagUnique::Bin) as usize; unsafe { // UNSAFE(@ohsayan): either because of static or lt @@ -193,136 +213,192 @@ impl<'a> LitIR<'a> { } } -impl<'a> Hash for LitIR<'a> { - fn hash(&self, state: &mut H) { - self.tag.tag_unique().hash(state); - self.__vdata().hash(state); - } -} - -impl<'a> DataspecMeta1D for LitIR<'a> { - type Target = SpecialPaddedWord; - type StringItem = &'a str; - type Tag = FullTag; - fn new(flag: Self::Tag, data: Self::Target) -> Self { +impl<'a> Lit<'a> { + const DTC_NONE: u8 = 0; + const DTC_HSTR: u8 = 1; + unsafe fn _new(tag: FullTag, dtc: u8, word: SpecialPaddedWord) -> Self { Self { - tag: flag, - data, + tag, + dtc, + word, _lt: PhantomData, } } - fn kind(&self) -> Self::Tag { - self.tag - } - fn data(&self) -> Self::Target { + fn _quad(quad: u64, tag: FullTag) -> Self { unsafe { - // UNSAFE(@ohsayan): We can freely copy our stack because everything is already allocated - mem::transmute_copy(self) + // UNSAFE(@ohsayan): we initialize the correct bit pattern + Self::_new(tag, Self::DTC_NONE, SpecialPaddedWord::new_quad(quad)) } } -} - -/* - UNSAFE(@ohsayan): Safety: - - Heap str: no - - Heap bin: no - - Drop str: no - - Drop bin: no - - Clone str: stack - - Clone bin: stack -*/ -unsafe impl<'a> DataspecRaw1D for LitIR<'a> { - const HEAP_STR: bool = false; - const HEAP_BIN: bool = false; - unsafe fn drop_str(&mut self) {} - unsafe fn drop_bin(&mut self) {} - unsafe fn clone_str(s: &str) -> Self::Target { - WordIO::store((s.len(), s.as_ptr())) + unsafe fn _wide_word(ptr: *mut u8, len: usize, dtc: u8, tag: FullTag) -> Self { + Self::_new(tag, dtc, SpecialPaddedWord::new(len as _, ptr as _)) } - unsafe fn clone_bin(b: &[u8]) -> Self::Target { - WordIO::store((b.len(), b.as_ptr())) + unsafe fn _str(ptr: *mut u8, len: usize, dtc: u8) -> Self { + Self::_wide_word(ptr, len, dtc, FullTag::STR) } -} - -/* - UNSAFE(@ohsayan): Safety: - - No touches :) -*/ -unsafe impl<'a> Dataspec1D for LitIR<'a> { - fn Str(s: Self::StringItem) -> Self { - Self::new(FullTag::STR, WordIO::store((s.len(), s.as_ptr()))) + unsafe fn _drop_zero(_: SpecialPaddedWord) {} + unsafe fn _drop_hstr(word: SpecialPaddedWord) { + let (a, b) = word.dwordqn_load_qw_nw(); + drop(Vec::from_raw_parts( + b as *const u8 as *mut u8, + a as _, + a as _, + )); } } -impl<'a> ToString for LitIR<'a> { - fn to_string(&self) -> String { - ::to_string_debug(self) +impl<'a> Drop for Lit<'a> { + fn drop(&mut self) { + static DFN: [unsafe fn(SpecialPaddedWord); 2] = [Lit::_drop_zero, Lit::_drop_hstr]; + unsafe { DFN[self.dtc as usize](core::mem::transmute_copy(&self.word)) } } } -/* - UNSAFE(@ohsayan): Safety: - - No touches -*/ -unsafe impl<'a> DataspecMethods1D for LitIR<'a> {} - -impl<'a, T: DataspecMethods1D> PartialEq for LitIR<'a> { - fn eq(&self, other: &T) -> bool { - ::self_eq(self, other) +impl<'a> Clone for Lit<'a> { + fn clone(&self) -> Lit<'a> { + static CFN: [unsafe fn(SpecialPaddedWord) -> SpecialPaddedWord; 2] = unsafe { + [ + |stack| core::mem::transmute(stack), + |hstr| { + let (q, n) = hstr.dwordqn_load_qw_nw(); + let mut md = ManuallyDrop::new( + slice::from_raw_parts(n as *const u8, q as usize).to_owned(), + ); + md.shrink_to_fit(); + SpecialPaddedWord::new(q, md.as_mut_ptr() as _) + }, + ] + }; + unsafe { + Self::_new( + self.tag, + self.dtc, + CFN[self.dtc as usize](core::mem::transmute_copy(&self.word)), + ) + } } } -impl<'a> fmt::Debug for LitIR<'a> { +impl<'a> fmt::Debug for Lit<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut f = f.debug_struct("LitIR"); - f.field("tag", &self.tag); - self.self_fmt_debug_data("data", &mut f); - f.field("_lt", &self._lt); - f.finish() + let mut field = f.debug_struct("Lit"); + field.field("tag", &self.tag); + unsafe { + macro_rules! d { + ($expr:expr) => {{ + field.field("data", &$expr); + }}; + } + match self.tag.tag_class() { + TagClass::Bool => d!(self.bool()), + TagClass::UnsignedInt => d!(self.uint()), + TagClass::SignedInt => d!(self.sint()), + TagClass::Float => d!(self.float()), + TagClass::Bin => d!(self.bin()), + TagClass::Str => d!(self.str()), + TagClass::List => panic!("found 2D in 1D"), + } + } + field.finish() } } -impl<'a> Drop for LitIR<'a> { - fn drop(&mut self) { - self.self_drop(); +impl<'a> Hash for Lit<'a> { + fn hash(&self, state: &mut H) { + self.tag.tag_unique().hash(state); + self.__vdata().hash(state); } } -impl<'a> Clone for LitIR<'a> { - fn clone(&self) -> Self { - self.self_clone() +impl<'a> PartialEq for Lit<'a> { + fn eq(&self, other: &Self) -> bool { + unsafe { + // UNSAFE(@ohsayan): +tagck + match (self.tag.tag_class(), other.tag.tag_class()) { + (TagClass::Bool, TagClass::Bool) => self.bool() == other.bool(), + (TagClass::UnsignedInt, TagClass::UnsignedInt) => self.uint() == other.uint(), + (TagClass::SignedInt, TagClass::SignedInt) => self.sint() == other.sint(), + (TagClass::Float, TagClass::Float) => self.float() == other.float(), + (TagClass::Bin, TagClass::Bin) => self.bin() == other.bin(), + (TagClass::Str, TagClass::Str) => self.str() == other.str(), + _ => false, + } + } } } direct_from! { - LitIR<'a> => { - bool as Bool, - u64 as UnsignedInt, - i64 as SignedInt, - f64 as Float, - &'a str as Str, - &'a [u8] as Bin, + Lit<'a> => { + bool as new_bool, + u64 as new_uint, + i64 as new_sint, + f64 as new_float, + &'a str as new_str, + String as new_string, + Box as new_boxed_str, + &'a [u8] as new_bin, + } +} + +impl<'a> ToString for Lit<'a> { + fn to_string(&self) -> String { + unsafe { + match self.kind().tag_class() { + TagClass::Bool => self.bool().to_string(), + TagClass::UnsignedInt => self.uint().to_string(), + TagClass::SignedInt => self.sint().to_string(), + TagClass::Float => self.float().to_string(), + TagClass::Bin => format!("{:?}", self.bin()), + TagClass::Str => format!("{:?}", self.str()), + TagClass::List => panic!("found 2D in 1D"), + } + } } } #[test] -fn tlit() { - let str1 = Lit::Str("hello".into()); - let str2 = str1.clone(); - assert_eq!(str1, str2); - assert_eq!(str1.str(), "hello"); - assert_eq!(str2.str(), "hello"); - drop(str1); - assert_eq!(str2.str(), "hello"); +fn stk_variants() { + let stk1 = [ + Lit::new_bool(true), + Lit::new_uint(u64::MAX), + Lit::new_sint(i64::MIN), + Lit::new_float(f64::MIN), + Lit::new_str("hello"), + Lit::new_bin(b"world"), + ]; + let stk2 = stk1.clone(); + assert_eq!(stk1, stk2); } #[test] -fn tlitir() { - let str1 = LitIR::Str("hello"); - let str2 = str1.clone(); - assert_eq!(str1, str2); - assert_eq!(str1.str(), "hello"); - assert_eq!(str2.str(), "hello"); - drop(str1); - assert_eq!(str2.str(), "hello"); +fn hp_variants() { + let hp1 = [ + Lit::new_string("hello".into()), + Lit::new_string("world".into()), + ]; + let hp2 = hp1.clone(); + assert_eq!(hp1, hp2); +} + +#[test] +fn lt_link() { + let l = Lit::new_string("hello".into()); + let l_ir = l.as_ir(); + assert_eq!(l, l_ir); +} + +#[test] +fn token_array_lt_test() { + let tokens = vec![Lit::new_string("hello".to_string()), Lit::new_str("hi")]; + #[derive(Debug)] + pub struct SelectStatement<'a> { + primary_key: Lit<'a>, + shorthand: Lit<'a>, + } + let select_stmt = SelectStatement { + primary_key: tokens[0].as_ir(), + shorthand: tokens[1].as_ir(), + }; + drop(select_stmt); + drop(tokens); } diff --git a/server/src/engine/data/macros.rs b/server/src/engine/data/macros.rs deleted file mode 100644 index 99e552b6..00000000 --- a/server/src/engine/data/macros.rs +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Created on Mon Feb 27 2023 - * - * This file is a part of Skytable - * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source - * NoSQL database written by Sayan Nandan ("the Author") with the - * vision to provide flexibility in data modelling without compromising - * on performance, queryability or scalability. - * - * Copyright (c) 2023, Sayan Nandan - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * -*/ - -/// This is a pretty complex macro that emulates the behavior of an enumeration by making use of flags and macro hacks. You might literally feel it's like a lang match, but nope, -/// there's a lot of wizardry beneath. Well, it's important to know that it works and you shouldn't touch it UNLESS YOU ABSOLUTELY KNOW what you're doing -macro_rules! match_data { - (match ref $dataitem:ident $tail:tt) => {match_data!(@branch [ #[deny(unreachable_patterns)] match crate::engine::data::tag::DataTag::tag_class(&crate::engine::data::spec::DataspecMeta1D::kind($dataitem))] $dataitem [] $tail)}; - (match $dataitem:ident $tail:tt) => {match_data!(@branch [ #[deny(unreachable_patterns)] match crate::engine::data::tag::DataTag::tag_class(&crate::engine::data::spec::DataspecMeta1D::kind(&$dataitem))] $dataitem [] $tail)}; - (@branch $decl:tt $dataitem:ident [$($branch:tt)*] {}) => {match_data!(@defeat0 $decl [$($branch)*])}; - (@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident($capture:ident) => $ret:expr, $($tail:tt)*}) => { - match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant => {let $capture = unsafe { /* UNSAFE(@ohsayan): flagck */ match_data!(@extract $name $dataitem $variant) }; $ret},] {$($tail)*}) - }; - (@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident(_) => $ret:expr, $($tail:tt)*}) => { - match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant => $ret,] {$($tail)*}) - }; - (@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident($capture:ident) if $guard:expr => $ret:expr, $($tail:tt)*}) => { - match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant if { let $capture = unsafe { /* UNSAFE(@ohsayan): flagck */ match_data!(@extract $name $dataitem $variant) }; $guard } => { - let $capture = unsafe { /* UNSAFE(@ohsayan): flagck */ match_data!(@extract $name $dataitem $variant) }; let _ = &$capture; $ret}, ] {$($tail)*} - ) - }; - (@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident(_) if $guard:expr => $ret:expr, $($tail:tt)*}) => { - match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant if $guard => $ret,] {$($tail)*}) - }; - (@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* _ => $ret:expr, $($tail:tt)*}) => { - match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* _ => $ret,] {$($tail)*}) - }; - (@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $capture:ident => $ret:expr, $($tail:tt)* }) => { - match_data!(@branch $decl $dataitem [ $($branch)* $(#[$attr])* $capture => { $ret},] {$($tail:tt)*}) - }; - (@defeat0 [$($decl:tt)*] [$($branch:tt)*]) => {$($decl)* { $($branch)* }}; - (@extract $name:ident $dataitem:ident Bool) => {<$name as crate::engine::data::spec::Dataspec1D>::read_bool_uck(&$dataitem)}; - (@extract $name:ident $dataitem:ident UnsignedInt) => {<$name as crate::engine::data::spec::Dataspec1D>::read_uint_uck(&$dataitem)}; - (@extract $name:ident $dataitem:ident SignedInt) => {<$name as crate::engine::data::spec::Dataspec1D>::read_sint_uck(&$dataitem)}; - (@extract $name:ident $dataitem:ident Float) => {<$name as crate::engine::data::spec::Dataspec1D>::read_float_uck(&$dataitem)}; - (@extract $name:ident $dataitem:ident Bin) => {<$name as crate::engine::data::spec::Dataspec1D>::read_bin_uck(&$dataitem)}; - (@extract $name:ident $dataitem:ident Str) => {<$name as crate::engine::data::spec::Dataspec1D>::read_str_uck(&$dataitem)}; -} diff --git a/server/src/engine/data/mod.rs b/server/src/engine/data/mod.rs index 97dedc69..be15bd35 100644 --- a/server/src/engine/data/mod.rs +++ b/server/src/engine/data/mod.rs @@ -24,12 +24,9 @@ * */ -#[macro_use] -mod macros; pub mod cell; pub mod dict; pub mod lit; -pub mod spec; pub mod tag; pub mod uuid; // test diff --git a/server/src/engine/data/spec.rs b/server/src/engine/data/spec.rs deleted file mode 100644 index 9a9e71c2..00000000 --- a/server/src/engine/data/spec.rs +++ /dev/null @@ -1,310 +0,0 @@ -/* - * Created on Sun Feb 26 2023 - * - * This file is a part of Skytable - * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source - * NoSQL database written by Sayan Nandan ("the Author") with the - * vision to provide flexibility in data modelling without compromising - * on performance, queryability or scalability. - * - * Copyright (c) 2023, Sayan Nandan - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * -*/ - -/* - So, I woke up and chose violence. God bless me and the stack memory. What I've done here is a sin. Do not follow my footsteps here if you want to write safe and maintainable code. - -- @ohsayan -*/ - -use { - super::tag::{DataTag, TagClass}, - crate::engine::mem::{DwordQN, WordIO}, - core::{fmt, mem, slice}, -}; - -#[inline(always)] -fn when_then T>(cond: bool, then: F) -> Option { - cond.then(then) -} - -/// Information about the type that implements the dataspec traits -pub trait DataspecMeta1D: Sized { - // assoc - type Tag: DataTag; - /// The target must be able to store (atleast) a native dword - type Target: DwordQN; - /// The string item. This helps us remain correct with the dtors - type StringItem; - // fn - /// Create a new instance. Usually allocates zero memory *directly* - fn new(tag: Self::Tag, data: Self::Target) -> Self; - /// Returns the reduced dataflag - fn kind(&self) -> Self::Tag; - /// Returns the data stack - fn data(&self) -> Self::Target; -} - -/// Unsafe dtor/ctor impls for dataspec items. We have no clue about these things, the implementor must take care of them -/// -/// ## Safety -/// -/// - Your dtors MUST BE correct -pub unsafe trait DataspecRaw1D: DataspecMeta1D { - /// Is the string heap allocated...anywhere down the line? - const HEAP_STR: bool; - /// Is the binary heap allocated...anywhere down the line? - const HEAP_BIN: bool; - /// Drop the string, if you need a dtor - unsafe fn drop_str(&mut self); - /// Drop the binary, if you need a dtor - unsafe fn drop_bin(&mut self); - /// Clone the string object. Note, we literally HAVE NO IDEA about what you're doing here - unsafe fn clone_str(s: &str) -> Self::Target; - /// Clone the binary object. Again, NOT A DAMN CLUE about whay you're doing down there - unsafe fn clone_bin(b: &[u8]) -> Self::Target; -} - -/// Functions that can be used to read/write to/from dataspec objects -/// -/// ## Safety -/// - You must touch your targets by yourself -pub unsafe trait Dataspec1D: DataspecMeta1D + DataspecRaw1D { - // store - /// Store a new bool. This function is always safe to call - #[allow(non_snake_case)] - fn Bool(b: bool) -> Self { - Self::new(Self::Tag::BOOL, WordIO::store(b)) - } - /// Store a new uint. This function is always safe to call - #[allow(non_snake_case)] - fn UnsignedInt(u: u64) -> Self { - Self::new(Self::Tag::UINT, WordIO::store(u)) - } - /// Store a new sint. This function is always safe to call - #[allow(non_snake_case)] - fn SignedInt(s: i64) -> Self { - Self::new(Self::Tag::SINT, WordIO::store(s)) - } - /// Store a new float. This function is always safe to call - #[allow(non_snake_case)] - fn Float(f: f64) -> Self { - Self::new(Self::Tag::FLOAT, WordIO::store(f.to_bits())) - } - /// Store a new binary. This function is always safe to call - #[allow(non_snake_case)] - fn Bin(b: &[u8]) -> Self { - Self::new(Self::Tag::BIN, WordIO::store((b.len(), b.as_ptr()))) - } - - /// Store a new string. Now, I won't talk about this one's safety because it depends on the implementor - #[allow(non_snake_case)] - fn Str(s: Self::StringItem) -> Self; - - // load - // bool - /// Load a bool (this is unsafe for logical verity) - unsafe fn read_bool_uck(&self) -> bool { - self.data().load() - } - /// Load a bool - fn read_bool_try(&self) -> Option { - when_then(self.kind().tag_class() == TagClass::Bool, || unsafe { - // UNSAFE(@ohsayan): we've verified the flag. but lol because this isn't actually unsafe - self.read_bool_uck() - }) - } - /// Load a bool - /// ## Panics - /// If you're not a bool, you panic - fn bool(&self) -> bool { - self.read_bool_try().unwrap() - } - // uint - /// Load a uint (this is unsafe for logical verity) - unsafe fn read_uint_uck(&self) -> u64 { - self.data().load() - } - /// Load a uint - fn read_uint_try(&self) -> Option { - when_then( - self.kind().tag_class() == TagClass::UnsignedInt, - || unsafe { - // UNSAFE(@ohsayan): we've verified the flag. but lol because this isn't actually unsafe - self.read_uint_uck() - }, - ) - } - /// Load a uint - /// ## Panics - /// If you're not a uint, you panic - fn uint(&self) -> u64 { - self.read_uint_try().unwrap() - } - // sint - /// Load a sint (unsafe for logical verity) - unsafe fn read_sint_uck(&self) -> i64 { - self.data().load() - } - /// Load a sint - fn read_sint_try(&self) -> Option { - when_then(self.kind().tag_class() == TagClass::SignedInt, || unsafe { - // UNSAFE(@ohsayan): we've verified the flag. but lol because this isn't actually unsafe - self.read_sint_uck() - }) - } - /// Load a sint and panic if we're not a sint - fn sint(&self) -> i64 { - self.read_sint_try().unwrap() - } - // float - /// Load a float (unsafe for logical verity) - unsafe fn read_float_uck(&self) -> f64 { - self.data().load() - } - /// Load a float - fn read_float_try(&self) -> Option { - when_then(self.kind().tag_class() == TagClass::Float, || unsafe { - self.read_float_uck() - }) - } - /// Load a float and panic if we aren't one - fn float(&self) -> f64 { - self.read_float_try().unwrap() - } - // bin - /// Load a binary - /// - /// ## Safety - /// Are you a binary? Did you store it correctly? Are you a victim of segfaults? - unsafe fn read_bin_uck(&self) -> &[u8] { - let (l, p) = self.data().load(); - slice::from_raw_parts(p, l) - } - /// Load a bin - fn read_bin_try(&self) -> Option<&[u8]> { - when_then(self.kind().tag_class() == TagClass::Bin, || unsafe { - self.read_bin_uck() - }) - } - /// Load a bin or panic if we aren't one - fn bin(&self) -> &[u8] { - self.read_bin_try().unwrap() - } - // str - /// Load a str - /// - /// ## Safety - /// Are you a str? Did you store it correctly? Are you a victim of segfaults? - unsafe fn read_str_uck(&self) -> &str { - mem::transmute(self.read_bin_uck()) - } - /// Load a str - fn read_str_try(&self) -> Option<&str> { - when_then(self.kind().tag_class() == TagClass::Str, || unsafe { - self.read_str_uck() - }) - } - /// Load a str and panic if we aren't one - fn str(&self) -> &str { - self.read_str_try().unwrap() - } -} - -/// Common impls -/// -/// ## Safety -/// - You are not touching your target -pub unsafe trait DataspecMethods1D: Dataspec1D { - fn self_drop(&mut self) { - match self.kind().tag_class() { - TagClass::Str if ::HEAP_STR => unsafe { - // UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition - ::drop_str(self) - }, - TagClass::Bin if ::HEAP_BIN => unsafe { - // UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition - ::drop_bin(self) - }, - _ => {} - } - } - fn self_clone(&self) -> Self { - let data = match self.kind().tag_class() { - TagClass::Str if ::HEAP_STR => unsafe { - // UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition - ::clone_str(Dataspec1D::read_str_uck(self)) - }, - TagClass::Bin if ::HEAP_BIN => unsafe { - // UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition - ::clone_bin(Dataspec1D::read_bin_uck(self)) - }, - _ => self.data(), - }; - Self::new(self.kind(), data) - } - fn self_eq(&self, other: &impl DataspecMethods1D) -> bool { - unsafe { - // UNSAFE(@ohsayan): we are checking our flags - match (self.kind().tag_class(), other.kind().tag_class()) { - (TagClass::Bool, TagClass::Bool) => self.read_bool_uck() == other.read_bool_uck(), - (TagClass::UnsignedInt, TagClass::UnsignedInt) => { - self.read_uint_uck() == other.read_uint_uck() - } - (TagClass::SignedInt, TagClass::SignedInt) => { - self.read_sint_uck() == other.read_sint_uck() - } - (TagClass::Float, TagClass::Float) => { - self.read_float_uck() == other.read_float_uck() - } - (TagClass::Bin, TagClass::Bin) => self.read_bin_uck() == other.read_bin_uck(), - (TagClass::Str, TagClass::Str) => self.read_str_uck() == other.read_str_uck(), - _ => false, - } - } - } - fn self_fmt_debug_data(&self, data_field: &str, f: &mut fmt::DebugStruct) { - macro_rules! fmtdebug { - ($($(#[$attr:meta])* $match:pat => $ret:expr),* $(,)?) => { - match self.kind().tag_class() {$($(#[$attr])* $match => { let _x = $ret; f.field(data_field, &_x) },)*} - } - } - unsafe { - // UNSAFE(@ohsayan): we are checking our flags - fmtdebug!( - TagClass::Bool => self.read_bool_uck(), - TagClass::UnsignedInt => self.read_uint_uck(), - TagClass::SignedInt => self.read_sint_uck(), - TagClass::Float => self.read_float_uck(), - TagClass::Bin => self.read_bin_uck(), - TagClass::Str => self.read_str_uck(), - #[allow(unreachable_code)] - TagClass::List => unreachable!("found 2D data in 1D"), - ) - }; - } - #[rustfmt::skip] - fn to_string_debug(&self) -> String { - match_data!(match ref self { - Self::Bool(b) => b.to_string(), - Self::UnsignedInt(u) => u.to_string(), - Self::SignedInt(s) => s.to_string(), - Self::Float(f) => f.to_string(), - Self::Bin(b) => format!("{:?}", b), - Self::Str(s) => format!("{:?}", s), - Self::List(_) => unreachable!("found 2D data in 1D"), - }) - } -} diff --git a/server/src/engine/data/tests/mod.rs b/server/src/engine/data/tests/mod.rs index 4c931392..1586b8e4 100644 --- a/server/src/engine/data/tests/mod.rs +++ b/server/src/engine/data/tests/mod.rs @@ -25,21 +25,11 @@ */ mod md_dict_tests; -use super::{ - lit::{Lit, LitIR}, - spec::Dataspec1D, -}; - -#[test] -fn t_largest_int_litir() { - let x = LitIR::UnsignedInt(u64::MAX); - let y = LitIR::UnsignedInt(u64::MAX); - assert_eq!(x, y); -} +use super::lit::Lit; #[test] fn t_largest_int_lit() { - let x = Lit::UnsignedInt(u64::MAX); - let y = Lit::UnsignedInt(u64::MAX); + let x = Lit::new_uint(u64::MAX); + let y = Lit::new_uint(u64::MAX); assert_eq!(x, y); } diff --git a/server/src/engine/mem/mod.rs b/server/src/engine/mem/mod.rs index 290c0597..eca293bc 100644 --- a/server/src/engine/mem/mod.rs +++ b/server/src/engine/mem/mod.rs @@ -69,6 +69,12 @@ impl SpecialPaddedWord { pub const unsafe fn new(a: u64, b: usize) -> Self { Self { a, b } } + pub fn new_quad(a: u64) -> Self { + Self { + a, + b: ZERO_BLOCK.as_ptr() as usize, + } + } } pub trait StatelessLen { diff --git a/server/src/engine/mem/scanner.rs b/server/src/engine/mem/scanner.rs index 892a4548..f761b340 100644 --- a/server/src/engine/mem/scanner.rs +++ b/server/src/engine/mem/scanner.rs @@ -29,190 +29,408 @@ use core::{ptr, slice}; pub type BufferedScanner<'a> = Scanner<'a, u8>; #[derive(Debug, PartialEq)] +/// A scanner over a slice buffer `[T]` pub struct Scanner<'a, T> { d: &'a [T], __cursor: usize, } impl<'a, T> Scanner<'a, T> { + /// Create a new scanner, starting at position 0 pub const fn new(d: &'a [T]) -> Self { - unsafe { Self::new_with_cursor(d, 0) } + unsafe { + // UNSAFE(@ohsayan): starting with 0 is always correct + Self::new_with_cursor(d, 0) + } } + /// Create a new scanner, starting with the given position + /// + /// ## Safety + /// + /// `i` must be a valid index into the given slice pub const unsafe fn new_with_cursor(d: &'a [T], i: usize) -> Self { Self { d, __cursor: i } } +} + +impl<'a, T> Scanner<'a, T> { + pub const fn buffer_len(&self) -> usize { + self.d.len() + } + /// Returns the remaining number of **items** pub const fn remaining(&self) -> usize { - self.d.len() - self.__cursor + self.buffer_len() - self.__cursor } + /// Returns the number of items consumed by the scanner pub const fn consumed(&self) -> usize { self.__cursor } + /// Returns the current cursor position pub const fn cursor(&self) -> usize { self.__cursor } - pub fn current(&self) -> &[T] { + /// Returns the buffer from the current position + pub fn current_buffer(&self) -> &[T] { &self.d[self.__cursor..] } + /// Returns the ptr to the cursor + /// + /// WARNING: The pointer might be invalid! pub const fn cursor_ptr(&self) -> *const T { - unsafe { self.d.as_ptr().add(self.__cursor) } + unsafe { + // UNSAFE(@ohsayan): assuming that the cursor is correctly initialized, this is always fine + self.d.as_ptr().add(self.__cursor) + } } + /// Returns true if the scanner has reached eof pub fn eof(&self) -> bool { self.remaining() == 0 } + /// Returns true if the scanner has atleast `sizeof` bytes remaining pub fn has_left(&self, sizeof: usize) -> bool { self.remaining() >= sizeof } - pub fn matches_cursor_rounded(&self, f: impl Fn(&T) -> bool) -> bool { - f(&self.d[(self.d.len() - 1).min(self.__cursor)]) + /// Returns true if the rounded cursor matches the predicate + pub fn rounded_cursor_matches(&self, f: impl Fn(&T) -> bool) -> bool { + f(&self.d[self.rounded_cursor()]) + } + /// Same as `rounded_cursor_matches`, but with the added guarantee that no rounding was done + pub fn rounded_cursor_not_eof_matches(&self, f: impl Fn(&T) -> bool) -> bool { + self.rounded_cursor_matches(f) & !self.eof() } - pub fn matches_cursor_rounded_and_not_eof(&self, f: impl Fn(&T) -> bool) -> bool { - self.matches_cursor_rounded(f) & !self.eof() + /// A shorthand for equality in `rounded_cursor_not_eof_matches` + pub fn rounded_cursor_not_eof_equals(&self, v_t: T) -> bool + where + T: PartialEq, + { + self.rounded_cursor_matches(|v| v_t.eq(v)) & !self.eof() } } impl<'a, T> Scanner<'a, T> { + /// Manually set the cursor position + /// + /// ## Safety + /// The index must be valid pub unsafe fn set_cursor(&mut self, i: usize) { self.__cursor = i; } - pub unsafe fn move_ahead(&mut self) { - self.move_back_by(1) + /// Increment the cursor + /// + /// ## Safety + /// The buffer must not have reached EOF + pub unsafe fn incr_cursor(&mut self) { + self.incr_cursor_by(1) } - pub unsafe fn move_ahead_by(&mut self, by: usize) { - self._incr(by) + /// Increment the cursor by the given amount + /// + /// ## Safety + /// The buffer must have atleast `by` remaining + pub unsafe fn incr_cursor_by(&mut self, by: usize) { + self.__cursor += by; } - pub unsafe fn move_back(&mut self) { - self.move_back_by(1) + /// Increment the cursor if the given the condition is satisfied + /// + /// ## Safety + /// Custom logic should ensure only legal cursor increments + pub unsafe fn incr_cursor_if(&mut self, iff: bool) { + self.incr_cursor_by(iff as _) } - pub unsafe fn move_back_by(&mut self, by: usize) { + /// Decrement the cursor + /// + /// ## Safety + /// The cursor must **not be at 0** + pub unsafe fn decr_cursor(&mut self) { + self.decr_cursor_by(1) + } + /// Decrement the cursor by the given amount + /// + /// ## Safety + /// Should not overflow (overflow safety is ... nevermind) + pub unsafe fn decr_cursor_by(&mut self, by: usize) { self.__cursor -= by; } - unsafe fn _incr(&mut self, by: usize) { - self.__cursor += by; + /// Returns the current cursor + /// + /// ## Safety + /// Buffer should NOT be at EOF + pub unsafe fn deref_cursor(&self) -> T + where + T: Copy, + { + *self.cursor_ptr() + } + /// Returns the rounded cursor + pub fn rounded_cursor(&self) -> usize { + (self.buffer_len() - 1).min(self.__cursor) } - unsafe fn _cursor(&self) -> *const T { - self.d.as_ptr().add(self.__cursor) + /// Returns the current cursor value with rounding + pub fn rounded_cursor_value(&self) -> T + where + T: Copy, + { + self.d[self.rounded_cursor()] } } impl<'a> Scanner<'a, u8> { + /// Attempt to parse the next byte pub fn try_next_byte(&mut self) -> Option { if self.eof() { None } else { - Some(unsafe { self.next_byte() }) + Some(unsafe { + // UNSAFE(@ohsayan): +remaining check + self.next_byte() + }) } } + /// Attempt to parse the next block pub fn try_next_block(&mut self) -> Option<[u8; N]> { if self.has_left(N) { - Some(unsafe { self.next_chunk() }) + Some(unsafe { + // UNSAFE(@ohsayan): +remaining check + self.next_chunk() + }) } else { None } } - pub fn try_next_variable_block(&'a mut self, len: usize) -> Option<&'a [u8]> { + /// Attempt to parse the next block (variable) + pub fn try_next_variable_block(&mut self, len: usize) -> Option<&'a [u8]> { if self.has_left(len) { - Some(unsafe { self.next_chunk_variable(len) }) + Some(unsafe { + // UNSAFE(@ohsayan): +remaining check + self.next_chunk_variable(len) + }) } else { None } } } -pub enum BufferedReadResult { +/// Incomplete buffered reads +#[derive(Debug, PartialEq)] +pub enum ScannerDecodeResult { + /// The value was decoded Value(T), + /// We need more data to determine if we have the correct value NeedMore, + /// Found an error while decoding a value Error, } impl<'a> Scanner<'a, u8> { + /// Keep moving the cursor ahead while the predicate returns true pub fn trim_ahead(&mut self, f: impl Fn(u8) -> bool) { - while self.matches_cursor_rounded_and_not_eof(|b| f(*b)) { - unsafe { self.move_ahead() } + while self.rounded_cursor_not_eof_matches(|b| f(*b)) { + unsafe { + // UNSAFE(@ohsayan): not eof + self.incr_cursor() + } } } - pub fn move_ahead_if_matches(&mut self, f: impl Fn(u8) -> bool) { - unsafe { self.move_back_by(self.matches_cursor_rounded_and_not_eof(|b| f(*b)) as _) } - } - /// Attempt to parse a `\n` terminated (we move past the LF, so you can't see it) + /// Attempt to parse a `\n` terminated integer (we move past the LF, so you can't see it) /// /// If we were unable to read in the integer, then the cursor will be restored to its starting position // TODO(@ohsayan): optimize - pub fn try_next_ascii_u64_lf_separated_with_result(&mut self) -> BufferedReadResult { + pub fn try_next_ascii_u64_lf_separated_with_result_or_restore_cursor( + &mut self, + ) -> ScannerDecodeResult { + self.try_next_ascii_u64_lf_separated_with_result_or::() + } + pub fn try_next_ascii_u64_lf_separated_with_result(&mut self) -> ScannerDecodeResult { + self.try_next_ascii_u64_lf_separated_with_result_or::() + } + pub fn try_next_ascii_u64_lf_separated_with_result_or( + &mut self, + ) -> ScannerDecodeResult { let mut okay = true; let start = self.cursor(); - let ret = self.extract_integer(&mut okay); + let ret = self.try_next_ascii_u64_stop_at_lf(&mut okay); let payload_ok = okay; - let lf = self.matches_cursor_rounded_and_not_eof(|b| *b == b'\n'); + let lf = self.rounded_cursor_not_eof_matches(|b| *b == b'\n'); okay &= lf; - unsafe { self._incr(okay as _) }; // skip LF + unsafe { + // UNSAFE(@ohsayan): not eof + // skip LF + self.incr_cursor_if(okay) + }; if okay { - BufferedReadResult::Value(ret) + ScannerDecodeResult::Value(ret) } else { - unsafe { self.set_cursor(start) } + if RESTORE_CURSOR { + unsafe { + // UNSAFE(@ohsayan): we correctly restore the cursor + self.set_cursor(start) + } + } if payload_ok { // payload was ok, but we missed a null - BufferedReadResult::NeedMore + ScannerDecodeResult::NeedMore } else { // payload was NOT ok - BufferedReadResult::Error + ScannerDecodeResult::Error } } } + /// Attempt to parse a LF terminated integer (we move past the LF) + /// If we were unable to read in the integer, then the cursor will be restored to its starting position + pub fn try_next_ascii_u64_lf_separated_or_restore_cursor(&mut self) -> Option { + self.try_next_ascii_u64_lf_separated_or::() + } pub fn try_next_ascii_u64_lf_separated(&mut self) -> Option { + self.try_next_ascii_u64_lf_separated_or::() + } + pub fn try_next_ascii_u64_lf_separated_or( + &mut self, + ) -> Option { let start = self.cursor(); let mut okay = true; - let ret = self.extract_integer(&mut okay); - let lf = self.matches_cursor_rounded_and_not_eof(|b| *b == b'\n'); + let ret = self.try_next_ascii_u64_stop_at_lf(&mut okay); + let lf = self.rounded_cursor_not_eof_matches(|b| *b == b'\n'); + unsafe { + // UNSAFE(@ohsayan): not eof + self.incr_cursor_if(lf & okay) + } if okay & lf { Some(ret) } else { - unsafe { self.set_cursor(start) } + if RESTORE_CURSOR { + unsafe { + // UNSAFE(@ohsayan): we correctly restore the cursor + self.set_cursor(start) + } + } None } } - pub fn extract_integer(&mut self, okay: &mut bool) -> u64 { + /// Extracts whatever integer is possible using the current bytestream, stopping at a LF (but **not** skipping it) + pub fn try_next_ascii_u64_stop_at_lf(&mut self, g_okay: &mut bool) -> u64 { + self.try_next_ascii_u64_stop_at::(g_okay, |byte| byte != b'\n') + } + /// Extracts whatever integer is possible using the current bytestream, stopping only when either an overflow occurs or when + /// the closure returns false + pub fn try_next_ascii_u64_stop_at( + &mut self, + g_okay: &mut bool, + keep_going_if: impl Fn(u8) -> bool, + ) -> u64 { let mut ret = 0u64; - while self.matches_cursor_rounded_and_not_eof(|b| *b != b'\n') & *okay { + let mut okay = true; + while self.rounded_cursor_not_eof_matches(|b| keep_going_if(*b)) & okay { let b = self.d[self.cursor()]; - *okay &= b.is_ascii_digit(); + if ASCII_CHECK { + okay &= b.is_ascii_digit(); + } ret = match ret.checked_mul(10) { Some(r) => r, None => { - *okay = false; + okay = false; break; } }; ret = match ret.checked_add((b & 0x0F) as u64) { Some(r) => r, None => { - *okay = false; + okay = false; break; } }; - unsafe { self._incr(1) } + unsafe { + // UNSAFE(@ohsayan): loop invariant + self.incr_cursor_by(1) + } } + *g_okay &= okay; ret } } impl<'a> Scanner<'a, u8> { + /// Attempt to parse the next [`i64`] value, stopping and skipping the STOP_BYTE + /// + /// WARNING: The cursor is NOT reversed + pub fn try_next_ascii_i64_separated_by(&mut self) -> (bool, i64) { + let (okay, int) = self.try_next_ascii_i64_stop_at(|b| b == STOP_BYTE); + let lf = self.rounded_cursor_not_eof_equals(STOP_BYTE); + unsafe { + // UNSAFE(@ohsayan): not eof + self.incr_cursor_if(lf & okay) + } + (lf & okay, int) + } + /// Attempt to parse the next [`i64`] value, stopping at the stop condition or stopping if an error occurred + /// + /// WARNING: It is NOT guaranteed that the stop condition was met + pub fn try_next_ascii_i64_stop_at(&mut self, stop_if: impl Fn(u8) -> bool) -> (bool, i64) { + let mut ret = 0i64; + // check if we have a direction + let current = self.rounded_cursor_value(); + let direction_negative = current == b'-'; + // skip negative + unsafe { + // UNSAFE(@ohsayan): not eof + self.incr_cursor_if(direction_negative) + } + let mut okay = direction_negative | current.is_ascii_digit() & !self.eof(); + while self.rounded_cursor_not_eof_matches(|b| !stop_if(*b)) & okay { + let byte = unsafe { + // UNSAFE(@ohsayan): loop invariant + self.next_byte() + }; + okay &= byte.is_ascii_digit(); + ret = match ret.checked_mul(10) { + Some(r) => r, + None => { + okay = false; + break; + } + }; + if direction_negative { + ret = match ret.checked_sub((byte & 0x0f) as i64) { + Some(r) => r, + None => { + okay = false; + break; + } + }; + } else { + ret = match ret.checked_add((byte & 0x0f) as i64) { + Some(r) => r, + None => { + okay = false; + break; + } + } + } + } + (okay, ret) + } +} + +impl<'a> Scanner<'a, u8> { + /// Load the next [`u64`] LE pub unsafe fn next_u64_le(&mut self) -> u64 { u64::from_le_bytes(self.next_chunk()) } + /// Load the next block pub unsafe fn next_chunk(&mut self) -> [u8; N] { let mut b = [0u8; N]; - ptr::copy_nonoverlapping(self._cursor(), b.as_mut_ptr(), N); - self._incr(N); + ptr::copy_nonoverlapping(self.cursor_ptr(), b.as_mut_ptr(), N); + self.incr_cursor_by(N); b } - pub unsafe fn next_chunk_variable(&mut self, size: usize) -> &[u8] { - let r = slice::from_raw_parts(self._cursor(), size); - self._incr(size); + /// Load the next variable-sized block + pub unsafe fn next_chunk_variable(&mut self, size: usize) -> &'a [u8] { + let r = slice::from_raw_parts(self.cursor_ptr(), size); + self.incr_cursor_by(size); r } + /// Load the next byte pub unsafe fn next_byte(&mut self) -> u8 { - let r = *self._cursor(); - self._incr(1); + let r = *self.cursor_ptr(); + self.incr_cursor_by(1); r } } diff --git a/server/src/engine/mem/tests/mod.rs b/server/src/engine/mem/tests/mod.rs index efe152a8..b689c053 100644 --- a/server/src/engine/mem/tests/mod.rs +++ b/server/src/engine/mem/tests/mod.rs @@ -25,6 +25,7 @@ */ use super::*; +mod scanner; mod word; mod vinline { diff --git a/server/src/engine/mem/tests/scanner.rs b/server/src/engine/mem/tests/scanner.rs new file mode 100644 index 00000000..e1b2ee94 --- /dev/null +++ b/server/src/engine/mem/tests/scanner.rs @@ -0,0 +1,249 @@ +/* + * Created on Wed Sep 20 2023 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2023, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +use crate::engine::mem::scanner::{BufferedScanner, ScannerDecodeResult}; + +fn s(b: &[u8]) -> BufferedScanner { + BufferedScanner::new(b) +} + +/* + lf separated +*/ + +#[test] +fn read_u64_lf_separated() { + let mut s = s(b"18446744073709551615\n"); + assert_eq!( + s.try_next_ascii_u64_lf_separated_or_restore_cursor() + .unwrap(), + u64::MAX + ); + assert_eq!(s.cursor(), s.buffer_len()); +} + +#[test] +fn read_u64_lf_separated_missing() { + let mut s = s(b"18446744073709551615"); + assert!(s + .try_next_ascii_u64_lf_separated_or_restore_cursor() + .is_none()); + assert_eq!(s.cursor(), 0); +} + +#[test] +fn read_u64_lf_separated_invalid() { + let mut scn = s(b"1844674407370955161A\n"); + assert!(scn + .try_next_ascii_u64_lf_separated_or_restore_cursor() + .is_none()); + assert_eq!(scn.cursor(), 0); + let mut scn = s(b"?1844674407370955161A\n"); + assert!(scn + .try_next_ascii_u64_lf_separated_or_restore_cursor() + .is_none()); + assert_eq!(scn.cursor(), 0); +} + +#[test] +fn read_u64_lf_separated_zero() { + let mut s = s(b"0\n"); + assert_eq!( + s.try_next_ascii_u64_lf_separated_or_restore_cursor() + .unwrap(), + 0 + ); + assert_eq!(s.cursor(), s.buffer_len()); +} + +#[test] +fn read_u64_lf_overflow() { + let mut s = s(b"184467440737095516155\n"); + assert!(s + .try_next_ascii_u64_lf_separated_or_restore_cursor() + .is_none()); + assert_eq!(s.cursor(), 0); +} + +/* + lf separated allow unbuffered +*/ + +#[test] +fn incomplete_read_u64_okay() { + let mut scn = s(b"18446744073709551615\n"); + assert_eq!( + scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(), + ScannerDecodeResult::Value(u64::MAX) + ); + assert_eq!(scn.cursor(), scn.buffer_len()); +} + +#[test] +fn incomplete_read_u64_missing_lf() { + let mut scn = s(b"18446744073709551615"); + assert_eq!( + scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(), + ScannerDecodeResult::NeedMore + ); + assert_eq!(scn.cursor(), 0); +} + +#[test] +fn incomplete_read_u64_lf_error() { + let mut scn = s(b"1844674407370955161A\n"); + assert_eq!( + scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(), + ScannerDecodeResult::Error + ); + assert_eq!(scn.cursor(), 0); + let mut scn = s(b"?1844674407370955161A\n"); + assert_eq!( + scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(), + ScannerDecodeResult::Error + ); + assert_eq!(scn.cursor(), 0); +} + +#[test] +fn incomplete_read_u64_lf_zero() { + let mut scn = s(b"0\n"); + assert_eq!( + scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(), + ScannerDecodeResult::Value(0) + ) +} + +#[test] +fn incomplete_read_u64_lf_overflow() { + let mut s = s(b"184467440737095516155\n"); + assert_eq!( + s.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(), + ScannerDecodeResult::Error + ); + assert_eq!(s.cursor(), 0); +} + +/* + lf separated i64 +*/ + +fn concat(a: impl ToString, b: impl ToString) -> Vec { + let (a, b) = (a.to_string(), b.to_string()); + let mut s = String::with_capacity(a.len() + b.len()); + s.push_str(a.as_str()); + s.push_str(b.as_str()); + s.into_bytes() +} + +#[test] +fn read_i64_lf_separated_okay() { + let buf = concat(i64::MAX, "\n"); + let mut scn = s(&buf); + assert_eq!( + scn.try_next_ascii_i64_separated_by::(), + (true, i64::MAX) + ); + assert_eq!(scn.cursor(), scn.buffer_len()); + let buf = concat(i64::MIN, "\n"); + let mut scn = s(&buf); + assert_eq!( + scn.try_next_ascii_i64_separated_by::(), + (true, i64::MIN) + ); + assert_eq!(scn.cursor(), scn.buffer_len()); +} + +#[test] +fn read_i64_lf_separated_missing() { + let buf = concat(i64::MAX, ""); + let mut scn = s(&buf); + assert_eq!( + scn.try_next_ascii_i64_separated_by::(), + (false, i64::MAX) + ); + assert_eq!(scn.cursor(), scn.buffer_len()); + let buf = concat(i64::MIN, ""); + let mut scn = s(&buf); + assert_eq!( + scn.try_next_ascii_i64_separated_by::(), + (false, i64::MIN) + ); + assert_eq!(scn.cursor(), scn.buffer_len()); +} + +#[test] +fn read_i64_lf_separated_invalid() { + let buf = concat(i64::MAX, "A\n"); + let mut scn = s(&buf); + assert_eq!( + scn.try_next_ascii_i64_separated_by::(), + (false, i64::MAX) + ); + assert_eq!(scn.cursor(), scn.buffer_len() - 1); + let buf = concat("A", format!("{}\n", i64::MIN)); + let mut scn = s(&buf); + assert_eq!(scn.try_next_ascii_i64_separated_by::(), (false, 0)); + assert_eq!(scn.cursor(), 0); +} + +#[test] +fn read_i64_lf_overflow() { + let buf = concat(u64::MAX, "\n"); + let mut scn = s(&buf); + assert_eq!( + scn.try_next_ascii_i64_separated_by::(), + (false, 1844674407370955161) + ); + assert_eq!(scn.cursor(), scn.buffer_len() - 1); +} + +#[test] +fn read_i64_lf_underflow() { + let buf = concat(i64::MIN, "1\n"); + let mut scn = s(&buf); + assert_eq!( + scn.try_next_ascii_i64_separated_by::(), + (false, -9223372036854775808) + ); + assert_eq!(scn.cursor(), scn.buffer_len() - 1); +} + +#[test] +fn rounding() { + let mut scanner = s(b"123"); + for i in 1..=u8::MAX { + match i { + 1..=3 => { + assert_eq!(scanner.try_next_byte().unwrap(), (i + b'0')); + } + _ => { + assert_eq!(scanner.rounded_cursor_value(), b'3'); + } + } + } + assert_eq!(scanner.cursor(), scanner.buffer_len()); +} diff --git a/server/src/engine/net/mod.rs b/server/src/engine/net/mod.rs index 9975ae49..213135b9 100644 --- a/server/src/engine/net/mod.rs +++ b/server/src/engine/net/mod.rs @@ -30,7 +30,7 @@ mod protocol; pub trait Socket: AsyncWrite + AsyncRead + Unpin {} pub type IoResult = Result; -enum QLoopReturn { +pub enum QLoopReturn { Fin, ConnectionRst, } diff --git a/server/src/engine/net/protocol/data_exchange.rs b/server/src/engine/net/protocol/data_exchange.rs index 8b024e04..b964ca9c 100644 --- a/server/src/engine/net/protocol/data_exchange.rs +++ b/server/src/engine/net/protocol/data_exchange.rs @@ -98,7 +98,7 @@ fn parse_lf_separated( ) -> LFTIntParseResult { let mut ret = previously_buffered; let mut okay = true; - while scanner.matches_cursor_rounded_and_not_eof(|b| *b != b'\n') & okay { + while scanner.rounded_cursor_not_eof_matches(|b| *b != b'\n') & okay { let b = unsafe { scanner.next_byte() }; okay &= b.is_ascii_digit(); ret = match ret.checked_mul(10) { @@ -111,8 +111,8 @@ fn parse_lf_separated( }; } let payload_ok = okay; - let lf_ok = scanner.matches_cursor_rounded_and_not_eof(|b| *b == b'\n'); - unsafe { scanner.move_ahead_by(lf_ok as usize) } + let lf_ok = scanner.rounded_cursor_not_eof_matches(|b| *b == b'\n'); + unsafe { scanner.incr_cursor_by(lf_ok as usize) } if payload_ok & lf_ok { LFTIntParseResult::Value(ret) } else { @@ -181,8 +181,8 @@ impl<'a> CSQuery<'a> { let slice; unsafe { // UNSAFE(@ohsayan): checked len at branch - slice = slice::from_raw_parts(scanner.current().as_ptr(), size); - scanner.move_ahead_by(size); + slice = slice::from_raw_parts(scanner.current_buffer().as_ptr(), size); + scanner.incr_cursor_by(size); } CSQueryExchangeResult::Completed(CSQuery::new(slice)) } else { diff --git a/server/src/engine/net/protocol/handshake.rs b/server/src/engine/net/protocol/handshake.rs index a5d9616d..c1d0545b 100644 --- a/server/src/engine/net/protocol/handshake.rs +++ b/server/src/engine/net/protocol/handshake.rs @@ -26,7 +26,7 @@ use { crate::{ - engine::mem::scanner::{BufferedReadResult, BufferedScanner}, + engine::mem::scanner::{BufferedScanner, ScannerDecodeResult}, util::compiler, }, std::slice, @@ -320,9 +320,10 @@ impl<'a> CHandshake<'a> { // we're done here return unsafe { // UNSAFE(@ohsayan): we just checked buffered size - let uname = slice::from_raw_parts(scanner.current().as_ptr(), uname_l); - let pwd = slice::from_raw_parts(scanner.current().as_ptr().add(uname_l), pwd_l); - scanner.move_ahead_by(uname_l + pwd_l); + let uname = slice::from_raw_parts(scanner.current_buffer().as_ptr(), uname_l); + let pwd = + slice::from_raw_parts(scanner.current_buffer().as_ptr().add(uname_l), pwd_l); + scanner.incr_cursor_by(uname_l + pwd_l); HandshakeResult::Completed(Self::new( static_hs, Some(CHandshakeAuth::new(uname, pwd)), @@ -367,15 +368,16 @@ impl<'a> CHandshake<'a> { AuthMode::Password => {} } // let us see if we can parse the username length - let uname_l = match scanner.try_next_ascii_u64_lf_separated_with_result() { - BufferedReadResult::NeedMore => { + let uname_l = match scanner.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor() + { + ScannerDecodeResult::NeedMore => { return HandshakeResult::ChangeState { new_state: HandshakeState::StaticBlock(static_header), expect: AuthMode::Password.min_payload_bytes(), // 2 for uname_l and 2 for pwd_l }; } - BufferedReadResult::Value(v) => v as usize, - BufferedReadResult::Error => { + ScannerDecodeResult::Value(v) => v as usize, + ScannerDecodeResult::Error => { return HandshakeResult::Error(ProtocolError::CorruptedHSPacket) } }; @@ -388,16 +390,16 @@ impl<'a> CHandshake<'a> { uname_l: usize, ) -> HandshakeResult<'a> { // we just have to get the password len - let pwd_l = match scanner.try_next_ascii_u64_lf_separated_with_result() { - BufferedReadResult::Value(v) => v as usize, - BufferedReadResult::NeedMore => { + let pwd_l = match scanner.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor() { + ScannerDecodeResult::Value(v) => v as usize, + ScannerDecodeResult::NeedMore => { // newline missing (or maybe there's more?) return HandshakeResult::ChangeState { new_state: HandshakeState::ExpectingMetaForVariableBlock { static_hs, uname_l }, expect: uname_l + 2, // space for username + password len }; } - BufferedReadResult::Error => { + ScannerDecodeResult::Error => { return HandshakeResult::Error(ProtocolError::CorruptedHSPacket) } }; diff --git a/server/src/engine/ql/ast/mod.rs b/server/src/engine/ql/ast/mod.rs index 1ec298db..4fb2ebd6 100644 --- a/server/src/engine/ql/ast/mod.rs +++ b/server/src/engine/ql/ast/mod.rs @@ -36,7 +36,7 @@ use { }, crate::{ engine::{ - data::{cell::Datacell, lit::LitIR}, + data::{cell::Datacell, lit::Lit}, error::{Error, QueryResult}, }, util::{compiler, MaybeInit}, @@ -162,7 +162,7 @@ impl<'a, Qd: QueryData<'a>> State<'a, Qd> { /// /// ## Safety /// - Must ensure that `Self::can_read_lit_rounded` is true - pub unsafe fn read_cursor_lit_unchecked(&mut self) -> LitIR<'a> { + pub unsafe fn read_cursor_lit_unchecked(&mut self) -> Lit<'a> { let tok = self.read(); Qd::read_lit(&mut self.d, tok) } @@ -171,7 +171,7 @@ impl<'a, Qd: QueryData<'a>> State<'a, Qd> { /// /// ## Safety /// - Must ensure that `Self::can_read_lit_from` is true for the token - pub unsafe fn read_lit_unchecked_from(&mut self, tok: &'a Token<'a>) -> LitIR<'a> { + pub unsafe fn read_lit_unchecked_from(&mut self, tok: &'a Token<'a>) -> Lit<'a> { Qd::read_lit(&mut self.d, tok) } #[inline(always)] @@ -274,7 +274,7 @@ pub trait QueryData<'a> { /// /// ## Safety /// The current token **must match** the signature of a lit - unsafe fn read_lit(&mut self, tok: &'a Token) -> LitIR<'a>; + unsafe fn read_lit(&mut self, tok: &'a Token) -> Lit<'a>; /// Read a lit using the given token and then copy it into a [`DataType`] /// /// ## Safety @@ -299,7 +299,7 @@ impl<'a> QueryData<'a> for InplaceData { tok.is_lit() } #[inline(always)] - unsafe fn read_lit(&mut self, tok: &'a Token) -> LitIR<'a> { + unsafe fn read_lit(&mut self, tok: &'a Token) -> Lit<'a> { tok.uck_read_lit().as_ir() } #[inline(always)] @@ -312,42 +312,6 @@ impl<'a> QueryData<'a> for InplaceData { } } -#[derive(Debug)] -pub struct SubstitutedData<'a> { - data: &'a [LitIR<'a>], -} -impl<'a> SubstitutedData<'a> { - #[inline(always)] - pub const fn new(src: &'a [LitIR<'a>]) -> Self { - Self { data: src } - } -} - -impl<'a> QueryData<'a> for SubstitutedData<'a> { - #[inline(always)] - fn can_read_lit_from(&self, tok: &Token) -> bool { - Token![?].eq(tok) && self.nonzero() - } - #[inline(always)] - unsafe fn read_lit(&mut self, tok: &'a Token) -> LitIR<'a> { - debug_assert!(Token![?].eq(tok)); - let ret = self.data[0].clone(); - self.data = &self.data[1..]; - ret - } - #[inline(always)] - unsafe fn read_data_type(&mut self, tok: &'a Token) -> Datacell { - debug_assert!(Token![?].eq(tok)); - let ret = self.data[0].clone(); - self.data = &self.data[1..]; - Datacell::from(ret) - } - #[inline(always)] - fn nonzero(&self) -> bool { - !self.data.is_empty() - } -} - /* AST */ diff --git a/server/src/engine/ql/benches.rs b/server/src/engine/ql/benches.rs index 848c4d7d..53fd6613 100644 --- a/server/src/engine/ql/benches.rs +++ b/server/src/engine/ql/benches.rs @@ -77,7 +77,7 @@ mod lexer { #[bench] fn lex_raw_literal(b: &mut Bencher) { let src = b"\r44\ne69b10ffcc250ae5091dec6f299072e23b0b41d6a739"; - let expected = vec![Token::Lit(Lit::Bin( + let expected = vec![Token::Lit(Lit::new_bin( b"e69b10ffcc250ae5091dec6f299072e23b0b41d6a739", ))]; b.iter(|| assert_eq!(lex_insecure(src).unwrap(), expected)); diff --git a/server/src/engine/ql/dml/mod.rs b/server/src/engine/ql/dml/mod.rs index 11645848..76c9087c 100644 --- a/server/src/engine/ql/dml/mod.rs +++ b/server/src/engine/ql/dml/mod.rs @@ -39,7 +39,7 @@ use { ast::{QueryData, State}, lex::Ident, }, - crate::{engine::data::lit::LitIR, util::compiler}, + crate::{engine::data::lit::Lit, util::compiler}, std::collections::HashMap, }; @@ -59,13 +59,13 @@ fn u(b: bool) -> u8 { #[derive(Debug, PartialEq)] pub struct RelationalExpr<'a> { pub(super) lhs: Ident<'a>, - pub(super) rhs: LitIR<'a>, + pub(super) rhs: Lit<'a>, pub(super) opc: u8, } impl<'a> RelationalExpr<'a> { #[inline(always)] - pub(super) fn new(lhs: Ident<'a>, rhs: LitIR<'a>, opc: u8) -> RelationalExpr<'a> { + pub(super) fn new(lhs: Ident<'a>, rhs: Lit<'a>, opc: u8) -> RelationalExpr<'a> { Self { lhs, rhs, opc } } pub(super) const OP_EQ: u8 = 1; @@ -77,7 +77,7 @@ impl<'a> RelationalExpr<'a> { pub fn filter_hint_none(&self) -> bool { self.opc == Self::OP_EQ } - pub fn rhs(&self) -> LitIR<'a> { + pub fn rhs(&self) -> Lit<'a> { self.rhs.clone() } #[inline(always)] diff --git a/server/src/engine/ql/dml/upd.rs b/server/src/engine/ql/dml/upd.rs index 8e159a14..f6a59d4a 100644 --- a/server/src/engine/ql/dml/upd.rs +++ b/server/src/engine/ql/dml/upd.rs @@ -31,7 +31,7 @@ use { crate::{ engine::{ core::query_meta::AssignmentOperator, - data::lit::LitIR, + data::lit::Lit, error::{Error, QueryResult}, ql::{ ast::{Entity, QueryData, State}, @@ -60,13 +60,13 @@ pub struct AssignmentExpression<'a> { /// the LHS ident pub lhs: Ident<'a>, /// the RHS lit - pub rhs: LitIR<'a>, + pub rhs: Lit<'a>, /// operator pub operator_fn: AssignmentOperator, } impl<'a> AssignmentExpression<'a> { - pub fn new(lhs: Ident<'a>, rhs: LitIR<'a>, operator_fn: AssignmentOperator) -> Self { + pub fn new(lhs: Ident<'a>, rhs: Lit<'a>, operator_fn: AssignmentOperator) -> Self { Self { lhs, rhs, diff --git a/server/src/engine/ql/lex/mod.rs b/server/src/engine/ql/lex/mod.rs index 6c5ca70c..3d4f9363 100644 --- a/server/src/engine/ql/lex/mod.rs +++ b/server/src/engine/ql/lex/mod.rs @@ -25,553 +25,471 @@ */ mod raw; +pub use raw::{Ident, Keyword, Symbol, Token}; use { - self::raw::RawLexer, crate::{ engine::{ - data::{ - lit::{Lit, LitIR}, - spec::Dataspec1D, - }, + data::lit::Lit, error::{Error, QueryResult}, + mem::BufferedScanner, }, util::compiler, }, - core::{fmt, ops::BitOr, slice, str}, + core::slice, + raw::{kwof, symof}, }; -pub use self::raw::{Ident, Keyword, Symbol, Token}; -pub type Slice<'a> = &'a [u8]; - /* - Lexer impls + basic lexer definition */ -#[derive(Debug)] -/// This implements the `opmode-dev` for BlueQL -pub struct InsecureLexer<'a> { - base: RawLexer<'a>, +type Slice<'a> = &'a [u8]; + +#[derive(Debug, PartialEq)] +/// The internal lexer impl +pub struct Lexer<'a> { + token_buffer: BufferedScanner<'a>, + tokens: Vec>, + last_error: Option, } -impl<'a> InsecureLexer<'a> { - #[inline(always)] - pub const fn new(src: Slice<'a>) -> Self { +impl<'a> Lexer<'a> { + /// Initialize a new lexer + fn new(src: &'a [u8]) -> Self { Self { - base: RawLexer::new(src), + token_buffer: BufferedScanner::new(src), + tokens: Vec::new(), + last_error: None, } } - #[inline(always)] - pub fn lex(src: Slice<'a>) -> QueryResult>> { - let mut slf = Self::new(src); - slf._lex(); - let RawLexer { - tokens, last_error, .. - } = slf.base; - match last_error { - None => Ok(tokens), - Some(e) => Err(e), - } + /// set an error + #[inline(never)] + #[cold] + fn set_error(&mut self, e: Error) { + self.last_error = Some(e); } - #[inline(always)] - fn _lex(&mut self) { - let slf = &mut self.base; - while slf.not_exhausted() && slf.no_error() { - match unsafe { - // UNSAFE(@ohsayan): Verified non-null from pre - slf.deref_cursor() - } { - byte if byte.is_ascii_alphabetic() => slf.scan_ident_or_keyword(), - #[cfg(test)] - byte if byte == b'\x01' => { - slf.push_token(Token::IgnorableComma); - unsafe { - // UNSAFE(@ohsayan): All good here. Already read the token - slf.incr_cursor(); - } - } - byte if byte.is_ascii_digit() => Self::scan_unsigned_integer(slf), - b'\r' => Self::scan_binary_literal(slf), - b'-' => Self::scan_signed_integer(slf), - qs @ (b'\'' | b'"') => Self::scan_quoted_string(slf, qs), - // blank space or an arbitrary byte - b' ' | b'\n' | b'\t' => slf.trim_ahead(), - b => slf.scan_byte(b), - } - } + /// push in a new token + fn push_token(&mut self, t: impl Into>) { + self.tokens.push(t.into()) + } + fn no_error(&self) -> bool { + self.last_error.is_none() } } -// high-level methods -impl<'a> InsecureLexer<'a> { - #[inline(always)] - fn scan_signed_integer(slf: &mut RawLexer<'a>) { +impl<'a> Lexer<'a> { + /// Scan an identifier + fn scan_ident(&mut self) -> Slice<'a> { + let s = self.token_buffer.cursor_ptr(); unsafe { - // UNSAFE(@ohsayan): We hit an integer hence this was called - slf.incr_cursor(); - } - if slf.peek_is(|b| b.is_ascii_digit()) { - // we have some digits - let start = unsafe { - // UNSAFE(@ohsayan): Take the (-) into the parse - // TODO(@ohsayan): we can maybe look at a more efficient way later - slf.cursor().sub(1) - }; - while slf.peek_is_and_forward(|b| b.is_ascii_digit()) {} - let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted(); - match unsafe { - // UNSAFE(@ohsayan): a sequence of ASCII bytes in the integer range will always be correct unicode - str::from_utf8_unchecked(slice::from_raw_parts( - start, - // UNSAFE(@ohsayan): valid cursor and start pointers - slf.cursor().offset_from(start) as usize, - )) - } - .parse::() + while self + .token_buffer + .rounded_cursor_not_eof_matches(|b| b.is_ascii_alphanumeric() || *b == b'_') { - Ok(num) if compiler::likely(wseof) => { - slf.push_token(Lit::SignedInt(num)); - } - _ => { - compiler::cold_call(|| slf.set_error(Error::LexInvalidLiteral)); - } + // UNSAFE(@ohsayan): increment cursor, this is valid + self.token_buffer.incr_cursor(); } - } else { - slf.push_token(Token![-]); + // UNSAFE(@ohsayan): valid slice and ptrs + slice::from_raw_parts( + s, + self.token_buffer.current_buffer().as_ptr().offset_from(s) as usize, + ) } } - #[inline(always)] - fn scan_unsigned_integer(slf: &mut RawLexer<'a>) { - let s = slf.cursor(); - - while slf.peek_is(|b| b.is_ascii_digit()) { - unsafe { - // UNSAFE(@ohsayan): since we're going ahead, this is correct (until EOA) - slf.incr_cursor(); + /// Scan an identifier or keyword + fn scan_ident_or_keyword(&mut self) { + let s = self.scan_ident(); + let st = s.to_ascii_lowercase(); + match kwof(&st) { + Some(kw) => self.tokens.push(kw.into()), + // FIXME(@ohsayan): Uh, mind fixing this? The only advantage is that I can keep the graph *memory* footprint small + None if st == b"true" || st == b"false" => { + self.push_token(Lit::new_bool(st == b"true")) } + None => self.tokens.push(unsafe { + // UNSAFE(@ohsayan): scan_ident only returns a valid ident which is always a string + Token::Ident(Ident::new(s)) + }), } - /* - 1234; // valid - 1234} // valid - 1234{ // invalid - 1234, // valid - 1234a // invalid - */ - let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted(); - match unsafe { - /* - UNSAFE(@ohsayan): - (1) Valid cursor and start pointer (since we copy it from the cursor which is correct) - (2) All ASCII alphabetic bytes are captured, hence this will always be a correct unicode string - */ - str::from_utf8_unchecked(slice::from_raw_parts( - s, - slf.cursor().offset_from(s) as usize, - )) + } + fn scan_byte(&mut self, byte: u8) { + match symof(byte) { + Some(tok) => self.push_token(tok), + None => return self.set_error(Error::LexUnexpectedByte), } - .parse() - { - Ok(num) if compiler::likely(wseof) => { - slf.tokens.push(Token::Lit(Lit::UnsignedInt(num))) - } - _ => slf.set_error(Error::LexInvalidLiteral), + unsafe { + // UNSAFE(@ohsayan): we are sent a byte, so fw cursor + self.token_buffer.incr_cursor(); } } +} - #[inline(always)] - fn scan_binary_literal(slf: &mut RawLexer<'a>) { - unsafe { - // UNSAFE(@ohsayan): cursor increment since we hit the marker byte (CR) - slf.incr_cursor(); - } - let mut size = 0usize; - let mut okay = true; - while slf.not_exhausted() - && unsafe { - // UNSAFE(@ohsayan): verified non-exhaustion - slf.deref_cursor() != b'\n' - } - && okay - { - /* - Don't ask me how stupid this is. Like, I was probably in some "mood" when I wrote this - and it works duh, but isn't the most elegant of things (could I have just used a parse? - nah, I'm just a hardcore numeric normie) - -- Sayan - */ +impl<'a> Lexer<'a> { + fn trim_ahead(&mut self) { + self.token_buffer + .trim_ahead(|b| (b == b' ') | (b == b'\n') | (b == b'\t')) + } +} + +/* + Insecure lexer +*/ + +pub struct InsecureLexer<'a> { + l: Lexer<'a>, +} + +impl<'a> InsecureLexer<'a> { + pub fn lex(src: &'a [u8]) -> QueryResult>> { + let slf = Self { l: Lexer::new(src) }; + slf._lex() + } + fn _lex(mut self) -> QueryResult>> { + while !self.l.token_buffer.eof() & self.l.no_error() { let byte = unsafe { - // UNSAFE(@ohsayan): The pre invariant guarantees that this is correct - slf.deref_cursor() + // UNSAFE(@ohsayan): loop invariant + self.l.token_buffer.deref_cursor() }; - okay &= byte.is_ascii_digit(); - let (prod, of_flag) = size.overflowing_mul(10); - okay &= !of_flag; - let (sum, of_flag) = prod.overflowing_add((byte & 0x0F) as _); - size = sum; - okay &= !of_flag; - unsafe { - // UNSAFE(@ohsayan): We just read something, so this is fine (until EOA) - slf.incr_cursor(); + match byte { + #[cfg(test)] + byte if byte == b'\x01' => { + self.l.push_token(Token::IgnorableComma); + unsafe { + // UNSAFE(@ohsayan): All good here. Already read the token + self.l.token_buffer.incr_cursor(); + } + } + // ident + byte if byte.is_ascii_alphabetic() | (byte == b'_') => { + self.l.scan_ident_or_keyword() + } + // uint + byte if byte.is_ascii_digit() => self.scan_unsigned_integer(), + // sint + b'-' => { + unsafe { + // UNSAFE(@ohsayan): loop invariant + self.l.token_buffer.incr_cursor() + }; + self.scan_signed_integer(); + } + // binary + b'\r' => { + unsafe { + // UNSAFE(@ohsayan): loop invariant + self.l.token_buffer.incr_cursor() + } + self.scan_binary() + } + // string + quote_style @ (b'"' | b'\'') => { + unsafe { + // UNSAFE(@ohsayan): loop invariant + self.l.token_buffer.incr_cursor() + } + self.scan_quoted_string(quote_style) + } + // whitespace + b' ' | b'\n' | b'\t' => self.l.trim_ahead(), + // some random byte + byte => self.l.scan_byte(byte), } } - okay &= slf.peek_eq_and_forward(b'\n'); - okay &= slf.remaining() >= size; - if compiler::likely(okay) { - unsafe { - // UNSAFE(@ohsayan): Correct cursor and length (from above we know that we have enough bytes) - slf.push_token(Lit::Bin(slice::from_raw_parts(slf.cursor(), size))); - // UNSAFE(@ohsayan): Correct length increment - slf.incr_cursor_by(size); - } - } else { - slf.set_error(Error::LexInvalidLiteral); + match self.l.last_error { + None => Ok(self.l.tokens), + Some(e) => Err(e), } } - #[inline(always)] - fn scan_quoted_string(slf: &mut RawLexer<'a>, quote_style: u8) { - debug_assert!( - unsafe { - // UNSAFE(@ohsayan): yessir, we just hit this byte. if called elsewhere, this function will crash and burn (or simply, segfault) - slf.deref_cursor() - } == quote_style, - "illegal call to scan_quoted_string" - ); - unsafe { - // UNSAFE(@ohsayan): Increment this cursor (this is correct since we just hit the quote) - slf.incr_cursor() +} + +impl<'a> InsecureLexer<'a> { + fn scan_binary(&mut self) { + let Some(len) = self + .l + .token_buffer + .try_next_ascii_u64_lf_separated_or_restore_cursor() + else { + self.l.set_error(Error::LexInvalidLiteral); + return; + }; + let len = len as usize; + match self.l.token_buffer.try_next_variable_block(len) { + Some(block) => self.l.push_token(Lit::new_bin(block)), + None => self.l.set_error(Error::LexInvalidLiteral), } + } + fn scan_quoted_string(&mut self, quote_style: u8) { + // cursor is at beginning of `"`; we need to scan until the end of quote or an escape let mut buf = Vec::new(); - unsafe { - while slf.peek_neq(quote_style) { - // UNSAFE(@ohsayan): deref is good since peek passed - match slf.deref_cursor() { - b if b != b'\\' => { - buf.push(b); - } - _ => { - // UNSAFE(@ohsayan): we read one byte, so this should work - slf.incr_cursor(); - if slf.exhausted() { - break; + while self + .l + .token_buffer + .rounded_cursor_not_eof_matches(|b| *b != quote_style) + { + let byte = unsafe { + // UNSAFE(@ohsayan): loop invariant + self.l.token_buffer.next_byte() + }; + match byte { + b'\\' => { + // hmm, this might be an escape (either `\\` or `\"`) + if self + .l + .token_buffer + .rounded_cursor_not_eof_matches(|b| *b == quote_style || *b == b'\\') + { + // ignore escaped byte + unsafe { + buf.push(self.l.token_buffer.next_byte()); } - // UNSAFE(@ohsayan): correct because of the above branch - let b = slf.deref_cursor(); - let quote = b == quote_style; - let bs = b == b'\\'; - if quote | bs { - buf.push(b); - } else { - break; // what on good earth is that escape? + } else { + // this is not allowed + unsafe { + // UNSAFE(@ohsayan): we move the cursor ahead, now we're moving it back + self.l.token_buffer.decr_cursor() } + self.l.set_error(Error::LexInvalidLiteral); + return; } } - /* - UNSAFE(@ohsayan): This is correct because: - (a) If we are in arm 1: we move the cursor ahead from the `\` byte (the branch doesn't do it) - (b) If we are in arm 2: we don't skip the second quote byte in the branch, hence this is correct - */ - slf.incr_cursor(); + _ => buf.push(byte), } - let terminated = slf.peek_eq_and_forward(quote_style); - match String::from_utf8(buf) { - Ok(st) if terminated => slf.tokens.push(Token::Lit(st.into_boxed_str().into())), - _ => slf.set_error(Error::LexInvalidLiteral), + } + let ended_with_quote = self + .l + .token_buffer + .rounded_cursor_not_eof_equals(quote_style); + // skip quote + unsafe { + // UNSAFE(@ohsayan): not eof + self.l.token_buffer.incr_cursor_if(ended_with_quote) + } + match String::from_utf8(buf) { + Ok(s) if ended_with_quote => self.l.push_token(Lit::new_string(s)), + Err(_) | Ok(_) => self.l.set_error(Error::LexInvalidLiteral), + } + } + fn scan_unsigned_integer(&mut self) { + let mut okay = true; + // extract integer + let int = self + .l + .token_buffer + .try_next_ascii_u64_stop_at::(&mut okay, |b| b.is_ascii_digit()); + /* + see if we ended at a correct byte: + iff the integer has an alphanumeric byte at the end is the integer invalid + */ + if compiler::unlikely( + !okay + | self + .l + .token_buffer + .rounded_cursor_not_eof_matches(u8::is_ascii_alphanumeric), + ) { + self.l.set_error(Error::LexInvalidLiteral); + } else { + self.l.push_token(Lit::new_uint(int)) + } + } + fn scan_signed_integer(&mut self) { + if self.l.token_buffer.rounded_cursor_value().is_ascii_digit() { + unsafe { + // UNSAFE(@ohsayan): the cursor was moved ahead, now we're moving it back + self.l.token_buffer.decr_cursor() } + let (okay, int) = self + .l + .token_buffer + .try_next_ascii_i64_stop_at(|b| !b.is_ascii_digit()); + if okay + & !self + .l + .token_buffer + .rounded_cursor_value() + .is_ascii_alphabetic() + { + self.l.push_token(Lit::new_sint(int)) + } else { + self.l.set_error(Error::LexInvalidLiteral) + } + } else { + self.l.push_token(Token![-]); } } } +/* + secure +*/ + #[derive(Debug)] -/// This lexer implements the `opmod-safe` for BlueQL -pub struct SafeLexer<'a> { - base: RawLexer<'a>, +pub struct SecureLexer<'a> { + l: Lexer<'a>, + param_buffer: BufferedScanner<'a>, } -impl<'a> SafeLexer<'a> { - #[inline(always)] - pub const fn new(src: Slice<'a>) -> Self { +impl<'a> SecureLexer<'a> { + pub fn new(src: &'a [u8], query_window: usize) -> Self { Self { - base: RawLexer::new(src), + l: Lexer::new(&src[..query_window]), + param_buffer: BufferedScanner::new(&src[query_window..]), } } - #[inline(always)] - pub fn lex(src: Slice<'a>) -> QueryResult> { - Self::new(src)._lex() + pub fn lex(src: &'a [u8], query_window: usize) -> QueryResult>> { + Self::new(src, query_window)._lex() } - #[inline(always)] - fn _lex(self) -> QueryResult>> { - let Self { base: mut l } = self; - while l.not_exhausted() && l.no_error() { +} + +impl<'a> SecureLexer<'a> { + fn _lex(mut self) -> QueryResult>> { + while self.l.no_error() & !self.l.token_buffer.eof() { let b = unsafe { - // UNSAFE(@ohsayan): This is correct because of the pre invariant - l.deref_cursor() + // UNSAFE(@ohsayan): loop invariant + self.l.token_buffer.deref_cursor() }; match b { - // ident or kw - b if b.is_ascii_alphabetic() => l.scan_ident_or_keyword(), - // extra terminal chars - b'\n' | b'\t' | b' ' => l.trim_ahead(), - // arbitrary byte - b => l.scan_byte(b), + b if b.is_ascii_alphabetic() | (b == b'_') => self.l.scan_ident_or_keyword(), + b'?' => { + // a parameter: null, bool, sint, uint, float, binary, string + const TYPE: [&str; 8] = [ + "null", "bool", "uint", "sint", "float", "binary", "string", "ERROR", + ]; + // skip the param byte + unsafe { + // UNSAFE(@ohsayan): loop invariant + self.l.token_buffer.incr_cursor() + } + // find target + let ecc_code = SCAN_PARAM.len() - 1; + let target_code = self.param_buffer.rounded_cursor_value(); + let target_fn = target_code.min(ecc_code as u8); + // forward if we have target + unsafe { + self.param_buffer + .incr_cursor_by((target_code == target_fn) as _) + } + // check requirements + let has_enough = self + .param_buffer + .has_left(SCAN_PARAM_EXPECT[target_fn as usize] as _); + let final_target = + (has_enough as u8 * target_fn) | (!has_enough as u8 * ecc_code as u8); + // exec + let final_target = final_target as usize; + unsafe { + if final_target >= SCAN_PARAM.len() { + impossible!() + } + } + unsafe { + // UNSAFE(@ohsayan): our computation above ensures that we're meeting the expected target + SCAN_PARAM[final_target](&mut self) + } + } + b' ' | b'\t' | b'\n' => self.l.trim_ahead(), + sym => self.l.scan_byte(sym), } } - let RawLexer { - last_error, tokens, .. - } = l; - match last_error { - None => Ok(tokens), + match self.l.last_error { + None => Ok(self.l.tokens), Some(e) => Err(e), } } } -const ALLOW_UNSIGNED: bool = false; -const ALLOW_SIGNED: bool = true; - -pub trait NumberDefinition: Sized + fmt::Debug + Copy + Clone + BitOr { - const ALLOW_SIGNED: bool; - fn mul_of(&self, v: u8) -> (Self, bool); - fn add_of(&self, v: u8) -> (Self, bool); - fn sub_of(&self, v: u8) -> (Self, bool); - fn qualified_max_length() -> usize; - fn zero() -> Self; - fn b(self, b: bool) -> Self; -} - -macro_rules! impl_number_def { - ($( - $ty:ty {$supports_signed:ident, $qualified_max_length:expr}),* $(,)? - ) => { - $(impl NumberDefinition for $ty { - const ALLOW_SIGNED: bool = $supports_signed; - #[inline(always)] fn zero() -> Self { 0 } - #[inline(always)] fn b(self, b: bool) -> Self { b as Self * self } - #[inline(always)] - fn mul_of(&self, v: u8) -> ($ty, bool) { <$ty>::overflowing_mul(*self, v as $ty) } - #[inline(always)] - fn add_of(&self, v: u8) -> ($ty, bool) { <$ty>::overflowing_add(*self, v as $ty) } - #[inline(always)] - fn sub_of(&self, v: u8) -> ($ty, bool) { <$ty>::overflowing_sub(*self, v as $ty) } - #[inline(always)] fn qualified_max_length() -> usize { $qualified_max_length } - })* - } -} - -#[cfg(target_pointer_width = "64")] -const SZ_USIZE: usize = 20; -#[cfg(target_pointer_width = "32")] -const SZ_USIZE: usize = 10; -#[cfg(target_pointer_width = "64")] -const SZ_ISIZE: usize = 20; -#[cfg(target_pointer_width = "32")] -const SZ_ISIZE: usize = 11; - -impl_number_def! { - usize {ALLOW_SIGNED, SZ_USIZE}, - // 255 - u8 {ALLOW_UNSIGNED, 3}, - // 65536 - u16 {ALLOW_UNSIGNED, 5}, - // 4294967296 - u32 {ALLOW_UNSIGNED, 10}, - // 18446744073709551616 - u64 {ALLOW_UNSIGNED, 20}, - // signed - isize {ALLOW_SIGNED, SZ_ISIZE}, - // -128 - i8 {ALLOW_SIGNED, 4}, - // -32768 - i16 {ALLOW_SIGNED, 6}, - // -2147483648 - i32 {ALLOW_SIGNED, 11}, - // -9223372036854775808 - i64 {ALLOW_SIGNED, 20}, -} - -#[inline(always)] -pub(super) fn decode_num_ub(src: &[u8], flag: &mut bool, cnt: &mut usize) -> N -where - N: NumberDefinition, -{ - let l = src.len(); - let mut okay = !src.is_empty(); - let mut i = 0; - let mut number = N::zero(); - let mut nx_stop = false; - - let is_signed = if N::ALLOW_SIGNED { - let loc_s = i < l && src[i] == b'-'; - i += loc_s as usize; - okay &= (i + 2) <= l; // [-][digit][LF] - loc_s - } else { - false - }; - - while i < l && okay && !nx_stop { - // potential exit - nx_stop = src[i] == b'\n'; - // potential entry - let mut local_ok = src[i].is_ascii_digit(); - let (p, p_of) = number.mul_of(10); - local_ok &= !p_of; - let lfret = if N::ALLOW_SIGNED && is_signed { - let (d, d_of) = p.sub_of(src[i] & 0x0f); - local_ok &= !d_of; - d - } else { - let (s, s_of) = p.add_of(src[i] & 0x0f); - local_ok &= !s_of; - s - }; - // reassign or assign - let reassign = number.b(nx_stop); - let assign = lfret.b(!nx_stop); - number = reassign | assign; - okay &= local_ok | nx_stop; - i += okay as usize; - } - if N::ALLOW_SIGNED { - number = number.b(okay); - } - okay &= nx_stop; - *cnt += i; - *flag &= okay; - number -} - -#[derive(Debug, PartialEq)] -/// Data constructed from `opmode-safe` -pub struct SafeQueryData<'a> { - p: Box<[LitIR<'a>]>, - t: Vec>, -} - -impl<'a> SafeQueryData<'a> { - #[cfg(test)] - pub fn new_test(p: Box<[LitIR<'a>]>, t: Vec>) -> Self { - Self { p, t } - } - #[inline(always)] - pub fn parse_data(pf: Slice<'a>, pf_sz: usize) -> QueryResult]>> { - Self::p_revloop(pf, pf_sz) - } - #[inline(always)] - pub fn parse(qf: Slice<'a>, pf: Slice<'a>, pf_sz: usize) -> QueryResult { - let q = SafeLexer::lex(qf); - let p = Self::p_revloop(pf, pf_sz); - match (q, p) { - (Ok(q), Ok(p)) => Ok(Self { t: q, p }), - // first error - (Err(e), _) | (_, Err(e)) => Err(e), - } - } - #[inline] - pub(super) fn p_revloop(mut src: Slice<'a>, size: usize) -> QueryResult]>> { - static LITIR_TF: [for<'a> fn(Slice<'a>, &mut usize, &mut Vec>) -> bool; 7] = [ - SafeQueryData::uint, // tc: 0 - SafeQueryData::sint, // tc: 1 - SafeQueryData::bool, // tc: 2 - SafeQueryData::float, // tc: 3 - SafeQueryData::bin, // tc: 4 - SafeQueryData::str, // tc: 5 - |_, _, _| false, // ecc: 6 - ]; - let nonpadded_offset = (LITIR_TF.len() - 2) as u8; - let ecc_offset = LITIR_TF.len() - 1; - let mut okay = true; - let mut data = Vec::with_capacity(size); - while src.len() >= 3 && okay { - let tc = src[0]; - okay &= tc <= nonpadded_offset; - let mx = ecc_offset.min(tc as usize); - let mut i_ = 1; - okay &= LITIR_TF[mx](&src[1..], &mut i_, &mut data); - src = &src[i_..]; - } - okay &= src.is_empty() && data.len() == size; - if compiler::likely(okay) { - Ok(data.into_boxed_slice()) - } else { - Err(Error::LexInvalidEscapedLiteral) - } - } -} - -// low level methods -impl<'b> SafeQueryData<'b> { - #[inline(always)] - fn mxple<'a>(src: Slice<'a>, cnt: &mut usize, flag: &mut bool) -> Slice<'a> { - // find payload length - let mut i = 0; - let payload_len = decode_num_ub::(src, flag, &mut i); - let src = &src[i..]; - // find payload - *flag &= src.len() >= payload_len; - let mx_extract = payload_len.min(src.len()); - // incr cursor - i += mx_extract; - *cnt += i; - unsafe { - // UNSAFE(@ohsayan): src is correct (guaranteed). even if the decoded length returns an error we still remain within bounds of the EOA - slice::from_raw_parts(src.as_ptr(), mx_extract) - } - } - #[inline(always)] - pub(super) fn uint<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec>) -> bool { - let mut b = true; - let r = decode_num_ub(src, &mut b, cnt); - data.push(LitIR::UnsignedInt(r)); - b - } - #[inline(always)] - pub(super) fn sint<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec>) -> bool { - let mut b = true; - let r = decode_num_ub(src, &mut b, cnt); - data.push(LitIR::SignedInt(r)); - b - } - #[inline(always)] - pub(super) fn bool<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec>) -> bool { - // `true\n` or `false\n` - let mx = 6.min(src.len()); - let slice = &src[..mx]; - let v_true = slice.starts_with(b"true\n"); - let v_false = slice.starts_with(b"false\n"); - let incr = v_true as usize * 5 + v_false as usize * 6; - data.push(LitIR::Bool(v_true)); - *cnt += incr; - v_true | v_false - } - #[inline(always)] - pub(super) fn float<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec>) -> bool { - let mut okay = true; - let payload = Self::mxple(src, cnt, &mut okay); - match String::from_utf8_lossy(payload).parse() { - Ok(p) if compiler::likely(okay) => { - data.push(LitIR::Float(p)); +const SCAN_PARAM_EXPECT: [u8; 8] = [0, 1, 2, 2, 2, 2, 2, 0]; +static SCAN_PARAM: [unsafe fn(&mut SecureLexer); 8] = unsafe { + [ + // null + |s| s.l.push_token(Token![null]), + // bool + |slf| { + let nb = slf.param_buffer.next_byte(); + slf.l.push_token(Token::Lit(Lit::new_bool(nb == 1))); + if nb > 1 { + slf.l.set_error(Error::LexInvalidEscapedLiteral); } - _ => {} - } - okay - } - #[inline(always)] - pub(super) fn bin<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec>) -> bool { - let mut okay = true; - let payload = Self::mxple(src, cnt, &mut okay); - data.push(LitIR::Bin(payload)); - okay - } - #[inline(always)] - pub(super) fn str<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec>) -> bool { - let mut okay = true; - let payload = Self::mxple(src, cnt, &mut okay); - match str::from_utf8(payload) { - Ok(s) if compiler::likely(okay) => { - data.push(LitIR::Str(s)); - true + }, + // uint + |slf| match slf + .param_buffer + .try_next_ascii_u64_lf_separated_or_restore_cursor() + { + Some(int) => slf.l.push_token(Lit::new_uint(int)), + None => slf.l.set_error(Error::LexInvalidEscapedLiteral), + }, + // sint + |slf| { + let (okay, int) = slf.param_buffer.try_next_ascii_i64_separated_by::(); + if okay { + slf.l.push_token(Lit::new_sint(int)) + } else { + slf.l.set_error(Error::LexInvalidLiteral) } - _ => false, - } - } -} + }, + // float + |slf| { + let Some(size_of_body) = slf + .param_buffer + .try_next_ascii_u64_lf_separated_or_restore_cursor() + else { + slf.l.set_error(Error::LexInvalidEscapedLiteral); + return; + }; + let body = match slf + .param_buffer + .try_next_variable_block(size_of_body as usize) + { + Some(body) => body, + None => { + slf.l.set_error(Error::LexInvalidEscapedLiteral); + return; + } + }; + match core::str::from_utf8(body).map(core::str::FromStr::from_str) { + Ok(Ok(fp)) => slf.l.push_token(Lit::new_float(fp)), + _ => slf.l.set_error(Error::LexInvalidEscapedLiteral), + } + }, + // binary + |slf| { + let Some(size_of_body) = slf + .param_buffer + .try_next_ascii_u64_lf_separated_or_restore_cursor() + else { + slf.l.set_error(Error::LexInvalidEscapedLiteral); + return; + }; + match slf + .param_buffer + .try_next_variable_block(size_of_body as usize) + { + Some(block) => slf.l.push_token(Lit::new_bin(block)), + None => slf.l.set_error(Error::LexInvalidEscapedLiteral), + } + }, + // string + |slf| { + let Some(size_of_body) = slf + .param_buffer + .try_next_ascii_u64_lf_separated_or_restore_cursor() + else { + slf.l.set_error(Error::LexInvalidEscapedLiteral); + return; + }; + match slf + .param_buffer + .try_next_variable_block(size_of_body as usize) + .map(core::str::from_utf8) + { + // TODO(@ohsayan): obliterate this alloc + Some(Ok(s)) => slf.l.push_token(Lit::new_string(s.to_owned())), + _ => slf.l.set_error(Error::LexInvalidEscapedLiteral), + } + }, + // ecc + |s| s.l.set_error(Error::LexInvalidEscapedLiteral), + ] +}; diff --git a/server/src/engine/ql/lex/raw.rs b/server/src/engine/ql/lex/raw.rs index 74061272..3fb83abd 100644 --- a/server/src/engine/ql/lex/raw.rs +++ b/server/src/engine/ql/lex/raw.rs @@ -25,12 +25,8 @@ */ use { - super::Slice, - crate::engine::{ - data::{lit::Lit, spec::Dataspec1D}, - error::Error, - }, - core::{borrow::Borrow, fmt, ops::Deref, slice, str}, + crate::engine::data::lit::Lit, + core::{borrow::Borrow, fmt, ops::Deref, str}, }; #[repr(transparent)] @@ -367,176 +363,3 @@ impl<'a> AsRef> for Token<'a> { self } } - -#[derive(Debug)] -pub struct RawLexer<'a> { - c: *const u8, - e: *const u8, - pub(super) tokens: Vec>, - pub(super) last_error: Option, -} - -// ctor -impl<'a> RawLexer<'a> { - #[inline(always)] - pub(super) const fn new(src: Slice<'a>) -> Self { - Self { - c: src.as_ptr(), - e: unsafe { - // UNSAFE(@ohsayan): Always safe (<= EOA) - src.as_ptr().add(src.len()) - }, - last_error: None, - tokens: Vec::new(), - } - } -} - -// meta -impl<'a> RawLexer<'a> { - #[inline(always)] - pub(super) const fn cursor(&self) -> *const u8 { - self.c - } - #[inline(always)] - pub(super) const fn data_end_ptr(&self) -> *const u8 { - self.e - } - #[inline(always)] - pub(super) fn not_exhausted(&self) -> bool { - self.data_end_ptr() > self.cursor() - } - #[inline(always)] - pub(super) fn exhausted(&self) -> bool { - self.cursor() == self.data_end_ptr() - } - #[inline(always)] - pub(super) fn remaining(&self) -> usize { - unsafe { - // UNSAFE(@ohsayan): valid ptrs - self.e.offset_from(self.c) as usize - } - } - #[inline(always)] - pub(super) unsafe fn deref_cursor(&self) -> u8 { - *self.cursor() - } - #[inline(always)] - pub(super) unsafe fn incr_cursor_by(&mut self, by: usize) { - debug_assert!(self.remaining() >= by); - self.c = self.cursor().add(by) - } - #[inline(always)] - pub(super) unsafe fn incr_cursor(&mut self) { - self.incr_cursor_by(1) - } - #[inline(always)] - unsafe fn incr_cursor_if(&mut self, iff: bool) { - self.incr_cursor_by(iff as usize) - } - #[inline(always)] - pub(super) fn push_token(&mut self, token: impl Into>) { - self.tokens.push(token.into()) - } - #[inline(always)] - pub(super) fn peek_is(&mut self, f: impl FnOnce(u8) -> bool) -> bool { - self.not_exhausted() - && unsafe { - // UNSAFE(@ohsayan): verified cursor is nonnull - f(self.deref_cursor()) - } - } - #[inline(always)] - pub(super) fn peek_is_and_forward(&mut self, f: impl FnOnce(u8) -> bool) -> bool { - let did_fw = self.not_exhausted() - && unsafe { - // UNSAFE(@ohsayan): verified ptr - f(self.deref_cursor()) - }; - unsafe { - // UNSAFE(@ohsayan): increment cursor - self.incr_cursor_if(did_fw); - } - did_fw - } - #[inline(always)] - fn peek_eq_and_forward_or_eof(&mut self, eq: u8) -> bool { - unsafe { - // UNSAFE(@ohsayan): verified cursor - let eq = self.not_exhausted() && self.deref_cursor() == eq; - // UNSAFE(@ohsayan): incr cursor if matched - self.incr_cursor_if(eq); - eq | self.exhausted() - } - } - #[inline(always)] - pub(super) fn peek_neq(&self, b: u8) -> bool { - self.not_exhausted() - && unsafe { - // UNSAFE(@ohsayan): verified cursor - self.deref_cursor() != b - } - } - #[inline(always)] - pub(super) fn peek_eq_and_forward(&mut self, b: u8) -> bool { - unsafe { - // UNSAFE(@ohsayan): verified cursor - let r = self.not_exhausted() && self.deref_cursor() == b; - self.incr_cursor_if(r); - r - } - } - #[inline(always)] - pub(super) fn trim_ahead(&mut self) { - while self.peek_is_and_forward(|b| b == b' ' || b == b'\t' || b == b'\n') {} - } - #[inline(always)] - pub(super) fn set_error(&mut self, e: Error) { - self.last_error = Some(e); - } - #[inline(always)] - pub(super) fn no_error(&self) -> bool { - self.last_error.is_none() - } -} - -// high level methods -impl<'a> RawLexer<'a> { - #[inline(always)] - pub(super) fn scan_ident(&mut self) -> Slice<'a> { - let s = self.cursor(); - unsafe { - while self.peek_is(|b| b.is_ascii_alphanumeric() || b == b'_') { - // UNSAFE(@ohsayan): increment cursor, this is valid - self.incr_cursor(); - } - // UNSAFE(@ohsayan): valid slice and ptrs - slice::from_raw_parts(s, self.cursor().offset_from(s) as usize) - } - } - #[inline(always)] - pub(super) fn scan_ident_or_keyword(&mut self) { - let s = self.scan_ident(); - let st = s.to_ascii_lowercase(); - match kwof(&st) { - Some(kw) => self.tokens.push(kw.into()), - // FIXME(@ohsayan): Uh, mind fixing this? The only advantage is that I can keep the graph *memory* footprint small - None if st == b"true" || st == b"false" => self.push_token(Lit::Bool(st == b"true")), - None => self.tokens.push(unsafe { - // UNSAFE(@ohsayan): scan_ident only returns a valid ident which is always a string - Token::Ident(Ident::new(s)) - }), - } - } - #[inline(always)] - pub(super) fn scan_byte(&mut self, byte: u8) { - match symof(byte) { - Some(tok) => self.push_token(tok), - None => return self.set_error(Error::LexUnexpectedByte), - } - unsafe { - // UNSAFE(@ohsayan): we are sent a byte, so fw cursor - self.incr_cursor(); - } - } -} diff --git a/server/src/engine/ql/macros.rs b/server/src/engine/ql/macros.rs index 7daa66ac..b5e7cde5 100644 --- a/server/src/engine/ql/macros.rs +++ b/server/src/engine/ql/macros.rs @@ -331,3 +331,11 @@ macro_rules! build_lut { } } } + +#[cfg(test)] +macro_rules! into_vec { + ($ty:ty => ($($v:expr),* $(,)?)) => {{ + let v: Vec<$ty> = std::vec![$($v.into(),)*]; + v + }} +} diff --git a/server/src/engine/ql/tests.rs b/server/src/engine/ql/tests.rs index 259d97da..ebcef8d1 100644 --- a/server/src/engine/ql/tests.rs +++ b/server/src/engine/ql/tests.rs @@ -25,7 +25,7 @@ */ use { - super::lex::{InsecureLexer, SafeLexer, Symbol, Token}, + super::lex::{InsecureLexer, SecureLexer, Symbol, Token}, crate::{ engine::{data::cell::Datacell, error::QueryResult}, util::test_utils, @@ -44,10 +44,8 @@ mod structure_syn; pub fn lex_insecure(src: &[u8]) -> QueryResult>> { InsecureLexer::lex(src) } -#[inline(always)] -/// Uses the [`SafeLexer`] to lex the given input -pub fn lex_secure(src: &[u8]) -> QueryResult> { - SafeLexer::lex(src) +pub fn lex_secure<'a>(src: &'a [u8], query_window: usize) -> QueryResult>> { + SecureLexer::lex(src, query_window) } pub trait NullableData { diff --git a/server/src/engine/ql/tests/dml_tests.rs b/server/src/engine/ql/tests/dml_tests.rs index 2b0cc879..eeb7ec3a 100644 --- a/server/src/engine/ql/tests/dml_tests.rs +++ b/server/src/engine/ql/tests/dml_tests.rs @@ -27,13 +27,7 @@ use super::*; mod list_parse { use super::*; - use crate::engine::{ - data::{lit::LitIR, spec::Dataspec1D}, - ql::{ - ast::{parse_ast_node_full, traits::ASTNode, State, SubstitutedData}, - dml::ins::List, - }, - }; + use crate::engine::ql::{ast::parse_ast_node_full, dml::ins::List}; #[test] fn list_mini() { @@ -58,28 +52,6 @@ mod list_parse { assert_eq!(r.as_slice(), into_array![1, 2, 3, 4]) } #[test] - fn list_param() { - let tok = lex_secure( - b" - [?, ?, ?, ?] - ", - ) - .unwrap(); - let data = [ - LitIR::UnsignedInt(1), - LitIR::UnsignedInt(2), - LitIR::UnsignedInt(3), - LitIR::UnsignedInt(4), - ]; - let mut state = State::new(&tok[1..], SubstitutedData::new(&data)); - assert_eq!( - ::from_state(&mut state) - .unwrap() - .into_inner(), - into_array![1, 2, 3, 4] - ) - } - #[test] fn list_pro() { let tok = lex_insecure( b" @@ -104,40 +76,6 @@ mod list_parse { ) } #[test] - fn list_pro_param() { - let tok = lex_secure( - b" - [ - [?, ?], - [?, ?], - [?, ?], - [] - ] - ", - ) - .unwrap(); - let data = [ - LitIR::UnsignedInt(1), - LitIR::UnsignedInt(2), - LitIR::UnsignedInt(3), - LitIR::UnsignedInt(4), - LitIR::UnsignedInt(5), - LitIR::UnsignedInt(6), - ]; - let mut state = State::new(&tok[1..], SubstitutedData::new(&data)); - assert_eq!( - ::from_state(&mut state) - .unwrap() - .into_inner(), - into_array![ - into_array![1, 2], - into_array![3, 4], - into_array![5, 6], - into_array![] - ] - ) - } - #[test] fn list_pro_max() { let tok = lex_insecure( b" @@ -161,46 +99,6 @@ mod list_parse { ] ) } - #[test] - fn list_pro_max_param() { - let tok = lex_secure( - b" - [ - [[?, ?], [?, ?]], - [[], [?, ?]], - [[?, ?], [?, ?]], - [[?, ?], []] - ] - ", - ) - .unwrap(); - let data = [ - LitIR::UnsignedInt(1), - LitIR::UnsignedInt(1), - LitIR::UnsignedInt(2), - LitIR::UnsignedInt(2), - LitIR::UnsignedInt(4), - LitIR::UnsignedInt(4), - LitIR::UnsignedInt(5), - LitIR::UnsignedInt(5), - LitIR::UnsignedInt(6), - LitIR::UnsignedInt(6), - LitIR::UnsignedInt(7), - LitIR::UnsignedInt(7), - ]; - let mut state = State::new(&tok[1..], SubstitutedData::new(&data)); - assert_eq!( - ::from_state(&mut state) - .unwrap() - .into_inner(), - into_array![ - into_array![into_array![1, 1], into_array![2, 2]], - into_array![into_array![], into_array![4, 4]], - into_array![into_array![5, 5], into_array![6, 6]], - into_array![into_array![7, 7], into_array![]], - ] - ) - } } mod tuple_syntax { @@ -599,7 +497,7 @@ mod stmt_select { use { super::*, crate::engine::{ - data::{lit::LitIR, spec::Dataspec1D}, + data::lit::Lit, ql::{ ast::{parse_ast_node_full, Entity}, dml::{sel::SelectStatement, RelationalExpr}, @@ -622,7 +520,7 @@ mod stmt_select { true, dict! { Ident::from("username") => RelationalExpr::new( - Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ + Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ ), }, ); @@ -643,7 +541,7 @@ mod stmt_select { false, dict! { Ident::from("username") => RelationalExpr::new( - Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ + Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ ), }, ); @@ -664,7 +562,7 @@ mod stmt_select { false, dict! { Ident::from("username") => RelationalExpr::new( - Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ + Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ ), }, ); @@ -685,7 +583,7 @@ mod stmt_select { false, dict! { Ident::from("username") => RelationalExpr::new( - Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ + Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ ), }, ); @@ -697,7 +595,7 @@ mod expression_tests { super::*, crate::engine::{ core::query_meta::AssignmentOperator, - data::{lit::LitIR, spec::Dataspec1D}, + data::lit::Lit, ql::{ast::parse_ast_node_full, dml::upd::AssignmentExpression, lex::Ident}, }, }; @@ -709,7 +607,7 @@ mod expression_tests { r, AssignmentExpression::new( Ident::from("username"), - LitIR::Str("sayan"), + Lit::new_str("sayan"), AssignmentOperator::Assign ) ); @@ -722,7 +620,7 @@ mod expression_tests { r, AssignmentExpression::new( Ident::from("followers"), - LitIR::UnsignedInt(100), + Lit::new_uint(100), AssignmentOperator::AddAssign ) ); @@ -735,7 +633,7 @@ mod expression_tests { r, AssignmentExpression::new( Ident::from("following"), - LitIR::UnsignedInt(150), + Lit::new_uint(150), AssignmentOperator::SubAssign ) ); @@ -748,7 +646,7 @@ mod expression_tests { r, AssignmentExpression::new( Ident::from("product_qty"), - LitIR::UnsignedInt(2), + Lit::new_uint(2), AssignmentOperator::MulAssign ) ); @@ -761,7 +659,7 @@ mod expression_tests { r, AssignmentExpression::new( Ident::from("image_crop_factor"), - LitIR::UnsignedInt(2), + Lit::new_uint(2), AssignmentOperator::DivAssign ) ); @@ -772,7 +670,7 @@ mod update_statement { super::*, crate::engine::{ core::query_meta::AssignmentOperator, - data::{lit::LitIR, spec::Dataspec1D}, + data::lit::Lit, ql::{ ast::{parse_ast_node_full, Entity}, dml::{ @@ -796,13 +694,13 @@ mod update_statement { Entity::Single(Ident::from("app")), vec![AssignmentExpression::new( Ident::from("notes"), - LitIR::Str("this is my new note"), + Lit::new_str("this is my new note"), AssignmentOperator::AddAssign, )], WhereClause::new(dict! { Ident::from("username") => RelationalExpr::new( Ident::from("username"), - LitIR::Str("sayan"), + Lit::new_str("sayan"), RelationalExpr::OP_EQ ) }), @@ -829,19 +727,19 @@ mod update_statement { vec![ AssignmentExpression::new( Ident::from("notes"), - LitIR::Str("this is my new note"), + Lit::new_str("this is my new note"), AssignmentOperator::AddAssign, ), AssignmentExpression::new( Ident::from("email"), - LitIR::Str("sayan@example.com"), + Lit::new_str("sayan@example.com"), AssignmentOperator::Assign, ), ], WhereClause::new(dict! { Ident::from("username") => RelationalExpr::new( Ident::from("username"), - LitIR::Str("sayan"), + Lit::new_str("sayan"), RelationalExpr::OP_EQ ) }), @@ -853,7 +751,7 @@ mod delete_stmt { use { super::*, crate::engine::{ - data::{lit::LitIR, spec::Dataspec1D}, + data::lit::Lit, ql::{ ast::{parse_ast_node_full, Entity}, dml::{del::DeleteStatement, RelationalExpr}, @@ -875,7 +773,7 @@ mod delete_stmt { dict! { Ident::from("username") => RelationalExpr::new( Ident::from("username"), - LitIR::Str("sayan"), + Lit::new_str("sayan"), RelationalExpr::OP_EQ ) }, @@ -898,7 +796,7 @@ mod delete_stmt { dict! { Ident::from("username") => RelationalExpr::new( Ident::from("username"), - LitIR::Str("sayan"), + Lit::new_str("sayan"), RelationalExpr::OP_EQ ) }, @@ -913,7 +811,7 @@ mod relational_expr { use { super::*, crate::engine::{ - data::{lit::LitIR, spec::Dataspec1D}, + data::lit::Lit, ql::{ast::parse_ast_node_full, dml::RelationalExpr, lex::Ident}, }, }; @@ -925,7 +823,7 @@ mod relational_expr { assert_eq!( r, RelationalExpr { - rhs: LitIR::UnsignedInt(10), + rhs: Lit::new_uint(10), lhs: Ident::from("primary_key"), opc: RelationalExpr::OP_EQ } @@ -938,7 +836,7 @@ mod relational_expr { assert_eq!( r, RelationalExpr { - rhs: LitIR::UnsignedInt(10), + rhs: Lit::new_uint(10), lhs: Ident::from("primary_key"), opc: RelationalExpr::OP_NE } @@ -951,7 +849,7 @@ mod relational_expr { assert_eq!( r, RelationalExpr { - rhs: LitIR::UnsignedInt(10), + rhs: Lit::new_uint(10), lhs: Ident::from("primary_key"), opc: RelationalExpr::OP_GT } @@ -964,7 +862,7 @@ mod relational_expr { assert_eq!( r, RelationalExpr { - rhs: LitIR::UnsignedInt(10), + rhs: Lit::new_uint(10), lhs: Ident::from("primary_key"), opc: RelationalExpr::OP_GE } @@ -977,7 +875,7 @@ mod relational_expr { assert_eq!( r, RelationalExpr { - rhs: LitIR::UnsignedInt(10), + rhs: Lit::new_uint(10), lhs: Ident::from("primary_key"), opc: RelationalExpr::OP_LT } @@ -991,7 +889,7 @@ mod relational_expr { r, RelationalExpr::new( Ident::from("primary_key"), - LitIR::UnsignedInt(10), + Lit::new_uint(10), RelationalExpr::OP_LE ) ); @@ -1001,7 +899,7 @@ mod where_clause { use { super::*, crate::engine::{ - data::{lit::LitIR, spec::Dataspec1D}, + data::lit::Lit, ql::{ ast::parse_ast_node_full, dml::{RelationalExpr, WhereClause}, @@ -1020,7 +918,7 @@ mod where_clause { let expected = WhereClause::new(dict! { Ident::from("x") => RelationalExpr::new( Ident::from("x"), - LitIR::UnsignedInt(100), + Lit::new_uint(100), RelationalExpr::OP_EQ ) }); @@ -1037,12 +935,12 @@ mod where_clause { let expected = WhereClause::new(dict! { Ident::from("userid") => RelationalExpr::new( Ident::from("userid"), - LitIR::UnsignedInt(100), + Lit::new_uint(100), RelationalExpr::OP_EQ ), Ident::from("pass") => RelationalExpr::new( Ident::from("pass"), - LitIR::Str("password"), + Lit::new_str("password"), RelationalExpr::OP_EQ ) }); diff --git a/server/src/engine/ql/tests/lexer_tests.rs b/server/src/engine/ql/tests/lexer_tests.rs index b680a080..0af6afc0 100644 --- a/server/src/engine/ql/tests/lexer_tests.rs +++ b/server/src/engine/ql/tests/lexer_tests.rs @@ -27,12 +27,9 @@ use { super::{ super::lex::{Ident, Token}, - lex_insecure, - }, - crate::engine::{ - data::{lit::Lit, spec::Dataspec1D}, - error::Error, + lex_insecure, lex_secure, }, + crate::engine::{data::lit::Lit, error::Error}, }; macro_rules! v( @@ -59,7 +56,7 @@ fn lex_unsigned_int() { let number = v!("123456"); assert_eq!( lex_insecure(&number).unwrap(), - vec![Token::Lit(Lit::UnsignedInt(123456))] + vec![Token::Lit(Lit::new_uint(123456))] ); } #[test] @@ -67,16 +64,19 @@ fn lex_signed_int() { let number = v!("-123456"); assert_eq!( lex_insecure(&number).unwrap(), - vec![Token::Lit(Lit::SignedInt(-123456))] + vec![Token::Lit(Lit::new_sint(-123456))] ); } #[test] fn lex_bool() { let (t, f) = v!("true", "false"); - assert_eq!(lex_insecure(&t).unwrap(), vec![Token::Lit(Lit::Bool(true))]); + assert_eq!( + lex_insecure(&t).unwrap(), + vec![Token::Lit(Lit::new_bool(true))] + ); assert_eq!( lex_insecure(&f).unwrap(), - vec![Token::Lit(Lit::Bool(false))] + vec![Token::Lit(Lit::new_bool(false))] ); } #[test] @@ -84,12 +84,12 @@ fn lex_string() { let s = br#" "hello, world" "#; assert_eq!( lex_insecure(s).unwrap(), - vec![Token::Lit(Lit::Str("hello, world".into()))] + vec![Token::Lit(Lit::new_string("hello, world".into()))] ); let s = br#" 'hello, world' "#; assert_eq!( lex_insecure(s).unwrap(), - vec![Token::Lit(Lit::Str("hello, world".into()))] + vec![Token::Lit(Lit::new_string("hello, world".into()))] ); } #[test] @@ -97,12 +97,12 @@ fn lex_string_test_escape_quote() { let s = br#" "\"hello world\"" "#; // == "hello world" assert_eq!( lex_insecure(s).unwrap(), - vec![Token::Lit(Lit::Str("\"hello world\"".into()))] + vec![Token::Lit(Lit::new_string("\"hello world\"".into()))] ); let s = br#" '\'hello world\'' "#; // == 'hello world' assert_eq!( lex_insecure(s).unwrap(), - vec![Token::Lit(Lit::Str("'hello world'".into()))] + vec![Token::Lit(Lit::new_string("'hello world'".into()))] ); } #[test] @@ -110,12 +110,12 @@ fn lex_string_use_different_quote_style() { let s = br#" "he's on it" "#; assert_eq!( lex_insecure(s).unwrap(), - vec![Token::Lit(Lit::Str("he's on it".into()))] + vec![Token::Lit(Lit::new_string("he's on it".into()))] ); let s = br#" 'he thinks that "that girl" fixed it' "#; assert_eq!( lex_insecure(s).unwrap(), - vec![Token::Lit(Lit::Str( + vec![Token::Lit(Lit::new_string( "he thinks that \"that girl\" fixed it".into() ))] ) @@ -125,18 +125,18 @@ fn lex_string_escape_bs() { let s = v!(r#" "windows has c:\\" "#); assert_eq!( lex_insecure(&s).unwrap(), - vec![Token::Lit(Lit::Str("windows has c:\\".into()))] + vec![Token::Lit(Lit::new_string("windows has c:\\".into()))] ); let s = v!(r#" 'windows has c:\\' "#); assert_eq!( lex_insecure(&s).unwrap(), - vec![Token::Lit(Lit::Str("windows has c:\\".into()))] + vec![Token::Lit(Lit::new_string("windows has c:\\".into()))] ); let lol = v!(r#"'\\\\\\\\\\'"#); let lexed = lex_insecure(&lol).unwrap(); assert_eq!( lexed, - vec![Token::Lit(Lit::Str("\\".repeat(5).into_boxed_str()))], + vec![Token::Lit(Lit::new_string("\\".repeat(5)))], "lol" ) } @@ -156,352 +156,166 @@ fn lex_string_unclosed() { fn lex_unsafe_literal_mini() { let usl = lex_insecure("\r0\n".as_bytes()).unwrap(); assert_eq!(usl.len(), 1); - assert_eq!(Token::Lit(Lit::Bin(b"")), usl[0]); + assert_eq!(Token::Lit(Lit::new_bin(b"")), usl[0]); } #[test] fn lex_unsafe_literal() { let usl = lex_insecure("\r9\nabcdefghi".as_bytes()).unwrap(); assert_eq!(usl.len(), 1); - assert_eq!(Token::Lit(Lit::Bin(b"abcdefghi")), usl[0]); + assert_eq!(Token::Lit(Lit::new_bin(b"abcdefghi")), usl[0]); } #[test] fn lex_unsafe_literal_pro() { let usl = lex_insecure("\r18\nabcdefghi123456789".as_bytes()).unwrap(); assert_eq!(usl.len(), 1); - assert_eq!(Token::Lit(Lit::Bin(b"abcdefghi123456789")), usl[0]); + assert_eq!(Token::Lit(Lit::new_bin(b"abcdefghi123456789")), usl[0]); } -mod num_tests { - use crate::engine::ql::lex::decode_num_ub as ubdc; - mod uint8 { - use super::*; - #[test] - fn ndecub_u8_ok() { - const SRC: &[u8] = b"123\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(b); - assert_eq!(i, SRC.len()); - assert_eq!(x, 123); - } - #[test] - fn ndecub_u8_lb() { - const SRC: &[u8] = b"0\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(b); - assert_eq!(i, SRC.len()); - assert_eq!(x, 0); - } - #[test] - fn ndecub_u8_ub() { - const SRC: &[u8] = b"255\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(b); - assert_eq!(i, SRC.len()); - assert_eq!(x, 255); - } - #[test] - fn ndecub_u8_ub_of() { - const SRC: &[u8] = b"256\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(!b); - assert_eq!(i, 2); - assert_eq!(x, 0); - } - } - mod sint8 { - use super::*; - #[test] - pub(crate) fn ndecub_i8_ok() { - const SRC: &[u8] = b"-123\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(b); - assert_eq!(i, SRC.len()); - assert_eq!(x, -123); - } - #[test] - pub(crate) fn ndecub_i8_lb() { - const SRC: &[u8] = b"-128\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(b); - assert_eq!(i, SRC.len()); - assert_eq!(x, -128); - } - #[test] - pub(crate) fn ndecub_i8_lb_of() { - const SRC: &[u8] = b"-129\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(!b); - assert_eq!(i, 3); - assert_eq!(x, 0); - } - #[test] - pub(crate) fn ndecub_i8_ub() { - const SRC: &[u8] = b"127\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(b); - assert_eq!(i, SRC.len()); - assert_eq!(x, 127); - } - #[test] - pub(crate) fn ndecub_i8_ub_of() { - const SRC: &[u8] = b"128\n"; - let mut i = 0; - let mut b = true; - let x = ubdc::(SRC, &mut b, &mut i); - assert!(!b); - assert_eq!(i, 2); - assert_eq!(x, 0); - } - } +/* + safe query tests +*/ + +fn make_safe_query(a: &[u8], b: &[u8]) -> (Vec, usize) { + let mut s = Vec::with_capacity(a.len() + b.len()); + s.extend(a); + s.extend(b); + (s, a.len()) } -mod safequery_params { - use crate::engine::{ - data::{lit::LitIR, spec::Dataspec1D}, - ql::lex::SafeQueryData, - }; - use rand::seq::SliceRandom; - #[test] - fn param_uint() { - let src = b"12345\n"; - let mut d = Vec::new(); - let mut i = 0; - assert!(SafeQueryData::uint(src, &mut i, &mut d)); - assert_eq!(i, src.len()); - assert_eq!(d, vec![LitIR::UnsignedInt(12345)]); - } - #[test] - fn param_sint() { - let src = b"-12345\n"; - let mut d = Vec::new(); - let mut i = 0; - assert!(SafeQueryData::sint(src, &mut i, &mut d)); - assert_eq!(i, src.len()); - assert_eq!(d, vec![LitIR::SignedInt(-12345)]); - } - #[test] - fn param_bool_true() { - let src = b"true\n"; - let mut d = Vec::new(); - let mut i = 0; - assert!(SafeQueryData::bool(src, &mut i, &mut d)); - assert_eq!(i, src.len()); - assert_eq!(d, vec![LitIR::Bool(true)]); - } - #[test] - fn param_bool_false() { - let src = b"false\n"; - let mut d = Vec::new(); - let mut i = 0; - assert!(SafeQueryData::bool(src, &mut i, &mut d)); - assert_eq!(i, src.len()); - assert_eq!(d, vec![LitIR::Bool(false)]); - } - #[test] - fn param_float() { - let src = b"4\n3.14"; - let mut d = Vec::new(); - let mut i = 0; - assert!(SafeQueryData::float(src, &mut i, &mut d)); - assert_eq!(i, src.len()); - assert_eq!(d, vec![LitIR::Float(3.14)]); - } - #[test] - fn param_bin() { - let src = b"5\nsayan"; - let mut d = Vec::new(); - let mut i = 0; - assert!(SafeQueryData::bin(src, &mut i, &mut d)); - assert_eq!(i, src.len()); - assert_eq!(d, vec![LitIR::Bin(b"sayan")]); - } - #[test] - fn param_str() { - let src = b"5\nsayan"; - let mut d = Vec::new(); - let mut i = 0; - assert!(SafeQueryData::str(src, &mut i, &mut d)); - assert_eq!(i, src.len()); - assert_eq!(d, vec![LitIR::Str("sayan")]); - } - #[test] - fn param_full_uint() { - let src = b"\x0012345\n"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::UnsignedInt(12345)]); - } - #[test] - fn param_full_sint() { - let src = b"\x01-12345\n"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::SignedInt(-12345)]); - } - #[test] - fn param_full_bool() { - let src = b"\x02true\n"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::Bool(true)]); - let src = b"\x02false\n"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::Bool(false)]); - } - #[test] - fn param_full_float() { - let src = b"\x034\n3.14"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::Float(3.14)]); - let src = b"\x035\n-3.14"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::Float(-3.14)]); - } - #[test] - fn param_full_bin() { - let src = b"\x0412\nhello, world"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::Bin(b"hello, world")]); - } - #[test] - fn param_full_str() { - let src = b"\x0512\nhello, world"; - let r = SafeQueryData::p_revloop(src, 1).unwrap(); - assert_eq!(r.as_ref(), [LitIR::Str("hello, world")]); - } - #[test] - fn params_mix() { - let mut rng = rand::thread_rng(); - const DATA: [&[u8]; 6] = [ - b"\x0012345\n", - b"\x01-12345\n", - b"\x02true\n", - b"\x0311\n12345.67890", - b"\x0430\none two three four five binary", - b"\x0527\none two three four five str", - ]; - let retmap: [LitIR; 6] = [ - LitIR::UnsignedInt(12345), - LitIR::SignedInt(-12345), - LitIR::Bool(true), - LitIR::Float(12345.67890), - LitIR::Bin(b"one two three four five binary"), - LitIR::Str("one two three four five str"), - ]; - for _ in 0..DATA.len().pow(2) { - let mut local_data = DATA; - local_data.shuffle(&mut rng); - let ret: Vec = local_data - .iter() - .map(|v| retmap[v[0] as usize].clone()) - .collect(); - let src: Vec = local_data.into_iter().flat_map(|v| v.to_owned()).collect(); - let r = SafeQueryData::p_revloop(&src, 6).unwrap(); - assert_eq!(r.as_ref(), ret); - } - } +#[test] +fn safe_query_all_literals() { + let (query, query_window) = make_safe_query( + b"? ? ? ? ? ? ?", + b"\x00\x01\x01\x021234\n\x03-1234\n\x049\n1234.5678\x0513\nbinarywithlf\n\x065\nsayan", + ); + let ret = lex_secure(&query, query_window).unwrap(); + assert_eq!( + ret, + into_vec![Token<'static> => ( + Token![null], + Lit::new_bool(true), + Lit::new_uint(1234), + Lit::new_sint(-1234), + Lit::new_float(1234.5678), + Lit::new_bin(b"binarywithlf\n"), + Lit::new_string("sayan".into()), + )], + ); } -mod safequery_full_param { - use crate::engine::{ - data::{lit::LitIR, spec::Dataspec1D}, - ql::lex::{Ident, SafeQueryData, Token}, - }; - #[test] - fn p_mini() { - let query = b"select * from myapp where username = ?"; - let params = b"\x055\nsayan"; - let sq = SafeQueryData::parse(query, params, 1).unwrap(); - assert_eq!( - sq, - SafeQueryData::new_test( - vec![LitIR::Str("sayan")].into_boxed_slice(), - vec![ - Token![select], - Token![*], - Token![from], - Token::Ident(Ident::from("myapp")), - Token![where], - Token::Ident(Ident::from("username")), - Token![=], - Token![?] - ] - ) - ); - } - #[test] - fn p() { - let query = b"select * from myapp where username = ? and pass = ?"; - let params = b"\x055\nsayan\x048\npass1234"; - let sq = SafeQueryData::parse(query, params, 2).unwrap(); - assert_eq!( - sq, - SafeQueryData::new_test( - vec![LitIR::Str("sayan"), LitIR::Bin(b"pass1234")].into_boxed_slice(), - vec![ - Token![select], - Token![*], - Token![from], - Token::Ident(Ident::from("myapp")), - Token![where], - Token::Ident(Ident::from("username")), - Token![=], - Token![?], - Token![and], - Token::Ident(Ident::from("pass")), - Token![=], - Token![?] - ] - ) - ); - } - #[test] - fn p_pro() { - let query = b"select $notes[~?] from myapp where username = ? and pass = ?"; - let params = b"\x00100\n\x055\nsayan\x048\npass1234"; - let sq = SafeQueryData::parse(query, params, 3).unwrap(); +const SFQ_NULL: &[u8] = b"\x00"; +const SFQ_BOOL_FALSE: &[u8] = b"\x01\0"; +const SFQ_BOOL_TRUE: &[u8] = b"\x01\x01"; +const SFQ_UINT: &[u8] = b"\x0218446744073709551615\n"; +const SFQ_SINT: &[u8] = b"\x03-9223372036854775808\n"; +const SFQ_FLOAT: &[u8] = b"\x0411\n3.141592654"; +const SFQ_BINARY: &[u8] = "\x0546\ncringeπŸ˜ƒπŸ˜„πŸ˜πŸ˜†πŸ˜…πŸ˜‚πŸ€£πŸ˜ŠπŸ˜ΈπŸ˜Ί".as_bytes(); +const SFQ_STRING: &[u8] = "\x0646\ncringeπŸ˜ƒπŸ˜„πŸ˜πŸ˜†πŸ˜…πŸ˜‚πŸ€£πŸ˜ŠπŸ˜ΈπŸ˜Ί".as_bytes(); + +#[test] +fn safe_query_null() { + let (query, query_window) = make_safe_query(b"?", SFQ_NULL); + let r = lex_secure(&query, query_window).unwrap(); + assert_eq!(r, vec![Token![null]]) +} + +#[test] +fn safe_query_bool() { + let (query, query_window) = make_safe_query(b"?", SFQ_BOOL_FALSE); + let b_false = lex_secure(&query, query_window).unwrap(); + let (query, query_window) = make_safe_query(b"?", SFQ_BOOL_TRUE); + let b_true = lex_secure(&query, query_window).unwrap(); + assert_eq!( + [b_false, b_true].concat(), + vec![ + Token::from(Lit::new_bool(false)), + Token::from(Lit::new_bool(true)) + ] + ); +} + +#[test] +fn safe_query_uint() { + let (query, query_window) = make_safe_query(b"?", SFQ_UINT); + let int = lex_secure(&query, query_window).unwrap(); + assert_eq!(int, vec![Token::Lit(Lit::new_uint(u64::MAX))]); +} + +#[test] +fn safe_query_sint() { + let (query, query_window) = make_safe_query(b"?", SFQ_SINT); + let int = lex_secure(&query, query_window).unwrap(); + assert_eq!(int, vec![Token::Lit(Lit::new_sint(i64::MIN))]); +} + +#[test] +fn safe_query_float() { + let (query, query_window) = make_safe_query(b"?", SFQ_FLOAT); + let float = lex_secure(&query, query_window).unwrap(); + assert_eq!(float, vec![Token::Lit(Lit::new_float(3.141592654))]); +} + +#[test] +fn safe_query_binary() { + let (query, query_window) = make_safe_query(b"?", SFQ_BINARY); + let binary = lex_secure(&query, query_window).unwrap(); + assert_eq!( + binary, + vec![Token::Lit(Lit::new_bin( + "cringeπŸ˜ƒπŸ˜„πŸ˜πŸ˜†πŸ˜…πŸ˜‚πŸ€£πŸ˜ŠπŸ˜ΈπŸ˜Ί".as_bytes() + ))] + ); +} + +#[test] +fn safe_query_string() { + let (query, query_window) = make_safe_query(b"?", SFQ_STRING); + let binary = lex_secure(&query, query_window).unwrap(); + assert_eq!( + binary, + vec![Token::Lit(Lit::new_string( + "cringeπŸ˜ƒπŸ˜„πŸ˜πŸ˜†πŸ˜…πŸ˜‚πŸ€£πŸ˜ŠπŸ˜ΈπŸ˜Ί".to_owned().into() + ))] + ); +} + +#[test] +fn safe_params_shuffled() { + let expected = [ + (SFQ_NULL, Token![null]), + (SFQ_BOOL_FALSE, Token::Lit(Lit::new_bool(false))), + (SFQ_BOOL_TRUE, Token::Lit(Lit::new_bool(true))), + (SFQ_UINT, Token::Lit(Lit::new_uint(u64::MAX))), + (SFQ_SINT, Token::Lit(Lit::new_sint(i64::MIN))), + (SFQ_FLOAT, Token::Lit(Lit::new_float(3.141592654))), + ( + SFQ_BINARY, + Token::Lit(Lit::new_bin("cringeπŸ˜ƒπŸ˜„πŸ˜πŸ˜†πŸ˜…πŸ˜‚πŸ€£πŸ˜ŠπŸ˜ΈπŸ˜Ί".as_bytes())), + ), + ( + SFQ_STRING, + Token::Lit(Lit::new_string( + "cringeπŸ˜ƒπŸ˜„πŸ˜πŸ˜†πŸ˜…πŸ˜‚πŸ€£πŸ˜ŠπŸ˜ΈπŸ˜Ί".to_owned().into(), + )), + ), + ]; + let mut rng = crate::util::test_utils::randomizer(); + for _ in 0..expected.len().pow(2) { + let mut this_expected = expected.clone(); + crate::util::test_utils::shuffle_slice(&mut this_expected, &mut rng); + let param_segment: Vec = this_expected + .iter() + .map(|(raw, _)| raw.to_vec()) + .flatten() + .collect(); + let (query, query_window) = make_safe_query(b"? ? ? ? ? ? ? ?", ¶m_segment); + let ret = lex_secure(&query, query_window).unwrap(); assert_eq!( - sq, - SafeQueryData::new_test( - vec![ - LitIR::UnsignedInt(100), - LitIR::Str("sayan"), - LitIR::Bin(b"pass1234") - ] - .into_boxed_slice(), - vec![ - Token![select], - Token![$], - Token::Ident(Ident::from("notes")), - Token![open []], - Token![~], - Token![?], - Token![close []], - Token![from], - Token::Ident(Ident::from("myapp")), - Token![where], - Token::Ident(Ident::from("username")), - Token![=], - Token![?], - Token![and], - Token::Ident(Ident::from("pass")), - Token![=], - Token![?] - ] - ) - ); + ret, + this_expected + .into_iter() + .map(|(_, expected)| expected) + .collect::>() + ) } } diff --git a/server/src/engine/ql/tests/schema_tests.rs b/server/src/engine/ql/tests/schema_tests.rs index 32f26f1a..d9927fe6 100644 --- a/server/src/engine/ql/tests/schema_tests.rs +++ b/server/src/engine/ql/tests/schema_tests.rs @@ -26,7 +26,7 @@ use { super::{super::lex::Ident, lex_insecure, *}, - crate::engine::data::{lit::Lit, spec::Dataspec1D}, + crate::engine::data::lit::Lit, }; mod inspect { use { @@ -71,7 +71,7 @@ mod alter_space { use { super::*, crate::engine::{ - data::{lit::Lit, spec::Dataspec1D}, + data::lit::Lit, ql::{ast::parse_ast_node_full, ddl::alt::AlterSpace}, }, }; @@ -98,8 +98,8 @@ mod alter_space { AlterSpace::new( Ident::from("mymodel"), null_dict! { - "max_entry" => Lit::UnsignedInt(1000), - "driver" => Lit::Str("ts-0.8".into()) + "max_entry" => Lit::new_uint(1000), + "driver" => Lit::new_string("ts-0.8".into()) } ) ); @@ -130,9 +130,9 @@ mod tymeta { assert_eq!( tymeta, null_dict! { - "hello" => Lit::Str("world".into()), - "loading" => Lit::Bool(true), - "size" => Lit::UnsignedInt(100) + "hello" => Lit::new_string("world".into()), + "loading" => Lit::new_bool(true), + "size" => Lit::new_uint(100) } ); } @@ -154,8 +154,8 @@ mod tymeta { assert_eq!( final_ret, null_dict! { - "maxlen" => Lit::UnsignedInt(100), - "unique" => Lit::Bool(true) + "maxlen" => Lit::new_uint(100), + "unique" => Lit::new_bool(true) } ) } @@ -179,10 +179,10 @@ mod tymeta { assert_eq!( final_ret, null_dict! { - "maxlen" => Lit::UnsignedInt(100), - "unique" => Lit::Bool(true), + "maxlen" => Lit::new_uint(100), + "unique" => Lit::new_bool(true), "this" => null_dict! { - "is" => Lit::Str("cool".into()) + "is" => Lit::new_string("cool".into()) } } ) @@ -209,7 +209,7 @@ mod layer { vec![LayerSpec::new( Ident::from("string"), null_dict! { - "maxlen" => Lit::UnsignedInt(100) + "maxlen" => Lit::new_uint(100) } )] ); @@ -237,8 +237,8 @@ mod layer { LayerSpec::new( Ident::from("list"), null_dict! { - "unique" => Lit::Bool(true), - "maxlen" => Lit::UnsignedInt(10), + "unique" => Lit::new_bool(true), + "maxlen" => Lit::new_uint(10), } ) ] @@ -257,15 +257,15 @@ mod layer { LayerSpec::new( Ident::from("string"), null_dict! { - "ascii_only" => Lit::Bool(true), - "maxlen" => Lit::UnsignedInt(255) + "ascii_only" => Lit::new_bool(true), + "maxlen" => Lit::new_uint(255) } ), LayerSpec::new( Ident::from("list"), null_dict! { - "unique" => Lit::Bool(true), - "maxlen" => Lit::UnsignedInt(10), + "unique" => Lit::new_bool(true), + "maxlen" => Lit::new_uint(10), } ) ] @@ -289,10 +289,13 @@ mod layer { LayerSpec::new( Ident::from("list"), null_dict! { - "maxlen" => Lit::UnsignedInt(100), + "maxlen" => Lit::new_uint(100), }, ), - LayerSpec::new(Ident::from("list"), null_dict!("unique" => Lit::Bool(true))), + LayerSpec::new( + Ident::from("list"), + null_dict!("unique" => Lit::new_bool(true)), + ), ]; fuzz_tokens(tok.as_slice(), |should_pass, new_tok| { let layers = parse_ast_node_multiple_full::(&new_tok); @@ -360,8 +363,8 @@ mod fields { [LayerSpec::new( Ident::from("string"), null_dict! { - "maxlen" => Lit::UnsignedInt(10), - "ascii_only" => Lit::Bool(true), + "maxlen" => Lit::new_uint(10), + "ascii_only" => Lit::new_bool(true), } )] .into(), @@ -393,14 +396,14 @@ mod fields { LayerSpec::new( Ident::from("string"), null_dict! { - "maxlen" => Lit::UnsignedInt(255), - "ascii_only" => Lit::Bool(true), + "maxlen" => Lit::new_uint(255), + "ascii_only" => Lit::new_bool(true), } ), LayerSpec::new( Ident::from("list"), null_dict! { - "unique" => Lit::Bool(true) + "unique" => Lit::new_bool(true) } ), ] @@ -555,7 +558,7 @@ mod schemas { LayerSpec::new( Ident::from("list"), null_dict! { - "unique" => Lit::Bool(true) + "unique" => Lit::new_bool(true) } ) ], @@ -624,7 +627,7 @@ mod schemas { LayerSpec::new( Ident::from("list"), null_dict! { - "unique" => Lit::Bool(true) + "unique" => Lit::new_bool(true) } ) ], @@ -634,9 +637,9 @@ mod schemas { ], null_dict! { "env" => null_dict! { - "free_user_limit" => Lit::UnsignedInt(100), + "free_user_limit" => Lit::new_uint(100), }, - "storage_driver" => Lit::Str("skyheap".into()), + "storage_driver" => Lit::new_string("skyheap".into()), } ) ) @@ -679,7 +682,7 @@ mod dict_field_syntax { Ident::from("username"), vec![LayerSpec::new(Ident::from("string"), null_dict! {})], null_dict! { - "nullable" => Lit::Bool(false), + "nullable" => Lit::new_bool(false), }, ) ); @@ -707,13 +710,13 @@ mod dict_field_syntax { vec![LayerSpec::new( Ident::from("string"), null_dict! { - "minlen" => Lit::UnsignedInt(6), - "maxlen" => Lit::UnsignedInt(255), + "minlen" => Lit::new_uint(6), + "maxlen" => Lit::new_uint(255), } )], null_dict! { - "nullable" => Lit::Bool(false), - "jingle_bells" => Lit::Str("snow".into()), + "nullable" => Lit::new_bool(false), + "jingle_bells" => Lit::new_string("snow".into()), }, ) ); @@ -744,19 +747,19 @@ mod dict_field_syntax { LayerSpec::new( Ident::from("string"), null_dict! { - "ascii_only" => Lit::Bool(true), + "ascii_only" => Lit::new_bool(true), } ), LayerSpec::new( Ident::from("list"), null_dict! { - "unique" => Lit::Bool(true), + "unique" => Lit::new_bool(true), } ) ], null_dict! { - "nullable" => Lit::Bool(true), - "jingle_bells" => Lit::Str("snow".into()), + "nullable" => Lit::new_bool(true), + "jingle_bells" => Lit::new_string("snow".into()), }, ) ); @@ -863,7 +866,7 @@ mod alter_model_add { Ident::from("myfield"), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(), null_dict! { - "nullable" => Lit::Bool(true) + "nullable" => Lit::new_bool(true) }, )] .into() @@ -889,7 +892,7 @@ mod alter_model_add { Ident::from("myfield"), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(), null_dict! { - "nullable" => Lit::Bool(true) + "nullable" => Lit::new_bool(true) }, )] .into() @@ -930,7 +933,7 @@ mod alter_model_add { Ident::from("myfield"), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(), null_dict! { - "nullable" => Lit::Bool(true) + "nullable" => Lit::new_bool(true) }, ), ExpandedField::new( @@ -939,19 +942,19 @@ mod alter_model_add { LayerSpec::new( Ident::from("string"), null_dict! { - "maxlen" => Lit::UnsignedInt(255) + "maxlen" => Lit::new_uint(255) } ), LayerSpec::new( Ident::from("list"), null_dict! { - "unique" => Lit::Bool(true) + "unique" => Lit::new_bool(true) }, ) ] .into(), null_dict! { - "nullable" => Lit::Bool(false) + "nullable" => Lit::new_bool(false) }, ) ] @@ -1042,7 +1045,7 @@ mod alter_model_update { Ident::from("myfield"), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(), null_dict! { - "nullable" => Lit::Bool(true) + "nullable" => Lit::new_bool(true) }, )] .into() @@ -1077,7 +1080,7 @@ mod alter_model_update { Ident::from("myfield"), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(), null_dict! { - "nullable" => Lit::Bool(true) + "nullable" => Lit::new_bool(true) }, ), ExpandedField::new( @@ -1120,14 +1123,14 @@ mod alter_model_update { Ident::from("myfield"), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(), null_dict! { - "nullable" => Lit::Bool(true) + "nullable" => Lit::new_bool(true) }, ), ExpandedField::new( Ident::from("myfield2"), [LayerSpec::new( Ident::from("string"), - null_dict! {"maxlen" => Lit::UnsignedInt(255)} + null_dict! {"maxlen" => Lit::new_uint(255)} )] .into(), null_dict! {}, diff --git a/server/src/engine/ql/tests/structure_syn.rs b/server/src/engine/ql/tests/structure_syn.rs index 3a17af63..960a0cac 100644 --- a/server/src/engine/ql/tests/structure_syn.rs +++ b/server/src/engine/ql/tests/structure_syn.rs @@ -27,7 +27,7 @@ use { super::*, crate::engine::{ - data::{lit::Lit, spec::Dataspec1D, DictGeneric}, + data::{lit::Lit, DictGeneric}, ql::{ast::parse_ast_node_full, ddl::syn::DictBasic}, }, }; @@ -56,7 +56,7 @@ mod dict { br#"{name: "sayan"}"#, br#"{name: "sayan",}"#, }; - let r = null_dict!("name" => Lit::Str("sayan".into())); + let r = null_dict!("name" => Lit::new_string("sayan".into())); multi_assert_eq!(d1, d2 => r); } #[test] @@ -78,9 +78,9 @@ mod dict { "#, }; let r = null_dict! ( - "name" => Lit::Str("sayan".into()), - "verified" => Lit::Bool(true), - "burgers" => Lit::UnsignedInt(152), + "name" => Lit::new_string("sayan".into()), + "verified" => Lit::new_bool(true), + "burgers" => Lit::new_uint(152), ); multi_assert_eq!(d1, d2 => r); } @@ -119,11 +119,11 @@ mod dict { }; multi_assert_eq!( d1, d2, d3 => null_dict! { - "name" => Lit::Str("sayan".into()), + "name" => Lit::new_string("sayan".into()), "notes" => null_dict! { - "burgers" => Lit::Str("all the time, extra mayo".into()), - "taco" => Lit::Bool(true), - "pretzels" => Lit::UnsignedInt(1), + "burgers" => Lit::new_string("all the time, extra mayo".into()), + "taco" => Lit::new_bool(true), + "pretzels" => Lit::new_uint(1), } } ); @@ -178,7 +178,7 @@ mod dict { "now" => null_dict! { "this" => null_dict! { "is" => null_dict! { - "ridiculous" => Lit::Bool(true), + "ridiculous" => Lit::new_bool(true), } } } @@ -207,16 +207,16 @@ mod dict { } "; let ret_dict = null_dict! { - "the_tradition_is" => Lit::Str("hello, world".into()), + "the_tradition_is" => Lit::new_string("hello, world".into()), "could_have_been" => null_dict! { - "this" => Lit::Bool(true), - "or_maybe_this" => Lit::UnsignedInt(100), - "even_this" => Lit::Str("hello, universe!".into()), + "this" => Lit::new_bool(true), + "or_maybe_this" => Lit::new_uint(100), + "even_this" => Lit::new_string("hello, universe!".into()), }, - "but_oh_well" => Lit::Str("it continues to be the 'annoying' phrase".into()), + "but_oh_well" => Lit::new_string("it continues to be the 'annoying' phrase".into()), "lorem" => null_dict! { "ipsum" => null_dict! { - "dolor" => Lit::Str("sit amet".into()) + "dolor" => Lit::new_string("sit amet".into()) } } }; @@ -258,7 +258,7 @@ mod null_dict_tests { assert_eq!( d, null_dict! { - "this_is_non_null" => Lit::Str("hello".into()), + "this_is_non_null" => Lit::new_string("hello".into()), "but_this_is_null" => Null, } ) @@ -279,8 +279,8 @@ mod null_dict_tests { assert_eq!( d, null_dict! { - "a_string" => Lit::Str("this is a string".into()), - "num" => Lit::UnsignedInt(1234), + "a_string" => Lit::new_string("this is a string".into()), + "num" => Lit::new_uint(1234), "a_dict" => null_dict! { "a_null" => Null, } @@ -304,8 +304,8 @@ mod null_dict_tests { assert_eq!( d, null_dict! { - "a_string" => Lit::Str("this is a string".into()), - "num" => Lit::UnsignedInt(1234), + "a_string" => Lit::new_string("this is a string".into()), + "num" => Lit::new_uint(1234), "a_dict" => null_dict! { "a_null" => Null, },