Cleanup lexer impl

next
Sayan Nandan 1 year ago
parent 5e068c0e9b
commit a390120231
No known key found for this signature in database
GPG Key ID: 42EEDF4AE9D96B54

@ -1,5 +1,5 @@
[workspace] [workspace]
resolver = "2" resolver = "1"
members = [ members = [
"cli", "cli",
"server", "server",

@ -32,7 +32,7 @@ mod upd;
use crate::{ use crate::{
engine::{ engine::{
core::model::Model, core::model::Model,
data::{lit::LitIR, spec::DataspecMeta1D, tag::DataTag}, data::{lit::Lit, tag::DataTag},
error::{Error, QueryResult}, error::{Error, QueryResult},
ql::dml::WhereClause, ql::dml::WhereClause,
}, },
@ -47,7 +47,7 @@ impl Model {
pub(self) fn resolve_where<'a>( pub(self) fn resolve_where<'a>(
&self, &self,
where_clause: &mut WhereClause<'a>, where_clause: &mut WhereClause<'a>,
) -> QueryResult<LitIR<'a>> { ) -> QueryResult<Lit<'a>> {
match where_clause.clauses_mut().remove(self.p_key().as_bytes()) { match where_clause.clauses_mut().remove(self.p_key().as_bytes()) {
Some(clause) Some(clause)
if clause.filter_hint_none() if clause.filter_hint_none()

@ -33,8 +33,7 @@ use {
core::{self, model::delta::DataDeltaKind, query_meta::AssignmentOperator}, core::{self, model::delta::DataDeltaKind, query_meta::AssignmentOperator},
data::{ data::{
cell::Datacell, cell::Datacell,
lit::LitIR, lit::Lit,
spec::{Dataspec1D, DataspecMeta1D},
tag::{DataTag, TagClass}, tag::{DataTag, TagClass},
}, },
error::{Error, QueryResult}, error::{Error, QueryResult},
@ -49,51 +48,51 @@ use {
}; };
#[inline(always)] #[inline(always)]
unsafe fn dc_op_fail(_: &Datacell, _: LitIR) -> (bool, Datacell) { unsafe fn dc_op_fail(_: &Datacell, _: Lit) -> (bool, Datacell) {
(false, Datacell::null()) (false, Datacell::null())
} }
// bool // bool
unsafe fn dc_op_bool_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_bool_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) {
(true, Datacell::new_bool(rhs.read_bool_uck())) (true, Datacell::new_bool(rhs.bool()))
} }
// uint // uint
unsafe fn dc_op_uint_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_uint_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) {
(true, Datacell::new_uint(rhs.read_uint_uck())) (true, Datacell::new_uint(rhs.uint()))
} }
unsafe fn dc_op_uint_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_uint_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (sum, of) = dc.read_uint().overflowing_add(rhs.read_uint_uck()); let (sum, of) = dc.read_uint().overflowing_add(rhs.uint());
(of, Datacell::new_uint(sum)) (of, Datacell::new_uint(sum))
} }
unsafe fn dc_op_uint_sub(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_uint_sub(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (diff, of) = dc.read_uint().overflowing_sub(rhs.read_uint_uck()); let (diff, of) = dc.read_uint().overflowing_sub(rhs.uint());
(of, Datacell::new_uint(diff)) (of, Datacell::new_uint(diff))
} }
unsafe fn dc_op_uint_mul(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_uint_mul(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (prod, of) = dc.read_uint().overflowing_mul(rhs.read_uint_uck()); let (prod, of) = dc.read_uint().overflowing_mul(rhs.uint());
(of, Datacell::new_uint(prod)) (of, Datacell::new_uint(prod))
} }
unsafe fn dc_op_uint_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_uint_div(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (quo, of) = dc.read_uint().overflowing_div(rhs.read_uint_uck()); let (quo, of) = dc.read_uint().overflowing_div(rhs.uint());
(of, Datacell::new_uint(quo)) (of, Datacell::new_uint(quo))
} }
// sint // sint
unsafe fn dc_op_sint_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_sint_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) {
(true, Datacell::new_sint(rhs.read_sint_uck())) (true, Datacell::new_sint(rhs.sint()))
} }
unsafe fn dc_op_sint_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_sint_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (sum, of) = dc.read_sint().overflowing_add(rhs.read_sint_uck()); let (sum, of) = dc.read_sint().overflowing_add(rhs.sint());
(of, Datacell::new_sint(sum)) (of, Datacell::new_sint(sum))
} }
unsafe fn dc_op_sint_sub(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_sint_sub(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (diff, of) = dc.read_sint().overflowing_sub(rhs.read_sint_uck()); let (diff, of) = dc.read_sint().overflowing_sub(rhs.sint());
(of, Datacell::new_sint(diff)) (of, Datacell::new_sint(diff))
} }
unsafe fn dc_op_sint_mul(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_sint_mul(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (prod, of) = dc.read_sint().overflowing_mul(rhs.read_sint_uck()); let (prod, of) = dc.read_sint().overflowing_mul(rhs.sint());
(of, Datacell::new_sint(prod)) (of, Datacell::new_sint(prod))
} }
unsafe fn dc_op_sint_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_sint_div(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let (quo, of) = dc.read_sint().overflowing_div(rhs.read_sint_uck()); let (quo, of) = dc.read_sint().overflowing_div(rhs.sint());
(of, Datacell::new_sint(quo)) (of, Datacell::new_sint(quo))
} }
/* /*
@ -106,28 +105,28 @@ unsafe fn dc_op_sint_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) {
-- --
TODO(@ohsayan): account for float32 overflow TODO(@ohsayan): account for float32 overflow
*/ */
unsafe fn dc_op_float_ass(_: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_float_ass(_: &Datacell, rhs: Lit) -> (bool, Datacell) {
(true, Datacell::new_float(rhs.read_float_uck())) (true, Datacell::new_float(rhs.float()))
} }
unsafe fn dc_op_float_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_float_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let sum = dc.read_float() + rhs.read_float_uck(); let sum = dc.read_float() + rhs.float();
(true, Datacell::new_float(sum)) (true, Datacell::new_float(sum))
} }
unsafe fn dc_op_float_sub(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_float_sub(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let diff = dc.read_float() - rhs.read_float_uck(); let diff = dc.read_float() - rhs.float();
(true, Datacell::new_float(diff)) (true, Datacell::new_float(diff))
} }
unsafe fn dc_op_float_mul(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_float_mul(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let prod = dc.read_float() - rhs.read_float_uck(); let prod = dc.read_float() - rhs.float();
(true, Datacell::new_float(prod)) (true, Datacell::new_float(prod))
} }
unsafe fn dc_op_float_div(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_float_div(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let quo = dc.read_float() * rhs.read_float_uck(); let quo = dc.read_float() * rhs.float();
(true, Datacell::new_float(quo)) (true, Datacell::new_float(quo))
} }
// binary // binary
unsafe fn dc_op_bin_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_bin_ass(_dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let new_bin = rhs.read_bin_uck(); let new_bin = rhs.bin();
let mut v = Vec::new(); let mut v = Vec::new();
if v.try_reserve_exact(new_bin.len()).is_err() { if v.try_reserve_exact(new_bin.len()).is_err() {
return dc_op_fail(_dc, rhs); return dc_op_fail(_dc, rhs);
@ -135,8 +134,8 @@ unsafe fn dc_op_bin_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) {
v.extend_from_slice(new_bin); v.extend_from_slice(new_bin);
(true, Datacell::new_bin(v.into_boxed_slice())) (true, Datacell::new_bin(v.into_boxed_slice()))
} }
unsafe fn dc_op_bin_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_bin_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let push_into_bin = rhs.read_bin_uck(); let push_into_bin = rhs.bin();
let mut bin = Vec::new(); let mut bin = Vec::new();
if compiler::unlikely(bin.try_reserve_exact(push_into_bin.len()).is_err()) { if compiler::unlikely(bin.try_reserve_exact(push_into_bin.len()).is_err()) {
return dc_op_fail(dc, rhs); return dc_op_fail(dc, rhs);
@ -146,8 +145,8 @@ unsafe fn dc_op_bin_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) {
(true, Datacell::new_bin(bin.into_boxed_slice())) (true, Datacell::new_bin(bin.into_boxed_slice()))
} }
// string // string
unsafe fn dc_op_str_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_str_ass(_dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let new_str = rhs.read_str_uck(); let new_str = rhs.str();
let mut v = String::new(); let mut v = String::new();
if v.try_reserve_exact(new_str.len()).is_err() { if v.try_reserve_exact(new_str.len()).is_err() {
return dc_op_fail(_dc, rhs); return dc_op_fail(_dc, rhs);
@ -155,8 +154,8 @@ unsafe fn dc_op_str_ass(_dc: &Datacell, rhs: LitIR) -> (bool, Datacell) {
v.push_str(new_str); v.push_str(new_str);
(true, Datacell::new_str(v.into_boxed_str())) (true, Datacell::new_str(v.into_boxed_str()))
} }
unsafe fn dc_op_str_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) { unsafe fn dc_op_str_add(dc: &Datacell, rhs: Lit) -> (bool, Datacell) {
let push_into_str = rhs.read_str_uck(); let push_into_str = rhs.str();
let mut str = String::new(); let mut str = String::new();
if compiler::unlikely(str.try_reserve_exact(push_into_str.len()).is_err()) { if compiler::unlikely(str.try_reserve_exact(push_into_str.len()).is_err()) {
return dc_op_fail(dc, rhs); return dc_op_fail(dc, rhs);
@ -166,7 +165,7 @@ unsafe fn dc_op_str_add(dc: &Datacell, rhs: LitIR) -> (bool, Datacell) {
(true, Datacell::new_str(str.into_boxed_str())) (true, Datacell::new_str(str.into_boxed_str()))
} }
static OPERATOR: [unsafe fn(&Datacell, LitIR) -> (bool, Datacell); { static OPERATOR: [unsafe fn(&Datacell, Lit) -> (bool, Datacell); {
TagClass::MAX as usize * AssignmentOperator::VARIANTS TagClass::MAX as usize * AssignmentOperator::VARIANTS
}] = [ }] = [
// bool // bool

@ -26,13 +26,12 @@
use crate::engine::mem::ZERO_BLOCK; use crate::engine::mem::ZERO_BLOCK;
#[cfg(test)] #[cfg(test)]
use crate::{engine::data::spec::Dataspec1D, util::test_utils}; use crate::util::test_utils;
use { use {
crate::engine::{ crate::engine::{
data::{ data::{
cell::Datacell, cell::Datacell,
lit::LitIR, lit::Lit,
spec::DataspecMeta1D,
tag::{DataTag, TagUnique}, tag::{DataTag, TagUnique},
}, },
idx::meta::Comparable, idx::meta::Comparable,
@ -245,22 +244,22 @@ impl Hash for PrimaryIndexKey {
} }
} }
impl<'a> PartialEq<LitIR<'a>> for PrimaryIndexKey { impl<'a> PartialEq<Lit<'a>> for PrimaryIndexKey {
fn eq(&self, key: &LitIR<'a>) -> bool { fn eq(&self, key: &Lit<'a>) -> bool {
debug_assert!(key.kind().tag_unique().is_unique()); debug_assert!(key.kind().tag_unique().is_unique());
self.tag == key.kind().tag_unique() && self.virtual_block() == key.__vdata() self.tag == key.kind().tag_unique() && self.virtual_block() == key.__vdata()
} }
} }
impl<'a> Comparable<LitIR<'a>> for PrimaryIndexKey { impl<'a> Comparable<Lit<'a>> for PrimaryIndexKey {
fn cmp_eq(&self, key: &LitIR<'a>) -> bool { fn cmp_eq(&self, key: &Lit<'a>) -> bool {
<PrimaryIndexKey as PartialEq<LitIR>>::eq(self, key) <PrimaryIndexKey as PartialEq<Lit>>::eq(self, key)
} }
} }
impl<'a> Comparable<PrimaryIndexKey> for LitIR<'a> { impl<'a> Comparable<PrimaryIndexKey> for Lit<'a> {
fn cmp_eq(&self, key: &PrimaryIndexKey) -> bool { fn cmp_eq(&self, key: &PrimaryIndexKey) -> bool {
<PrimaryIndexKey as PartialEq<LitIR>>::eq(key, self) <PrimaryIndexKey as PartialEq<Lit>>::eq(key, self)
} }
} }
@ -333,16 +332,16 @@ fn check_pk_eq_hash() {
fn check_pk_lit_eq_hash() { fn check_pk_lit_eq_hash() {
let state = test_utils::randomstate(); let state = test_utils::randomstate();
let data = [ let data = [
LitIR::UnsignedInt(100), Lit::new_uint(100),
LitIR::SignedInt(-100), Lit::new_sint(-100),
LitIR::Bin(b"binary bro"), Lit::new_bin(b"binary bro"),
LitIR::Str("string bro"), Lit::new_str("string bro"),
]; ];
for litir in data { for lit in data {
let pk = PrimaryIndexKey::try_from_dc(Datacell::from(litir.clone())).unwrap(); let pk = PrimaryIndexKey::try_from_dc(Datacell::from(lit.clone())).unwrap();
assert_eq!(pk, litir); assert_eq!(pk, lit);
assert_eq!( assert_eq!(
test_utils::hash_rs(&state, &litir), test_utils::hash_rs(&state, &lit),
test_utils::hash_rs(&state, &pk) test_utils::hash_rs(&state, &pk)
); );
} }
@ -352,7 +351,7 @@ fn check_pk_lit_eq_hash() {
fn check_pk_extremes() { fn check_pk_extremes() {
let state = test_utils::randomstate(); let state = test_utils::randomstate();
let d1 = PrimaryIndexKey::try_from_dc(Datacell::new_uint(u64::MAX)).unwrap(); let d1 = PrimaryIndexKey::try_from_dc(Datacell::new_uint(u64::MAX)).unwrap();
let d2 = PrimaryIndexKey::try_from_dc(Datacell::from(LitIR::UnsignedInt(u64::MAX))).unwrap(); let d2 = PrimaryIndexKey::try_from_dc(Datacell::from(Lit::new_uint(u64::MAX))).unwrap();
assert_eq!(d1, d2); assert_eq!(d1, d2);
assert_eq!(d1.uint().unwrap(), u64::MAX); assert_eq!(d1.uint().unwrap(), u64::MAX);
assert_eq!(d2.uint().unwrap(), u64::MAX); assert_eq!(d2.uint().unwrap(), u64::MAX);
@ -360,7 +359,7 @@ fn check_pk_extremes() {
test_utils::hash_rs(&state, &d1), test_utils::hash_rs(&state, &d1),
test_utils::hash_rs(&state, &d2) test_utils::hash_rs(&state, &d2)
); );
assert_eq!(d1, LitIR::UnsignedInt(u64::MAX)); assert_eq!(d1, Lit::new_uint(u64::MAX));
assert_eq!(d2, LitIR::UnsignedInt(u64::MAX)); assert_eq!(d2, Lit::new_uint(u64::MAX));
assert_eq!(d1.uint().unwrap(), u64::MAX); assert_eq!(d1.uint().unwrap(), u64::MAX);
} }

@ -28,7 +28,7 @@ mod key;
mod row; mod row;
use crate::engine::{ use crate::engine::{
data::lit::LitIR, data::lit::Lit,
idx::{IndexBaseSpec, IndexMTRaw, MTIndex}, idx::{IndexBaseSpec, IndexMTRaw, MTIndex},
sync::atm::Guard, sync::atm::Guard,
}; };
@ -49,12 +49,12 @@ impl PrimaryIndex {
data: IndexMTRaw::idx_init(), data: IndexMTRaw::idx_init(),
} }
} }
pub fn remove<'a>(&self, key: LitIR<'a>, g: &Guard) -> bool { pub fn remove<'a>(&self, key: Lit<'a>, g: &Guard) -> bool {
self.data.mt_delete(&key, g) self.data.mt_delete(&key, g)
} }
pub fn select<'a, 'v, 't: 'v, 'g: 't>( pub fn select<'a, 'v, 't: 'v, 'g: 't>(
&'t self, &'t self,
key: LitIR<'a>, key: Lit<'a>,
g: &'g Guard, g: &'g Guard,
) -> Option<&'v Row> { ) -> Option<&'v Row> {
self.data.mt_get_element(&key, g) self.data.mt_get_element(&key, g)

@ -31,7 +31,7 @@ mod update;
use crate::engine::{ use crate::engine::{
core::{dml, index::Row, model::Model}, core::{dml, index::Row, model::Model},
data::{cell::Datacell, lit::LitIR}, data::{cell::Datacell, lit::Lit},
error::QueryResult, error::QueryResult,
fractal::GlobalInstanceLike, fractal::GlobalInstanceLike,
ql::{ ql::{
@ -75,7 +75,7 @@ fn _exec_only_read_key_and_then<T>(
let _irm = mdl.intent_read_model(); let _irm = mdl.intent_read_model();
let row = mdl let row = mdl
.primary_index() .primary_index()
.select(LitIR::from(key_name), &guard) .select(Lit::from(key_name), &guard)
.unwrap() .unwrap()
.clone(); .clone();
drop(guard); drop(guard);

@ -30,8 +30,7 @@ use {
crate::engine::{ crate::engine::{
self, self,
data::{ data::{
lit::{Lit, LitIR}, lit::Lit,
spec::{Dataspec1D, DataspecMeta1D},
tag::{CUTag, DataTag, TagClass}, tag::{CUTag, DataTag, TagClass},
}, },
mem::{DwordNN, DwordQN, NativeQword, SpecialPaddedWord, WordIO}, mem::{DwordNN, DwordQN, NativeQword, SpecialPaddedWord, WordIO},
@ -228,8 +227,8 @@ direct_from! {
} }
} }
impl<'a> From<LitIR<'a>> for Datacell { impl<'a> From<Lit<'a>> for Datacell {
fn from(l: LitIR<'a>) -> Self { fn from(l: Lit<'a>) -> Self {
match l.kind().tag_class() { match l.kind().tag_class() {
tag if tag < TagClass::Bin => unsafe { tag if tag < TagClass::Bin => unsafe {
// UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type doesn't need any advanced construction // UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type doesn't need any advanced construction
@ -241,7 +240,7 @@ impl<'a> From<LitIR<'a>> for Datacell {
}, },
TagClass::Bin | TagClass::Str => unsafe { TagClass::Bin | TagClass::Str => unsafe {
// UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type requires a new heap for construction // UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type requires a new heap for construction
let mut bin = ManuallyDrop::new(l.read_bin_uck().to_owned().into_boxed_slice()); let mut bin = ManuallyDrop::new(l.bin().to_owned().into_boxed_slice());
Datacell::new( Datacell::new(
CUTag::from(l.kind()), CUTag::from(l.kind()),
DataRaw::word(DwordQN::dwordqn_store_qw_nw( DataRaw::word(DwordQN::dwordqn_store_qw_nw(
@ -269,12 +268,6 @@ impl From<i32> for Datacell {
} }
} }
impl<'a> From<Lit<'a>> for Datacell {
fn from(l: Lit<'a>) -> Self {
Self::from(l.as_ir())
}
}
impl<const N: usize> From<[Datacell; N]> for Datacell { impl<const N: usize> From<[Datacell; N]> for Datacell {
fn from(l: [Datacell; N]) -> Self { fn from(l: [Datacell; N]) -> Self {
Self::new_list(l.into()) Self::new_list(l.into())
@ -459,17 +452,17 @@ impl Clone for Datacell {
#[derive(Debug)] #[derive(Debug)]
pub struct VirtualDatacell<'a> { pub struct VirtualDatacell<'a> {
dc: ManuallyDrop<Datacell>, dc: ManuallyDrop<Datacell>,
_lt: PhantomData<LitIR<'a>>, _lt: PhantomData<Lit<'a>>,
} }
impl<'a> VirtualDatacell<'a> { impl<'a> VirtualDatacell<'a> {
pub fn new(litir: LitIR<'a>) -> Self { pub fn new(lit: Lit<'a>) -> Self {
Self { Self {
dc: ManuallyDrop::new(unsafe { dc: ManuallyDrop::new(unsafe {
// UNSAFE(@ohsayan): this is a "reference" to a "virtual" aka fake DC. this just works because of memory layouts // UNSAFE(@ohsayan): this is a "reference" to a "virtual" aka fake DC. this just works because of memory layouts
Datacell::new( Datacell::new(
CUTag::from(litir.kind()), CUTag::from(lit.kind()),
DataRaw::word(litir.data().dwordqn_promote()), DataRaw::word(lit.data().dwordqn_promote()),
) )
}), }),
_lt: PhantomData, _lt: PhantomData,
@ -477,8 +470,8 @@ impl<'a> VirtualDatacell<'a> {
} }
} }
impl<'a> From<LitIR<'a>> for VirtualDatacell<'a> { impl<'a> From<Lit<'a>> for VirtualDatacell<'a> {
fn from(l: LitIR<'a>) -> Self { fn from(l: Lit<'a>) -> Self {
Self::new(l) Self::new(l)
} }
} }
@ -504,6 +497,6 @@ impl<'a> Clone for VirtualDatacell<'a> {
#[test] #[test]
fn virtual_dc_damn() { fn virtual_dc_damn() {
let dc = LitIR::Str("hello, world"); let dc = Lit::new_str("hello, world");
assert_eq!(VirtualDatacell::from(dc), Datacell::from("hello, world")); assert_eq!(VirtualDatacell::from(dc), Datacell::from("hello, world"));
} }

@ -26,10 +26,7 @@
use { use {
crate::engine::{ crate::engine::{
data::{ data::{cell::Datacell, lit::Lit},
cell::Datacell,
lit::{Lit, LitIR},
},
idx::STIndex, idx::STIndex,
}, },
std::collections::HashMap, std::collections::HashMap,
@ -181,15 +178,9 @@ fn rmerge_metadata_prepare_patch(
impls impls
*/ */
impl<'a> From<LitIR<'a>> for DictEntryGeneric {
fn from(l: LitIR<'a>) -> Self {
Self::Data(Datacell::from(l))
}
}
impl<'a> From<Lit<'a>> for DictEntryGeneric { impl<'a> From<Lit<'a>> for DictEntryGeneric {
fn from(value: Lit<'a>) -> Self { fn from(l: Lit<'a>) -> Self {
Self::Data(Datacell::from(value)) Self::Data(Datacell::from(l))
} }
} }

@ -1,5 +1,5 @@
/* /*
* Created on Sun Feb 26 2023 * Created on Wed Sep 20 2023
* *
* This file is a part of Skytable * This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
@ -25,166 +25,186 @@
*/ */
use { use {
super::{ crate::engine::{
spec::{Dataspec1D, DataspecMeta1D, DataspecMethods1D, DataspecRaw1D}, data::tag::{DataTag, FullTag, TagClass, TagUnique},
tag::{DataTag, FullTag, TagUnique}, mem::{DwordQN, SpecialPaddedWord},
}, },
crate::engine::mem::{DwordQN, SpecialPaddedWord, WordIO},
core::{ core::{
fmt, fmt,
hash::{Hash, Hasher}, hash::{Hash, Hasher},
marker::PhantomData, marker::PhantomData,
mem::{self, ManuallyDrop}, mem::ManuallyDrop,
slice, slice, str,
}, },
}; };
/* /*
Lit NOTE(@ohsayan): Heinous hackery that should not ever be repeated. Just don't touch anything here.
*/ */
/// A literal representation
pub struct Lit<'a> { pub struct Lit<'a> {
data: SpecialPaddedWord,
tag: FullTag, tag: FullTag,
dtc: u8,
word: SpecialPaddedWord,
_lt: PhantomData<&'a [u8]>, _lt: PhantomData<&'a [u8]>,
} }
impl<'a> Lit<'a> { impl<'a> Lit<'a> {
pub fn as_ir(&'a self) -> LitIR<'a> { /// Create a new bool literal
unsafe { pub fn new_bool(b: bool) -> Self {
// UNSAFE(@ohsayan): 'tis the lifetime. 'tis the savior Self::_quad(b as _, FullTag::BOOL)
mem::transmute_copy(self)
}
} }
} /// Create a new unsigned integer
pub fn new_uint(u: u64) -> Self {
impl<'a> DataspecMeta1D for Lit<'a> { Self::_quad(u, FullTag::UINT)
type Tag = FullTag;
type Target = SpecialPaddedWord;
type StringItem = Box<str>;
fn new(flag: Self::Tag, data: Self::Target) -> Self {
Self {
data,
tag: flag,
_lt: PhantomData,
}
} }
fn kind(&self) -> Self::Tag { /// Create a new signed integer
self.tag pub fn new_sint(s: i64) -> Self {
Self::_quad(s as _, FullTag::SINT)
}
/// Create a new float64
pub fn new_float(f: f64) -> Self {
Self::_quad(f.to_bits(), FullTag::FLOAT)
} }
fn data(&self) -> Self::Target { /// Returns a "shallow clone"
///
/// This function will fall apart if lifetimes aren't handled correctly (aka will segfault)
pub fn as_ir(&'a self) -> Lit<'a> {
unsafe { unsafe {
// UNSAFE(@ohsayan): This function doesn't create any clones, so we're good // UNSAFE(@ohsayan): this is a dirty, uncanny and wild hack that everyone should be forbidden from doing
mem::transmute_copy(self) let mut slf: Lit<'a> = core::mem::transmute_copy(self);
slf.dtc = Self::DTC_NONE;
slf
} }
} }
} }
/* impl<'a> Lit<'a> {
UNSAFE(@ohsayan): Safety checks: /// Attempt to read a bool
- Heap str: yes pub fn try_bool(&self) -> Option<bool> {
- Heap bin: no (self.tag.tag_class() == TagClass::Bool).then_some(unsafe {
- Drop str: yes, dealloc // UNSAFE(@ohsayan): +tagck
- Drop bin: not needed self.bool()
- Clone str: yes, alloc })
- Clone bin: not needed
*/
unsafe impl<'a> DataspecRaw1D for Lit<'a> {
const HEAP_STR: bool = true;
const HEAP_BIN: bool = false;
unsafe fn drop_str(&mut self) {
let (len, ptr) = self.data().load();
drop(String::from_raw_parts(ptr, len, len));
} }
unsafe fn drop_bin(&mut self) {} /// Attempt to read an unsigned integer
unsafe fn clone_str(s: &str) -> Self::Target { pub fn try_uint(&self) -> Option<u64> {
let new_string = ManuallyDrop::new(s.to_owned().into_boxed_str()); (self.tag.tag_class() == TagClass::UnsignedInt).then_some(unsafe {
WordIO::store((new_string.len(), new_string.as_ptr())) // UNSAFE(@ohsayan): +tagck
self.uint()
})
} }
unsafe fn clone_bin(b: &[u8]) -> Self::Target { /// Attempt to read a signed integer
WordIO::store((b.len(), b.as_ptr())) pub fn try_sint(&self) -> Option<i64> {
(self.tag.tag_class() == TagClass::SignedInt).then_some(unsafe {
// UNSAFE(@ohsayan): +tagck
self.sint()
})
} }
} /// Attempt to read a float
pub fn try_float(&self) -> Option<f64> {
/* (self.tag.tag_class() == TagClass::Float).then_some(unsafe {
UNSAFE(@ohsayan): Safety checks: // UNSAFE(@ohsayan): +tagck
- We LEAK memory because, duh self.float()
- We don't touch our own targets, ever (well, I'm a bad boy so I do touch it in fmt::Debug) })
*/
unsafe impl<'a> Dataspec1D for Lit<'a> {
fn Str(s: Box<str>) -> Self {
let md = ManuallyDrop::new(s);
Self::new(FullTag::STR, WordIO::store((md.len(), md.as_ptr())))
} }
} /// Read a bool directly. This function isn't exactly unsafe, but we want to provide a type preserving API
pub unsafe fn bool(&self) -> bool {
/* self.uint() == 1
UNSAFE(@ohsayan):
- No target touch
*/
unsafe impl<'a> DataspecMethods1D for Lit<'a> {}
impl<'a, T: DataspecMethods1D> PartialEq<T> for Lit<'a> {
fn eq(&self, other: &T) -> bool {
<Self as DataspecMethods1D>::self_eq(self, other)
} }
} /// Read an unsigned integer directly. This function isn't exactly unsafe, but we want to provide a type
impl<'a> fmt::Debug for Lit<'a> { /// preserving API
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { pub unsafe fn uint(&self) -> u64 {
let mut f = f.debug_struct("Lit"); self.word.dwordqn_load_qw_nw().0
f.field("tag", &self.tag);
self.self_fmt_debug_data("data", &mut f);
f.field("_lt", &self._lt);
f.finish()
} }
} /// Read a signed integer directly. This function isn't exactly unsafe, but we want to provide a type
/// preserving API
impl<'a> Drop for Lit<'a> { pub unsafe fn sint(&self) -> i64 {
fn drop(&mut self) { self.uint() as _
self.self_drop();
} }
} /// Read a floating point number directly. This function isn't exactly unsafe, but we want to provide a type
/// preserving API
impl<'a> Clone for Lit<'a> { pub unsafe fn float(&self) -> f64 {
fn clone(&self) -> Self { f64::from_bits(self.uint())
self.self_clone()
} }
} }
impl<'a> ToString for Lit<'a> { impl<'a> Lit<'a> {
fn to_string(&self) -> String { /// Attempt to read a binary value
<Self as DataspecMethods1D>::to_string_debug(self) pub fn try_bin(&self) -> Option<&'a [u8]> {
(self.tag.tag_class() == TagClass::Bin).then(|| unsafe {
// UNSAFE(@ohsayan): +tagck
self.bin()
})
} }
} /// Attempt to read a string value
pub fn try_str(&self) -> Option<&'a str> {
direct_from! { (self.tag.tag_class() == TagClass::Str).then(|| unsafe {
Lit<'a> => { // UNSAFE(@ohsayan): +tagck
bool as Bool, self.str()
u64 as UnsignedInt, })
i64 as SignedInt, }
f64 as Float, /// Read a string value directly
&'a str as Str, ///
String as Str, /// ## Safety
Box<str> as Str, /// The underlying repr MUST be a string. Otherwise you'll segfault or cause other library functions to misbehave
&'a [u8] as Bin, pub unsafe fn str(&self) -> &'a str {
str::from_utf8_unchecked(self.bin())
}
/// Read a binary value directly
///
/// ## Safety
/// The underlying repr MUST be a string. Otherwise you'll segfault
pub unsafe fn bin(&self) -> &'a [u8] {
let (q, n) = self.word.dwordqn_load_qw_nw();
slice::from_raw_parts(n as *const u8 as *mut u8, q as _)
} }
} }
/* impl<'a> Lit<'a> {
LitIR /// Create a new string (referenced)
*/ pub fn new_str(s: &'a str) -> Self {
unsafe {
/// ☢TRAIT WARNING☢: The [`Hash`] implementation is strictly intended for usage with [`crate::engine::core`] components ONLY. This will FAIL and PRODUCE INCORRECT results /*
/// when used elsewhere UNSAFE(@ohsayan): the mut cast is just for typesake so it doesn't matter while we also set DTC
pub struct LitIR<'a> { to none so it shouldn't matter anyway
tag: FullTag, */
data: SpecialPaddedWord, Self::_str(s.as_ptr() as *mut u8, s.len(), Self::DTC_NONE)
_lt: PhantomData<&'a str>, }
}
/// Create a new boxed string
pub fn new_boxed_str(s: Box<str>) -> Self {
let mut md = ManuallyDrop::new(s); // mut -> aliasing!
unsafe {
// UNSAFE(@ohsayan): correct aliasing, and DTC to destroy heap
Self::_str(md.as_mut_ptr(), md.len(), Self::DTC_HSTR)
}
}
/// Create a new string
pub fn new_string(s: String) -> Self {
Self::new_boxed_str(s.into_boxed_str())
}
/// Create a new binary (referenced)
pub fn new_bin(b: &'a [u8]) -> Self {
unsafe {
// UNSAFE(@ohsayan): mut cast is once again just a typesake change
Self::_wide_word(b.as_ptr() as *mut _, b.len(), Self::DTC_NONE, FullTag::BIN)
}
}
} }
impl<'a> LitIR<'a> { impl<'a> Lit<'a> {
pub fn __vdata(&self) -> &[u8] { /// Returns the type of this literal
let (vlen, data) = self.data().dwordqn_load_qw_nw(); pub fn kind(&self) -> FullTag {
self.tag
}
/// Returns the internal representation of this type
pub unsafe fn data(&self) -> &SpecialPaddedWord {
&self.word
}
pub fn __vdata(&self) -> &'a [u8] {
let (vlen, data) = self.word.dwordqn_load_qw_nw();
let len = vlen as usize * (self.kind().tag_unique() >= TagUnique::Bin) as usize; let len = vlen as usize * (self.kind().tag_unique() >= TagUnique::Bin) as usize;
unsafe { unsafe {
// UNSAFE(@ohsayan): either because of static or lt // UNSAFE(@ohsayan): either because of static or lt
@ -193,136 +213,192 @@ impl<'a> LitIR<'a> {
} }
} }
impl<'a> Hash for LitIR<'a> { impl<'a> Lit<'a> {
fn hash<H: Hasher>(&self, state: &mut H) { const DTC_NONE: u8 = 0;
self.tag.tag_unique().hash(state); const DTC_HSTR: u8 = 1;
self.__vdata().hash(state); unsafe fn _new(tag: FullTag, dtc: u8, word: SpecialPaddedWord) -> Self {
}
}
impl<'a> DataspecMeta1D for LitIR<'a> {
type Target = SpecialPaddedWord;
type StringItem = &'a str;
type Tag = FullTag;
fn new(flag: Self::Tag, data: Self::Target) -> Self {
Self { Self {
tag: flag, tag,
data, dtc,
word,
_lt: PhantomData, _lt: PhantomData,
} }
} }
fn kind(&self) -> Self::Tag { fn _quad(quad: u64, tag: FullTag) -> Self {
self.tag
}
fn data(&self) -> Self::Target {
unsafe { unsafe {
// UNSAFE(@ohsayan): We can freely copy our stack because everything is already allocated // UNSAFE(@ohsayan): we initialize the correct bit pattern
mem::transmute_copy(self) Self::_new(tag, Self::DTC_NONE, SpecialPaddedWord::new_quad(quad))
} }
} }
} unsafe fn _wide_word(ptr: *mut u8, len: usize, dtc: u8, tag: FullTag) -> Self {
Self::_new(tag, dtc, SpecialPaddedWord::new(len as _, ptr as _))
/*
UNSAFE(@ohsayan): Safety:
- Heap str: no
- Heap bin: no
- Drop str: no
- Drop bin: no
- Clone str: stack
- Clone bin: stack
*/
unsafe impl<'a> DataspecRaw1D for LitIR<'a> {
const HEAP_STR: bool = false;
const HEAP_BIN: bool = false;
unsafe fn drop_str(&mut self) {}
unsafe fn drop_bin(&mut self) {}
unsafe fn clone_str(s: &str) -> Self::Target {
WordIO::store((s.len(), s.as_ptr()))
} }
unsafe fn clone_bin(b: &[u8]) -> Self::Target { unsafe fn _str(ptr: *mut u8, len: usize, dtc: u8) -> Self {
WordIO::store((b.len(), b.as_ptr())) Self::_wide_word(ptr, len, dtc, FullTag::STR)
} }
} unsafe fn _drop_zero(_: SpecialPaddedWord) {}
unsafe fn _drop_hstr(word: SpecialPaddedWord) {
/* let (a, b) = word.dwordqn_load_qw_nw();
UNSAFE(@ohsayan): Safety: drop(Vec::from_raw_parts(
- No touches :) b as *const u8 as *mut u8,
*/ a as _,
unsafe impl<'a> Dataspec1D for LitIR<'a> { a as _,
fn Str(s: Self::StringItem) -> Self { ));
Self::new(FullTag::STR, WordIO::store((s.len(), s.as_ptr())))
} }
} }
impl<'a> ToString for LitIR<'a> { impl<'a> Drop for Lit<'a> {
fn to_string(&self) -> String { fn drop(&mut self) {
<Self as DataspecMethods1D>::to_string_debug(self) static DFN: [unsafe fn(SpecialPaddedWord); 2] = [Lit::_drop_zero, Lit::_drop_hstr];
unsafe { DFN[self.dtc as usize](core::mem::transmute_copy(&self.word)) }
} }
} }
/* impl<'a> Clone for Lit<'a> {
UNSAFE(@ohsayan): Safety: fn clone(&self) -> Lit<'a> {
- No touches static CFN: [unsafe fn(SpecialPaddedWord) -> SpecialPaddedWord; 2] = unsafe {
*/ [
unsafe impl<'a> DataspecMethods1D for LitIR<'a> {} |stack| core::mem::transmute(stack),
|hstr| {
impl<'a, T: DataspecMethods1D> PartialEq<T> for LitIR<'a> { let (q, n) = hstr.dwordqn_load_qw_nw();
fn eq(&self, other: &T) -> bool { let mut md = ManuallyDrop::new(
<Self as DataspecMethods1D>::self_eq(self, other) slice::from_raw_parts(n as *const u8, q as usize).to_owned(),
);
md.shrink_to_fit();
SpecialPaddedWord::new(q, md.as_mut_ptr() as _)
},
]
};
unsafe {
Self::_new(
self.tag,
self.dtc,
CFN[self.dtc as usize](core::mem::transmute_copy(&self.word)),
)
}
} }
} }
impl<'a> fmt::Debug for LitIR<'a> { impl<'a> fmt::Debug for Lit<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut f = f.debug_struct("LitIR"); let mut field = f.debug_struct("Lit");
f.field("tag", &self.tag); field.field("tag", &self.tag);
self.self_fmt_debug_data("data", &mut f); unsafe {
f.field("_lt", &self._lt); macro_rules! d {
f.finish() ($expr:expr) => {{
field.field("data", &$expr);
}};
}
match self.tag.tag_class() {
TagClass::Bool => d!(self.bool()),
TagClass::UnsignedInt => d!(self.uint()),
TagClass::SignedInt => d!(self.sint()),
TagClass::Float => d!(self.float()),
TagClass::Bin => d!(self.bin()),
TagClass::Str => d!(self.str()),
TagClass::List => panic!("found 2D in 1D"),
}
}
field.finish()
} }
} }
impl<'a> Drop for LitIR<'a> { impl<'a> Hash for Lit<'a> {
fn drop(&mut self) { fn hash<H: Hasher>(&self, state: &mut H) {
self.self_drop(); self.tag.tag_unique().hash(state);
self.__vdata().hash(state);
} }
} }
impl<'a> Clone for LitIR<'a> { impl<'a> PartialEq for Lit<'a> {
fn clone(&self) -> Self { fn eq(&self, other: &Self) -> bool {
self.self_clone() unsafe {
// UNSAFE(@ohsayan): +tagck
match (self.tag.tag_class(), other.tag.tag_class()) {
(TagClass::Bool, TagClass::Bool) => self.bool() == other.bool(),
(TagClass::UnsignedInt, TagClass::UnsignedInt) => self.uint() == other.uint(),
(TagClass::SignedInt, TagClass::SignedInt) => self.sint() == other.sint(),
(TagClass::Float, TagClass::Float) => self.float() == other.float(),
(TagClass::Bin, TagClass::Bin) => self.bin() == other.bin(),
(TagClass::Str, TagClass::Str) => self.str() == other.str(),
_ => false,
}
}
} }
} }
direct_from! { direct_from! {
LitIR<'a> => { Lit<'a> => {
bool as Bool, bool as new_bool,
u64 as UnsignedInt, u64 as new_uint,
i64 as SignedInt, i64 as new_sint,
f64 as Float, f64 as new_float,
&'a str as Str, &'a str as new_str,
&'a [u8] as Bin, String as new_string,
Box<str> as new_boxed_str,
&'a [u8] as new_bin,
}
}
impl<'a> ToString for Lit<'a> {
fn to_string(&self) -> String {
unsafe {
match self.kind().tag_class() {
TagClass::Bool => self.bool().to_string(),
TagClass::UnsignedInt => self.uint().to_string(),
TagClass::SignedInt => self.sint().to_string(),
TagClass::Float => self.float().to_string(),
TagClass::Bin => format!("{:?}", self.bin()),
TagClass::Str => format!("{:?}", self.str()),
TagClass::List => panic!("found 2D in 1D"),
}
}
} }
} }
#[test] #[test]
fn tlit() { fn stk_variants() {
let str1 = Lit::Str("hello".into()); let stk1 = [
let str2 = str1.clone(); Lit::new_bool(true),
assert_eq!(str1, str2); Lit::new_uint(u64::MAX),
assert_eq!(str1.str(), "hello"); Lit::new_sint(i64::MIN),
assert_eq!(str2.str(), "hello"); Lit::new_float(f64::MIN),
drop(str1); Lit::new_str("hello"),
assert_eq!(str2.str(), "hello"); Lit::new_bin(b"world"),
];
let stk2 = stk1.clone();
assert_eq!(stk1, stk2);
} }
#[test] #[test]
fn tlitir() { fn hp_variants() {
let str1 = LitIR::Str("hello"); let hp1 = [
let str2 = str1.clone(); Lit::new_string("hello".into()),
assert_eq!(str1, str2); Lit::new_string("world".into()),
assert_eq!(str1.str(), "hello"); ];
assert_eq!(str2.str(), "hello"); let hp2 = hp1.clone();
drop(str1); assert_eq!(hp1, hp2);
assert_eq!(str2.str(), "hello"); }
#[test]
fn lt_link() {
let l = Lit::new_string("hello".into());
let l_ir = l.as_ir();
assert_eq!(l, l_ir);
}
#[test]
fn token_array_lt_test() {
let tokens = vec![Lit::new_string("hello".to_string()), Lit::new_str("hi")];
#[derive(Debug)]
pub struct SelectStatement<'a> {
primary_key: Lit<'a>,
shorthand: Lit<'a>,
}
let select_stmt = SelectStatement {
primary_key: tokens[0].as_ir(),
shorthand: tokens[1].as_ir(),
};
drop(select_stmt);
drop(tokens);
} }

@ -1,60 +0,0 @@
/*
* Created on Mon Feb 27 2023
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
/// This is a pretty complex macro that emulates the behavior of an enumeration by making use of flags and macro hacks. You might literally feel it's like a lang match, but nope,
/// there's a lot of wizardry beneath. Well, it's important to know that it works and you shouldn't touch it UNLESS YOU ABSOLUTELY KNOW what you're doing
macro_rules! match_data {
(match ref $dataitem:ident $tail:tt) => {match_data!(@branch [ #[deny(unreachable_patterns)] match crate::engine::data::tag::DataTag::tag_class(&crate::engine::data::spec::DataspecMeta1D::kind($dataitem))] $dataitem [] $tail)};
(match $dataitem:ident $tail:tt) => {match_data!(@branch [ #[deny(unreachable_patterns)] match crate::engine::data::tag::DataTag::tag_class(&crate::engine::data::spec::DataspecMeta1D::kind(&$dataitem))] $dataitem [] $tail)};
(@branch $decl:tt $dataitem:ident [$($branch:tt)*] {}) => {match_data!(@defeat0 $decl [$($branch)*])};
(@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident($capture:ident) => $ret:expr, $($tail:tt)*}) => {
match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant => {let $capture = unsafe { /* UNSAFE(@ohsayan): flagck */ match_data!(@extract $name $dataitem $variant) }; $ret},] {$($tail)*})
};
(@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident(_) => $ret:expr, $($tail:tt)*}) => {
match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant => $ret,] {$($tail)*})
};
(@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident($capture:ident) if $guard:expr => $ret:expr, $($tail:tt)*}) => {
match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant if { let $capture = unsafe { /* UNSAFE(@ohsayan): flagck */ match_data!(@extract $name $dataitem $variant) }; $guard } => {
let $capture = unsafe { /* UNSAFE(@ohsayan): flagck */ match_data!(@extract $name $dataitem $variant) }; let _ = &$capture; $ret}, ] {$($tail)*}
)
};
(@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $name:ident::$variant:ident(_) if $guard:expr => $ret:expr, $($tail:tt)*}) => {
match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* crate::engine::data::tag::TagClass::$variant if $guard => $ret,] {$($tail)*})
};
(@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* _ => $ret:expr, $($tail:tt)*}) => {
match_data!(@branch $decl $dataitem [$($branch)* $(#[$attr])* _ => $ret,] {$($tail)*})
};
(@branch $decl:tt $dataitem:ident [$($branch:tt)*] { $(#[$attr:meta])* $capture:ident => $ret:expr, $($tail:tt)* }) => {
match_data!(@branch $decl $dataitem [ $($branch)* $(#[$attr])* $capture => { $ret},] {$($tail:tt)*})
};
(@defeat0 [$($decl:tt)*] [$($branch:tt)*]) => {$($decl)* { $($branch)* }};
(@extract $name:ident $dataitem:ident Bool) => {<$name as crate::engine::data::spec::Dataspec1D>::read_bool_uck(&$dataitem)};
(@extract $name:ident $dataitem:ident UnsignedInt) => {<$name as crate::engine::data::spec::Dataspec1D>::read_uint_uck(&$dataitem)};
(@extract $name:ident $dataitem:ident SignedInt) => {<$name as crate::engine::data::spec::Dataspec1D>::read_sint_uck(&$dataitem)};
(@extract $name:ident $dataitem:ident Float) => {<$name as crate::engine::data::spec::Dataspec1D>::read_float_uck(&$dataitem)};
(@extract $name:ident $dataitem:ident Bin) => {<$name as crate::engine::data::spec::Dataspec1D>::read_bin_uck(&$dataitem)};
(@extract $name:ident $dataitem:ident Str) => {<$name as crate::engine::data::spec::Dataspec1D>::read_str_uck(&$dataitem)};
}

@ -24,12 +24,9 @@
* *
*/ */
#[macro_use]
mod macros;
pub mod cell; pub mod cell;
pub mod dict; pub mod dict;
pub mod lit; pub mod lit;
pub mod spec;
pub mod tag; pub mod tag;
pub mod uuid; pub mod uuid;
// test // test

@ -1,310 +0,0 @@
/*
* Created on Sun Feb 26 2023
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
/*
So, I woke up and chose violence. God bless me and the stack memory. What I've done here is a sin. Do not follow my footsteps here if you want to write safe and maintainable code.
-- @ohsayan
*/
use {
super::tag::{DataTag, TagClass},
crate::engine::mem::{DwordQN, WordIO},
core::{fmt, mem, slice},
};
#[inline(always)]
fn when_then<T, F: FnOnce() -> T>(cond: bool, then: F) -> Option<T> {
cond.then(then)
}
/// Information about the type that implements the dataspec traits
pub trait DataspecMeta1D: Sized {
// assoc
type Tag: DataTag;
/// The target must be able to store (atleast) a native dword
type Target: DwordQN;
/// The string item. This helps us remain correct with the dtors
type StringItem;
// fn
/// Create a new instance. Usually allocates zero memory *directly*
fn new(tag: Self::Tag, data: Self::Target) -> Self;
/// Returns the reduced dataflag
fn kind(&self) -> Self::Tag;
/// Returns the data stack
fn data(&self) -> Self::Target;
}
/// Unsafe dtor/ctor impls for dataspec items. We have no clue about these things, the implementor must take care of them
///
/// ## Safety
///
/// - Your dtors MUST BE correct
pub unsafe trait DataspecRaw1D: DataspecMeta1D {
/// Is the string heap allocated...anywhere down the line?
const HEAP_STR: bool;
/// Is the binary heap allocated...anywhere down the line?
const HEAP_BIN: bool;
/// Drop the string, if you need a dtor
unsafe fn drop_str(&mut self);
/// Drop the binary, if you need a dtor
unsafe fn drop_bin(&mut self);
/// Clone the string object. Note, we literally HAVE NO IDEA about what you're doing here
unsafe fn clone_str(s: &str) -> Self::Target;
/// Clone the binary object. Again, NOT A DAMN CLUE about whay you're doing down there
unsafe fn clone_bin(b: &[u8]) -> Self::Target;
}
/// Functions that can be used to read/write to/from dataspec objects
///
/// ## Safety
/// - You must touch your targets by yourself
pub unsafe trait Dataspec1D: DataspecMeta1D + DataspecRaw1D {
// store
/// Store a new bool. This function is always safe to call
#[allow(non_snake_case)]
fn Bool(b: bool) -> Self {
Self::new(Self::Tag::BOOL, WordIO::store(b))
}
/// Store a new uint. This function is always safe to call
#[allow(non_snake_case)]
fn UnsignedInt(u: u64) -> Self {
Self::new(Self::Tag::UINT, WordIO::store(u))
}
/// Store a new sint. This function is always safe to call
#[allow(non_snake_case)]
fn SignedInt(s: i64) -> Self {
Self::new(Self::Tag::SINT, WordIO::store(s))
}
/// Store a new float. This function is always safe to call
#[allow(non_snake_case)]
fn Float(f: f64) -> Self {
Self::new(Self::Tag::FLOAT, WordIO::store(f.to_bits()))
}
/// Store a new binary. This function is always safe to call
#[allow(non_snake_case)]
fn Bin(b: &[u8]) -> Self {
Self::new(Self::Tag::BIN, WordIO::store((b.len(), b.as_ptr())))
}
/// Store a new string. Now, I won't talk about this one's safety because it depends on the implementor
#[allow(non_snake_case)]
fn Str(s: Self::StringItem) -> Self;
// load
// bool
/// Load a bool (this is unsafe for logical verity)
unsafe fn read_bool_uck(&self) -> bool {
self.data().load()
}
/// Load a bool
fn read_bool_try(&self) -> Option<bool> {
when_then(self.kind().tag_class() == TagClass::Bool, || unsafe {
// UNSAFE(@ohsayan): we've verified the flag. but lol because this isn't actually unsafe
self.read_bool_uck()
})
}
/// Load a bool
/// ## Panics
/// If you're not a bool, you panic
fn bool(&self) -> bool {
self.read_bool_try().unwrap()
}
// uint
/// Load a uint (this is unsafe for logical verity)
unsafe fn read_uint_uck(&self) -> u64 {
self.data().load()
}
/// Load a uint
fn read_uint_try(&self) -> Option<u64> {
when_then(
self.kind().tag_class() == TagClass::UnsignedInt,
|| unsafe {
// UNSAFE(@ohsayan): we've verified the flag. but lol because this isn't actually unsafe
self.read_uint_uck()
},
)
}
/// Load a uint
/// ## Panics
/// If you're not a uint, you panic
fn uint(&self) -> u64 {
self.read_uint_try().unwrap()
}
// sint
/// Load a sint (unsafe for logical verity)
unsafe fn read_sint_uck(&self) -> i64 {
self.data().load()
}
/// Load a sint
fn read_sint_try(&self) -> Option<i64> {
when_then(self.kind().tag_class() == TagClass::SignedInt, || unsafe {
// UNSAFE(@ohsayan): we've verified the flag. but lol because this isn't actually unsafe
self.read_sint_uck()
})
}
/// Load a sint and panic if we're not a sint
fn sint(&self) -> i64 {
self.read_sint_try().unwrap()
}
// float
/// Load a float (unsafe for logical verity)
unsafe fn read_float_uck(&self) -> f64 {
self.data().load()
}
/// Load a float
fn read_float_try(&self) -> Option<f64> {
when_then(self.kind().tag_class() == TagClass::Float, || unsafe {
self.read_float_uck()
})
}
/// Load a float and panic if we aren't one
fn float(&self) -> f64 {
self.read_float_try().unwrap()
}
// bin
/// Load a binary
///
/// ## Safety
/// Are you a binary? Did you store it correctly? Are you a victim of segfaults?
unsafe fn read_bin_uck(&self) -> &[u8] {
let (l, p) = self.data().load();
slice::from_raw_parts(p, l)
}
/// Load a bin
fn read_bin_try(&self) -> Option<&[u8]> {
when_then(self.kind().tag_class() == TagClass::Bin, || unsafe {
self.read_bin_uck()
})
}
/// Load a bin or panic if we aren't one
fn bin(&self) -> &[u8] {
self.read_bin_try().unwrap()
}
// str
/// Load a str
///
/// ## Safety
/// Are you a str? Did you store it correctly? Are you a victim of segfaults?
unsafe fn read_str_uck(&self) -> &str {
mem::transmute(self.read_bin_uck())
}
/// Load a str
fn read_str_try(&self) -> Option<&str> {
when_then(self.kind().tag_class() == TagClass::Str, || unsafe {
self.read_str_uck()
})
}
/// Load a str and panic if we aren't one
fn str(&self) -> &str {
self.read_str_try().unwrap()
}
}
/// Common impls
///
/// ## Safety
/// - You are not touching your target
pub unsafe trait DataspecMethods1D: Dataspec1D {
fn self_drop(&mut self) {
match self.kind().tag_class() {
TagClass::Str if <Self as DataspecRaw1D>::HEAP_STR => unsafe {
// UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
<Self as DataspecRaw1D>::drop_str(self)
},
TagClass::Bin if <Self as DataspecRaw1D>::HEAP_BIN => unsafe {
// UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
<Self as DataspecRaw1D>::drop_bin(self)
},
_ => {}
}
}
fn self_clone(&self) -> Self {
let data = match self.kind().tag_class() {
TagClass::Str if <Self as DataspecRaw1D>::HEAP_STR => unsafe {
// UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
<Self as DataspecRaw1D>::clone_str(Dataspec1D::read_str_uck(self))
},
TagClass::Bin if <Self as DataspecRaw1D>::HEAP_BIN => unsafe {
// UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
<Self as DataspecRaw1D>::clone_bin(Dataspec1D::read_bin_uck(self))
},
_ => self.data(),
};
Self::new(self.kind(), data)
}
fn self_eq(&self, other: &impl DataspecMethods1D) -> bool {
unsafe {
// UNSAFE(@ohsayan): we are checking our flags
match (self.kind().tag_class(), other.kind().tag_class()) {
(TagClass::Bool, TagClass::Bool) => self.read_bool_uck() == other.read_bool_uck(),
(TagClass::UnsignedInt, TagClass::UnsignedInt) => {
self.read_uint_uck() == other.read_uint_uck()
}
(TagClass::SignedInt, TagClass::SignedInt) => {
self.read_sint_uck() == other.read_sint_uck()
}
(TagClass::Float, TagClass::Float) => {
self.read_float_uck() == other.read_float_uck()
}
(TagClass::Bin, TagClass::Bin) => self.read_bin_uck() == other.read_bin_uck(),
(TagClass::Str, TagClass::Str) => self.read_str_uck() == other.read_str_uck(),
_ => false,
}
}
}
fn self_fmt_debug_data(&self, data_field: &str, f: &mut fmt::DebugStruct) {
macro_rules! fmtdebug {
($($(#[$attr:meta])* $match:pat => $ret:expr),* $(,)?) => {
match self.kind().tag_class() {$($(#[$attr])* $match => { let _x = $ret; f.field(data_field, &_x) },)*}
}
}
unsafe {
// UNSAFE(@ohsayan): we are checking our flags
fmtdebug!(
TagClass::Bool => self.read_bool_uck(),
TagClass::UnsignedInt => self.read_uint_uck(),
TagClass::SignedInt => self.read_sint_uck(),
TagClass::Float => self.read_float_uck(),
TagClass::Bin => self.read_bin_uck(),
TagClass::Str => self.read_str_uck(),
#[allow(unreachable_code)]
TagClass::List => unreachable!("found 2D data in 1D"),
)
};
}
#[rustfmt::skip]
fn to_string_debug(&self) -> String {
match_data!(match ref self {
Self::Bool(b) => b.to_string(),
Self::UnsignedInt(u) => u.to_string(),
Self::SignedInt(s) => s.to_string(),
Self::Float(f) => f.to_string(),
Self::Bin(b) => format!("{:?}", b),
Self::Str(s) => format!("{:?}", s),
Self::List(_) => unreachable!("found 2D data in 1D"),
})
}
}

@ -25,21 +25,11 @@
*/ */
mod md_dict_tests; mod md_dict_tests;
use super::{ use super::lit::Lit;
lit::{Lit, LitIR},
spec::Dataspec1D,
};
#[test]
fn t_largest_int_litir() {
let x = LitIR::UnsignedInt(u64::MAX);
let y = LitIR::UnsignedInt(u64::MAX);
assert_eq!(x, y);
}
#[test] #[test]
fn t_largest_int_lit() { fn t_largest_int_lit() {
let x = Lit::UnsignedInt(u64::MAX); let x = Lit::new_uint(u64::MAX);
let y = Lit::UnsignedInt(u64::MAX); let y = Lit::new_uint(u64::MAX);
assert_eq!(x, y); assert_eq!(x, y);
} }

@ -69,6 +69,12 @@ impl SpecialPaddedWord {
pub const unsafe fn new(a: u64, b: usize) -> Self { pub const unsafe fn new(a: u64, b: usize) -> Self {
Self { a, b } Self { a, b }
} }
pub fn new_quad(a: u64) -> Self {
Self {
a,
b: ZERO_BLOCK.as_ptr() as usize,
}
}
} }
pub trait StatelessLen { pub trait StatelessLen {

@ -29,190 +29,408 @@ use core::{ptr, slice};
pub type BufferedScanner<'a> = Scanner<'a, u8>; pub type BufferedScanner<'a> = Scanner<'a, u8>;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
/// A scanner over a slice buffer `[T]`
pub struct Scanner<'a, T> { pub struct Scanner<'a, T> {
d: &'a [T], d: &'a [T],
__cursor: usize, __cursor: usize,
} }
impl<'a, T> Scanner<'a, T> { impl<'a, T> Scanner<'a, T> {
/// Create a new scanner, starting at position 0
pub const fn new(d: &'a [T]) -> Self { pub const fn new(d: &'a [T]) -> Self {
unsafe { Self::new_with_cursor(d, 0) } unsafe {
// UNSAFE(@ohsayan): starting with 0 is always correct
Self::new_with_cursor(d, 0)
}
} }
/// Create a new scanner, starting with the given position
///
/// ## Safety
///
/// `i` must be a valid index into the given slice
pub const unsafe fn new_with_cursor(d: &'a [T], i: usize) -> Self { pub const unsafe fn new_with_cursor(d: &'a [T], i: usize) -> Self {
Self { d, __cursor: i } Self { d, __cursor: i }
} }
}
impl<'a, T> Scanner<'a, T> {
pub const fn buffer_len(&self) -> usize {
self.d.len()
}
/// Returns the remaining number of **items**
pub const fn remaining(&self) -> usize { pub const fn remaining(&self) -> usize {
self.d.len() - self.__cursor self.buffer_len() - self.__cursor
} }
/// Returns the number of items consumed by the scanner
pub const fn consumed(&self) -> usize { pub const fn consumed(&self) -> usize {
self.__cursor self.__cursor
} }
/// Returns the current cursor position
pub const fn cursor(&self) -> usize { pub const fn cursor(&self) -> usize {
self.__cursor self.__cursor
} }
pub fn current(&self) -> &[T] { /// Returns the buffer from the current position
pub fn current_buffer(&self) -> &[T] {
&self.d[self.__cursor..] &self.d[self.__cursor..]
} }
/// Returns the ptr to the cursor
///
/// WARNING: The pointer might be invalid!
pub const fn cursor_ptr(&self) -> *const T { pub const fn cursor_ptr(&self) -> *const T {
unsafe { self.d.as_ptr().add(self.__cursor) } unsafe {
// UNSAFE(@ohsayan): assuming that the cursor is correctly initialized, this is always fine
self.d.as_ptr().add(self.__cursor)
}
} }
/// Returns true if the scanner has reached eof
pub fn eof(&self) -> bool { pub fn eof(&self) -> bool {
self.remaining() == 0 self.remaining() == 0
} }
/// Returns true if the scanner has atleast `sizeof` bytes remaining
pub fn has_left(&self, sizeof: usize) -> bool { pub fn has_left(&self, sizeof: usize) -> bool {
self.remaining() >= sizeof self.remaining() >= sizeof
} }
pub fn matches_cursor_rounded(&self, f: impl Fn(&T) -> bool) -> bool { /// Returns true if the rounded cursor matches the predicate
f(&self.d[(self.d.len() - 1).min(self.__cursor)]) pub fn rounded_cursor_matches(&self, f: impl Fn(&T) -> bool) -> bool {
f(&self.d[self.rounded_cursor()])
}
/// Same as `rounded_cursor_matches`, but with the added guarantee that no rounding was done
pub fn rounded_cursor_not_eof_matches(&self, f: impl Fn(&T) -> bool) -> bool {
self.rounded_cursor_matches(f) & !self.eof()
} }
pub fn matches_cursor_rounded_and_not_eof(&self, f: impl Fn(&T) -> bool) -> bool { /// A shorthand for equality in `rounded_cursor_not_eof_matches`
self.matches_cursor_rounded(f) & !self.eof() pub fn rounded_cursor_not_eof_equals(&self, v_t: T) -> bool
where
T: PartialEq,
{
self.rounded_cursor_matches(|v| v_t.eq(v)) & !self.eof()
} }
} }
impl<'a, T> Scanner<'a, T> { impl<'a, T> Scanner<'a, T> {
/// Manually set the cursor position
///
/// ## Safety
/// The index must be valid
pub unsafe fn set_cursor(&mut self, i: usize) { pub unsafe fn set_cursor(&mut self, i: usize) {
self.__cursor = i; self.__cursor = i;
} }
pub unsafe fn move_ahead(&mut self) { /// Increment the cursor
self.move_back_by(1) ///
/// ## Safety
/// The buffer must not have reached EOF
pub unsafe fn incr_cursor(&mut self) {
self.incr_cursor_by(1)
} }
pub unsafe fn move_ahead_by(&mut self, by: usize) { /// Increment the cursor by the given amount
self._incr(by) ///
/// ## Safety
/// The buffer must have atleast `by` remaining
pub unsafe fn incr_cursor_by(&mut self, by: usize) {
self.__cursor += by;
} }
pub unsafe fn move_back(&mut self) { /// Increment the cursor if the given the condition is satisfied
self.move_back_by(1) ///
/// ## Safety
/// Custom logic should ensure only legal cursor increments
pub unsafe fn incr_cursor_if(&mut self, iff: bool) {
self.incr_cursor_by(iff as _)
} }
pub unsafe fn move_back_by(&mut self, by: usize) { /// Decrement the cursor
///
/// ## Safety
/// The cursor must **not be at 0**
pub unsafe fn decr_cursor(&mut self) {
self.decr_cursor_by(1)
}
/// Decrement the cursor by the given amount
///
/// ## Safety
/// Should not overflow (overflow safety is ... nevermind)
pub unsafe fn decr_cursor_by(&mut self, by: usize) {
self.__cursor -= by; self.__cursor -= by;
} }
unsafe fn _incr(&mut self, by: usize) { /// Returns the current cursor
self.__cursor += by; ///
/// ## Safety
/// Buffer should NOT be at EOF
pub unsafe fn deref_cursor(&self) -> T
where
T: Copy,
{
*self.cursor_ptr()
}
/// Returns the rounded cursor
pub fn rounded_cursor(&self) -> usize {
(self.buffer_len() - 1).min(self.__cursor)
} }
unsafe fn _cursor(&self) -> *const T { /// Returns the current cursor value with rounding
self.d.as_ptr().add(self.__cursor) pub fn rounded_cursor_value(&self) -> T
where
T: Copy,
{
self.d[self.rounded_cursor()]
} }
} }
impl<'a> Scanner<'a, u8> { impl<'a> Scanner<'a, u8> {
/// Attempt to parse the next byte
pub fn try_next_byte(&mut self) -> Option<u8> { pub fn try_next_byte(&mut self) -> Option<u8> {
if self.eof() { if self.eof() {
None None
} else { } else {
Some(unsafe { self.next_byte() }) Some(unsafe {
// UNSAFE(@ohsayan): +remaining check
self.next_byte()
})
} }
} }
/// Attempt to parse the next block
pub fn try_next_block<const N: usize>(&mut self) -> Option<[u8; N]> { pub fn try_next_block<const N: usize>(&mut self) -> Option<[u8; N]> {
if self.has_left(N) { if self.has_left(N) {
Some(unsafe { self.next_chunk() }) Some(unsafe {
// UNSAFE(@ohsayan): +remaining check
self.next_chunk()
})
} else { } else {
None None
} }
} }
pub fn try_next_variable_block(&'a mut self, len: usize) -> Option<&'a [u8]> { /// Attempt to parse the next block (variable)
pub fn try_next_variable_block(&mut self, len: usize) -> Option<&'a [u8]> {
if self.has_left(len) { if self.has_left(len) {
Some(unsafe { self.next_chunk_variable(len) }) Some(unsafe {
// UNSAFE(@ohsayan): +remaining check
self.next_chunk_variable(len)
})
} else { } else {
None None
} }
} }
} }
pub enum BufferedReadResult<T> { /// Incomplete buffered reads
#[derive(Debug, PartialEq)]
pub enum ScannerDecodeResult<T> {
/// The value was decoded
Value(T), Value(T),
/// We need more data to determine if we have the correct value
NeedMore, NeedMore,
/// Found an error while decoding a value
Error, Error,
} }
impl<'a> Scanner<'a, u8> { impl<'a> Scanner<'a, u8> {
/// Keep moving the cursor ahead while the predicate returns true
pub fn trim_ahead(&mut self, f: impl Fn(u8) -> bool) { pub fn trim_ahead(&mut self, f: impl Fn(u8) -> bool) {
while self.matches_cursor_rounded_and_not_eof(|b| f(*b)) { while self.rounded_cursor_not_eof_matches(|b| f(*b)) {
unsafe { self.move_ahead() } unsafe {
// UNSAFE(@ohsayan): not eof
self.incr_cursor()
}
} }
} }
pub fn move_ahead_if_matches(&mut self, f: impl Fn(u8) -> bool) { /// Attempt to parse a `\n` terminated integer (we move past the LF, so you can't see it)
unsafe { self.move_back_by(self.matches_cursor_rounded_and_not_eof(|b| f(*b)) as _) }
}
/// Attempt to parse a `\n` terminated (we move past the LF, so you can't see it)
/// ///
/// If we were unable to read in the integer, then the cursor will be restored to its starting position /// If we were unable to read in the integer, then the cursor will be restored to its starting position
// TODO(@ohsayan): optimize // TODO(@ohsayan): optimize
pub fn try_next_ascii_u64_lf_separated_with_result(&mut self) -> BufferedReadResult<u64> { pub fn try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(
&mut self,
) -> ScannerDecodeResult<u64> {
self.try_next_ascii_u64_lf_separated_with_result_or::<true>()
}
pub fn try_next_ascii_u64_lf_separated_with_result(&mut self) -> ScannerDecodeResult<u64> {
self.try_next_ascii_u64_lf_separated_with_result_or::<false>()
}
pub fn try_next_ascii_u64_lf_separated_with_result_or<const RESTORE_CURSOR: bool>(
&mut self,
) -> ScannerDecodeResult<u64> {
let mut okay = true; let mut okay = true;
let start = self.cursor(); let start = self.cursor();
let ret = self.extract_integer(&mut okay); let ret = self.try_next_ascii_u64_stop_at_lf(&mut okay);
let payload_ok = okay; let payload_ok = okay;
let lf = self.matches_cursor_rounded_and_not_eof(|b| *b == b'\n'); let lf = self.rounded_cursor_not_eof_matches(|b| *b == b'\n');
okay &= lf; okay &= lf;
unsafe { self._incr(okay as _) }; // skip LF unsafe {
// UNSAFE(@ohsayan): not eof
// skip LF
self.incr_cursor_if(okay)
};
if okay { if okay {
BufferedReadResult::Value(ret) ScannerDecodeResult::Value(ret)
} else { } else {
unsafe { self.set_cursor(start) } if RESTORE_CURSOR {
unsafe {
// UNSAFE(@ohsayan): we correctly restore the cursor
self.set_cursor(start)
}
}
if payload_ok { if payload_ok {
// payload was ok, but we missed a null // payload was ok, but we missed a null
BufferedReadResult::NeedMore ScannerDecodeResult::NeedMore
} else { } else {
// payload was NOT ok // payload was NOT ok
BufferedReadResult::Error ScannerDecodeResult::Error
} }
} }
} }
/// Attempt to parse a LF terminated integer (we move past the LF)
/// If we were unable to read in the integer, then the cursor will be restored to its starting position
pub fn try_next_ascii_u64_lf_separated_or_restore_cursor(&mut self) -> Option<u64> {
self.try_next_ascii_u64_lf_separated_or::<true>()
}
pub fn try_next_ascii_u64_lf_separated(&mut self) -> Option<u64> { pub fn try_next_ascii_u64_lf_separated(&mut self) -> Option<u64> {
self.try_next_ascii_u64_lf_separated_or::<false>()
}
pub fn try_next_ascii_u64_lf_separated_or<const RESTORE_CURSOR: bool>(
&mut self,
) -> Option<u64> {
let start = self.cursor(); let start = self.cursor();
let mut okay = true; let mut okay = true;
let ret = self.extract_integer(&mut okay); let ret = self.try_next_ascii_u64_stop_at_lf(&mut okay);
let lf = self.matches_cursor_rounded_and_not_eof(|b| *b == b'\n'); let lf = self.rounded_cursor_not_eof_matches(|b| *b == b'\n');
unsafe {
// UNSAFE(@ohsayan): not eof
self.incr_cursor_if(lf & okay)
}
if okay & lf { if okay & lf {
Some(ret) Some(ret)
} else { } else {
unsafe { self.set_cursor(start) } if RESTORE_CURSOR {
unsafe {
// UNSAFE(@ohsayan): we correctly restore the cursor
self.set_cursor(start)
}
}
None None
} }
} }
pub fn extract_integer(&mut self, okay: &mut bool) -> u64 { /// Extracts whatever integer is possible using the current bytestream, stopping at a LF (but **not** skipping it)
pub fn try_next_ascii_u64_stop_at_lf(&mut self, g_okay: &mut bool) -> u64 {
self.try_next_ascii_u64_stop_at::<true>(g_okay, |byte| byte != b'\n')
}
/// Extracts whatever integer is possible using the current bytestream, stopping only when either an overflow occurs or when
/// the closure returns false
pub fn try_next_ascii_u64_stop_at<const ASCII_CHECK: bool>(
&mut self,
g_okay: &mut bool,
keep_going_if: impl Fn(u8) -> bool,
) -> u64 {
let mut ret = 0u64; let mut ret = 0u64;
while self.matches_cursor_rounded_and_not_eof(|b| *b != b'\n') & *okay { let mut okay = true;
while self.rounded_cursor_not_eof_matches(|b| keep_going_if(*b)) & okay {
let b = self.d[self.cursor()]; let b = self.d[self.cursor()];
*okay &= b.is_ascii_digit(); if ASCII_CHECK {
okay &= b.is_ascii_digit();
}
ret = match ret.checked_mul(10) { ret = match ret.checked_mul(10) {
Some(r) => r, Some(r) => r,
None => { None => {
*okay = false; okay = false;
break; break;
} }
}; };
ret = match ret.checked_add((b & 0x0F) as u64) { ret = match ret.checked_add((b & 0x0F) as u64) {
Some(r) => r, Some(r) => r,
None => { None => {
*okay = false; okay = false;
break; break;
} }
}; };
unsafe { self._incr(1) } unsafe {
// UNSAFE(@ohsayan): loop invariant
self.incr_cursor_by(1)
}
} }
*g_okay &= okay;
ret ret
} }
} }
impl<'a> Scanner<'a, u8> { impl<'a> Scanner<'a, u8> {
/// Attempt to parse the next [`i64`] value, stopping and skipping the STOP_BYTE
///
/// WARNING: The cursor is NOT reversed
pub fn try_next_ascii_i64_separated_by<const STOP_BYTE: u8>(&mut self) -> (bool, i64) {
let (okay, int) = self.try_next_ascii_i64_stop_at(|b| b == STOP_BYTE);
let lf = self.rounded_cursor_not_eof_equals(STOP_BYTE);
unsafe {
// UNSAFE(@ohsayan): not eof
self.incr_cursor_if(lf & okay)
}
(lf & okay, int)
}
/// Attempt to parse the next [`i64`] value, stopping at the stop condition or stopping if an error occurred
///
/// WARNING: It is NOT guaranteed that the stop condition was met
pub fn try_next_ascii_i64_stop_at(&mut self, stop_if: impl Fn(u8) -> bool) -> (bool, i64) {
let mut ret = 0i64;
// check if we have a direction
let current = self.rounded_cursor_value();
let direction_negative = current == b'-';
// skip negative
unsafe {
// UNSAFE(@ohsayan): not eof
self.incr_cursor_if(direction_negative)
}
let mut okay = direction_negative | current.is_ascii_digit() & !self.eof();
while self.rounded_cursor_not_eof_matches(|b| !stop_if(*b)) & okay {
let byte = unsafe {
// UNSAFE(@ohsayan): loop invariant
self.next_byte()
};
okay &= byte.is_ascii_digit();
ret = match ret.checked_mul(10) {
Some(r) => r,
None => {
okay = false;
break;
}
};
if direction_negative {
ret = match ret.checked_sub((byte & 0x0f) as i64) {
Some(r) => r,
None => {
okay = false;
break;
}
};
} else {
ret = match ret.checked_add((byte & 0x0f) as i64) {
Some(r) => r,
None => {
okay = false;
break;
}
}
}
}
(okay, ret)
}
}
impl<'a> Scanner<'a, u8> {
/// Load the next [`u64`] LE
pub unsafe fn next_u64_le(&mut self) -> u64 { pub unsafe fn next_u64_le(&mut self) -> u64 {
u64::from_le_bytes(self.next_chunk()) u64::from_le_bytes(self.next_chunk())
} }
/// Load the next block
pub unsafe fn next_chunk<const N: usize>(&mut self) -> [u8; N] { pub unsafe fn next_chunk<const N: usize>(&mut self) -> [u8; N] {
let mut b = [0u8; N]; let mut b = [0u8; N];
ptr::copy_nonoverlapping(self._cursor(), b.as_mut_ptr(), N); ptr::copy_nonoverlapping(self.cursor_ptr(), b.as_mut_ptr(), N);
self._incr(N); self.incr_cursor_by(N);
b b
} }
pub unsafe fn next_chunk_variable(&mut self, size: usize) -> &[u8] { /// Load the next variable-sized block
let r = slice::from_raw_parts(self._cursor(), size); pub unsafe fn next_chunk_variable(&mut self, size: usize) -> &'a [u8] {
self._incr(size); let r = slice::from_raw_parts(self.cursor_ptr(), size);
self.incr_cursor_by(size);
r r
} }
/// Load the next byte
pub unsafe fn next_byte(&mut self) -> u8 { pub unsafe fn next_byte(&mut self) -> u8 {
let r = *self._cursor(); let r = *self.cursor_ptr();
self._incr(1); self.incr_cursor_by(1);
r r
} }
} }

@ -25,6 +25,7 @@
*/ */
use super::*; use super::*;
mod scanner;
mod word; mod word;
mod vinline { mod vinline {

@ -0,0 +1,249 @@
/*
* Created on Wed Sep 20 2023
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
use crate::engine::mem::scanner::{BufferedScanner, ScannerDecodeResult};
fn s(b: &[u8]) -> BufferedScanner {
BufferedScanner::new(b)
}
/*
lf separated
*/
#[test]
fn read_u64_lf_separated() {
let mut s = s(b"18446744073709551615\n");
assert_eq!(
s.try_next_ascii_u64_lf_separated_or_restore_cursor()
.unwrap(),
u64::MAX
);
assert_eq!(s.cursor(), s.buffer_len());
}
#[test]
fn read_u64_lf_separated_missing() {
let mut s = s(b"18446744073709551615");
assert!(s
.try_next_ascii_u64_lf_separated_or_restore_cursor()
.is_none());
assert_eq!(s.cursor(), 0);
}
#[test]
fn read_u64_lf_separated_invalid() {
let mut scn = s(b"1844674407370955161A\n");
assert!(scn
.try_next_ascii_u64_lf_separated_or_restore_cursor()
.is_none());
assert_eq!(scn.cursor(), 0);
let mut scn = s(b"?1844674407370955161A\n");
assert!(scn
.try_next_ascii_u64_lf_separated_or_restore_cursor()
.is_none());
assert_eq!(scn.cursor(), 0);
}
#[test]
fn read_u64_lf_separated_zero() {
let mut s = s(b"0\n");
assert_eq!(
s.try_next_ascii_u64_lf_separated_or_restore_cursor()
.unwrap(),
0
);
assert_eq!(s.cursor(), s.buffer_len());
}
#[test]
fn read_u64_lf_overflow() {
let mut s = s(b"184467440737095516155\n");
assert!(s
.try_next_ascii_u64_lf_separated_or_restore_cursor()
.is_none());
assert_eq!(s.cursor(), 0);
}
/*
lf separated allow unbuffered
*/
#[test]
fn incomplete_read_u64_okay() {
let mut scn = s(b"18446744073709551615\n");
assert_eq!(
scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(),
ScannerDecodeResult::Value(u64::MAX)
);
assert_eq!(scn.cursor(), scn.buffer_len());
}
#[test]
fn incomplete_read_u64_missing_lf() {
let mut scn = s(b"18446744073709551615");
assert_eq!(
scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(),
ScannerDecodeResult::NeedMore
);
assert_eq!(scn.cursor(), 0);
}
#[test]
fn incomplete_read_u64_lf_error() {
let mut scn = s(b"1844674407370955161A\n");
assert_eq!(
scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(),
ScannerDecodeResult::Error
);
assert_eq!(scn.cursor(), 0);
let mut scn = s(b"?1844674407370955161A\n");
assert_eq!(
scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(),
ScannerDecodeResult::Error
);
assert_eq!(scn.cursor(), 0);
}
#[test]
fn incomplete_read_u64_lf_zero() {
let mut scn = s(b"0\n");
assert_eq!(
scn.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(),
ScannerDecodeResult::Value(0)
)
}
#[test]
fn incomplete_read_u64_lf_overflow() {
let mut s = s(b"184467440737095516155\n");
assert_eq!(
s.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor(),
ScannerDecodeResult::Error
);
assert_eq!(s.cursor(), 0);
}
/*
lf separated i64
*/
fn concat(a: impl ToString, b: impl ToString) -> Vec<u8> {
let (a, b) = (a.to_string(), b.to_string());
let mut s = String::with_capacity(a.len() + b.len());
s.push_str(a.as_str());
s.push_str(b.as_str());
s.into_bytes()
}
#[test]
fn read_i64_lf_separated_okay() {
let buf = concat(i64::MAX, "\n");
let mut scn = s(&buf);
assert_eq!(
scn.try_next_ascii_i64_separated_by::<b'\n'>(),
(true, i64::MAX)
);
assert_eq!(scn.cursor(), scn.buffer_len());
let buf = concat(i64::MIN, "\n");
let mut scn = s(&buf);
assert_eq!(
scn.try_next_ascii_i64_separated_by::<b'\n'>(),
(true, i64::MIN)
);
assert_eq!(scn.cursor(), scn.buffer_len());
}
#[test]
fn read_i64_lf_separated_missing() {
let buf = concat(i64::MAX, "");
let mut scn = s(&buf);
assert_eq!(
scn.try_next_ascii_i64_separated_by::<b'\n'>(),
(false, i64::MAX)
);
assert_eq!(scn.cursor(), scn.buffer_len());
let buf = concat(i64::MIN, "");
let mut scn = s(&buf);
assert_eq!(
scn.try_next_ascii_i64_separated_by::<b'\n'>(),
(false, i64::MIN)
);
assert_eq!(scn.cursor(), scn.buffer_len());
}
#[test]
fn read_i64_lf_separated_invalid() {
let buf = concat(i64::MAX, "A\n");
let mut scn = s(&buf);
assert_eq!(
scn.try_next_ascii_i64_separated_by::<b'\n'>(),
(false, i64::MAX)
);
assert_eq!(scn.cursor(), scn.buffer_len() - 1);
let buf = concat("A", format!("{}\n", i64::MIN));
let mut scn = s(&buf);
assert_eq!(scn.try_next_ascii_i64_separated_by::<b'\n'>(), (false, 0));
assert_eq!(scn.cursor(), 0);
}
#[test]
fn read_i64_lf_overflow() {
let buf = concat(u64::MAX, "\n");
let mut scn = s(&buf);
assert_eq!(
scn.try_next_ascii_i64_separated_by::<b'\n'>(),
(false, 1844674407370955161)
);
assert_eq!(scn.cursor(), scn.buffer_len() - 1);
}
#[test]
fn read_i64_lf_underflow() {
let buf = concat(i64::MIN, "1\n");
let mut scn = s(&buf);
assert_eq!(
scn.try_next_ascii_i64_separated_by::<b'\n'>(),
(false, -9223372036854775808)
);
assert_eq!(scn.cursor(), scn.buffer_len() - 1);
}
#[test]
fn rounding() {
let mut scanner = s(b"123");
for i in 1..=u8::MAX {
match i {
1..=3 => {
assert_eq!(scanner.try_next_byte().unwrap(), (i + b'0'));
}
_ => {
assert_eq!(scanner.rounded_cursor_value(), b'3');
}
}
}
assert_eq!(scanner.cursor(), scanner.buffer_len());
}

@ -30,7 +30,7 @@ mod protocol;
pub trait Socket: AsyncWrite + AsyncRead + Unpin {} pub trait Socket: AsyncWrite + AsyncRead + Unpin {}
pub type IoResult<T> = Result<T, std::io::Error>; pub type IoResult<T> = Result<T, std::io::Error>;
enum QLoopReturn { pub enum QLoopReturn {
Fin, Fin,
ConnectionRst, ConnectionRst,
} }

@ -98,7 +98,7 @@ fn parse_lf_separated(
) -> LFTIntParseResult { ) -> LFTIntParseResult {
let mut ret = previously_buffered; let mut ret = previously_buffered;
let mut okay = true; let mut okay = true;
while scanner.matches_cursor_rounded_and_not_eof(|b| *b != b'\n') & okay { while scanner.rounded_cursor_not_eof_matches(|b| *b != b'\n') & okay {
let b = unsafe { scanner.next_byte() }; let b = unsafe { scanner.next_byte() };
okay &= b.is_ascii_digit(); okay &= b.is_ascii_digit();
ret = match ret.checked_mul(10) { ret = match ret.checked_mul(10) {
@ -111,8 +111,8 @@ fn parse_lf_separated(
}; };
} }
let payload_ok = okay; let payload_ok = okay;
let lf_ok = scanner.matches_cursor_rounded_and_not_eof(|b| *b == b'\n'); let lf_ok = scanner.rounded_cursor_not_eof_matches(|b| *b == b'\n');
unsafe { scanner.move_ahead_by(lf_ok as usize) } unsafe { scanner.incr_cursor_by(lf_ok as usize) }
if payload_ok & lf_ok { if payload_ok & lf_ok {
LFTIntParseResult::Value(ret) LFTIntParseResult::Value(ret)
} else { } else {
@ -181,8 +181,8 @@ impl<'a> CSQuery<'a> {
let slice; let slice;
unsafe { unsafe {
// UNSAFE(@ohsayan): checked len at branch // UNSAFE(@ohsayan): checked len at branch
slice = slice::from_raw_parts(scanner.current().as_ptr(), size); slice = slice::from_raw_parts(scanner.current_buffer().as_ptr(), size);
scanner.move_ahead_by(size); scanner.incr_cursor_by(size);
} }
CSQueryExchangeResult::Completed(CSQuery::new(slice)) CSQueryExchangeResult::Completed(CSQuery::new(slice))
} else { } else {

@ -26,7 +26,7 @@
use { use {
crate::{ crate::{
engine::mem::scanner::{BufferedReadResult, BufferedScanner}, engine::mem::scanner::{BufferedScanner, ScannerDecodeResult},
util::compiler, util::compiler,
}, },
std::slice, std::slice,
@ -320,9 +320,10 @@ impl<'a> CHandshake<'a> {
// we're done here // we're done here
return unsafe { return unsafe {
// UNSAFE(@ohsayan): we just checked buffered size // UNSAFE(@ohsayan): we just checked buffered size
let uname = slice::from_raw_parts(scanner.current().as_ptr(), uname_l); let uname = slice::from_raw_parts(scanner.current_buffer().as_ptr(), uname_l);
let pwd = slice::from_raw_parts(scanner.current().as_ptr().add(uname_l), pwd_l); let pwd =
scanner.move_ahead_by(uname_l + pwd_l); slice::from_raw_parts(scanner.current_buffer().as_ptr().add(uname_l), pwd_l);
scanner.incr_cursor_by(uname_l + pwd_l);
HandshakeResult::Completed(Self::new( HandshakeResult::Completed(Self::new(
static_hs, static_hs,
Some(CHandshakeAuth::new(uname, pwd)), Some(CHandshakeAuth::new(uname, pwd)),
@ -367,15 +368,16 @@ impl<'a> CHandshake<'a> {
AuthMode::Password => {} AuthMode::Password => {}
} }
// let us see if we can parse the username length // let us see if we can parse the username length
let uname_l = match scanner.try_next_ascii_u64_lf_separated_with_result() { let uname_l = match scanner.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor()
BufferedReadResult::NeedMore => { {
ScannerDecodeResult::NeedMore => {
return HandshakeResult::ChangeState { return HandshakeResult::ChangeState {
new_state: HandshakeState::StaticBlock(static_header), new_state: HandshakeState::StaticBlock(static_header),
expect: AuthMode::Password.min_payload_bytes(), // 2 for uname_l and 2 for pwd_l expect: AuthMode::Password.min_payload_bytes(), // 2 for uname_l and 2 for pwd_l
}; };
} }
BufferedReadResult::Value(v) => v as usize, ScannerDecodeResult::Value(v) => v as usize,
BufferedReadResult::Error => { ScannerDecodeResult::Error => {
return HandshakeResult::Error(ProtocolError::CorruptedHSPacket) return HandshakeResult::Error(ProtocolError::CorruptedHSPacket)
} }
}; };
@ -388,16 +390,16 @@ impl<'a> CHandshake<'a> {
uname_l: usize, uname_l: usize,
) -> HandshakeResult<'a> { ) -> HandshakeResult<'a> {
// we just have to get the password len // we just have to get the password len
let pwd_l = match scanner.try_next_ascii_u64_lf_separated_with_result() { let pwd_l = match scanner.try_next_ascii_u64_lf_separated_with_result_or_restore_cursor() {
BufferedReadResult::Value(v) => v as usize, ScannerDecodeResult::Value(v) => v as usize,
BufferedReadResult::NeedMore => { ScannerDecodeResult::NeedMore => {
// newline missing (or maybe there's more?) // newline missing (or maybe there's more?)
return HandshakeResult::ChangeState { return HandshakeResult::ChangeState {
new_state: HandshakeState::ExpectingMetaForVariableBlock { static_hs, uname_l }, new_state: HandshakeState::ExpectingMetaForVariableBlock { static_hs, uname_l },
expect: uname_l + 2, // space for username + password len expect: uname_l + 2, // space for username + password len
}; };
} }
BufferedReadResult::Error => { ScannerDecodeResult::Error => {
return HandshakeResult::Error(ProtocolError::CorruptedHSPacket) return HandshakeResult::Error(ProtocolError::CorruptedHSPacket)
} }
}; };

@ -36,7 +36,7 @@ use {
}, },
crate::{ crate::{
engine::{ engine::{
data::{cell::Datacell, lit::LitIR}, data::{cell::Datacell, lit::Lit},
error::{Error, QueryResult}, error::{Error, QueryResult},
}, },
util::{compiler, MaybeInit}, util::{compiler, MaybeInit},
@ -162,7 +162,7 @@ impl<'a, Qd: QueryData<'a>> State<'a, Qd> {
/// ///
/// ## Safety /// ## Safety
/// - Must ensure that `Self::can_read_lit_rounded` is true /// - Must ensure that `Self::can_read_lit_rounded` is true
pub unsafe fn read_cursor_lit_unchecked(&mut self) -> LitIR<'a> { pub unsafe fn read_cursor_lit_unchecked(&mut self) -> Lit<'a> {
let tok = self.read(); let tok = self.read();
Qd::read_lit(&mut self.d, tok) Qd::read_lit(&mut self.d, tok)
} }
@ -171,7 +171,7 @@ impl<'a, Qd: QueryData<'a>> State<'a, Qd> {
/// ///
/// ## Safety /// ## Safety
/// - Must ensure that `Self::can_read_lit_from` is true for the token /// - Must ensure that `Self::can_read_lit_from` is true for the token
pub unsafe fn read_lit_unchecked_from(&mut self, tok: &'a Token<'a>) -> LitIR<'a> { pub unsafe fn read_lit_unchecked_from(&mut self, tok: &'a Token<'a>) -> Lit<'a> {
Qd::read_lit(&mut self.d, tok) Qd::read_lit(&mut self.d, tok)
} }
#[inline(always)] #[inline(always)]
@ -274,7 +274,7 @@ pub trait QueryData<'a> {
/// ///
/// ## Safety /// ## Safety
/// The current token **must match** the signature of a lit /// The current token **must match** the signature of a lit
unsafe fn read_lit(&mut self, tok: &'a Token) -> LitIR<'a>; unsafe fn read_lit(&mut self, tok: &'a Token) -> Lit<'a>;
/// Read a lit using the given token and then copy it into a [`DataType`] /// Read a lit using the given token and then copy it into a [`DataType`]
/// ///
/// ## Safety /// ## Safety
@ -299,7 +299,7 @@ impl<'a> QueryData<'a> for InplaceData {
tok.is_lit() tok.is_lit()
} }
#[inline(always)] #[inline(always)]
unsafe fn read_lit(&mut self, tok: &'a Token) -> LitIR<'a> { unsafe fn read_lit(&mut self, tok: &'a Token) -> Lit<'a> {
tok.uck_read_lit().as_ir() tok.uck_read_lit().as_ir()
} }
#[inline(always)] #[inline(always)]
@ -312,42 +312,6 @@ impl<'a> QueryData<'a> for InplaceData {
} }
} }
#[derive(Debug)]
pub struct SubstitutedData<'a> {
data: &'a [LitIR<'a>],
}
impl<'a> SubstitutedData<'a> {
#[inline(always)]
pub const fn new(src: &'a [LitIR<'a>]) -> Self {
Self { data: src }
}
}
impl<'a> QueryData<'a> for SubstitutedData<'a> {
#[inline(always)]
fn can_read_lit_from(&self, tok: &Token) -> bool {
Token![?].eq(tok) && self.nonzero()
}
#[inline(always)]
unsafe fn read_lit(&mut self, tok: &'a Token) -> LitIR<'a> {
debug_assert!(Token![?].eq(tok));
let ret = self.data[0].clone();
self.data = &self.data[1..];
ret
}
#[inline(always)]
unsafe fn read_data_type(&mut self, tok: &'a Token) -> Datacell {
debug_assert!(Token![?].eq(tok));
let ret = self.data[0].clone();
self.data = &self.data[1..];
Datacell::from(ret)
}
#[inline(always)]
fn nonzero(&self) -> bool {
!self.data.is_empty()
}
}
/* /*
AST AST
*/ */

@ -77,7 +77,7 @@ mod lexer {
#[bench] #[bench]
fn lex_raw_literal(b: &mut Bencher) { fn lex_raw_literal(b: &mut Bencher) {
let src = b"\r44\ne69b10ffcc250ae5091dec6f299072e23b0b41d6a739"; let src = b"\r44\ne69b10ffcc250ae5091dec6f299072e23b0b41d6a739";
let expected = vec![Token::Lit(Lit::Bin( let expected = vec![Token::Lit(Lit::new_bin(
b"e69b10ffcc250ae5091dec6f299072e23b0b41d6a739", b"e69b10ffcc250ae5091dec6f299072e23b0b41d6a739",
))]; ))];
b.iter(|| assert_eq!(lex_insecure(src).unwrap(), expected)); b.iter(|| assert_eq!(lex_insecure(src).unwrap(), expected));

@ -39,7 +39,7 @@ use {
ast::{QueryData, State}, ast::{QueryData, State},
lex::Ident, lex::Ident,
}, },
crate::{engine::data::lit::LitIR, util::compiler}, crate::{engine::data::lit::Lit, util::compiler},
std::collections::HashMap, std::collections::HashMap,
}; };
@ -59,13 +59,13 @@ fn u(b: bool) -> u8 {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct RelationalExpr<'a> { pub struct RelationalExpr<'a> {
pub(super) lhs: Ident<'a>, pub(super) lhs: Ident<'a>,
pub(super) rhs: LitIR<'a>, pub(super) rhs: Lit<'a>,
pub(super) opc: u8, pub(super) opc: u8,
} }
impl<'a> RelationalExpr<'a> { impl<'a> RelationalExpr<'a> {
#[inline(always)] #[inline(always)]
pub(super) fn new(lhs: Ident<'a>, rhs: LitIR<'a>, opc: u8) -> RelationalExpr<'a> { pub(super) fn new(lhs: Ident<'a>, rhs: Lit<'a>, opc: u8) -> RelationalExpr<'a> {
Self { lhs, rhs, opc } Self { lhs, rhs, opc }
} }
pub(super) const OP_EQ: u8 = 1; pub(super) const OP_EQ: u8 = 1;
@ -77,7 +77,7 @@ impl<'a> RelationalExpr<'a> {
pub fn filter_hint_none(&self) -> bool { pub fn filter_hint_none(&self) -> bool {
self.opc == Self::OP_EQ self.opc == Self::OP_EQ
} }
pub fn rhs(&self) -> LitIR<'a> { pub fn rhs(&self) -> Lit<'a> {
self.rhs.clone() self.rhs.clone()
} }
#[inline(always)] #[inline(always)]

@ -31,7 +31,7 @@ use {
crate::{ crate::{
engine::{ engine::{
core::query_meta::AssignmentOperator, core::query_meta::AssignmentOperator,
data::lit::LitIR, data::lit::Lit,
error::{Error, QueryResult}, error::{Error, QueryResult},
ql::{ ql::{
ast::{Entity, QueryData, State}, ast::{Entity, QueryData, State},
@ -60,13 +60,13 @@ pub struct AssignmentExpression<'a> {
/// the LHS ident /// the LHS ident
pub lhs: Ident<'a>, pub lhs: Ident<'a>,
/// the RHS lit /// the RHS lit
pub rhs: LitIR<'a>, pub rhs: Lit<'a>,
/// operator /// operator
pub operator_fn: AssignmentOperator, pub operator_fn: AssignmentOperator,
} }
impl<'a> AssignmentExpression<'a> { impl<'a> AssignmentExpression<'a> {
pub fn new(lhs: Ident<'a>, rhs: LitIR<'a>, operator_fn: AssignmentOperator) -> Self { pub fn new(lhs: Ident<'a>, rhs: Lit<'a>, operator_fn: AssignmentOperator) -> Self {
Self { Self {
lhs, lhs,
rhs, rhs,

@ -25,553 +25,471 @@
*/ */
mod raw; mod raw;
pub use raw::{Ident, Keyword, Symbol, Token};
use { use {
self::raw::RawLexer,
crate::{ crate::{
engine::{ engine::{
data::{ data::lit::Lit,
lit::{Lit, LitIR},
spec::Dataspec1D,
},
error::{Error, QueryResult}, error::{Error, QueryResult},
mem::BufferedScanner,
}, },
util::compiler, util::compiler,
}, },
core::{fmt, ops::BitOr, slice, str}, core::slice,
raw::{kwof, symof},
}; };
pub use self::raw::{Ident, Keyword, Symbol, Token};
pub type Slice<'a> = &'a [u8];
/* /*
Lexer impls basic lexer definition
*/ */
#[derive(Debug)] type Slice<'a> = &'a [u8];
/// This implements the `opmode-dev` for BlueQL
pub struct InsecureLexer<'a> { #[derive(Debug, PartialEq)]
base: RawLexer<'a>, /// The internal lexer impl
pub struct Lexer<'a> {
token_buffer: BufferedScanner<'a>,
tokens: Vec<Token<'a>>,
last_error: Option<Error>,
} }
impl<'a> InsecureLexer<'a> { impl<'a> Lexer<'a> {
#[inline(always)] /// Initialize a new lexer
pub const fn new(src: Slice<'a>) -> Self { fn new(src: &'a [u8]) -> Self {
Self { Self {
base: RawLexer::new(src), token_buffer: BufferedScanner::new(src),
tokens: Vec::new(),
last_error: None,
} }
} }
#[inline(always)] /// set an error
pub fn lex(src: Slice<'a>) -> QueryResult<Vec<Token<'a>>> { #[inline(never)]
let mut slf = Self::new(src); #[cold]
slf._lex(); fn set_error(&mut self, e: Error) {
let RawLexer { self.last_error = Some(e);
tokens, last_error, ..
} = slf.base;
match last_error {
None => Ok(tokens),
Some(e) => Err(e),
}
} }
#[inline(always)] /// push in a new token
fn _lex(&mut self) { fn push_token(&mut self, t: impl Into<Token<'a>>) {
let slf = &mut self.base; self.tokens.push(t.into())
while slf.not_exhausted() && slf.no_error() { }
match unsafe { fn no_error(&self) -> bool {
// UNSAFE(@ohsayan): Verified non-null from pre self.last_error.is_none()
slf.deref_cursor()
} {
byte if byte.is_ascii_alphabetic() => slf.scan_ident_or_keyword(),
#[cfg(test)]
byte if byte == b'\x01' => {
slf.push_token(Token::IgnorableComma);
unsafe {
// UNSAFE(@ohsayan): All good here. Already read the token
slf.incr_cursor();
}
}
byte if byte.is_ascii_digit() => Self::scan_unsigned_integer(slf),
b'\r' => Self::scan_binary_literal(slf),
b'-' => Self::scan_signed_integer(slf),
qs @ (b'\'' | b'"') => Self::scan_quoted_string(slf, qs),
// blank space or an arbitrary byte
b' ' | b'\n' | b'\t' => slf.trim_ahead(),
b => slf.scan_byte(b),
}
}
} }
} }
// high-level methods impl<'a> Lexer<'a> {
impl<'a> InsecureLexer<'a> { /// Scan an identifier
#[inline(always)] fn scan_ident(&mut self) -> Slice<'a> {
fn scan_signed_integer(slf: &mut RawLexer<'a>) { let s = self.token_buffer.cursor_ptr();
unsafe { unsafe {
// UNSAFE(@ohsayan): We hit an integer hence this was called while self
slf.incr_cursor(); .token_buffer
} .rounded_cursor_not_eof_matches(|b| b.is_ascii_alphanumeric() || *b == b'_')
if slf.peek_is(|b| b.is_ascii_digit()) {
// we have some digits
let start = unsafe {
// UNSAFE(@ohsayan): Take the (-) into the parse
// TODO(@ohsayan): we can maybe look at a more efficient way later
slf.cursor().sub(1)
};
while slf.peek_is_and_forward(|b| b.is_ascii_digit()) {}
let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
match unsafe {
// UNSAFE(@ohsayan): a sequence of ASCII bytes in the integer range will always be correct unicode
str::from_utf8_unchecked(slice::from_raw_parts(
start,
// UNSAFE(@ohsayan): valid cursor and start pointers
slf.cursor().offset_from(start) as usize,
))
}
.parse::<i64>()
{ {
Ok(num) if compiler::likely(wseof) => { // UNSAFE(@ohsayan): increment cursor, this is valid
slf.push_token(Lit::SignedInt(num)); self.token_buffer.incr_cursor();
}
_ => {
compiler::cold_call(|| slf.set_error(Error::LexInvalidLiteral));
}
} }
} else { // UNSAFE(@ohsayan): valid slice and ptrs
slf.push_token(Token![-]); slice::from_raw_parts(
s,
self.token_buffer.current_buffer().as_ptr().offset_from(s) as usize,
)
} }
} }
#[inline(always)] /// Scan an identifier or keyword
fn scan_unsigned_integer(slf: &mut RawLexer<'a>) { fn scan_ident_or_keyword(&mut self) {
let s = slf.cursor(); let s = self.scan_ident();
let st = s.to_ascii_lowercase();
while slf.peek_is(|b| b.is_ascii_digit()) { match kwof(&st) {
unsafe { Some(kw) => self.tokens.push(kw.into()),
// UNSAFE(@ohsayan): since we're going ahead, this is correct (until EOA) // FIXME(@ohsayan): Uh, mind fixing this? The only advantage is that I can keep the graph *memory* footprint small
slf.incr_cursor(); None if st == b"true" || st == b"false" => {
self.push_token(Lit::new_bool(st == b"true"))
} }
None => self.tokens.push(unsafe {
// UNSAFE(@ohsayan): scan_ident only returns a valid ident which is always a string
Token::Ident(Ident::new(s))
}),
} }
/* }
1234; // valid fn scan_byte(&mut self, byte: u8) {
1234} // valid match symof(byte) {
1234{ // invalid Some(tok) => self.push_token(tok),
1234, // valid None => return self.set_error(Error::LexUnexpectedByte),
1234a // invalid
*/
let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
match unsafe {
/*
UNSAFE(@ohsayan):
(1) Valid cursor and start pointer (since we copy it from the cursor which is correct)
(2) All ASCII alphabetic bytes are captured, hence this will always be a correct unicode string
*/
str::from_utf8_unchecked(slice::from_raw_parts(
s,
slf.cursor().offset_from(s) as usize,
))
} }
.parse() unsafe {
{ // UNSAFE(@ohsayan): we are sent a byte, so fw cursor
Ok(num) if compiler::likely(wseof) => { self.token_buffer.incr_cursor();
slf.tokens.push(Token::Lit(Lit::UnsignedInt(num)))
}
_ => slf.set_error(Error::LexInvalidLiteral),
} }
} }
}
#[inline(always)] impl<'a> Lexer<'a> {
fn scan_binary_literal(slf: &mut RawLexer<'a>) { fn trim_ahead(&mut self) {
unsafe { self.token_buffer
// UNSAFE(@ohsayan): cursor increment since we hit the marker byte (CR) .trim_ahead(|b| (b == b' ') | (b == b'\n') | (b == b'\t'))
slf.incr_cursor(); }
} }
let mut size = 0usize;
let mut okay = true; /*
while slf.not_exhausted() Insecure lexer
&& unsafe { */
// UNSAFE(@ohsayan): verified non-exhaustion
slf.deref_cursor() != b'\n' pub struct InsecureLexer<'a> {
} l: Lexer<'a>,
&& okay }
{
/* impl<'a> InsecureLexer<'a> {
Don't ask me how stupid this is. Like, I was probably in some "mood" when I wrote this pub fn lex(src: &'a [u8]) -> QueryResult<Vec<Token<'a>>> {
and it works duh, but isn't the most elegant of things (could I have just used a parse? let slf = Self { l: Lexer::new(src) };
nah, I'm just a hardcore numeric normie) slf._lex()
-- Sayan }
*/ fn _lex(mut self) -> QueryResult<Vec<Token<'a>>> {
while !self.l.token_buffer.eof() & self.l.no_error() {
let byte = unsafe { let byte = unsafe {
// UNSAFE(@ohsayan): The pre invariant guarantees that this is correct // UNSAFE(@ohsayan): loop invariant
slf.deref_cursor() self.l.token_buffer.deref_cursor()
}; };
okay &= byte.is_ascii_digit(); match byte {
let (prod, of_flag) = size.overflowing_mul(10); #[cfg(test)]
okay &= !of_flag; byte if byte == b'\x01' => {
let (sum, of_flag) = prod.overflowing_add((byte & 0x0F) as _); self.l.push_token(Token::IgnorableComma);
size = sum; unsafe {
okay &= !of_flag; // UNSAFE(@ohsayan): All good here. Already read the token
unsafe { self.l.token_buffer.incr_cursor();
// UNSAFE(@ohsayan): We just read something, so this is fine (until EOA) }
slf.incr_cursor(); }
// ident
byte if byte.is_ascii_alphabetic() | (byte == b'_') => {
self.l.scan_ident_or_keyword()
}
// uint
byte if byte.is_ascii_digit() => self.scan_unsigned_integer(),
// sint
b'-' => {
unsafe {
// UNSAFE(@ohsayan): loop invariant
self.l.token_buffer.incr_cursor()
};
self.scan_signed_integer();
}
// binary
b'\r' => {
unsafe {
// UNSAFE(@ohsayan): loop invariant
self.l.token_buffer.incr_cursor()
}
self.scan_binary()
}
// string
quote_style @ (b'"' | b'\'') => {
unsafe {
// UNSAFE(@ohsayan): loop invariant
self.l.token_buffer.incr_cursor()
}
self.scan_quoted_string(quote_style)
}
// whitespace
b' ' | b'\n' | b'\t' => self.l.trim_ahead(),
// some random byte
byte => self.l.scan_byte(byte),
} }
} }
okay &= slf.peek_eq_and_forward(b'\n'); match self.l.last_error {
okay &= slf.remaining() >= size; None => Ok(self.l.tokens),
if compiler::likely(okay) { Some(e) => Err(e),
unsafe {
// UNSAFE(@ohsayan): Correct cursor and length (from above we know that we have enough bytes)
slf.push_token(Lit::Bin(slice::from_raw_parts(slf.cursor(), size)));
// UNSAFE(@ohsayan): Correct length increment
slf.incr_cursor_by(size);
}
} else {
slf.set_error(Error::LexInvalidLiteral);
} }
} }
#[inline(always)] }
fn scan_quoted_string(slf: &mut RawLexer<'a>, quote_style: u8) {
debug_assert!( impl<'a> InsecureLexer<'a> {
unsafe { fn scan_binary(&mut self) {
// UNSAFE(@ohsayan): yessir, we just hit this byte. if called elsewhere, this function will crash and burn (or simply, segfault) let Some(len) = self
slf.deref_cursor() .l
} == quote_style, .token_buffer
"illegal call to scan_quoted_string" .try_next_ascii_u64_lf_separated_or_restore_cursor()
); else {
unsafe { self.l.set_error(Error::LexInvalidLiteral);
// UNSAFE(@ohsayan): Increment this cursor (this is correct since we just hit the quote) return;
slf.incr_cursor() };
let len = len as usize;
match self.l.token_buffer.try_next_variable_block(len) {
Some(block) => self.l.push_token(Lit::new_bin(block)),
None => self.l.set_error(Error::LexInvalidLiteral),
} }
}
fn scan_quoted_string(&mut self, quote_style: u8) {
// cursor is at beginning of `"`; we need to scan until the end of quote or an escape
let mut buf = Vec::new(); let mut buf = Vec::new();
unsafe { while self
while slf.peek_neq(quote_style) { .l
// UNSAFE(@ohsayan): deref is good since peek passed .token_buffer
match slf.deref_cursor() { .rounded_cursor_not_eof_matches(|b| *b != quote_style)
b if b != b'\\' => { {
buf.push(b); let byte = unsafe {
} // UNSAFE(@ohsayan): loop invariant
_ => { self.l.token_buffer.next_byte()
// UNSAFE(@ohsayan): we read one byte, so this should work };
slf.incr_cursor(); match byte {
if slf.exhausted() { b'\\' => {
break; // hmm, this might be an escape (either `\\` or `\"`)
if self
.l
.token_buffer
.rounded_cursor_not_eof_matches(|b| *b == quote_style || *b == b'\\')
{
// ignore escaped byte
unsafe {
buf.push(self.l.token_buffer.next_byte());
} }
// UNSAFE(@ohsayan): correct because of the above branch } else {
let b = slf.deref_cursor(); // this is not allowed
let quote = b == quote_style; unsafe {
let bs = b == b'\\'; // UNSAFE(@ohsayan): we move the cursor ahead, now we're moving it back
if quote | bs { self.l.token_buffer.decr_cursor()
buf.push(b);
} else {
break; // what on good earth is that escape?
} }
self.l.set_error(Error::LexInvalidLiteral);
return;
} }
} }
/* _ => buf.push(byte),
UNSAFE(@ohsayan): This is correct because:
(a) If we are in arm 1: we move the cursor ahead from the `\` byte (the branch doesn't do it)
(b) If we are in arm 2: we don't skip the second quote byte in the branch, hence this is correct
*/
slf.incr_cursor();
} }
let terminated = slf.peek_eq_and_forward(quote_style); }
match String::from_utf8(buf) { let ended_with_quote = self
Ok(st) if terminated => slf.tokens.push(Token::Lit(st.into_boxed_str().into())), .l
_ => slf.set_error(Error::LexInvalidLiteral), .token_buffer
.rounded_cursor_not_eof_equals(quote_style);
// skip quote
unsafe {
// UNSAFE(@ohsayan): not eof
self.l.token_buffer.incr_cursor_if(ended_with_quote)
}
match String::from_utf8(buf) {
Ok(s) if ended_with_quote => self.l.push_token(Lit::new_string(s)),
Err(_) | Ok(_) => self.l.set_error(Error::LexInvalidLiteral),
}
}
fn scan_unsigned_integer(&mut self) {
let mut okay = true;
// extract integer
let int = self
.l
.token_buffer
.try_next_ascii_u64_stop_at::<false>(&mut okay, |b| b.is_ascii_digit());
/*
see if we ended at a correct byte:
iff the integer has an alphanumeric byte at the end is the integer invalid
*/
if compiler::unlikely(
!okay
| self
.l
.token_buffer
.rounded_cursor_not_eof_matches(u8::is_ascii_alphanumeric),
) {
self.l.set_error(Error::LexInvalidLiteral);
} else {
self.l.push_token(Lit::new_uint(int))
}
}
fn scan_signed_integer(&mut self) {
if self.l.token_buffer.rounded_cursor_value().is_ascii_digit() {
unsafe {
// UNSAFE(@ohsayan): the cursor was moved ahead, now we're moving it back
self.l.token_buffer.decr_cursor()
} }
let (okay, int) = self
.l
.token_buffer
.try_next_ascii_i64_stop_at(|b| !b.is_ascii_digit());
if okay
& !self
.l
.token_buffer
.rounded_cursor_value()
.is_ascii_alphabetic()
{
self.l.push_token(Lit::new_sint(int))
} else {
self.l.set_error(Error::LexInvalidLiteral)
}
} else {
self.l.push_token(Token![-]);
} }
} }
} }
/*
secure
*/
#[derive(Debug)] #[derive(Debug)]
/// This lexer implements the `opmod-safe` for BlueQL pub struct SecureLexer<'a> {
pub struct SafeLexer<'a> { l: Lexer<'a>,
base: RawLexer<'a>, param_buffer: BufferedScanner<'a>,
} }
impl<'a> SafeLexer<'a> { impl<'a> SecureLexer<'a> {
#[inline(always)] pub fn new(src: &'a [u8], query_window: usize) -> Self {
pub const fn new(src: Slice<'a>) -> Self {
Self { Self {
base: RawLexer::new(src), l: Lexer::new(&src[..query_window]),
param_buffer: BufferedScanner::new(&src[query_window..]),
} }
} }
#[inline(always)] pub fn lex(src: &'a [u8], query_window: usize) -> QueryResult<Vec<Token<'a>>> {
pub fn lex(src: Slice<'a>) -> QueryResult<Vec<Token>> { Self::new(src, query_window)._lex()
Self::new(src)._lex()
} }
#[inline(always)] }
fn _lex(self) -> QueryResult<Vec<Token<'a>>> {
let Self { base: mut l } = self; impl<'a> SecureLexer<'a> {
while l.not_exhausted() && l.no_error() { fn _lex(mut self) -> QueryResult<Vec<Token<'a>>> {
while self.l.no_error() & !self.l.token_buffer.eof() {
let b = unsafe { let b = unsafe {
// UNSAFE(@ohsayan): This is correct because of the pre invariant // UNSAFE(@ohsayan): loop invariant
l.deref_cursor() self.l.token_buffer.deref_cursor()
}; };
match b { match b {
// ident or kw b if b.is_ascii_alphabetic() | (b == b'_') => self.l.scan_ident_or_keyword(),
b if b.is_ascii_alphabetic() => l.scan_ident_or_keyword(), b'?' => {
// extra terminal chars // a parameter: null, bool, sint, uint, float, binary, string
b'\n' | b'\t' | b' ' => l.trim_ahead(), const TYPE: [&str; 8] = [
// arbitrary byte "null", "bool", "uint", "sint", "float", "binary", "string", "ERROR",
b => l.scan_byte(b), ];
// skip the param byte
unsafe {
// UNSAFE(@ohsayan): loop invariant
self.l.token_buffer.incr_cursor()
}
// find target
let ecc_code = SCAN_PARAM.len() - 1;
let target_code = self.param_buffer.rounded_cursor_value();
let target_fn = target_code.min(ecc_code as u8);
// forward if we have target
unsafe {
self.param_buffer
.incr_cursor_by((target_code == target_fn) as _)
}
// check requirements
let has_enough = self
.param_buffer
.has_left(SCAN_PARAM_EXPECT[target_fn as usize] as _);
let final_target =
(has_enough as u8 * target_fn) | (!has_enough as u8 * ecc_code as u8);
// exec
let final_target = final_target as usize;
unsafe {
if final_target >= SCAN_PARAM.len() {
impossible!()
}
}
unsafe {
// UNSAFE(@ohsayan): our computation above ensures that we're meeting the expected target
SCAN_PARAM[final_target](&mut self)
}
}
b' ' | b'\t' | b'\n' => self.l.trim_ahead(),
sym => self.l.scan_byte(sym),
} }
} }
let RawLexer { match self.l.last_error {
last_error, tokens, .. None => Ok(self.l.tokens),
} = l;
match last_error {
None => Ok(tokens),
Some(e) => Err(e), Some(e) => Err(e),
} }
} }
} }
const ALLOW_UNSIGNED: bool = false; const SCAN_PARAM_EXPECT: [u8; 8] = [0, 1, 2, 2, 2, 2, 2, 0];
const ALLOW_SIGNED: bool = true; static SCAN_PARAM: [unsafe fn(&mut SecureLexer); 8] = unsafe {
[
pub trait NumberDefinition: Sized + fmt::Debug + Copy + Clone + BitOr<Self, Output = Self> { // null
const ALLOW_SIGNED: bool; |s| s.l.push_token(Token![null]),
fn mul_of(&self, v: u8) -> (Self, bool); // bool
fn add_of(&self, v: u8) -> (Self, bool); |slf| {
fn sub_of(&self, v: u8) -> (Self, bool); let nb = slf.param_buffer.next_byte();
fn qualified_max_length() -> usize; slf.l.push_token(Token::Lit(Lit::new_bool(nb == 1)));
fn zero() -> Self; if nb > 1 {
fn b(self, b: bool) -> Self; slf.l.set_error(Error::LexInvalidEscapedLiteral);
}
macro_rules! impl_number_def {
($(
$ty:ty {$supports_signed:ident, $qualified_max_length:expr}),* $(,)?
) => {
$(impl NumberDefinition for $ty {
const ALLOW_SIGNED: bool = $supports_signed;
#[inline(always)] fn zero() -> Self { 0 }
#[inline(always)] fn b(self, b: bool) -> Self { b as Self * self }
#[inline(always)]
fn mul_of(&self, v: u8) -> ($ty, bool) { <$ty>::overflowing_mul(*self, v as $ty) }
#[inline(always)]
fn add_of(&self, v: u8) -> ($ty, bool) { <$ty>::overflowing_add(*self, v as $ty) }
#[inline(always)]
fn sub_of(&self, v: u8) -> ($ty, bool) { <$ty>::overflowing_sub(*self, v as $ty) }
#[inline(always)] fn qualified_max_length() -> usize { $qualified_max_length }
})*
}
}
#[cfg(target_pointer_width = "64")]
const SZ_USIZE: usize = 20;
#[cfg(target_pointer_width = "32")]
const SZ_USIZE: usize = 10;
#[cfg(target_pointer_width = "64")]
const SZ_ISIZE: usize = 20;
#[cfg(target_pointer_width = "32")]
const SZ_ISIZE: usize = 11;
impl_number_def! {
usize {ALLOW_SIGNED, SZ_USIZE},
// 255
u8 {ALLOW_UNSIGNED, 3},
// 65536
u16 {ALLOW_UNSIGNED, 5},
// 4294967296
u32 {ALLOW_UNSIGNED, 10},
// 18446744073709551616
u64 {ALLOW_UNSIGNED, 20},
// signed
isize {ALLOW_SIGNED, SZ_ISIZE},
// -128
i8 {ALLOW_SIGNED, 4},
// -32768
i16 {ALLOW_SIGNED, 6},
// -2147483648
i32 {ALLOW_SIGNED, 11},
// -9223372036854775808
i64 {ALLOW_SIGNED, 20},
}
#[inline(always)]
pub(super) fn decode_num_ub<N>(src: &[u8], flag: &mut bool, cnt: &mut usize) -> N
where
N: NumberDefinition,
{
let l = src.len();
let mut okay = !src.is_empty();
let mut i = 0;
let mut number = N::zero();
let mut nx_stop = false;
let is_signed = if N::ALLOW_SIGNED {
let loc_s = i < l && src[i] == b'-';
i += loc_s as usize;
okay &= (i + 2) <= l; // [-][digit][LF]
loc_s
} else {
false
};
while i < l && okay && !nx_stop {
// potential exit
nx_stop = src[i] == b'\n';
// potential entry
let mut local_ok = src[i].is_ascii_digit();
let (p, p_of) = number.mul_of(10);
local_ok &= !p_of;
let lfret = if N::ALLOW_SIGNED && is_signed {
let (d, d_of) = p.sub_of(src[i] & 0x0f);
local_ok &= !d_of;
d
} else {
let (s, s_of) = p.add_of(src[i] & 0x0f);
local_ok &= !s_of;
s
};
// reassign or assign
let reassign = number.b(nx_stop);
let assign = lfret.b(!nx_stop);
number = reassign | assign;
okay &= local_ok | nx_stop;
i += okay as usize;
}
if N::ALLOW_SIGNED {
number = number.b(okay);
}
okay &= nx_stop;
*cnt += i;
*flag &= okay;
number
}
#[derive(Debug, PartialEq)]
/// Data constructed from `opmode-safe`
pub struct SafeQueryData<'a> {
p: Box<[LitIR<'a>]>,
t: Vec<Token<'a>>,
}
impl<'a> SafeQueryData<'a> {
#[cfg(test)]
pub fn new_test(p: Box<[LitIR<'a>]>, t: Vec<Token<'a>>) -> Self {
Self { p, t }
}
#[inline(always)]
pub fn parse_data(pf: Slice<'a>, pf_sz: usize) -> QueryResult<Box<[LitIR<'a>]>> {
Self::p_revloop(pf, pf_sz)
}
#[inline(always)]
pub fn parse(qf: Slice<'a>, pf: Slice<'a>, pf_sz: usize) -> QueryResult<Self> {
let q = SafeLexer::lex(qf);
let p = Self::p_revloop(pf, pf_sz);
match (q, p) {
(Ok(q), Ok(p)) => Ok(Self { t: q, p }),
// first error
(Err(e), _) | (_, Err(e)) => Err(e),
}
}
#[inline]
pub(super) fn p_revloop(mut src: Slice<'a>, size: usize) -> QueryResult<Box<[LitIR<'a>]>> {
static LITIR_TF: [for<'a> fn(Slice<'a>, &mut usize, &mut Vec<LitIR<'a>>) -> bool; 7] = [
SafeQueryData::uint, // tc: 0
SafeQueryData::sint, // tc: 1
SafeQueryData::bool, // tc: 2
SafeQueryData::float, // tc: 3
SafeQueryData::bin, // tc: 4
SafeQueryData::str, // tc: 5
|_, _, _| false, // ecc: 6
];
let nonpadded_offset = (LITIR_TF.len() - 2) as u8;
let ecc_offset = LITIR_TF.len() - 1;
let mut okay = true;
let mut data = Vec::with_capacity(size);
while src.len() >= 3 && okay {
let tc = src[0];
okay &= tc <= nonpadded_offset;
let mx = ecc_offset.min(tc as usize);
let mut i_ = 1;
okay &= LITIR_TF[mx](&src[1..], &mut i_, &mut data);
src = &src[i_..];
}
okay &= src.is_empty() && data.len() == size;
if compiler::likely(okay) {
Ok(data.into_boxed_slice())
} else {
Err(Error::LexInvalidEscapedLiteral)
}
}
}
// low level methods
impl<'b> SafeQueryData<'b> {
#[inline(always)]
fn mxple<'a>(src: Slice<'a>, cnt: &mut usize, flag: &mut bool) -> Slice<'a> {
// find payload length
let mut i = 0;
let payload_len = decode_num_ub::<usize>(src, flag, &mut i);
let src = &src[i..];
// find payload
*flag &= src.len() >= payload_len;
let mx_extract = payload_len.min(src.len());
// incr cursor
i += mx_extract;
*cnt += i;
unsafe {
// UNSAFE(@ohsayan): src is correct (guaranteed). even if the decoded length returns an error we still remain within bounds of the EOA
slice::from_raw_parts(src.as_ptr(), mx_extract)
}
}
#[inline(always)]
pub(super) fn uint<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool {
let mut b = true;
let r = decode_num_ub(src, &mut b, cnt);
data.push(LitIR::UnsignedInt(r));
b
}
#[inline(always)]
pub(super) fn sint<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool {
let mut b = true;
let r = decode_num_ub(src, &mut b, cnt);
data.push(LitIR::SignedInt(r));
b
}
#[inline(always)]
pub(super) fn bool<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool {
// `true\n` or `false\n`
let mx = 6.min(src.len());
let slice = &src[..mx];
let v_true = slice.starts_with(b"true\n");
let v_false = slice.starts_with(b"false\n");
let incr = v_true as usize * 5 + v_false as usize * 6;
data.push(LitIR::Bool(v_true));
*cnt += incr;
v_true | v_false
}
#[inline(always)]
pub(super) fn float<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool {
let mut okay = true;
let payload = Self::mxple(src, cnt, &mut okay);
match String::from_utf8_lossy(payload).parse() {
Ok(p) if compiler::likely(okay) => {
data.push(LitIR::Float(p));
} }
_ => {} },
} // uint
okay |slf| match slf
} .param_buffer
#[inline(always)] .try_next_ascii_u64_lf_separated_or_restore_cursor()
pub(super) fn bin<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool { {
let mut okay = true; Some(int) => slf.l.push_token(Lit::new_uint(int)),
let payload = Self::mxple(src, cnt, &mut okay); None => slf.l.set_error(Error::LexInvalidEscapedLiteral),
data.push(LitIR::Bin(payload)); },
okay // sint
} |slf| {
#[inline(always)] let (okay, int) = slf.param_buffer.try_next_ascii_i64_separated_by::<b'\n'>();
pub(super) fn str<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool { if okay {
let mut okay = true; slf.l.push_token(Lit::new_sint(int))
let payload = Self::mxple(src, cnt, &mut okay); } else {
match str::from_utf8(payload) { slf.l.set_error(Error::LexInvalidLiteral)
Ok(s) if compiler::likely(okay) => {
data.push(LitIR::Str(s));
true
} }
_ => false, },
} // float
} |slf| {
} let Some(size_of_body) = slf
.param_buffer
.try_next_ascii_u64_lf_separated_or_restore_cursor()
else {
slf.l.set_error(Error::LexInvalidEscapedLiteral);
return;
};
let body = match slf
.param_buffer
.try_next_variable_block(size_of_body as usize)
{
Some(body) => body,
None => {
slf.l.set_error(Error::LexInvalidEscapedLiteral);
return;
}
};
match core::str::from_utf8(body).map(core::str::FromStr::from_str) {
Ok(Ok(fp)) => slf.l.push_token(Lit::new_float(fp)),
_ => slf.l.set_error(Error::LexInvalidEscapedLiteral),
}
},
// binary
|slf| {
let Some(size_of_body) = slf
.param_buffer
.try_next_ascii_u64_lf_separated_or_restore_cursor()
else {
slf.l.set_error(Error::LexInvalidEscapedLiteral);
return;
};
match slf
.param_buffer
.try_next_variable_block(size_of_body as usize)
{
Some(block) => slf.l.push_token(Lit::new_bin(block)),
None => slf.l.set_error(Error::LexInvalidEscapedLiteral),
}
},
// string
|slf| {
let Some(size_of_body) = slf
.param_buffer
.try_next_ascii_u64_lf_separated_or_restore_cursor()
else {
slf.l.set_error(Error::LexInvalidEscapedLiteral);
return;
};
match slf
.param_buffer
.try_next_variable_block(size_of_body as usize)
.map(core::str::from_utf8)
{
// TODO(@ohsayan): obliterate this alloc
Some(Ok(s)) => slf.l.push_token(Lit::new_string(s.to_owned())),
_ => slf.l.set_error(Error::LexInvalidEscapedLiteral),
}
},
// ecc
|s| s.l.set_error(Error::LexInvalidEscapedLiteral),
]
};

@ -25,12 +25,8 @@
*/ */
use { use {
super::Slice, crate::engine::data::lit::Lit,
crate::engine::{ core::{borrow::Borrow, fmt, ops::Deref, str},
data::{lit::Lit, spec::Dataspec1D},
error::Error,
},
core::{borrow::Borrow, fmt, ops::Deref, slice, str},
}; };
#[repr(transparent)] #[repr(transparent)]
@ -367,176 +363,3 @@ impl<'a> AsRef<Token<'a>> for Token<'a> {
self self
} }
} }
#[derive(Debug)]
pub struct RawLexer<'a> {
c: *const u8,
e: *const u8,
pub(super) tokens: Vec<Token<'a>>,
pub(super) last_error: Option<Error>,
}
// ctor
impl<'a> RawLexer<'a> {
#[inline(always)]
pub(super) const fn new(src: Slice<'a>) -> Self {
Self {
c: src.as_ptr(),
e: unsafe {
// UNSAFE(@ohsayan): Always safe (<= EOA)
src.as_ptr().add(src.len())
},
last_error: None,
tokens: Vec::new(),
}
}
}
// meta
impl<'a> RawLexer<'a> {
#[inline(always)]
pub(super) const fn cursor(&self) -> *const u8 {
self.c
}
#[inline(always)]
pub(super) const fn data_end_ptr(&self) -> *const u8 {
self.e
}
#[inline(always)]
pub(super) fn not_exhausted(&self) -> bool {
self.data_end_ptr() > self.cursor()
}
#[inline(always)]
pub(super) fn exhausted(&self) -> bool {
self.cursor() == self.data_end_ptr()
}
#[inline(always)]
pub(super) fn remaining(&self) -> usize {
unsafe {
// UNSAFE(@ohsayan): valid ptrs
self.e.offset_from(self.c) as usize
}
}
#[inline(always)]
pub(super) unsafe fn deref_cursor(&self) -> u8 {
*self.cursor()
}
#[inline(always)]
pub(super) unsafe fn incr_cursor_by(&mut self, by: usize) {
debug_assert!(self.remaining() >= by);
self.c = self.cursor().add(by)
}
#[inline(always)]
pub(super) unsafe fn incr_cursor(&mut self) {
self.incr_cursor_by(1)
}
#[inline(always)]
unsafe fn incr_cursor_if(&mut self, iff: bool) {
self.incr_cursor_by(iff as usize)
}
#[inline(always)]
pub(super) fn push_token(&mut self, token: impl Into<Token<'a>>) {
self.tokens.push(token.into())
}
#[inline(always)]
pub(super) fn peek_is(&mut self, f: impl FnOnce(u8) -> bool) -> bool {
self.not_exhausted()
&& unsafe {
// UNSAFE(@ohsayan): verified cursor is nonnull
f(self.deref_cursor())
}
}
#[inline(always)]
pub(super) fn peek_is_and_forward(&mut self, f: impl FnOnce(u8) -> bool) -> bool {
let did_fw = self.not_exhausted()
&& unsafe {
// UNSAFE(@ohsayan): verified ptr
f(self.deref_cursor())
};
unsafe {
// UNSAFE(@ohsayan): increment cursor
self.incr_cursor_if(did_fw);
}
did_fw
}
#[inline(always)]
fn peek_eq_and_forward_or_eof(&mut self, eq: u8) -> bool {
unsafe {
// UNSAFE(@ohsayan): verified cursor
let eq = self.not_exhausted() && self.deref_cursor() == eq;
// UNSAFE(@ohsayan): incr cursor if matched
self.incr_cursor_if(eq);
eq | self.exhausted()
}
}
#[inline(always)]
pub(super) fn peek_neq(&self, b: u8) -> bool {
self.not_exhausted()
&& unsafe {
// UNSAFE(@ohsayan): verified cursor
self.deref_cursor() != b
}
}
#[inline(always)]
pub(super) fn peek_eq_and_forward(&mut self, b: u8) -> bool {
unsafe {
// UNSAFE(@ohsayan): verified cursor
let r = self.not_exhausted() && self.deref_cursor() == b;
self.incr_cursor_if(r);
r
}
}
#[inline(always)]
pub(super) fn trim_ahead(&mut self) {
while self.peek_is_and_forward(|b| b == b' ' || b == b'\t' || b == b'\n') {}
}
#[inline(always)]
pub(super) fn set_error(&mut self, e: Error) {
self.last_error = Some(e);
}
#[inline(always)]
pub(super) fn no_error(&self) -> bool {
self.last_error.is_none()
}
}
// high level methods
impl<'a> RawLexer<'a> {
#[inline(always)]
pub(super) fn scan_ident(&mut self) -> Slice<'a> {
let s = self.cursor();
unsafe {
while self.peek_is(|b| b.is_ascii_alphanumeric() || b == b'_') {
// UNSAFE(@ohsayan): increment cursor, this is valid
self.incr_cursor();
}
// UNSAFE(@ohsayan): valid slice and ptrs
slice::from_raw_parts(s, self.cursor().offset_from(s) as usize)
}
}
#[inline(always)]
pub(super) fn scan_ident_or_keyword(&mut self) {
let s = self.scan_ident();
let st = s.to_ascii_lowercase();
match kwof(&st) {
Some(kw) => self.tokens.push(kw.into()),
// FIXME(@ohsayan): Uh, mind fixing this? The only advantage is that I can keep the graph *memory* footprint small
None if st == b"true" || st == b"false" => self.push_token(Lit::Bool(st == b"true")),
None => self.tokens.push(unsafe {
// UNSAFE(@ohsayan): scan_ident only returns a valid ident which is always a string
Token::Ident(Ident::new(s))
}),
}
}
#[inline(always)]
pub(super) fn scan_byte(&mut self, byte: u8) {
match symof(byte) {
Some(tok) => self.push_token(tok),
None => return self.set_error(Error::LexUnexpectedByte),
}
unsafe {
// UNSAFE(@ohsayan): we are sent a byte, so fw cursor
self.incr_cursor();
}
}
}

@ -331,3 +331,11 @@ macro_rules! build_lut {
} }
} }
} }
#[cfg(test)]
macro_rules! into_vec {
($ty:ty => ($($v:expr),* $(,)?)) => {{
let v: Vec<$ty> = std::vec![$($v.into(),)*];
v
}}
}

@ -25,7 +25,7 @@
*/ */
use { use {
super::lex::{InsecureLexer, SafeLexer, Symbol, Token}, super::lex::{InsecureLexer, SecureLexer, Symbol, Token},
crate::{ crate::{
engine::{data::cell::Datacell, error::QueryResult}, engine::{data::cell::Datacell, error::QueryResult},
util::test_utils, util::test_utils,
@ -44,10 +44,8 @@ mod structure_syn;
pub fn lex_insecure(src: &[u8]) -> QueryResult<Vec<Token<'_>>> { pub fn lex_insecure(src: &[u8]) -> QueryResult<Vec<Token<'_>>> {
InsecureLexer::lex(src) InsecureLexer::lex(src)
} }
#[inline(always)] pub fn lex_secure<'a>(src: &'a [u8], query_window: usize) -> QueryResult<Vec<Token<'a>>> {
/// Uses the [`SafeLexer`] to lex the given input SecureLexer::lex(src, query_window)
pub fn lex_secure(src: &[u8]) -> QueryResult<Vec<Token>> {
SafeLexer::lex(src)
} }
pub trait NullableData<T> { pub trait NullableData<T> {

@ -27,13 +27,7 @@
use super::*; use super::*;
mod list_parse { mod list_parse {
use super::*; use super::*;
use crate::engine::{ use crate::engine::ql::{ast::parse_ast_node_full, dml::ins::List};
data::{lit::LitIR, spec::Dataspec1D},
ql::{
ast::{parse_ast_node_full, traits::ASTNode, State, SubstitutedData},
dml::ins::List,
},
};
#[test] #[test]
fn list_mini() { fn list_mini() {
@ -58,28 +52,6 @@ mod list_parse {
assert_eq!(r.as_slice(), into_array![1, 2, 3, 4]) assert_eq!(r.as_slice(), into_array![1, 2, 3, 4])
} }
#[test] #[test]
fn list_param() {
let tok = lex_secure(
b"
[?, ?, ?, ?]
",
)
.unwrap();
let data = [
LitIR::UnsignedInt(1),
LitIR::UnsignedInt(2),
LitIR::UnsignedInt(3),
LitIR::UnsignedInt(4),
];
let mut state = State::new(&tok[1..], SubstitutedData::new(&data));
assert_eq!(
<List as ASTNode>::from_state(&mut state)
.unwrap()
.into_inner(),
into_array![1, 2, 3, 4]
)
}
#[test]
fn list_pro() { fn list_pro() {
let tok = lex_insecure( let tok = lex_insecure(
b" b"
@ -104,40 +76,6 @@ mod list_parse {
) )
} }
#[test] #[test]
fn list_pro_param() {
let tok = lex_secure(
b"
[
[?, ?],
[?, ?],
[?, ?],
[]
]
",
)
.unwrap();
let data = [
LitIR::UnsignedInt(1),
LitIR::UnsignedInt(2),
LitIR::UnsignedInt(3),
LitIR::UnsignedInt(4),
LitIR::UnsignedInt(5),
LitIR::UnsignedInt(6),
];
let mut state = State::new(&tok[1..], SubstitutedData::new(&data));
assert_eq!(
<List as ASTNode>::from_state(&mut state)
.unwrap()
.into_inner(),
into_array![
into_array![1, 2],
into_array![3, 4],
into_array![5, 6],
into_array![]
]
)
}
#[test]
fn list_pro_max() { fn list_pro_max() {
let tok = lex_insecure( let tok = lex_insecure(
b" b"
@ -161,46 +99,6 @@ mod list_parse {
] ]
) )
} }
#[test]
fn list_pro_max_param() {
let tok = lex_secure(
b"
[
[[?, ?], [?, ?]],
[[], [?, ?]],
[[?, ?], [?, ?]],
[[?, ?], []]
]
",
)
.unwrap();
let data = [
LitIR::UnsignedInt(1),
LitIR::UnsignedInt(1),
LitIR::UnsignedInt(2),
LitIR::UnsignedInt(2),
LitIR::UnsignedInt(4),
LitIR::UnsignedInt(4),
LitIR::UnsignedInt(5),
LitIR::UnsignedInt(5),
LitIR::UnsignedInt(6),
LitIR::UnsignedInt(6),
LitIR::UnsignedInt(7),
LitIR::UnsignedInt(7),
];
let mut state = State::new(&tok[1..], SubstitutedData::new(&data));
assert_eq!(
<List as ASTNode>::from_state(&mut state)
.unwrap()
.into_inner(),
into_array![
into_array![into_array![1, 1], into_array![2, 2]],
into_array![into_array![], into_array![4, 4]],
into_array![into_array![5, 5], into_array![6, 6]],
into_array![into_array![7, 7], into_array![]],
]
)
}
} }
mod tuple_syntax { mod tuple_syntax {
@ -599,7 +497,7 @@ mod stmt_select {
use { use {
super::*, super::*,
crate::engine::{ crate::engine::{
data::{lit::LitIR, spec::Dataspec1D}, data::lit::Lit,
ql::{ ql::{
ast::{parse_ast_node_full, Entity}, ast::{parse_ast_node_full, Entity},
dml::{sel::SelectStatement, RelationalExpr}, dml::{sel::SelectStatement, RelationalExpr},
@ -622,7 +520,7 @@ mod stmt_select {
true, true,
dict! { dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ
), ),
}, },
); );
@ -643,7 +541,7 @@ mod stmt_select {
false, false,
dict! { dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ
), ),
}, },
); );
@ -664,7 +562,7 @@ mod stmt_select {
false, false,
dict! { dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ
), ),
}, },
); );
@ -685,7 +583,7 @@ mod stmt_select {
false, false,
dict! { dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), LitIR::Str("sayan"), RelationalExpr::OP_EQ Ident::from("username"), Lit::new_str("sayan"), RelationalExpr::OP_EQ
), ),
}, },
); );
@ -697,7 +595,7 @@ mod expression_tests {
super::*, super::*,
crate::engine::{ crate::engine::{
core::query_meta::AssignmentOperator, core::query_meta::AssignmentOperator,
data::{lit::LitIR, spec::Dataspec1D}, data::lit::Lit,
ql::{ast::parse_ast_node_full, dml::upd::AssignmentExpression, lex::Ident}, ql::{ast::parse_ast_node_full, dml::upd::AssignmentExpression, lex::Ident},
}, },
}; };
@ -709,7 +607,7 @@ mod expression_tests {
r, r,
AssignmentExpression::new( AssignmentExpression::new(
Ident::from("username"), Ident::from("username"),
LitIR::Str("sayan"), Lit::new_str("sayan"),
AssignmentOperator::Assign AssignmentOperator::Assign
) )
); );
@ -722,7 +620,7 @@ mod expression_tests {
r, r,
AssignmentExpression::new( AssignmentExpression::new(
Ident::from("followers"), Ident::from("followers"),
LitIR::UnsignedInt(100), Lit::new_uint(100),
AssignmentOperator::AddAssign AssignmentOperator::AddAssign
) )
); );
@ -735,7 +633,7 @@ mod expression_tests {
r, r,
AssignmentExpression::new( AssignmentExpression::new(
Ident::from("following"), Ident::from("following"),
LitIR::UnsignedInt(150), Lit::new_uint(150),
AssignmentOperator::SubAssign AssignmentOperator::SubAssign
) )
); );
@ -748,7 +646,7 @@ mod expression_tests {
r, r,
AssignmentExpression::new( AssignmentExpression::new(
Ident::from("product_qty"), Ident::from("product_qty"),
LitIR::UnsignedInt(2), Lit::new_uint(2),
AssignmentOperator::MulAssign AssignmentOperator::MulAssign
) )
); );
@ -761,7 +659,7 @@ mod expression_tests {
r, r,
AssignmentExpression::new( AssignmentExpression::new(
Ident::from("image_crop_factor"), Ident::from("image_crop_factor"),
LitIR::UnsignedInt(2), Lit::new_uint(2),
AssignmentOperator::DivAssign AssignmentOperator::DivAssign
) )
); );
@ -772,7 +670,7 @@ mod update_statement {
super::*, super::*,
crate::engine::{ crate::engine::{
core::query_meta::AssignmentOperator, core::query_meta::AssignmentOperator,
data::{lit::LitIR, spec::Dataspec1D}, data::lit::Lit,
ql::{ ql::{
ast::{parse_ast_node_full, Entity}, ast::{parse_ast_node_full, Entity},
dml::{ dml::{
@ -796,13 +694,13 @@ mod update_statement {
Entity::Single(Ident::from("app")), Entity::Single(Ident::from("app")),
vec![AssignmentExpression::new( vec![AssignmentExpression::new(
Ident::from("notes"), Ident::from("notes"),
LitIR::Str("this is my new note"), Lit::new_str("this is my new note"),
AssignmentOperator::AddAssign, AssignmentOperator::AddAssign,
)], )],
WhereClause::new(dict! { WhereClause::new(dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), Ident::from("username"),
LitIR::Str("sayan"), Lit::new_str("sayan"),
RelationalExpr::OP_EQ RelationalExpr::OP_EQ
) )
}), }),
@ -829,19 +727,19 @@ mod update_statement {
vec![ vec![
AssignmentExpression::new( AssignmentExpression::new(
Ident::from("notes"), Ident::from("notes"),
LitIR::Str("this is my new note"), Lit::new_str("this is my new note"),
AssignmentOperator::AddAssign, AssignmentOperator::AddAssign,
), ),
AssignmentExpression::new( AssignmentExpression::new(
Ident::from("email"), Ident::from("email"),
LitIR::Str("sayan@example.com"), Lit::new_str("sayan@example.com"),
AssignmentOperator::Assign, AssignmentOperator::Assign,
), ),
], ],
WhereClause::new(dict! { WhereClause::new(dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), Ident::from("username"),
LitIR::Str("sayan"), Lit::new_str("sayan"),
RelationalExpr::OP_EQ RelationalExpr::OP_EQ
) )
}), }),
@ -853,7 +751,7 @@ mod delete_stmt {
use { use {
super::*, super::*,
crate::engine::{ crate::engine::{
data::{lit::LitIR, spec::Dataspec1D}, data::lit::Lit,
ql::{ ql::{
ast::{parse_ast_node_full, Entity}, ast::{parse_ast_node_full, Entity},
dml::{del::DeleteStatement, RelationalExpr}, dml::{del::DeleteStatement, RelationalExpr},
@ -875,7 +773,7 @@ mod delete_stmt {
dict! { dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), Ident::from("username"),
LitIR::Str("sayan"), Lit::new_str("sayan"),
RelationalExpr::OP_EQ RelationalExpr::OP_EQ
) )
}, },
@ -898,7 +796,7 @@ mod delete_stmt {
dict! { dict! {
Ident::from("username") => RelationalExpr::new( Ident::from("username") => RelationalExpr::new(
Ident::from("username"), Ident::from("username"),
LitIR::Str("sayan"), Lit::new_str("sayan"),
RelationalExpr::OP_EQ RelationalExpr::OP_EQ
) )
}, },
@ -913,7 +811,7 @@ mod relational_expr {
use { use {
super::*, super::*,
crate::engine::{ crate::engine::{
data::{lit::LitIR, spec::Dataspec1D}, data::lit::Lit,
ql::{ast::parse_ast_node_full, dml::RelationalExpr, lex::Ident}, ql::{ast::parse_ast_node_full, dml::RelationalExpr, lex::Ident},
}, },
}; };
@ -925,7 +823,7 @@ mod relational_expr {
assert_eq!( assert_eq!(
r, r,
RelationalExpr { RelationalExpr {
rhs: LitIR::UnsignedInt(10), rhs: Lit::new_uint(10),
lhs: Ident::from("primary_key"), lhs: Ident::from("primary_key"),
opc: RelationalExpr::OP_EQ opc: RelationalExpr::OP_EQ
} }
@ -938,7 +836,7 @@ mod relational_expr {
assert_eq!( assert_eq!(
r, r,
RelationalExpr { RelationalExpr {
rhs: LitIR::UnsignedInt(10), rhs: Lit::new_uint(10),
lhs: Ident::from("primary_key"), lhs: Ident::from("primary_key"),
opc: RelationalExpr::OP_NE opc: RelationalExpr::OP_NE
} }
@ -951,7 +849,7 @@ mod relational_expr {
assert_eq!( assert_eq!(
r, r,
RelationalExpr { RelationalExpr {
rhs: LitIR::UnsignedInt(10), rhs: Lit::new_uint(10),
lhs: Ident::from("primary_key"), lhs: Ident::from("primary_key"),
opc: RelationalExpr::OP_GT opc: RelationalExpr::OP_GT
} }
@ -964,7 +862,7 @@ mod relational_expr {
assert_eq!( assert_eq!(
r, r,
RelationalExpr { RelationalExpr {
rhs: LitIR::UnsignedInt(10), rhs: Lit::new_uint(10),
lhs: Ident::from("primary_key"), lhs: Ident::from("primary_key"),
opc: RelationalExpr::OP_GE opc: RelationalExpr::OP_GE
} }
@ -977,7 +875,7 @@ mod relational_expr {
assert_eq!( assert_eq!(
r, r,
RelationalExpr { RelationalExpr {
rhs: LitIR::UnsignedInt(10), rhs: Lit::new_uint(10),
lhs: Ident::from("primary_key"), lhs: Ident::from("primary_key"),
opc: RelationalExpr::OP_LT opc: RelationalExpr::OP_LT
} }
@ -991,7 +889,7 @@ mod relational_expr {
r, r,
RelationalExpr::new( RelationalExpr::new(
Ident::from("primary_key"), Ident::from("primary_key"),
LitIR::UnsignedInt(10), Lit::new_uint(10),
RelationalExpr::OP_LE RelationalExpr::OP_LE
) )
); );
@ -1001,7 +899,7 @@ mod where_clause {
use { use {
super::*, super::*,
crate::engine::{ crate::engine::{
data::{lit::LitIR, spec::Dataspec1D}, data::lit::Lit,
ql::{ ql::{
ast::parse_ast_node_full, ast::parse_ast_node_full,
dml::{RelationalExpr, WhereClause}, dml::{RelationalExpr, WhereClause},
@ -1020,7 +918,7 @@ mod where_clause {
let expected = WhereClause::new(dict! { let expected = WhereClause::new(dict! {
Ident::from("x") => RelationalExpr::new( Ident::from("x") => RelationalExpr::new(
Ident::from("x"), Ident::from("x"),
LitIR::UnsignedInt(100), Lit::new_uint(100),
RelationalExpr::OP_EQ RelationalExpr::OP_EQ
) )
}); });
@ -1037,12 +935,12 @@ mod where_clause {
let expected = WhereClause::new(dict! { let expected = WhereClause::new(dict! {
Ident::from("userid") => RelationalExpr::new( Ident::from("userid") => RelationalExpr::new(
Ident::from("userid"), Ident::from("userid"),
LitIR::UnsignedInt(100), Lit::new_uint(100),
RelationalExpr::OP_EQ RelationalExpr::OP_EQ
), ),
Ident::from("pass") => RelationalExpr::new( Ident::from("pass") => RelationalExpr::new(
Ident::from("pass"), Ident::from("pass"),
LitIR::Str("password"), Lit::new_str("password"),
RelationalExpr::OP_EQ RelationalExpr::OP_EQ
) )
}); });

@ -27,12 +27,9 @@
use { use {
super::{ super::{
super::lex::{Ident, Token}, super::lex::{Ident, Token},
lex_insecure, lex_insecure, lex_secure,
},
crate::engine::{
data::{lit::Lit, spec::Dataspec1D},
error::Error,
}, },
crate::engine::{data::lit::Lit, error::Error},
}; };
macro_rules! v( macro_rules! v(
@ -59,7 +56,7 @@ fn lex_unsigned_int() {
let number = v!("123456"); let number = v!("123456");
assert_eq!( assert_eq!(
lex_insecure(&number).unwrap(), lex_insecure(&number).unwrap(),
vec![Token::Lit(Lit::UnsignedInt(123456))] vec![Token::Lit(Lit::new_uint(123456))]
); );
} }
#[test] #[test]
@ -67,16 +64,19 @@ fn lex_signed_int() {
let number = v!("-123456"); let number = v!("-123456");
assert_eq!( assert_eq!(
lex_insecure(&number).unwrap(), lex_insecure(&number).unwrap(),
vec![Token::Lit(Lit::SignedInt(-123456))] vec![Token::Lit(Lit::new_sint(-123456))]
); );
} }
#[test] #[test]
fn lex_bool() { fn lex_bool() {
let (t, f) = v!("true", "false"); let (t, f) = v!("true", "false");
assert_eq!(lex_insecure(&t).unwrap(), vec![Token::Lit(Lit::Bool(true))]); assert_eq!(
lex_insecure(&t).unwrap(),
vec![Token::Lit(Lit::new_bool(true))]
);
assert_eq!( assert_eq!(
lex_insecure(&f).unwrap(), lex_insecure(&f).unwrap(),
vec![Token::Lit(Lit::Bool(false))] vec![Token::Lit(Lit::new_bool(false))]
); );
} }
#[test] #[test]
@ -84,12 +84,12 @@ fn lex_string() {
let s = br#" "hello, world" "#; let s = br#" "hello, world" "#;
assert_eq!( assert_eq!(
lex_insecure(s).unwrap(), lex_insecure(s).unwrap(),
vec![Token::Lit(Lit::Str("hello, world".into()))] vec![Token::Lit(Lit::new_string("hello, world".into()))]
); );
let s = br#" 'hello, world' "#; let s = br#" 'hello, world' "#;
assert_eq!( assert_eq!(
lex_insecure(s).unwrap(), lex_insecure(s).unwrap(),
vec![Token::Lit(Lit::Str("hello, world".into()))] vec![Token::Lit(Lit::new_string("hello, world".into()))]
); );
} }
#[test] #[test]
@ -97,12 +97,12 @@ fn lex_string_test_escape_quote() {
let s = br#" "\"hello world\"" "#; // == "hello world" let s = br#" "\"hello world\"" "#; // == "hello world"
assert_eq!( assert_eq!(
lex_insecure(s).unwrap(), lex_insecure(s).unwrap(),
vec![Token::Lit(Lit::Str("\"hello world\"".into()))] vec![Token::Lit(Lit::new_string("\"hello world\"".into()))]
); );
let s = br#" '\'hello world\'' "#; // == 'hello world' let s = br#" '\'hello world\'' "#; // == 'hello world'
assert_eq!( assert_eq!(
lex_insecure(s).unwrap(), lex_insecure(s).unwrap(),
vec![Token::Lit(Lit::Str("'hello world'".into()))] vec![Token::Lit(Lit::new_string("'hello world'".into()))]
); );
} }
#[test] #[test]
@ -110,12 +110,12 @@ fn lex_string_use_different_quote_style() {
let s = br#" "he's on it" "#; let s = br#" "he's on it" "#;
assert_eq!( assert_eq!(
lex_insecure(s).unwrap(), lex_insecure(s).unwrap(),
vec![Token::Lit(Lit::Str("he's on it".into()))] vec![Token::Lit(Lit::new_string("he's on it".into()))]
); );
let s = br#" 'he thinks that "that girl" fixed it' "#; let s = br#" 'he thinks that "that girl" fixed it' "#;
assert_eq!( assert_eq!(
lex_insecure(s).unwrap(), lex_insecure(s).unwrap(),
vec![Token::Lit(Lit::Str( vec![Token::Lit(Lit::new_string(
"he thinks that \"that girl\" fixed it".into() "he thinks that \"that girl\" fixed it".into()
))] ))]
) )
@ -125,18 +125,18 @@ fn lex_string_escape_bs() {
let s = v!(r#" "windows has c:\\" "#); let s = v!(r#" "windows has c:\\" "#);
assert_eq!( assert_eq!(
lex_insecure(&s).unwrap(), lex_insecure(&s).unwrap(),
vec![Token::Lit(Lit::Str("windows has c:\\".into()))] vec![Token::Lit(Lit::new_string("windows has c:\\".into()))]
); );
let s = v!(r#" 'windows has c:\\' "#); let s = v!(r#" 'windows has c:\\' "#);
assert_eq!( assert_eq!(
lex_insecure(&s).unwrap(), lex_insecure(&s).unwrap(),
vec![Token::Lit(Lit::Str("windows has c:\\".into()))] vec![Token::Lit(Lit::new_string("windows has c:\\".into()))]
); );
let lol = v!(r#"'\\\\\\\\\\'"#); let lol = v!(r#"'\\\\\\\\\\'"#);
let lexed = lex_insecure(&lol).unwrap(); let lexed = lex_insecure(&lol).unwrap();
assert_eq!( assert_eq!(
lexed, lexed,
vec![Token::Lit(Lit::Str("\\".repeat(5).into_boxed_str()))], vec![Token::Lit(Lit::new_string("\\".repeat(5)))],
"lol" "lol"
) )
} }
@ -156,352 +156,166 @@ fn lex_string_unclosed() {
fn lex_unsafe_literal_mini() { fn lex_unsafe_literal_mini() {
let usl = lex_insecure("\r0\n".as_bytes()).unwrap(); let usl = lex_insecure("\r0\n".as_bytes()).unwrap();
assert_eq!(usl.len(), 1); assert_eq!(usl.len(), 1);
assert_eq!(Token::Lit(Lit::Bin(b"")), usl[0]); assert_eq!(Token::Lit(Lit::new_bin(b"")), usl[0]);
} }
#[test] #[test]
fn lex_unsafe_literal() { fn lex_unsafe_literal() {
let usl = lex_insecure("\r9\nabcdefghi".as_bytes()).unwrap(); let usl = lex_insecure("\r9\nabcdefghi".as_bytes()).unwrap();
assert_eq!(usl.len(), 1); assert_eq!(usl.len(), 1);
assert_eq!(Token::Lit(Lit::Bin(b"abcdefghi")), usl[0]); assert_eq!(Token::Lit(Lit::new_bin(b"abcdefghi")), usl[0]);
} }
#[test] #[test]
fn lex_unsafe_literal_pro() { fn lex_unsafe_literal_pro() {
let usl = lex_insecure("\r18\nabcdefghi123456789".as_bytes()).unwrap(); let usl = lex_insecure("\r18\nabcdefghi123456789".as_bytes()).unwrap();
assert_eq!(usl.len(), 1); assert_eq!(usl.len(), 1);
assert_eq!(Token::Lit(Lit::Bin(b"abcdefghi123456789")), usl[0]); assert_eq!(Token::Lit(Lit::new_bin(b"abcdefghi123456789")), usl[0]);
} }
mod num_tests { /*
use crate::engine::ql::lex::decode_num_ub as ubdc; safe query tests
mod uint8 { */
use super::*;
#[test] fn make_safe_query(a: &[u8], b: &[u8]) -> (Vec<u8>, usize) {
fn ndecub_u8_ok() { let mut s = Vec::with_capacity(a.len() + b.len());
const SRC: &[u8] = b"123\n"; s.extend(a);
let mut i = 0; s.extend(b);
let mut b = true; (s, a.len())
let x = ubdc::<u8>(SRC, &mut b, &mut i);
assert!(b);
assert_eq!(i, SRC.len());
assert_eq!(x, 123);
}
#[test]
fn ndecub_u8_lb() {
const SRC: &[u8] = b"0\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<u8>(SRC, &mut b, &mut i);
assert!(b);
assert_eq!(i, SRC.len());
assert_eq!(x, 0);
}
#[test]
fn ndecub_u8_ub() {
const SRC: &[u8] = b"255\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<u8>(SRC, &mut b, &mut i);
assert!(b);
assert_eq!(i, SRC.len());
assert_eq!(x, 255);
}
#[test]
fn ndecub_u8_ub_of() {
const SRC: &[u8] = b"256\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<u8>(SRC, &mut b, &mut i);
assert!(!b);
assert_eq!(i, 2);
assert_eq!(x, 0);
}
}
mod sint8 {
use super::*;
#[test]
pub(crate) fn ndecub_i8_ok() {
const SRC: &[u8] = b"-123\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<i8>(SRC, &mut b, &mut i);
assert!(b);
assert_eq!(i, SRC.len());
assert_eq!(x, -123);
}
#[test]
pub(crate) fn ndecub_i8_lb() {
const SRC: &[u8] = b"-128\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<i8>(SRC, &mut b, &mut i);
assert!(b);
assert_eq!(i, SRC.len());
assert_eq!(x, -128);
}
#[test]
pub(crate) fn ndecub_i8_lb_of() {
const SRC: &[u8] = b"-129\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<i8>(SRC, &mut b, &mut i);
assert!(!b);
assert_eq!(i, 3);
assert_eq!(x, 0);
}
#[test]
pub(crate) fn ndecub_i8_ub() {
const SRC: &[u8] = b"127\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<i8>(SRC, &mut b, &mut i);
assert!(b);
assert_eq!(i, SRC.len());
assert_eq!(x, 127);
}
#[test]
pub(crate) fn ndecub_i8_ub_of() {
const SRC: &[u8] = b"128\n";
let mut i = 0;
let mut b = true;
let x = ubdc::<i8>(SRC, &mut b, &mut i);
assert!(!b);
assert_eq!(i, 2);
assert_eq!(x, 0);
}
}
} }
mod safequery_params { #[test]
use crate::engine::{ fn safe_query_all_literals() {
data::{lit::LitIR, spec::Dataspec1D}, let (query, query_window) = make_safe_query(
ql::lex::SafeQueryData, b"? ? ? ? ? ? ?",
}; b"\x00\x01\x01\x021234\n\x03-1234\n\x049\n1234.5678\x0513\nbinarywithlf\n\x065\nsayan",
use rand::seq::SliceRandom; );
#[test] let ret = lex_secure(&query, query_window).unwrap();
fn param_uint() { assert_eq!(
let src = b"12345\n"; ret,
let mut d = Vec::new(); into_vec![Token<'static> => (
let mut i = 0; Token![null],
assert!(SafeQueryData::uint(src, &mut i, &mut d)); Lit::new_bool(true),
assert_eq!(i, src.len()); Lit::new_uint(1234),
assert_eq!(d, vec![LitIR::UnsignedInt(12345)]); Lit::new_sint(-1234),
} Lit::new_float(1234.5678),
#[test] Lit::new_bin(b"binarywithlf\n"),
fn param_sint() { Lit::new_string("sayan".into()),
let src = b"-12345\n"; )],
let mut d = Vec::new(); );
let mut i = 0;
assert!(SafeQueryData::sint(src, &mut i, &mut d));
assert_eq!(i, src.len());
assert_eq!(d, vec![LitIR::SignedInt(-12345)]);
}
#[test]
fn param_bool_true() {
let src = b"true\n";
let mut d = Vec::new();
let mut i = 0;
assert!(SafeQueryData::bool(src, &mut i, &mut d));
assert_eq!(i, src.len());
assert_eq!(d, vec![LitIR::Bool(true)]);
}
#[test]
fn param_bool_false() {
let src = b"false\n";
let mut d = Vec::new();
let mut i = 0;
assert!(SafeQueryData::bool(src, &mut i, &mut d));
assert_eq!(i, src.len());
assert_eq!(d, vec![LitIR::Bool(false)]);
}
#[test]
fn param_float() {
let src = b"4\n3.14";
let mut d = Vec::new();
let mut i = 0;
assert!(SafeQueryData::float(src, &mut i, &mut d));
assert_eq!(i, src.len());
assert_eq!(d, vec![LitIR::Float(3.14)]);
}
#[test]
fn param_bin() {
let src = b"5\nsayan";
let mut d = Vec::new();
let mut i = 0;
assert!(SafeQueryData::bin(src, &mut i, &mut d));
assert_eq!(i, src.len());
assert_eq!(d, vec![LitIR::Bin(b"sayan")]);
}
#[test]
fn param_str() {
let src = b"5\nsayan";
let mut d = Vec::new();
let mut i = 0;
assert!(SafeQueryData::str(src, &mut i, &mut d));
assert_eq!(i, src.len());
assert_eq!(d, vec![LitIR::Str("sayan")]);
}
#[test]
fn param_full_uint() {
let src = b"\x0012345\n";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::UnsignedInt(12345)]);
}
#[test]
fn param_full_sint() {
let src = b"\x01-12345\n";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::SignedInt(-12345)]);
}
#[test]
fn param_full_bool() {
let src = b"\x02true\n";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::Bool(true)]);
let src = b"\x02false\n";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::Bool(false)]);
}
#[test]
fn param_full_float() {
let src = b"\x034\n3.14";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::Float(3.14)]);
let src = b"\x035\n-3.14";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::Float(-3.14)]);
}
#[test]
fn param_full_bin() {
let src = b"\x0412\nhello, world";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::Bin(b"hello, world")]);
}
#[test]
fn param_full_str() {
let src = b"\x0512\nhello, world";
let r = SafeQueryData::p_revloop(src, 1).unwrap();
assert_eq!(r.as_ref(), [LitIR::Str("hello, world")]);
}
#[test]
fn params_mix() {
let mut rng = rand::thread_rng();
const DATA: [&[u8]; 6] = [
b"\x0012345\n",
b"\x01-12345\n",
b"\x02true\n",
b"\x0311\n12345.67890",
b"\x0430\none two three four five binary",
b"\x0527\none two three four five str",
];
let retmap: [LitIR; 6] = [
LitIR::UnsignedInt(12345),
LitIR::SignedInt(-12345),
LitIR::Bool(true),
LitIR::Float(12345.67890),
LitIR::Bin(b"one two three four five binary"),
LitIR::Str("one two three four five str"),
];
for _ in 0..DATA.len().pow(2) {
let mut local_data = DATA;
local_data.shuffle(&mut rng);
let ret: Vec<LitIR> = local_data
.iter()
.map(|v| retmap[v[0] as usize].clone())
.collect();
let src: Vec<u8> = local_data.into_iter().flat_map(|v| v.to_owned()).collect();
let r = SafeQueryData::p_revloop(&src, 6).unwrap();
assert_eq!(r.as_ref(), ret);
}
}
} }
mod safequery_full_param { const SFQ_NULL: &[u8] = b"\x00";
use crate::engine::{ const SFQ_BOOL_FALSE: &[u8] = b"\x01\0";
data::{lit::LitIR, spec::Dataspec1D}, const SFQ_BOOL_TRUE: &[u8] = b"\x01\x01";
ql::lex::{Ident, SafeQueryData, Token}, const SFQ_UINT: &[u8] = b"\x0218446744073709551615\n";
}; const SFQ_SINT: &[u8] = b"\x03-9223372036854775808\n";
#[test] const SFQ_FLOAT: &[u8] = b"\x0411\n3.141592654";
fn p_mini() { const SFQ_BINARY: &[u8] = "\x0546\ncringe😃😄😁😆😅😂🤣😊😸😺".as_bytes();
let query = b"select * from myapp where username = ?"; const SFQ_STRING: &[u8] = "\x0646\ncringe😃😄😁😆😅😂🤣😊😸😺".as_bytes();
let params = b"\x055\nsayan";
let sq = SafeQueryData::parse(query, params, 1).unwrap(); #[test]
assert_eq!( fn safe_query_null() {
sq, let (query, query_window) = make_safe_query(b"?", SFQ_NULL);
SafeQueryData::new_test( let r = lex_secure(&query, query_window).unwrap();
vec![LitIR::Str("sayan")].into_boxed_slice(), assert_eq!(r, vec![Token![null]])
vec![ }
Token![select],
Token![*], #[test]
Token![from], fn safe_query_bool() {
Token::Ident(Ident::from("myapp")), let (query, query_window) = make_safe_query(b"?", SFQ_BOOL_FALSE);
Token![where], let b_false = lex_secure(&query, query_window).unwrap();
Token::Ident(Ident::from("username")), let (query, query_window) = make_safe_query(b"?", SFQ_BOOL_TRUE);
Token![=], let b_true = lex_secure(&query, query_window).unwrap();
Token![?] assert_eq!(
] [b_false, b_true].concat(),
) vec![
); Token::from(Lit::new_bool(false)),
} Token::from(Lit::new_bool(true))
#[test] ]
fn p() { );
let query = b"select * from myapp where username = ? and pass = ?"; }
let params = b"\x055\nsayan\x048\npass1234";
let sq = SafeQueryData::parse(query, params, 2).unwrap(); #[test]
assert_eq!( fn safe_query_uint() {
sq, let (query, query_window) = make_safe_query(b"?", SFQ_UINT);
SafeQueryData::new_test( let int = lex_secure(&query, query_window).unwrap();
vec![LitIR::Str("sayan"), LitIR::Bin(b"pass1234")].into_boxed_slice(), assert_eq!(int, vec![Token::Lit(Lit::new_uint(u64::MAX))]);
vec![ }
Token![select],
Token![*], #[test]
Token![from], fn safe_query_sint() {
Token::Ident(Ident::from("myapp")), let (query, query_window) = make_safe_query(b"?", SFQ_SINT);
Token![where], let int = lex_secure(&query, query_window).unwrap();
Token::Ident(Ident::from("username")), assert_eq!(int, vec![Token::Lit(Lit::new_sint(i64::MIN))]);
Token![=], }
Token![?],
Token![and], #[test]
Token::Ident(Ident::from("pass")), fn safe_query_float() {
Token![=], let (query, query_window) = make_safe_query(b"?", SFQ_FLOAT);
Token![?] let float = lex_secure(&query, query_window).unwrap();
] assert_eq!(float, vec![Token::Lit(Lit::new_float(3.141592654))]);
) }
);
} #[test]
#[test] fn safe_query_binary() {
fn p_pro() { let (query, query_window) = make_safe_query(b"?", SFQ_BINARY);
let query = b"select $notes[~?] from myapp where username = ? and pass = ?"; let binary = lex_secure(&query, query_window).unwrap();
let params = b"\x00100\n\x055\nsayan\x048\npass1234"; assert_eq!(
let sq = SafeQueryData::parse(query, params, 3).unwrap(); binary,
vec![Token::Lit(Lit::new_bin(
"cringe😃😄😁😆😅😂🤣😊😸😺".as_bytes()
))]
);
}
#[test]
fn safe_query_string() {
let (query, query_window) = make_safe_query(b"?", SFQ_STRING);
let binary = lex_secure(&query, query_window).unwrap();
assert_eq!(
binary,
vec![Token::Lit(Lit::new_string(
"cringe😃😄😁😆😅😂🤣😊😸😺".to_owned().into()
))]
);
}
#[test]
fn safe_params_shuffled() {
let expected = [
(SFQ_NULL, Token![null]),
(SFQ_BOOL_FALSE, Token::Lit(Lit::new_bool(false))),
(SFQ_BOOL_TRUE, Token::Lit(Lit::new_bool(true))),
(SFQ_UINT, Token::Lit(Lit::new_uint(u64::MAX))),
(SFQ_SINT, Token::Lit(Lit::new_sint(i64::MIN))),
(SFQ_FLOAT, Token::Lit(Lit::new_float(3.141592654))),
(
SFQ_BINARY,
Token::Lit(Lit::new_bin("cringe😃😄😁😆😅😂🤣😊😸😺".as_bytes())),
),
(
SFQ_STRING,
Token::Lit(Lit::new_string(
"cringe😃😄😁😆😅😂🤣😊😸😺".to_owned().into(),
)),
),
];
let mut rng = crate::util::test_utils::randomizer();
for _ in 0..expected.len().pow(2) {
let mut this_expected = expected.clone();
crate::util::test_utils::shuffle_slice(&mut this_expected, &mut rng);
let param_segment: Vec<u8> = this_expected
.iter()
.map(|(raw, _)| raw.to_vec())
.flatten()
.collect();
let (query, query_window) = make_safe_query(b"? ? ? ? ? ? ? ?", &param_segment);
let ret = lex_secure(&query, query_window).unwrap();
assert_eq!( assert_eq!(
sq, ret,
SafeQueryData::new_test( this_expected
vec![ .into_iter()
LitIR::UnsignedInt(100), .map(|(_, expected)| expected)
LitIR::Str("sayan"), .collect::<Vec<_>>()
LitIR::Bin(b"pass1234") )
]
.into_boxed_slice(),
vec![
Token![select],
Token![$],
Token::Ident(Ident::from("notes")),
Token![open []],
Token![~],
Token![?],
Token![close []],
Token![from],
Token::Ident(Ident::from("myapp")),
Token![where],
Token::Ident(Ident::from("username")),
Token![=],
Token![?],
Token![and],
Token::Ident(Ident::from("pass")),
Token![=],
Token![?]
]
)
);
} }
} }

@ -26,7 +26,7 @@
use { use {
super::{super::lex::Ident, lex_insecure, *}, super::{super::lex::Ident, lex_insecure, *},
crate::engine::data::{lit::Lit, spec::Dataspec1D}, crate::engine::data::lit::Lit,
}; };
mod inspect { mod inspect {
use { use {
@ -71,7 +71,7 @@ mod alter_space {
use { use {
super::*, super::*,
crate::engine::{ crate::engine::{
data::{lit::Lit, spec::Dataspec1D}, data::lit::Lit,
ql::{ast::parse_ast_node_full, ddl::alt::AlterSpace}, ql::{ast::parse_ast_node_full, ddl::alt::AlterSpace},
}, },
}; };
@ -98,8 +98,8 @@ mod alter_space {
AlterSpace::new( AlterSpace::new(
Ident::from("mymodel"), Ident::from("mymodel"),
null_dict! { null_dict! {
"max_entry" => Lit::UnsignedInt(1000), "max_entry" => Lit::new_uint(1000),
"driver" => Lit::Str("ts-0.8".into()) "driver" => Lit::new_string("ts-0.8".into())
} }
) )
); );
@ -130,9 +130,9 @@ mod tymeta {
assert_eq!( assert_eq!(
tymeta, tymeta,
null_dict! { null_dict! {
"hello" => Lit::Str("world".into()), "hello" => Lit::new_string("world".into()),
"loading" => Lit::Bool(true), "loading" => Lit::new_bool(true),
"size" => Lit::UnsignedInt(100) "size" => Lit::new_uint(100)
} }
); );
} }
@ -154,8 +154,8 @@ mod tymeta {
assert_eq!( assert_eq!(
final_ret, final_ret,
null_dict! { null_dict! {
"maxlen" => Lit::UnsignedInt(100), "maxlen" => Lit::new_uint(100),
"unique" => Lit::Bool(true) "unique" => Lit::new_bool(true)
} }
) )
} }
@ -179,10 +179,10 @@ mod tymeta {
assert_eq!( assert_eq!(
final_ret, final_ret,
null_dict! { null_dict! {
"maxlen" => Lit::UnsignedInt(100), "maxlen" => Lit::new_uint(100),
"unique" => Lit::Bool(true), "unique" => Lit::new_bool(true),
"this" => null_dict! { "this" => null_dict! {
"is" => Lit::Str("cool".into()) "is" => Lit::new_string("cool".into())
} }
} }
) )
@ -209,7 +209,7 @@ mod layer {
vec![LayerSpec::new( vec![LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! { null_dict! {
"maxlen" => Lit::UnsignedInt(100) "maxlen" => Lit::new_uint(100)
} }
)] )]
); );
@ -237,8 +237,8 @@ mod layer {
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"unique" => Lit::Bool(true), "unique" => Lit::new_bool(true),
"maxlen" => Lit::UnsignedInt(10), "maxlen" => Lit::new_uint(10),
} }
) )
] ]
@ -257,15 +257,15 @@ mod layer {
LayerSpec::new( LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! { null_dict! {
"ascii_only" => Lit::Bool(true), "ascii_only" => Lit::new_bool(true),
"maxlen" => Lit::UnsignedInt(255) "maxlen" => Lit::new_uint(255)
} }
), ),
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"unique" => Lit::Bool(true), "unique" => Lit::new_bool(true),
"maxlen" => Lit::UnsignedInt(10), "maxlen" => Lit::new_uint(10),
} }
) )
] ]
@ -289,10 +289,13 @@ mod layer {
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"maxlen" => Lit::UnsignedInt(100), "maxlen" => Lit::new_uint(100),
}, },
), ),
LayerSpec::new(Ident::from("list"), null_dict!("unique" => Lit::Bool(true))), LayerSpec::new(
Ident::from("list"),
null_dict!("unique" => Lit::new_bool(true)),
),
]; ];
fuzz_tokens(tok.as_slice(), |should_pass, new_tok| { fuzz_tokens(tok.as_slice(), |should_pass, new_tok| {
let layers = parse_ast_node_multiple_full::<LayerSpec>(&new_tok); let layers = parse_ast_node_multiple_full::<LayerSpec>(&new_tok);
@ -360,8 +363,8 @@ mod fields {
[LayerSpec::new( [LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! { null_dict! {
"maxlen" => Lit::UnsignedInt(10), "maxlen" => Lit::new_uint(10),
"ascii_only" => Lit::Bool(true), "ascii_only" => Lit::new_bool(true),
} }
)] )]
.into(), .into(),
@ -393,14 +396,14 @@ mod fields {
LayerSpec::new( LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! { null_dict! {
"maxlen" => Lit::UnsignedInt(255), "maxlen" => Lit::new_uint(255),
"ascii_only" => Lit::Bool(true), "ascii_only" => Lit::new_bool(true),
} }
), ),
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"unique" => Lit::Bool(true) "unique" => Lit::new_bool(true)
} }
), ),
] ]
@ -555,7 +558,7 @@ mod schemas {
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"unique" => Lit::Bool(true) "unique" => Lit::new_bool(true)
} }
) )
], ],
@ -624,7 +627,7 @@ mod schemas {
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"unique" => Lit::Bool(true) "unique" => Lit::new_bool(true)
} }
) )
], ],
@ -634,9 +637,9 @@ mod schemas {
], ],
null_dict! { null_dict! {
"env" => null_dict! { "env" => null_dict! {
"free_user_limit" => Lit::UnsignedInt(100), "free_user_limit" => Lit::new_uint(100),
}, },
"storage_driver" => Lit::Str("skyheap".into()), "storage_driver" => Lit::new_string("skyheap".into()),
} }
) )
) )
@ -679,7 +682,7 @@ mod dict_field_syntax {
Ident::from("username"), Ident::from("username"),
vec![LayerSpec::new(Ident::from("string"), null_dict! {})], vec![LayerSpec::new(Ident::from("string"), null_dict! {})],
null_dict! { null_dict! {
"nullable" => Lit::Bool(false), "nullable" => Lit::new_bool(false),
}, },
) )
); );
@ -707,13 +710,13 @@ mod dict_field_syntax {
vec![LayerSpec::new( vec![LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! { null_dict! {
"minlen" => Lit::UnsignedInt(6), "minlen" => Lit::new_uint(6),
"maxlen" => Lit::UnsignedInt(255), "maxlen" => Lit::new_uint(255),
} }
)], )],
null_dict! { null_dict! {
"nullable" => Lit::Bool(false), "nullable" => Lit::new_bool(false),
"jingle_bells" => Lit::Str("snow".into()), "jingle_bells" => Lit::new_string("snow".into()),
}, },
) )
); );
@ -744,19 +747,19 @@ mod dict_field_syntax {
LayerSpec::new( LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! { null_dict! {
"ascii_only" => Lit::Bool(true), "ascii_only" => Lit::new_bool(true),
} }
), ),
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"unique" => Lit::Bool(true), "unique" => Lit::new_bool(true),
} }
) )
], ],
null_dict! { null_dict! {
"nullable" => Lit::Bool(true), "nullable" => Lit::new_bool(true),
"jingle_bells" => Lit::Str("snow".into()), "jingle_bells" => Lit::new_string("snow".into()),
}, },
) )
); );
@ -863,7 +866,7 @@ mod alter_model_add {
Ident::from("myfield"), Ident::from("myfield"),
[LayerSpec::new(Ident::from("string"), null_dict! {})].into(), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(),
null_dict! { null_dict! {
"nullable" => Lit::Bool(true) "nullable" => Lit::new_bool(true)
}, },
)] )]
.into() .into()
@ -889,7 +892,7 @@ mod alter_model_add {
Ident::from("myfield"), Ident::from("myfield"),
[LayerSpec::new(Ident::from("string"), null_dict! {})].into(), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(),
null_dict! { null_dict! {
"nullable" => Lit::Bool(true) "nullable" => Lit::new_bool(true)
}, },
)] )]
.into() .into()
@ -930,7 +933,7 @@ mod alter_model_add {
Ident::from("myfield"), Ident::from("myfield"),
[LayerSpec::new(Ident::from("string"), null_dict! {})].into(), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(),
null_dict! { null_dict! {
"nullable" => Lit::Bool(true) "nullable" => Lit::new_bool(true)
}, },
), ),
ExpandedField::new( ExpandedField::new(
@ -939,19 +942,19 @@ mod alter_model_add {
LayerSpec::new( LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! { null_dict! {
"maxlen" => Lit::UnsignedInt(255) "maxlen" => Lit::new_uint(255)
} }
), ),
LayerSpec::new( LayerSpec::new(
Ident::from("list"), Ident::from("list"),
null_dict! { null_dict! {
"unique" => Lit::Bool(true) "unique" => Lit::new_bool(true)
}, },
) )
] ]
.into(), .into(),
null_dict! { null_dict! {
"nullable" => Lit::Bool(false) "nullable" => Lit::new_bool(false)
}, },
) )
] ]
@ -1042,7 +1045,7 @@ mod alter_model_update {
Ident::from("myfield"), Ident::from("myfield"),
[LayerSpec::new(Ident::from("string"), null_dict! {})].into(), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(),
null_dict! { null_dict! {
"nullable" => Lit::Bool(true) "nullable" => Lit::new_bool(true)
}, },
)] )]
.into() .into()
@ -1077,7 +1080,7 @@ mod alter_model_update {
Ident::from("myfield"), Ident::from("myfield"),
[LayerSpec::new(Ident::from("string"), null_dict! {})].into(), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(),
null_dict! { null_dict! {
"nullable" => Lit::Bool(true) "nullable" => Lit::new_bool(true)
}, },
), ),
ExpandedField::new( ExpandedField::new(
@ -1120,14 +1123,14 @@ mod alter_model_update {
Ident::from("myfield"), Ident::from("myfield"),
[LayerSpec::new(Ident::from("string"), null_dict! {})].into(), [LayerSpec::new(Ident::from("string"), null_dict! {})].into(),
null_dict! { null_dict! {
"nullable" => Lit::Bool(true) "nullable" => Lit::new_bool(true)
}, },
), ),
ExpandedField::new( ExpandedField::new(
Ident::from("myfield2"), Ident::from("myfield2"),
[LayerSpec::new( [LayerSpec::new(
Ident::from("string"), Ident::from("string"),
null_dict! {"maxlen" => Lit::UnsignedInt(255)} null_dict! {"maxlen" => Lit::new_uint(255)}
)] )]
.into(), .into(),
null_dict! {}, null_dict! {},

@ -27,7 +27,7 @@
use { use {
super::*, super::*,
crate::engine::{ crate::engine::{
data::{lit::Lit, spec::Dataspec1D, DictGeneric}, data::{lit::Lit, DictGeneric},
ql::{ast::parse_ast_node_full, ddl::syn::DictBasic}, ql::{ast::parse_ast_node_full, ddl::syn::DictBasic},
}, },
}; };
@ -56,7 +56,7 @@ mod dict {
br#"{name: "sayan"}"#, br#"{name: "sayan"}"#,
br#"{name: "sayan",}"#, br#"{name: "sayan",}"#,
}; };
let r = null_dict!("name" => Lit::Str("sayan".into())); let r = null_dict!("name" => Lit::new_string("sayan".into()));
multi_assert_eq!(d1, d2 => r); multi_assert_eq!(d1, d2 => r);
} }
#[test] #[test]
@ -78,9 +78,9 @@ mod dict {
"#, "#,
}; };
let r = null_dict! ( let r = null_dict! (
"name" => Lit::Str("sayan".into()), "name" => Lit::new_string("sayan".into()),
"verified" => Lit::Bool(true), "verified" => Lit::new_bool(true),
"burgers" => Lit::UnsignedInt(152), "burgers" => Lit::new_uint(152),
); );
multi_assert_eq!(d1, d2 => r); multi_assert_eq!(d1, d2 => r);
} }
@ -119,11 +119,11 @@ mod dict {
}; };
multi_assert_eq!( multi_assert_eq!(
d1, d2, d3 => null_dict! { d1, d2, d3 => null_dict! {
"name" => Lit::Str("sayan".into()), "name" => Lit::new_string("sayan".into()),
"notes" => null_dict! { "notes" => null_dict! {
"burgers" => Lit::Str("all the time, extra mayo".into()), "burgers" => Lit::new_string("all the time, extra mayo".into()),
"taco" => Lit::Bool(true), "taco" => Lit::new_bool(true),
"pretzels" => Lit::UnsignedInt(1), "pretzels" => Lit::new_uint(1),
} }
} }
); );
@ -178,7 +178,7 @@ mod dict {
"now" => null_dict! { "now" => null_dict! {
"this" => null_dict! { "this" => null_dict! {
"is" => null_dict! { "is" => null_dict! {
"ridiculous" => Lit::Bool(true), "ridiculous" => Lit::new_bool(true),
} }
} }
} }
@ -207,16 +207,16 @@ mod dict {
} }
"; ";
let ret_dict = null_dict! { let ret_dict = null_dict! {
"the_tradition_is" => Lit::Str("hello, world".into()), "the_tradition_is" => Lit::new_string("hello, world".into()),
"could_have_been" => null_dict! { "could_have_been" => null_dict! {
"this" => Lit::Bool(true), "this" => Lit::new_bool(true),
"or_maybe_this" => Lit::UnsignedInt(100), "or_maybe_this" => Lit::new_uint(100),
"even_this" => Lit::Str("hello, universe!".into()), "even_this" => Lit::new_string("hello, universe!".into()),
}, },
"but_oh_well" => Lit::Str("it continues to be the 'annoying' phrase".into()), "but_oh_well" => Lit::new_string("it continues to be the 'annoying' phrase".into()),
"lorem" => null_dict! { "lorem" => null_dict! {
"ipsum" => null_dict! { "ipsum" => null_dict! {
"dolor" => Lit::Str("sit amet".into()) "dolor" => Lit::new_string("sit amet".into())
} }
} }
}; };
@ -258,7 +258,7 @@ mod null_dict_tests {
assert_eq!( assert_eq!(
d, d,
null_dict! { null_dict! {
"this_is_non_null" => Lit::Str("hello".into()), "this_is_non_null" => Lit::new_string("hello".into()),
"but_this_is_null" => Null, "but_this_is_null" => Null,
} }
) )
@ -279,8 +279,8 @@ mod null_dict_tests {
assert_eq!( assert_eq!(
d, d,
null_dict! { null_dict! {
"a_string" => Lit::Str("this is a string".into()), "a_string" => Lit::new_string("this is a string".into()),
"num" => Lit::UnsignedInt(1234), "num" => Lit::new_uint(1234),
"a_dict" => null_dict! { "a_dict" => null_dict! {
"a_null" => Null, "a_null" => Null,
} }
@ -304,8 +304,8 @@ mod null_dict_tests {
assert_eq!( assert_eq!(
d, d,
null_dict! { null_dict! {
"a_string" => Lit::Str("this is a string".into()), "a_string" => Lit::new_string("this is a string".into()),
"num" => Lit::UnsignedInt(1234), "num" => Lit::new_uint(1234),
"a_dict" => null_dict! { "a_dict" => null_dict! {
"a_null" => Null, "a_null" => Null,
}, },

Loading…
Cancel
Save