Unsafe code review (partial)

Unsafe code review for March, 2023.

NEEDCHECK: Still need to verify unsafe code in index implementations
next
Sayan Nandan 1 year ago
parent f0f67a98fc
commit ccfb7b2e12
No known key found for this signature in database
GPG Key ID: 42EEDF4AE9D96B54

@ -49,52 +49,76 @@ pub struct Datacell {
impl Datacell {
// bool
pub fn new_bool(b: bool) -> Self {
unsafe { Self::new(TagClass::Bool, DataRaw::word(SystemDword::store(b))) }
unsafe {
// UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
Self::new(TagClass::Bool, DataRaw::word(SystemDword::store(b)))
}
}
pub unsafe fn read_bool(&self) -> bool {
self.load_word()
}
pub fn try_bool(&self) -> Option<bool> {
self.checked_tag(TagClass::Bool, || unsafe { self.read_bool() })
self.checked_tag(TagClass::Bool, || unsafe {
// UNSAFE(@ohsayan): correct because we just verified the tag
self.read_bool()
})
}
pub fn bool(&self) -> bool {
self.try_bool().unwrap()
}
// uint
pub fn new_uint(u: u64) -> Self {
unsafe { Self::new(TagClass::UnsignedInt, DataRaw::word(SystemDword::store(u))) }
unsafe {
// UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
Self::new(TagClass::UnsignedInt, DataRaw::word(SystemDword::store(u)))
}
}
pub unsafe fn read_uint(&self) -> u64 {
self.load_word()
}
pub fn try_uint(&self) -> Option<u64> {
self.checked_tag(TagClass::UnsignedInt, || unsafe { self.read_uint() })
self.checked_tag(TagClass::UnsignedInt, || unsafe {
// UNSAFE(@ohsayan): correct because we just verified the tag
self.read_uint()
})
}
pub fn uint(&self) -> u64 {
self.try_uint().unwrap()
}
// sint
pub fn new_sint(u: i64) -> Self {
unsafe { Self::new(TagClass::SignedInt, DataRaw::word(SystemDword::store(u))) }
unsafe {
// UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
Self::new(TagClass::SignedInt, DataRaw::word(SystemDword::store(u)))
}
}
pub unsafe fn read_sint(&self) -> i64 {
self.load_word()
}
pub fn try_sint(&self) -> Option<i64> {
self.checked_tag(TagClass::SignedInt, || unsafe { self.read_sint() })
self.checked_tag(TagClass::SignedInt, || unsafe {
// UNSAFE(@ohsayan): Correct because we just verified the tag
self.read_sint()
})
}
pub fn sint(&self) -> i64 {
self.try_sint().unwrap()
}
// float
pub fn new_float(f: f64) -> Self {
unsafe { Self::new(TagClass::Float, DataRaw::word(SystemDword::store(f))) }
unsafe {
// UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
Self::new(TagClass::Float, DataRaw::word(SystemDword::store(f)))
}
}
pub unsafe fn read_float(&self) -> f64 {
self.load_word()
}
pub fn try_float(&self) -> Option<f64> {
self.checked_tag(TagClass::Float, || unsafe { self.read_float() })
self.checked_tag(TagClass::Float, || unsafe {
// UNSAFE(@ohsayan): Correcrt because we just verified the tag
self.read_float()
})
}
pub fn float(&self) -> f64 {
self.try_float().unwrap()
@ -103,6 +127,7 @@ impl Datacell {
pub fn new_bin(s: Box<[u8]>) -> Self {
let mut md = ManuallyDrop::new(s);
unsafe {
// UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
Self::new(
TagClass::Bin,
DataRaw::word(SystemDword::store((md.as_mut_ptr(), md.len()))),
@ -114,7 +139,10 @@ impl Datacell {
slice::from_raw_parts::<u8>(p, l)
}
pub fn try_bin(&self) -> Option<&[u8]> {
self.checked_tag(TagClass::Bin, || unsafe { self.read_bin() })
self.checked_tag(TagClass::Bin, || unsafe {
// UNSAFE(@ohsayan): Correct because we just verified the tag
self.read_bin()
})
}
pub fn bin(&self) -> &[u8] {
self.try_bin().unwrap()
@ -123,6 +151,7 @@ impl Datacell {
pub fn new_str(s: Box<str>) -> Self {
let mut md = ManuallyDrop::new(s.into_boxed_bytes());
unsafe {
// UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
Self::new(
TagClass::Str,
DataRaw::word(SystemDword::store((md.as_mut_ptr(), md.len()))),
@ -134,20 +163,29 @@ impl Datacell {
str::from_utf8_unchecked(slice::from_raw_parts(p, l))
}
pub fn try_str(&self) -> Option<&str> {
self.checked_tag(TagClass::Str, || unsafe { self.read_str() })
self.checked_tag(TagClass::Str, || unsafe {
// UNSAFE(@ohsayan): Correct because we just verified the tag
self.read_str()
})
}
pub fn str(&self) -> &str {
self.try_str().unwrap()
}
// list
pub fn new_list(l: Vec<Self>) -> Self {
unsafe { Self::new(TagClass::List, DataRaw::rwl(RwLock::new(l))) }
unsafe {
// UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
Self::new(TagClass::List, DataRaw::rwl(RwLock::new(l)))
}
}
pub unsafe fn read_list(&self) -> &RwLock<Vec<Self>> {
&self.data.rwl
}
pub fn try_list(&self) -> Option<&RwLock<Vec<Self>>> {
self.checked_tag(TagClass::List, || unsafe { self.read_list() })
self.checked_tag(TagClass::List, || unsafe {
// UNSAFE(@ohsayan): Correct because we just verified the tag
self.read_list()
})
}
pub fn list(&self) -> &RwLock<Vec<Self>> {
self.try_list().unwrap()
@ -176,20 +214,25 @@ impl<'a> From<LitIR<'a>> for Datacell {
fn from(l: LitIR<'a>) -> Self {
match l.kind().tag_class() {
tag if tag < TagClass::Bin => unsafe {
// DO NOT RELY ON the payload's bit pattern; it's padded
// UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type doesn't need any advanced construction
Datacell::new(
l.kind().tag_class(),
// DO NOT RELY ON the payload's bit pattern; it's padded
DataRaw::word(SystemDword::store_qw(l.data().load_qw())),
)
},
tag @ (TagClass::Bin | TagClass::Str) => unsafe {
// UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type requires a new heap for construction
let mut bin = ManuallyDrop::new(l.read_bin_uck().to_owned().into_boxed_slice());
Datacell::new(
tag,
DataRaw::word(SystemDword::store((bin.as_mut_ptr(), bin.len()))),
)
},
_ => unreachable!(),
_ => unsafe {
// UNSAFE(@ohsayan): a Lit will never be higher than a string
impossible!()
},
}
}
}
@ -223,6 +266,7 @@ impl Datacell {
}
pub fn null() -> Self {
unsafe {
// UNSAFE(@ohsayan): This is a hack. It's safe because we set init to false
Self::_new(
TagClass::Bool,
DataRaw::word(NativeQword::store_qw(0)),
@ -333,10 +377,14 @@ impl Drop for Datacell {
fn drop(&mut self) {
match self.tag {
TagClass::Str | TagClass::Bin => unsafe {
// UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
let (p, l) = self.load_word();
engine::mem::dealloc_array::<u8>(p, l)
},
TagClass::List => unsafe { ManuallyDrop::drop(&mut self.data.rwl) },
TagClass::List => unsafe {
// UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
ManuallyDrop::drop(&mut self.data.rwl)
},
_ => {}
}
}
@ -347,23 +395,29 @@ impl Clone for Datacell {
fn clone(&self) -> Self {
let data = match self.tag {
TagClass::Str | TagClass::Bin => unsafe {
let block = ManuallyDrop::new(self.read_bin().to_owned().into_boxed_slice());
// UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
let mut block = ManuallyDrop::new(self.read_bin().to_owned().into_boxed_slice());
DataRaw {
word: ManuallyDrop::new(SystemDword::store((block.as_ptr(), block.len()))),
word: ManuallyDrop::new(SystemDword::store((block.as_mut_ptr(), block.len()))),
}
},
TagClass::List => unsafe {
// UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
let data = self.read_list().read().iter().cloned().collect();
DataRaw {
rwl: ManuallyDrop::new(RwLock::new(data)),
}
},
_ => unsafe {
// UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
DataRaw {
word: ManuallyDrop::new(mem::transmute_copy(&self.data.word)),
}
},
};
unsafe { Self::_new(self.tag, data, self.init) }
unsafe {
// UNSAFE(@ohsayan): same tag, we correctly init data and also return the same init state
Self::_new(self.tag, data, self.init)
}
}
}

@ -245,20 +245,22 @@ impl Field {
}
}
#[inline(always)]
fn single_pass_for(&self, dc: &Datacell) -> bool {
((self.layers().len() == 1) & (self.layers()[0].tag.tag_class() == dc.kind()))
| (self.nullable & dc.is_null())
}
#[inline(always)]
fn compute_index(&self, dc: &Datacell) -> usize {
// escape check if it makes sense to
!(self.nullable & dc.is_null()) as usize * self.layers()[0].tag.tag_class().word()
if ((!self.is_nullable()) & dc.is_null()) | (self.layers[0].tag.tag_class() != dc.kind()) {
// illegal states: (1) bad null (2) tags don't match
7
} else {
self.layers()[0].tag.tag_class().word()
}
}
pub fn validate_data_fpath(&self, data: &Datacell) -> bool {
// if someone sends a PR with an added check, I'll personally come to your house and throw a brick on your head
if self.single_pass_for(data) {
if self.layers.len() == 1 {
layertrace("fpath");
unsafe { LVERIFY[self.compute_index(data)](self.layers()[0], data) }
unsafe {
// UNSAFE(@ohsayan): checked for non-null, and used correct class
LVERIFY[self.compute_index(data)](self.layers()[0], data)
}
} else {
Self::rverify_layers(self.layers(), data)
}
@ -268,19 +270,29 @@ impl Field {
let layer = layers[0];
let layers = &layers[1..];
match (layer.tag.tag_class(), data.kind()) {
(layer_tag, data_tag) if (layer_tag == data_tag) & (layer_tag < TagClass::List) => {
// time to go home
(unsafe { LVERIFY[layer.tag.tag_class().word()](layer, data) } & layers.is_empty())
}
(TagClass::List, TagClass::List) => unsafe {
let mut okay = !layers.is_empty() & LVERIFY[TagClass::List.word()](layer, data);
let list = data.read_list().read();
let mut it = list.iter();
while (it.len() != 0) & okay {
okay &= Self::rverify_layers(layers, it.next().unwrap());
(TagClass::List, TagClass::List) if !layers.is_empty() => {
let mut okay = unsafe {
// UNSAFE(@ohsayan): we've verified this
LVERIFY[TagClass::List.word()](layer, data)
};
let list = unsafe {
// UNSAFE(@ohsayan): we verified tags
data.read_list()
};
let lread = list.read();
let mut i = 0;
while (i < lread.len()) & okay {
okay &= Self::rverify_layers(layers, &lread[i]);
i += 1;
}
okay
},
}
(tag_a, tag_b) if tag_a == tag_b => {
unsafe {
// UNSAFE(@ohsayan): same tags; not-null for now so no extra handling required here
LVERIFY[tag_a.word()](layer, data)
}
}
_ => false,
}
}
@ -374,7 +386,7 @@ impl Layer {
}
}
static LVERIFY: [unsafe fn(Layer, &Datacell) -> bool; 7] = [
static LVERIFY: [unsafe fn(Layer, &Datacell) -> bool; 8] = [
lverify_bool,
lverify_uint,
lverify_sint,
@ -382,6 +394,7 @@ static LVERIFY: [unsafe fn(Layer, &Datacell) -> bool; 7] = [
lverify_bin,
lverify_str,
lverify_list,
|_, _| false,
];
#[cfg(test)]

@ -244,7 +244,7 @@ pub unsafe trait DataspecMethods1D: Dataspec1D {
// UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
<Self as DataspecRaw1D>::clone_str(Dataspec1D::read_str_uck(self))
},
TagClass::Str if <Self as DataspecRaw1D>::HEAP_STR => unsafe {
TagClass::Bin if <Self as DataspecRaw1D>::HEAP_BIN => unsafe {
// UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
<Self as DataspecRaw1D>::clone_bin(Dataspec1D::read_bin_uck(self))
},

@ -75,11 +75,17 @@ impl<const N: usize> AStr<N> {
}
#[inline(always)]
pub fn _as_str(&self) -> &str {
unsafe { mem::transmute(self._as_bytes()) }
unsafe {
// UNSAFE(@ohsayan): same layout
mem::transmute(self._as_bytes())
}
}
#[inline(always)]
pub fn _as_mut_str(&mut self) -> &mut str {
unsafe { mem::transmute(self._as_bytes_mut()) }
unsafe {
// UNSAFE(@ohsayan): same layout
mem::transmute(self._as_bytes_mut())
}
}
pub fn _as_bytes(&self) -> &[u8] {
self.base.as_slice()

@ -151,7 +151,9 @@ impl<const N: usize, T: Copy> UArray<N, T> {
debug_assert!(s.len() <= N);
let mut new = Self::new();
unsafe {
// UNSAFE(@ohsayan): the src pointer *will* be correct and the dst is us, and we own our stack here
ptr::copy_nonoverlapping(s.as_ptr(), new.a.as_mut_ptr() as *mut T, s.len());
// UNSAFE(@ohsayan): and here goes the call; same length as the origin buffer
new.set_len(s.len());
}
new

@ -82,7 +82,7 @@ impl<const N: usize, T> VInline<N, T> {
}
#[inline(always)]
pub fn remove(&mut self, idx: usize) -> T {
if idx >= self.len() {
if !(idx < self.len()) {
panic!("index out of range");
}
unsafe {
@ -190,11 +190,13 @@ impl<const N: usize, T> VInline<N, T> {
return;
}
if self.l <= N {
// the current can be fit into the stack, and we aren't on the stack. so copy data from heap and move it to the stack
unsafe {
// UNSAFE(@ohsayan): non-null heap
self.mv_to_stack();
}
} else {
// in this case, we can't move to stack but can optimize the heap size. so create a new heap, memcpy old heap and destroy old heap (NO dtor)
let nb = Self::alloc_block(self.len());
unsafe {
// UNSAFE(@ohsayan): nonov; non-null
@ -217,28 +219,27 @@ impl<const N: usize, T> VInline<N, T> {
}
#[inline]
fn grow(&mut self) {
if !(self.l == self.capacity()) {
return;
}
// allocate new block
let nc = self.ncap();
let nb = Self::alloc_block(nc);
if self.on_stack() {
// stack -> heap
unsafe {
// UNSAFE(@ohsayan): non-null; valid len
ptr::copy_nonoverlapping(self.d.s.as_ptr() as *const T, nb, self.l);
}
} else {
unsafe {
// UNSAFE(@ohsayan): non-null; valid len
ptr::copy_nonoverlapping(self.d.h.cast_const(), nb, self.l);
// UNSAFE(@ohsayan): non-null heap
self.dealloc_heap(self.d.h);
if self.l == self.capacity() {
// allocate new block because we've run out of capacity
let nc = self.ncap();
let nb = Self::alloc_block(nc);
if self.on_stack() {
// stack -> heap
unsafe {
// UNSAFE(@ohsayan): non-null; valid len
ptr::copy_nonoverlapping(self.d.s.as_ptr() as *const T, nb, self.l);
}
} else {
unsafe {
// UNSAFE(@ohsayan): non-null; valid len
ptr::copy_nonoverlapping(self.d.h.cast_const(), nb, self.l);
// UNSAFE(@ohsayan): non-null heap
self.dealloc_heap(self.d.h);
}
}
self.d.h = nb;
self.c = nc;
}
self.d.h = nb;
self.c = nc;
}
#[inline(always)]
unsafe fn dealloc_heap(&mut self, heap: *mut T) {
@ -338,6 +339,7 @@ impl<const N: usize, T> IntoIter<N, T> {
return None;
}
unsafe {
// UNSAFE(@ohsayan): we get the back pointer and move back; always behind EOA so we're chill
self.l -= 1;
ptr::read(self.v._as_ptr().add(self.l).cast())
}

@ -105,7 +105,10 @@ impl SystemDword for NativeDword {
let x;
#[cfg(target_pointer_width = "32")]
{
x = unsafe { core::mem::transmute(u) };
x = unsafe {
// UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
core::mem::transmute(u)
};
}
#[cfg(target_pointer_width = "64")]
{
@ -122,7 +125,10 @@ impl SystemDword for NativeDword {
let x;
#[cfg(target_pointer_width = "32")]
{
x = unsafe { core::mem::transmute_copy(self) }
x = unsafe {
// UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
core::mem::transmute_copy(self)
}
}
#[cfg(target_pointer_width = "64")]
{
@ -153,7 +159,10 @@ impl SystemDword for NativeTword {
let x;
#[cfg(target_pointer_width = "32")]
{
let [a, b]: [usize; 2] = unsafe { core::mem::transmute(u) };
let [a, b]: [usize; 2] = unsafe {
// UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
core::mem::transmute(u)
};
x = [a, b, 0];
}
#[cfg(target_pointer_width = "64")]
@ -172,7 +181,10 @@ impl SystemDword for NativeTword {
#[cfg(target_pointer_width = "32")]
{
let ab = [self.0[0], self.0[1]];
x = unsafe { core::mem::transmute(ab) };
x = unsafe {
// UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
core::mem::transmute(ab)
};
}
#[cfg(target_pointer_width = "64")]
{
@ -209,7 +221,10 @@ impl SystemDword for NativeQword {
let ret;
#[cfg(target_pointer_width = "32")]
{
let [a, b]: [usize; 2] = unsafe { core::mem::transmute(u) };
let [a, b]: [usize; 2] = unsafe {
// UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
core::mem::transmute(u)
};
ret = <Self as SystemQword>::store_full(a, b, 0, 0);
}
#[cfg(target_pointer_width = "64")]
@ -225,7 +240,10 @@ impl SystemDword for NativeQword {
let ret;
#[cfg(target_pointer_width = "32")]
{
ret = unsafe { core::mem::transmute([self.0[0], self.0[1]]) };
ret = unsafe {
// UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
core::mem::transmute([self.0[0], self.0[1]])
};
}
#[cfg(target_pointer_width = "64")]
{

@ -416,10 +416,12 @@ impl<'a> Entity<'a> {
let is_full = Self::tokens_with_full(tok);
let r = match () {
_ if is_full => unsafe {
// UNSAFE(@ohsayan): just verified signature
*c += 3;
Self::full_entity_from_slice(tok)
},
_ if is_current => unsafe {
// UNSAFE(@ohsayan): just verified signature
*c += 1;
Self::single_entity_from_slice(tok)
},
@ -451,6 +453,7 @@ impl<'a> Entity<'a> {
let is_full = state.cursor_signature_match_entity_full_rounded();
let is_single = state.cursor_has_ident_rounded();
unsafe {
// UNSAFE(@ohsayan): verified signatures
if is_full {
state.cursor_ahead_by(3);
*d = MaybeInit::new(Entity::full_entity_from_slice(tok));
@ -469,6 +472,7 @@ impl<'a> Entity<'a> {
let is_full = tok[0].is_ident() && tok[1] == Token![.] && tok[2].is_ident();
let is_single = tok[0].is_ident();
unsafe {
// UNSAFE(@ohsayan): verified signatures
if is_full {
state.cursor_ahead_by(3);
*d = MaybeInit::new(Entity::full_entity_from_slice(tok));

@ -117,7 +117,10 @@ impl<'a> AlterModel<'a> {
return compiler::cold_rerr(LangError::BadSyntax);
// FIXME(@ohsayan): bad because no specificity
}
let model_name = unsafe { state.fw_read().uck_read_ident() };
let model_name = unsafe {
// UNSAFE(@ohsayan): did rounded check for ident in the above branch
state.fw_read().uck_read_ident()
};
let kind = match state.fw_read() {
Token![add] => AlterKind::alter_add(state),
Token![remove] => AlterKind::alter_remove(state),

@ -55,7 +55,7 @@ impl<'a> DropSpace<'a> {
if state.exhausted() {
return Ok(DropSpace::new(
unsafe {
// UNSAFE(@ohsayan): Safe because the match predicate ensures that tok[1] is indeed an ident
// UNSAFE(@ohsayan): Safe because the if predicate ensures that tok[0] (relative) is indeed an ident
ident.uck_read_ident()
},
force,

@ -151,18 +151,35 @@ where
}
(tok, DictFoldState::LIT_OR_OB) if state.can_read_lit_from(tok) => {
// found lit
unsafe {
let v = Some(state.read_lit_unchecked_from(tok).into());
state.poison_if_not(dict.insert(key.take().as_str().into(), v).is_none());
}
let v = Some(unsafe {
// UNSAFE(@ohsayan): verified at guard
state.read_lit_unchecked_from(tok).into()
});
state.poison_if_not(
dict.insert(
unsafe {
// UNSAFE(@ohsayan): we switch to this state only when we are in the LIT_OR_OB state. this means that we've already read in a key
key.take().as_str().into()
},
v,
)
.is_none(),
);
// after lit we're either done or expect something else
mstate = DictFoldState::COMMA_OR_CB;
}
(Token![null], DictFoldState::LIT_OR_OB) => {
// found a null
unsafe {
state.poison_if_not(dict.insert(key.take().as_str().into(), None).is_none());
}
state.poison_if_not(
dict.insert(
unsafe {
// UNSAFE(@ohsayan): we only switch to this when we've already read in a key
key.take().as_str().into()
},
None,
)
.is_none(),
);
// after a null (essentially counts as a lit) we're either done or expect something else
mstate = DictFoldState::COMMA_OR_CB;
}
@ -170,12 +187,16 @@ where
// found a nested dict
let mut ndict = DictGeneric::new();
_rfold_dict::<Qd, NoBreakpoint>(DictFoldState::CB_OR_IDENT, state, &mut ndict);
unsafe {
state.poison_if_not(
dict.insert(key.take().as_str().into(), Some(ndict.into()))
.is_none(),
);
}
state.poison_if_not(
dict.insert(
unsafe {
// UNSAFE(@ohsayan): correct again because whenever we hit an expression position, we've already read in a key (ident)
key.take().as_str().into()
},
Some(ndict.into()),
)
.is_none(),
);
mstate = DictFoldState::COMMA_OR_CB;
}
(Token![,], DictFoldState::COMMA_OR_CB) => {
@ -240,11 +261,8 @@ states! {
}
}
fn rfold_layers<'a, Qd: QueryData<'a>>(
mut mstate: LayerFoldState,
state: &mut State<'a, Qd>,
layers: &mut Vec<LayerSpec<'a>>,
) {
fn rfold_layers<'a, Qd: QueryData<'a>>(state: &mut State<'a, Qd>, layers: &mut Vec<LayerSpec<'a>>) {
let mut mstate = LayerFoldState::BEGIN_IDENT;
let mut ty = MaybeInit::uninit();
let mut props = Default::default();
while state.loop_tt() {
@ -260,7 +278,7 @@ fn rfold_layers<'a, Qd: QueryData<'a>>(
// but we first need a colon
state.poison_if_not(state.cursor_rounded_eq(Token![:]));
state.cursor_ahead_if(state.okay());
rfold_layers(LayerFoldState::BEGIN_IDENT, state, layers);
rfold_layers(state, layers);
// we are yet to parse the remaining props
mstate = LayerFoldState::FOLD_INCOMPLETE;
} else {
@ -297,7 +315,10 @@ fn rfold_layers<'a, Qd: QueryData<'a>>(
if ((mstate == LayerFoldState::FINAL) | (mstate == LayerFoldState::FINAL_OR_OB)) & state.okay()
{
layers.push(LayerSpec {
ty: unsafe { ty.take() },
ty: unsafe {
// UNSAFE(@ohsayan): our start state always looks for an ident
ty.take()
},
props,
});
} else {
@ -351,7 +372,7 @@ impl<'a> FieldSpec<'a> {
};
// layers
let mut layers = Vec::new();
rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut layers);
rfold_layers(state, &mut layers);
if state.okay() {
Ok(FieldSpec {
field_name: field_name.clone(),
@ -403,7 +424,7 @@ impl<'a> ExpandedField<'a> {
}
state.poison_if_not(state.cursor_eq(Token![:]));
state.cursor_ahead();
rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut layers);
rfold_layers(state, &mut layers);
match state.fw_read() {
Token![,] => {
rfold_dict(DictFoldState::CB_OR_IDENT, state, &mut props);
@ -489,7 +510,7 @@ mod impls {
use {
super::{
rfold_dict, rfold_layers, rfold_tymeta, DictFoldState, DictGeneric, ExpandedField,
FieldSpec, LayerFoldState, LayerSpec,
FieldSpec, LayerSpec,
},
crate::engine::{
error::LangResult,
@ -511,7 +532,7 @@ mod impls {
const VERIFY: bool = true;
fn _from_state<Qd: QueryData<'a>>(state: &mut State<'a, Qd>) -> LangResult<Self> {
let mut layers = Vec::new();
rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut layers);
rfold_layers(state, &mut layers);
assert!(layers.len() == 1);
Ok(layers.swap_remove(0))
}
@ -519,7 +540,7 @@ mod impls {
state: &mut State<'a, Qd>,
) -> LangResult<Vec<Self>> {
let mut l = Vec::new();
rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut l);
rfold_layers(state, &mut l);
Ok(l)
}
}

@ -368,12 +368,12 @@ impl<'a> InsertStatement<'a> {
}
if state.okay() {
let data = unsafe {
// UNSAFE(@ohsayan): state's flag guarantees correctness
// UNSAFE(@ohsayan): state's flag guarantees correctness (see wildcard branch)
data.unwrap_unchecked()
};
Ok(InsertStatement {
entity: unsafe {
// UNSAFE(@ohsayan): state's flag ensures correctness
// UNSAFE(@ohsayan): state's flag ensures correctness (see Entity::parse_entity)
entity.assume_init()
},
data,

@ -77,7 +77,10 @@ impl<'a> InsecureLexer<'a> {
fn _lex(&mut self) {
let ref mut slf = self.base;
while slf.not_exhausted() && slf.no_error() {
match unsafe { slf.deref_cursor() } {
match unsafe {
// UNSAFE(@ohsayan): Verified non-null from pre
slf.deref_cursor()
} {
byte if byte.is_ascii_alphabetic() => slf.scan_ident_or_keyword(),
#[cfg(test)]
byte if byte == b'\x01' => {
@ -104,6 +107,7 @@ impl<'a> InsecureLexer<'a> {
#[inline(always)]
fn scan_signed_integer(slf: &mut RawLexer<'a>) {
unsafe {
// UNSAFE(@ohsayan): We hit an integer hence this was called
slf.incr_cursor();
}
if slf.peek_is(|b| b.is_ascii_digit()) {
@ -116,8 +120,10 @@ impl<'a> InsecureLexer<'a> {
while slf.peek_is_and_forward(|b| b.is_ascii_digit()) {}
let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
match unsafe {
// UNSAFE(@ohsayan): a sequence of ASCII bytes in the integer range will always be correct unicode
str::from_utf8_unchecked(slice::from_raw_parts(
start,
// UNSAFE(@ohsayan): valid cursor and start pointers
slf.cursor().offset_from(start) as usize,
))
}
@ -137,47 +143,66 @@ impl<'a> InsecureLexer<'a> {
#[inline(always)]
fn scan_unsigned_integer(slf: &mut RawLexer<'a>) {
let s = slf.cursor();
unsafe {
while slf.peek_is(|b| b.is_ascii_digit()) {
while slf.peek_is(|b| b.is_ascii_digit()) {
unsafe {
// UNSAFE(@ohsayan): since we're going ahead, this is correct (until EOA)
slf.incr_cursor();
}
}
/*
1234; // valid
1234} // valid
1234{ // invalid
1234, // valid
1234a // invalid
*/
let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
match unsafe {
/*
1234; // valid
1234} // valid
1234{ // invalid
1234, // valid
1234a // invalid
UNSAFE(@ohsayan):
(1) Valid cursor and start pointer (since we copy it from the cursor which is correct)
(2) All ASCII alphabetic bytes are captured, hence this will always be a correct unicode string
*/
let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
match str::from_utf8_unchecked(slice::from_raw_parts(
str::from_utf8_unchecked(slice::from_raw_parts(
s,
slf.cursor().offset_from(s) as usize,
))
.parse()
{
Ok(num) if compiler::likely(wseof) => {
slf.tokens.push(Token::Lit(Lit::UnsignedInt(num)))
}
_ => slf.set_error(LexError::InvalidUnsignedLiteral),
}
.parse()
{
Ok(num) if compiler::likely(wseof) => {
slf.tokens.push(Token::Lit(Lit::UnsignedInt(num)))
}
_ => slf.set_error(LexError::InvalidUnsignedLiteral),
}
}
#[inline(always)]
fn scan_binary_literal(slf: &mut RawLexer<'a>) {
unsafe {
// UNSAFE(@ohsayan): cursor increment since we hit the marker byte (CR)
slf.incr_cursor();
}
let mut size = 0usize;
let mut okay = true;
while slf.not_exhausted() && unsafe { slf.deref_cursor() != b'\n' } && okay {
while slf.not_exhausted()
&& unsafe {
// UNSAFE(@ohsayan): verified non-exhaustion
slf.deref_cursor() != b'\n'
}
&& okay
{
/*
Don't ask me how stupid this is. Like, I was probably in some "mood" when I wrote this
and it works duh, but isn't the most elegant of things (could I have just used a parse?
nah, I'm just a hardcore numeric normie)
-- Sayan
*/
let byte = unsafe { slf.deref_cursor() };
let byte = unsafe {
// UNSAFE(@ohsayan): The pre invariant guarantees that this is correct
slf.deref_cursor()
};
okay &= byte.is_ascii_digit();
let (prod, of_flag) = size.overflowing_mul(10);
okay &= !of_flag;
@ -185,6 +210,7 @@ impl<'a> InsecureLexer<'a> {
size = sum;
okay &= !of_flag;
unsafe {
// UNSAFE(@ohsayan): We just read something, so this is fine (until EOA)
slf.incr_cursor();
}
}
@ -192,7 +218,9 @@ impl<'a> InsecureLexer<'a> {
okay &= slf.remaining() >= size;
if compiler::likely(okay) {
unsafe {
// UNSAFE(@ohsayan): Correct cursor and length (from above we know that we have enough bytes)
slf.push_token(Lit::Bin(slice::from_raw_parts(slf.cursor(), size)));
// UNSAFE(@ohsayan): Correct length increment
slf.incr_cursor_by(size);
}
} else {
@ -202,22 +230,31 @@ impl<'a> InsecureLexer<'a> {
#[inline(always)]
fn scan_quoted_string(slf: &mut RawLexer<'a>, quote_style: u8) {
debug_assert!(
unsafe { slf.deref_cursor() } == quote_style,
unsafe {
// UNSAFE(@ohsayan): yessir, we just hit this byte. if called elsewhere, this function will crash and burn (or simply, segfault)
slf.deref_cursor()
} == quote_style,
"illegal call to scan_quoted_string"
);
unsafe { slf.incr_cursor() }
unsafe {
// UNSAFE(@ohsayan): Increment this cursor (this is correct since we just hit the quote)
slf.incr_cursor()
}
let mut buf = Vec::new();
unsafe {
while slf.peek_neq(quote_style) {
// UNSAFE(@ohsayan): deref is good since peek passed
match slf.deref_cursor() {
b if b != b'\\' => {
buf.push(b);
}
_ => {
// UNSAFE(@ohsayan): we read one byte, so this should work
slf.incr_cursor();
if slf.exhausted() {
break;
}
// UNSAFE(@ohsayan): correct because of the above branch
let b = slf.deref_cursor();
let quote = b == quote_style;
let bs = b == b'\\';
@ -228,6 +265,11 @@ impl<'a> InsecureLexer<'a> {
}
}
}
/*
UNSAFE(@ohsayan): This is correct because:
(a) If we are in arm 1: we move the cursor ahead from the `\` byte (the branch doesn't do it)
(b) If we are in arm 2: we don't skip the second quote byte in the branch, hence this is correct
*/
slf.incr_cursor();
}
let terminated = slf.peek_eq_and_forward(quote_style);
@ -260,7 +302,10 @@ impl<'a> SafeLexer<'a> {
fn _lex(self) -> LexResult<Vec<Token<'a>>> {
let Self { base: mut l } = self;
while l.not_exhausted() && l.no_error() {
let b = unsafe { l.deref_cursor() };
let b = unsafe {
// UNSAFE(@ohsayan): This is correct because of the pre invariant
l.deref_cursor()
};
match b {
// ident or kw
b if b.is_ascii_alphabetic() => l.scan_ident_or_keyword(),
@ -469,7 +514,10 @@ impl<'b> SafeQueryData<'b> {
// incr cursor
i += mx_extract;
*cnt += i;
unsafe { slice::from_raw_parts(src.as_ptr(), mx_extract) }
unsafe {
// UNSAFE(@ohsayan): src is correct (guaranteed). even if the decoded length returns an error we still remain within bounds of the EOA
slice::from_raw_parts(src.as_ptr(), mx_extract)
}
}
#[inline(always)]
pub(super) fn uint<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool {

@ -412,7 +412,10 @@ impl<'a> RawLexer<'a> {
}
#[inline(always)]
pub(super) fn remaining(&self) -> usize {
unsafe { self.e.offset_from(self.c) as usize }
unsafe {
// UNSAFE(@ohsayan): valid ptrs
self.e.offset_from(self.c) as usize
}
}
#[inline(always)]
pub(super) unsafe fn deref_cursor(&self) -> u8 {
@ -437,12 +440,21 @@ impl<'a> RawLexer<'a> {
}
#[inline(always)]
pub(super) fn peek_is(&mut self, f: impl FnOnce(u8) -> bool) -> bool {
self.not_exhausted() && unsafe { f(self.deref_cursor()) }
self.not_exhausted()
&& unsafe {
// UNSAFE(@ohsayan): verified cursor is nonnull
f(self.deref_cursor())
}
}
#[inline(always)]
pub(super) fn peek_is_and_forward(&mut self, f: impl FnOnce(u8) -> bool) -> bool {
let did_fw = self.not_exhausted() && unsafe { f(self.deref_cursor()) };
let did_fw = self.not_exhausted()
&& unsafe {
// UNSAFE(@ohsayan): verified ptr
f(self.deref_cursor())
};
unsafe {
// UNSAFE(@ohsayan): increment cursor
self.incr_cursor_if(did_fw);
}
did_fw
@ -450,18 +462,25 @@ impl<'a> RawLexer<'a> {
#[inline(always)]
fn peek_eq_and_forward_or_eof(&mut self, eq: u8) -> bool {
unsafe {
// UNSAFE(@ohsayan): verified cursor
let eq = self.not_exhausted() && self.deref_cursor() == eq;
// UNSAFE(@ohsayan): incr cursor if matched
self.incr_cursor_if(eq);
eq | self.exhausted()
}
}
#[inline(always)]
pub(super) fn peek_neq(&self, b: u8) -> bool {
self.not_exhausted() && unsafe { self.deref_cursor() != b }
self.not_exhausted()
&& unsafe {
// UNSAFE(@ohsayan): verified cursor
self.deref_cursor() != b
}
}
#[inline(always)]
pub(super) fn peek_eq_and_forward(&mut self, b: u8) -> bool {
unsafe {
// UNSAFE(@ohsayan): verified cursor
let r = self.not_exhausted() && self.deref_cursor() == b;
self.incr_cursor_if(r);
r
@ -488,8 +507,10 @@ impl<'a> RawLexer<'a> {
let s = self.cursor();
unsafe {
while self.peek_is(|b| b.is_ascii_alphanumeric() || b == b'_') {
// UNSAFE(@ohsayan): increment cursor, this is valid
self.incr_cursor();
}
// UNSAFE(@ohsayan): valid slice and ptrs
slice::from_raw_parts(s, self.cursor().offset_from(s) as usize)
}
}
@ -514,6 +535,7 @@ impl<'a> RawLexer<'a> {
None => return self.set_error(LexError::UnexpectedByte),
}
unsafe {
// UNSAFE(@ohsayan): we are sent a byte, so fw cursor
self.incr_cursor();
}
}

Loading…
Cancel
Save