Unsafe code review (partial)

Unsafe code review for March, 2023. NEEDCHECK: Still need to verify unsafe code in index implementations
1 year ago · ccfb7b2e12
parent f0f67a98fc
commit ccfb7b2e12
14 changed files with 317 additions and 124 deletions
--- a/server/src/engine/core/model/cell.rs
+++ b/server/src/engine/core/model/cell.rs
@ -49,52 +49,76 @@ pub struct Datacell {
 impl Datacell {
    // bool
    pub fn new_bool(b: bool) -> Self {
-        unsafe { Self::new(TagClass::Bool, DataRaw::word(SystemDword::store(b))) }
+        unsafe {
+            // UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
+            Self::new(TagClass::Bool, DataRaw::word(SystemDword::store(b)))
+        }
    }
    pub unsafe fn read_bool(&self) -> bool {
        self.load_word()
    }
    pub fn try_bool(&self) -> Option<bool> {
-        self.checked_tag(TagClass::Bool, || unsafe { self.read_bool() })
+        self.checked_tag(TagClass::Bool, || unsafe {
+            // UNSAFE(@ohsayan): correct because we just verified the tag
+            self.read_bool()
+        })
    }
    pub fn bool(&self) -> bool {
        self.try_bool().unwrap()
    }
    // uint
    pub fn new_uint(u: u64) -> Self {
-        unsafe { Self::new(TagClass::UnsignedInt, DataRaw::word(SystemDword::store(u))) }
+        unsafe {
+            // UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
+            Self::new(TagClass::UnsignedInt, DataRaw::word(SystemDword::store(u)))
+        }
    }
    pub unsafe fn read_uint(&self) -> u64 {
        self.load_word()
    }
    pub fn try_uint(&self) -> Option<u64> {
-        self.checked_tag(TagClass::UnsignedInt, || unsafe { self.read_uint() })
+        self.checked_tag(TagClass::UnsignedInt, || unsafe {
+            // UNSAFE(@ohsayan): correct because we just verified the tag
+            self.read_uint()
+        })
    }
    pub fn uint(&self) -> u64 {
        self.try_uint().unwrap()
    }
    // sint
    pub fn new_sint(u: i64) -> Self {
-        unsafe { Self::new(TagClass::SignedInt, DataRaw::word(SystemDword::store(u))) }
+        unsafe {
+            // UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
+            Self::new(TagClass::SignedInt, DataRaw::word(SystemDword::store(u)))
+        }
    }
    pub unsafe fn read_sint(&self) -> i64 {
        self.load_word()
    }
    pub fn try_sint(&self) -> Option<i64> {
-        self.checked_tag(TagClass::SignedInt, || unsafe { self.read_sint() })
+        self.checked_tag(TagClass::SignedInt, || unsafe {
+            // UNSAFE(@ohsayan): Correct because we just verified the tag
+            self.read_sint()
+        })
    }
    pub fn sint(&self) -> i64 {
        self.try_sint().unwrap()
    }
    // float
    pub fn new_float(f: f64) -> Self {
-        unsafe { Self::new(TagClass::Float, DataRaw::word(SystemDword::store(f))) }
+        unsafe {
+            // UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
+            Self::new(TagClass::Float, DataRaw::word(SystemDword::store(f)))
+        }
    }
    pub unsafe fn read_float(&self) -> f64 {
        self.load_word()
    }
    pub fn try_float(&self) -> Option<f64> {
-        self.checked_tag(TagClass::Float, || unsafe { self.read_float() })
+        self.checked_tag(TagClass::Float, || unsafe {
+            // UNSAFE(@ohsayan): Correcrt because we just verified the tag
+            self.read_float()
+        })
    }
    pub fn float(&self) -> f64 {
        self.try_float().unwrap()
@ -103,6 +127,7 @@ impl Datacell {
    pub fn new_bin(s: Box<[u8]>) -> Self {
        let mut md = ManuallyDrop::new(s);
        unsafe {
+            // UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
            Self::new(
                TagClass::Bin,
                DataRaw::word(SystemDword::store((md.as_mut_ptr(), md.len()))),
@ -114,7 +139,10 @@ impl Datacell {
        slice::from_raw_parts::<u8>(p, l)
    }
    pub fn try_bin(&self) -> Option<&[u8]> {
-        self.checked_tag(TagClass::Bin, || unsafe { self.read_bin() })
+        self.checked_tag(TagClass::Bin, || unsafe {
+            // UNSAFE(@ohsayan): Correct because we just verified the tag
+            self.read_bin()
+        })
    }
    pub fn bin(&self) -> &[u8] {
        self.try_bin().unwrap()
@ -123,6 +151,7 @@ impl Datacell {
    pub fn new_str(s: Box<str>) -> Self {
        let mut md = ManuallyDrop::new(s.into_boxed_bytes());
        unsafe {
+            // UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
            Self::new(
                TagClass::Str,
                DataRaw::word(SystemDword::store((md.as_mut_ptr(), md.len()))),
@ -134,20 +163,29 @@ impl Datacell {
        str::from_utf8_unchecked(slice::from_raw_parts(p, l))
    }
    pub fn try_str(&self) -> Option<&str> {
-        self.checked_tag(TagClass::Str, || unsafe { self.read_str() })
+        self.checked_tag(TagClass::Str, || unsafe {
+            // UNSAFE(@ohsayan): Correct because we just verified the tag
+            self.read_str()
+        })
    }
    pub fn str(&self) -> &str {
        self.try_str().unwrap()
    }
    // list
    pub fn new_list(l: Vec<Self>) -> Self {
-        unsafe { Self::new(TagClass::List, DataRaw::rwl(RwLock::new(l))) }
+        unsafe {
+            // UNSAFE(@ohsayan): Correct because we are initializing Self with the correct tag
+            Self::new(TagClass::List, DataRaw::rwl(RwLock::new(l)))
+        }
    }
    pub unsafe fn read_list(&self) -> &RwLock<Vec<Self>> {
        &self.data.rwl
    }
    pub fn try_list(&self) -> Option<&RwLock<Vec<Self>>> {
-        self.checked_tag(TagClass::List, || unsafe { self.read_list() })
+        self.checked_tag(TagClass::List, || unsafe {
+            // UNSAFE(@ohsayan): Correct because we just verified the tag
+            self.read_list()
+        })
    }
    pub fn list(&self) -> &RwLock<Vec<Self>> {
        self.try_list().unwrap()
@ -176,20 +214,25 @@ impl<'a> From<LitIR<'a>> for Datacell {
    fn from(l: LitIR<'a>) -> Self {
        match l.kind().tag_class() {
            tag if tag < TagClass::Bin => unsafe {
-                // DO NOT RELY ON the payload's bit pattern; it's padded
+                // UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type doesn't need any advanced construction
                Datacell::new(
                    l.kind().tag_class(),
+                    // DO NOT RELY ON the payload's bit pattern; it's padded
                    DataRaw::word(SystemDword::store_qw(l.data().load_qw())),
                )
            },
            tag @ (TagClass::Bin | TagClass::Str) => unsafe {
+                // UNSAFE(@ohsayan): Correct because we are using the same tag, and in this case the type requires a new heap for construction
                let mut bin = ManuallyDrop::new(l.read_bin_uck().to_owned().into_boxed_slice());
                Datacell::new(
                    tag,
                    DataRaw::word(SystemDword::store((bin.as_mut_ptr(), bin.len()))),
                )
            },
-            _ => unreachable!(),
+            _ => unsafe {
+                // UNSAFE(@ohsayan): a Lit will never be higher than a string
+                impossible!()
+            },
        }
    }
 }
@ -223,6 +266,7 @@ impl Datacell {
    }
    pub fn null() -> Self {
        unsafe {
+            // UNSAFE(@ohsayan): This is a hack. It's safe because we set init to false
            Self::_new(
                TagClass::Bool,
                DataRaw::word(NativeQword::store_qw(0)),
@ -333,10 +377,14 @@ impl Drop for Datacell {
    fn drop(&mut self) {
        match self.tag {
            TagClass::Str | TagClass::Bin => unsafe {
+                // UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
                let (p, l) = self.load_word();
                engine::mem::dealloc_array::<u8>(p, l)
            },
-            TagClass::List => unsafe { ManuallyDrop::drop(&mut self.data.rwl) },
+            TagClass::List => unsafe {
+                // UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
+                ManuallyDrop::drop(&mut self.data.rwl)
+            },
            _ => {}
        }
    }
@ -347,23 +395,29 @@ impl Clone for Datacell {
    fn clone(&self) -> Self {
        let data = match self.tag {
            TagClass::Str | TagClass::Bin => unsafe {
-                let block = ManuallyDrop::new(self.read_bin().to_owned().into_boxed_slice());
+                // UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
+                let mut block = ManuallyDrop::new(self.read_bin().to_owned().into_boxed_slice());
                DataRaw {
-                    word: ManuallyDrop::new(SystemDword::store((block.as_ptr(), block.len()))),
+                    word: ManuallyDrop::new(SystemDword::store((block.as_mut_ptr(), block.len()))),
                }
            },
            TagClass::List => unsafe {
+                // UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
                let data = self.read_list().read().iter().cloned().collect();
                DataRaw {
                    rwl: ManuallyDrop::new(RwLock::new(data)),
                }
            },
            _ => unsafe {
+                // UNSAFE(@ohsayan): we have checked that the cell is initialized (uninit will not satisfy this class), and we have checked its class
                DataRaw {
                    word: ManuallyDrop::new(mem::transmute_copy(&self.data.word)),
                }
            },
        };
-        unsafe { Self::_new(self.tag, data, self.init) }
+        unsafe {
+            // UNSAFE(@ohsayan): same tag, we correctly init data and also return the same init state
+            Self::_new(self.tag, data, self.init)
+        }
    }
 }
--- a/server/src/engine/core/model/mod.rs
+++ b/server/src/engine/core/model/mod.rs
@ -245,20 +245,22 @@ impl Field {
        }
    }
    #[inline(always)]
-    fn single_pass_for(&self, dc: &Datacell) -> bool {
-        ((self.layers().len() == 1) & (self.layers()[0].tag.tag_class() == dc.kind()))
-            | (self.nullable & dc.is_null())
-    }
-    #[inline(always)]
    fn compute_index(&self, dc: &Datacell) -> usize {
-        // escape check if it makes sense to
-        !(self.nullable & dc.is_null()) as usize * self.layers()[0].tag.tag_class().word()
+        if ((!self.is_nullable()) & dc.is_null()) | (self.layers[0].tag.tag_class() != dc.kind()) {
+            // illegal states: (1) bad null (2) tags don't match
+            7
+        } else {
+            self.layers()[0].tag.tag_class().word()
+        }
    }
    pub fn validate_data_fpath(&self, data: &Datacell) -> bool {
        // if someone sends a PR with an added check, I'll personally come to your house and throw a brick on your head
-        if self.single_pass_for(data) {
+        if self.layers.len() == 1 {
            layertrace("fpath");
-            unsafe { LVERIFY[self.compute_index(data)](self.layers()[0], data) }
+            unsafe {
+                // UNSAFE(@ohsayan): checked for non-null, and used correct class
+                LVERIFY[self.compute_index(data)](self.layers()[0], data)
+            }
        } else {
            Self::rverify_layers(self.layers(), data)
        }
@ -268,19 +270,29 @@ impl Field {
        let layer = layers[0];
        let layers = &layers[1..];
        match (layer.tag.tag_class(), data.kind()) {
-            (layer_tag, data_tag) if (layer_tag == data_tag) & (layer_tag < TagClass::List) => {
-                // time to go home
-                (unsafe { LVERIFY[layer.tag.tag_class().word()](layer, data) } & layers.is_empty())
-            }
-            (TagClass::List, TagClass::List) => unsafe {
-                let mut okay = !layers.is_empty() & LVERIFY[TagClass::List.word()](layer, data);
-                let list = data.read_list().read();
-                let mut it = list.iter();
-                while (it.len() != 0) & okay {
-                    okay &= Self::rverify_layers(layers, it.next().unwrap());
+            (TagClass::List, TagClass::List) if !layers.is_empty() => {
+                let mut okay = unsafe {
+                    // UNSAFE(@ohsayan): we've verified this
+                    LVERIFY[TagClass::List.word()](layer, data)
+                };
+                let list = unsafe {
+                    // UNSAFE(@ohsayan): we verified tags
+                    data.read_list()
+                };
+                let lread = list.read();
+                let mut i = 0;
+                while (i < lread.len()) & okay {
+                    okay &= Self::rverify_layers(layers, &lread[i]);
+                    i += 1;
                }
                okay
-            },
+            }
+            (tag_a, tag_b) if tag_a == tag_b => {
+                unsafe {
+                    // UNSAFE(@ohsayan): same tags; not-null for now so no extra handling required here
+                    LVERIFY[tag_a.word()](layer, data)
+                }
+            }
            _ => false,
        }
    }
@ -374,7 +386,7 @@ impl Layer {
    }
 }

-static LVERIFY: [unsafe fn(Layer, &Datacell) -> bool; 7] = [
+static LVERIFY: [unsafe fn(Layer, &Datacell) -> bool; 8] = [
    lverify_bool,
    lverify_uint,
    lverify_sint,
@ -382,6 +394,7 @@ static LVERIFY: [unsafe fn(Layer, &Datacell) -> bool; 7] = [
    lverify_bin,
    lverify_str,
    lverify_list,
+    |_, _| false,
 ];

 #[cfg(test)]
--- a/server/src/engine/data/spec.rs
+++ b/server/src/engine/data/spec.rs
@ -244,7 +244,7 @@ pub unsafe trait DataspecMethods1D: Dataspec1D {
                // UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
                <Self as DataspecRaw1D>::clone_str(Dataspec1D::read_str_uck(self))
            },
-            TagClass::Str if <Self as DataspecRaw1D>::HEAP_STR => unsafe {
+            TagClass::Bin if <Self as DataspecRaw1D>::HEAP_BIN => unsafe {
                // UNSAFE(@ohsayan): we are heap allocated, and we're calling the implementor's definition
                <Self as DataspecRaw1D>::clone_bin(Dataspec1D::read_bin_uck(self))
            },
--- a/server/src/engine/mem/astr.rs
+++ b/server/src/engine/mem/astr.rs
@ -75,11 +75,17 @@ impl<const N: usize> AStr<N> {
    }
    #[inline(always)]
    pub fn _as_str(&self) -> &str {
-        unsafe { mem::transmute(self._as_bytes()) }
+        unsafe {
+            // UNSAFE(@ohsayan): same layout
+            mem::transmute(self._as_bytes())
+        }
    }
    #[inline(always)]
    pub fn _as_mut_str(&mut self) -> &mut str {
-        unsafe { mem::transmute(self._as_bytes_mut()) }
+        unsafe {
+            // UNSAFE(@ohsayan): same layout
+            mem::transmute(self._as_bytes_mut())
+        }
    }
    pub fn _as_bytes(&self) -> &[u8] {
        self.base.as_slice()
--- a/server/src/engine/mem/uarray.rs
+++ b/server/src/engine/mem/uarray.rs
@ -151,7 +151,9 @@ impl<const N: usize, T: Copy> UArray<N, T> {
        debug_assert!(s.len() <= N);
        let mut new = Self::new();
        unsafe {
+            // UNSAFE(@ohsayan): the src pointer *will* be correct and the dst is us, and we own our stack here
            ptr::copy_nonoverlapping(s.as_ptr(), new.a.as_mut_ptr() as *mut T, s.len());
+            // UNSAFE(@ohsayan): and here goes the call; same length as the origin buffer
            new.set_len(s.len());
        }
        new
--- a/server/src/engine/mem/vinline.rs
+++ b/server/src/engine/mem/vinline.rs
@ -82,7 +82,7 @@ impl<const N: usize, T> VInline<N, T> {
    }
    #[inline(always)]
    pub fn remove(&mut self, idx: usize) -> T {
-        if idx >= self.len() {
+        if !(idx < self.len()) {
            panic!("index out of range");
        }
        unsafe {
@ -190,11 +190,13 @@ impl<const N: usize, T> VInline<N, T> {
            return;
        }
        if self.l <= N {
+            // the current can be fit into the stack, and we aren't on the stack. so copy data from heap and move it to the stack
            unsafe {
                // UNSAFE(@ohsayan): non-null heap
                self.mv_to_stack();
            }
        } else {
+            // in this case, we can't move to stack but can optimize the heap size. so create a new heap, memcpy old heap and destroy old heap (NO dtor)
            let nb = Self::alloc_block(self.len());
            unsafe {
                // UNSAFE(@ohsayan): nonov; non-null
@ -217,28 +219,27 @@ impl<const N: usize, T> VInline<N, T> {
    }
    #[inline]
    fn grow(&mut self) {
-        if !(self.l == self.capacity()) {
-            return;
-        }
-        // allocate new block
-        let nc = self.ncap();
-        let nb = Self::alloc_block(nc);
-        if self.on_stack() {
-            // stack -> heap
-            unsafe {
-                // UNSAFE(@ohsayan): non-null; valid len
-                ptr::copy_nonoverlapping(self.d.s.as_ptr() as *const T, nb, self.l);
-            }
-        } else {
-            unsafe {
-                // UNSAFE(@ohsayan): non-null; valid len
-                ptr::copy_nonoverlapping(self.d.h.cast_const(), nb, self.l);
-                // UNSAFE(@ohsayan): non-null heap
-                self.dealloc_heap(self.d.h);
+        if self.l == self.capacity() {
+            // allocate new block because we've run out of capacity
+            let nc = self.ncap();
+            let nb = Self::alloc_block(nc);
+            if self.on_stack() {
+                // stack -> heap
+                unsafe {
+                    // UNSAFE(@ohsayan): non-null; valid len
+                    ptr::copy_nonoverlapping(self.d.s.as_ptr() as *const T, nb, self.l);
+                }
+            } else {
+                unsafe {
+                    // UNSAFE(@ohsayan): non-null; valid len
+                    ptr::copy_nonoverlapping(self.d.h.cast_const(), nb, self.l);
+                    // UNSAFE(@ohsayan): non-null heap
+                    self.dealloc_heap(self.d.h);
+                }
            }
+            self.d.h = nb;
+            self.c = nc;
        }
-        self.d.h = nb;
-        self.c = nc;
    }
    #[inline(always)]
    unsafe fn dealloc_heap(&mut self, heap: *mut T) {
@ -338,6 +339,7 @@ impl<const N: usize, T> IntoIter<N, T> {
            return None;
        }
        unsafe {
+            // UNSAFE(@ohsayan): we get the back pointer and move back; always behind EOA so we're chill
            self.l -= 1;
            ptr::read(self.v._as_ptr().add(self.l).cast())
        }
--- a/server/src/engine/mem/word.rs
+++ b/server/src/engine/mem/word.rs
@ -105,7 +105,10 @@ impl SystemDword for NativeDword {
        let x;
        #[cfg(target_pointer_width = "32")]
        {
-            x = unsafe { core::mem::transmute(u) };
+            x = unsafe {
+                // UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
+                core::mem::transmute(u)
+            };
        }
        #[cfg(target_pointer_width = "64")]
        {
@ -122,7 +125,10 @@ impl SystemDword for NativeDword {
        let x;
        #[cfg(target_pointer_width = "32")]
        {
-            x = unsafe { core::mem::transmute_copy(self) }
+            x = unsafe {
+                // UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
+                core::mem::transmute_copy(self)
+            }
        }
        #[cfg(target_pointer_width = "64")]
        {
@ -153,7 +159,10 @@ impl SystemDword for NativeTword {
        let x;
        #[cfg(target_pointer_width = "32")]
        {
-            let [a, b]: [usize; 2] = unsafe { core::mem::transmute(u) };
+            let [a, b]: [usize; 2] = unsafe {
+                // UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
+                core::mem::transmute(u)
+            };
            x = [a, b, 0];
        }
        #[cfg(target_pointer_width = "64")]
@ -172,7 +181,10 @@ impl SystemDword for NativeTword {
        #[cfg(target_pointer_width = "32")]
        {
            let ab = [self.0[0], self.0[1]];
-            x = unsafe { core::mem::transmute(ab) };
+            x = unsafe {
+                // UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
+                core::mem::transmute(ab)
+            };
        }
        #[cfg(target_pointer_width = "64")]
        {
@ -209,7 +221,10 @@ impl SystemDword for NativeQword {
        let ret;
        #[cfg(target_pointer_width = "32")]
        {
-            let [a, b]: [usize; 2] = unsafe { core::mem::transmute(u) };
+            let [a, b]: [usize; 2] = unsafe {
+                // UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
+                core::mem::transmute(u)
+            };
            ret = <Self as SystemQword>::store_full(a, b, 0, 0);
        }
        #[cfg(target_pointer_width = "64")]
@ -225,7 +240,10 @@ impl SystemDword for NativeQword {
        let ret;
        #[cfg(target_pointer_width = "32")]
        {
-            ret = unsafe { core::mem::transmute([self.0[0], self.0[1]]) };
+            ret = unsafe {
+                // UNSAFE(@ohsayan): same layout and this is a stupidly simple cast and it's wild that the rust std doesn't have a simpler way to do it
+                core::mem::transmute([self.0[0], self.0[1]])
+            };
        }
        #[cfg(target_pointer_width = "64")]
        {
--- a/server/src/engine/ql/ast/mod.rs
+++ b/server/src/engine/ql/ast/mod.rs
@ -416,10 +416,12 @@ impl<'a> Entity<'a> {
        let is_full = Self::tokens_with_full(tok);
        let r = match () {
            _ if is_full => unsafe {
+                // UNSAFE(@ohsayan): just verified signature
                *c += 3;
                Self::full_entity_from_slice(tok)
            },
            _ if is_current => unsafe {
+                // UNSAFE(@ohsayan): just verified signature
                *c += 1;
                Self::single_entity_from_slice(tok)
            },
@ -451,6 +453,7 @@ impl<'a> Entity<'a> {
        let is_full = state.cursor_signature_match_entity_full_rounded();
        let is_single = state.cursor_has_ident_rounded();
        unsafe {
+            // UNSAFE(@ohsayan): verified signatures
            if is_full {
                state.cursor_ahead_by(3);
                *d = MaybeInit::new(Entity::full_entity_from_slice(tok));
@ -469,6 +472,7 @@ impl<'a> Entity<'a> {
        let is_full = tok[0].is_ident() && tok[1] == Token![.] && tok[2].is_ident();
        let is_single = tok[0].is_ident();
        unsafe {
+            // UNSAFE(@ohsayan): verified signatures
            if is_full {
                state.cursor_ahead_by(3);
                *d = MaybeInit::new(Entity::full_entity_from_slice(tok));
--- a/server/src/engine/ql/ddl/alt.rs
+++ b/server/src/engine/ql/ddl/alt.rs
@ -117,7 +117,10 @@ impl<'a> AlterModel<'a> {
            return compiler::cold_rerr(LangError::BadSyntax);
            // FIXME(@ohsayan): bad because no specificity
        }
-        let model_name = unsafe { state.fw_read().uck_read_ident() };
+        let model_name = unsafe {
+            // UNSAFE(@ohsayan): did rounded check for ident in the above branch
+            state.fw_read().uck_read_ident()
+        };
        let kind = match state.fw_read() {
            Token![add] => AlterKind::alter_add(state),
            Token![remove] => AlterKind::alter_remove(state),
--- a/server/src/engine/ql/ddl/drop.rs
+++ b/server/src/engine/ql/ddl/drop.rs
@ -55,7 +55,7 @@ impl<'a> DropSpace<'a> {
            if state.exhausted() {
                return Ok(DropSpace::new(
                    unsafe {
-                        // UNSAFE(@ohsayan): Safe because the match predicate ensures that tok[1] is indeed an ident
+                        // UNSAFE(@ohsayan): Safe because the if predicate ensures that tok[0] (relative) is indeed an ident
                        ident.uck_read_ident()
                    },
                    force,
--- a/server/src/engine/ql/ddl/syn.rs
+++ b/server/src/engine/ql/ddl/syn.rs
@ -151,18 +151,35 @@ where
            }
            (tok, DictFoldState::LIT_OR_OB) if state.can_read_lit_from(tok) => {
                // found lit
-                unsafe {
-                    let v = Some(state.read_lit_unchecked_from(tok).into());
-                    state.poison_if_not(dict.insert(key.take().as_str().into(), v).is_none());
-                }
+                let v = Some(unsafe {
+                    // UNSAFE(@ohsayan): verified at guard
+                    state.read_lit_unchecked_from(tok).into()
+                });
+                state.poison_if_not(
+                    dict.insert(
+                        unsafe {
+                            // UNSAFE(@ohsayan): we switch to this state only when we are in the LIT_OR_OB state. this means that we've already read in a key
+                            key.take().as_str().into()
+                        },
+                        v,
+                    )
+                    .is_none(),
+                );
                // after lit we're either done or expect something else
                mstate = DictFoldState::COMMA_OR_CB;
            }
            (Token![null], DictFoldState::LIT_OR_OB) => {
                // found a null
-                unsafe {
-                    state.poison_if_not(dict.insert(key.take().as_str().into(), None).is_none());
-                }
+                state.poison_if_not(
+                    dict.insert(
+                        unsafe {
+                            // UNSAFE(@ohsayan): we only switch to this when we've already read in a key
+                            key.take().as_str().into()
+                        },
+                        None,
+                    )
+                    .is_none(),
+                );
                // after a null (essentially counts as a lit) we're either done or expect something else
                mstate = DictFoldState::COMMA_OR_CB;
            }
@ -170,12 +187,16 @@ where
                // found a nested dict
                let mut ndict = DictGeneric::new();
                _rfold_dict::<Qd, NoBreakpoint>(DictFoldState::CB_OR_IDENT, state, &mut ndict);
-                unsafe {
-                    state.poison_if_not(
-                        dict.insert(key.take().as_str().into(), Some(ndict.into()))
-                            .is_none(),
-                    );
-                }
+                state.poison_if_not(
+                    dict.insert(
+                        unsafe {
+                            // UNSAFE(@ohsayan): correct again because whenever we hit an expression position, we've already read in a key (ident)
+                            key.take().as_str().into()
+                        },
+                        Some(ndict.into()),
+                    )
+                    .is_none(),
+                );
                mstate = DictFoldState::COMMA_OR_CB;
            }
            (Token![,], DictFoldState::COMMA_OR_CB) => {
@ -240,11 +261,8 @@ states! {
    }
 }

-fn rfold_layers<'a, Qd: QueryData<'a>>(
-    mut mstate: LayerFoldState,
-    state: &mut State<'a, Qd>,
-    layers: &mut Vec<LayerSpec<'a>>,
-) {
+fn rfold_layers<'a, Qd: QueryData<'a>>(state: &mut State<'a, Qd>, layers: &mut Vec<LayerSpec<'a>>) {
+    let mut mstate = LayerFoldState::BEGIN_IDENT;
    let mut ty = MaybeInit::uninit();
    let mut props = Default::default();
    while state.loop_tt() {
@ -260,7 +278,7 @@ fn rfold_layers<'a, Qd: QueryData<'a>>(
                    // but we first need a colon
                    state.poison_if_not(state.cursor_rounded_eq(Token![:]));
                    state.cursor_ahead_if(state.okay());
-                    rfold_layers(LayerFoldState::BEGIN_IDENT, state, layers);
+                    rfold_layers(state, layers);
                    // we are yet to parse the remaining props
                    mstate = LayerFoldState::FOLD_INCOMPLETE;
                } else {
@ -297,7 +315,10 @@ fn rfold_layers<'a, Qd: QueryData<'a>>(
    if ((mstate == LayerFoldState::FINAL) | (mstate == LayerFoldState::FINAL_OR_OB)) & state.okay()
    {
        layers.push(LayerSpec {
-            ty: unsafe { ty.take() },
+            ty: unsafe {
+                // UNSAFE(@ohsayan): our start state always looks for an ident
+                ty.take()
+            },
            props,
        });
    } else {
@ -351,7 +372,7 @@ impl<'a> FieldSpec<'a> {
        };
        // layers
        let mut layers = Vec::new();
-        rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut layers);
+        rfold_layers(state, &mut layers);
        if state.okay() {
            Ok(FieldSpec {
                field_name: field_name.clone(),
@ -403,7 +424,7 @@ impl<'a> ExpandedField<'a> {
            }
            state.poison_if_not(state.cursor_eq(Token![:]));
            state.cursor_ahead();
-            rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut layers);
+            rfold_layers(state, &mut layers);
            match state.fw_read() {
                Token![,] => {
                    rfold_dict(DictFoldState::CB_OR_IDENT, state, &mut props);
@ -489,7 +510,7 @@ mod impls {
    use {
        super::{
            rfold_dict, rfold_layers, rfold_tymeta, DictFoldState, DictGeneric, ExpandedField,
-            FieldSpec, LayerFoldState, LayerSpec,
+            FieldSpec, LayerSpec,
        },
        crate::engine::{
            error::LangResult,
@ -511,7 +532,7 @@ mod impls {
        const VERIFY: bool = true;
        fn _from_state<Qd: QueryData<'a>>(state: &mut State<'a, Qd>) -> LangResult<Self> {
            let mut layers = Vec::new();
-            rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut layers);
+            rfold_layers(state, &mut layers);
            assert!(layers.len() == 1);
            Ok(layers.swap_remove(0))
        }
@ -519,7 +540,7 @@ mod impls {
            state: &mut State<'a, Qd>,
        ) -> LangResult<Vec<Self>> {
            let mut l = Vec::new();
-            rfold_layers(LayerFoldState::BEGIN_IDENT, state, &mut l);
+            rfold_layers(state, &mut l);
            Ok(l)
        }
    }
--- a/server/src/engine/ql/dml/ins.rs
+++ b/server/src/engine/ql/dml/ins.rs
@ -368,12 +368,12 @@ impl<'a> InsertStatement<'a> {
        }
        if state.okay() {
            let data = unsafe {
-                // UNSAFE(@ohsayan): state's flag guarantees correctness
+                // UNSAFE(@ohsayan): state's flag guarantees correctness (see wildcard branch)
                data.unwrap_unchecked()
            };
            Ok(InsertStatement {
                entity: unsafe {
-                    // UNSAFE(@ohsayan): state's flag ensures correctness
+                    // UNSAFE(@ohsayan): state's flag ensures correctness (see Entity::parse_entity)
                    entity.assume_init()
                },
                data,
--- a/server/src/engine/ql/lex/mod.rs
+++ b/server/src/engine/ql/lex/mod.rs
@ -77,7 +77,10 @@ impl<'a> InsecureLexer<'a> {
    fn _lex(&mut self) {
        let ref mut slf = self.base;
        while slf.not_exhausted() && slf.no_error() {
-            match unsafe { slf.deref_cursor() } {
+            match unsafe {
+                // UNSAFE(@ohsayan): Verified non-null from pre
+                slf.deref_cursor()
+            } {
                byte if byte.is_ascii_alphabetic() => slf.scan_ident_or_keyword(),
                #[cfg(test)]
                byte if byte == b'\x01' => {
@ -104,6 +107,7 @@ impl<'a> InsecureLexer<'a> {
    #[inline(always)]
    fn scan_signed_integer(slf: &mut RawLexer<'a>) {
        unsafe {
+            // UNSAFE(@ohsayan): We hit an integer hence this was called
            slf.incr_cursor();
        }
        if slf.peek_is(|b| b.is_ascii_digit()) {
@ -116,8 +120,10 @@ impl<'a> InsecureLexer<'a> {
            while slf.peek_is_and_forward(|b| b.is_ascii_digit()) {}
            let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
            match unsafe {
+                // UNSAFE(@ohsayan): a sequence of ASCII bytes in the integer range will always be correct unicode
                str::from_utf8_unchecked(slice::from_raw_parts(
                    start,
+                    // UNSAFE(@ohsayan): valid cursor and start pointers
                    slf.cursor().offset_from(start) as usize,
                ))
            }
@ -137,47 +143,66 @@ impl<'a> InsecureLexer<'a> {
    #[inline(always)]
    fn scan_unsigned_integer(slf: &mut RawLexer<'a>) {
        let s = slf.cursor();
-        unsafe {
-            while slf.peek_is(|b| b.is_ascii_digit()) {
+
+        while slf.peek_is(|b| b.is_ascii_digit()) {
+            unsafe {
+                // UNSAFE(@ohsayan): since we're going ahead, this is correct (until EOA)
                slf.incr_cursor();
            }
+        }
+        /*
+            1234; // valid
+            1234} // valid
+            1234{ // invalid
+            1234, // valid
+            1234a // invalid
+        */
+        let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
+        match unsafe {
            /*
-                1234; // valid
-                1234} // valid
-                1234{ // invalid
-                1234, // valid
-                1234a // invalid
+                UNSAFE(@ohsayan):
+                (1) Valid cursor and start pointer (since we copy it from the cursor which is correct)
+                (2) All ASCII alphabetic bytes are captured, hence this will always be a correct unicode string
            */
-            let wseof = slf.peek_is(|char| !char.is_ascii_alphabetic()) || slf.exhausted();
-            match str::from_utf8_unchecked(slice::from_raw_parts(
+            str::from_utf8_unchecked(slice::from_raw_parts(
                s,
                slf.cursor().offset_from(s) as usize,
            ))
-            .parse()
-            {
-                Ok(num) if compiler::likely(wseof) => {
-                    slf.tokens.push(Token::Lit(Lit::UnsignedInt(num)))
-                }
-                _ => slf.set_error(LexError::InvalidUnsignedLiteral),
+        }
+        .parse()
+        {
+            Ok(num) if compiler::likely(wseof) => {
+                slf.tokens.push(Token::Lit(Lit::UnsignedInt(num)))
            }
+            _ => slf.set_error(LexError::InvalidUnsignedLiteral),
        }
    }

    #[inline(always)]
    fn scan_binary_literal(slf: &mut RawLexer<'a>) {
        unsafe {
+            // UNSAFE(@ohsayan): cursor increment since we hit the marker byte (CR)
            slf.incr_cursor();
        }
        let mut size = 0usize;
        let mut okay = true;
-        while slf.not_exhausted() && unsafe { slf.deref_cursor() != b'\n' } && okay {
+        while slf.not_exhausted()
+            && unsafe {
+                // UNSAFE(@ohsayan): verified non-exhaustion
+                slf.deref_cursor() != b'\n'
+            }
+            && okay
+        {
            /*
                Don't ask me how stupid this is. Like, I was probably in some "mood" when I wrote this
                and it works duh, but isn't the most elegant of things (could I have just used a parse?
                nah, I'm just a hardcore numeric normie)
                -- Sayan
            */
-            let byte = unsafe { slf.deref_cursor() };
+            let byte = unsafe {
+                // UNSAFE(@ohsayan): The pre invariant guarantees that this is correct
+                slf.deref_cursor()
+            };
            okay &= byte.is_ascii_digit();
            let (prod, of_flag) = size.overflowing_mul(10);
            okay &= !of_flag;
@ -185,6 +210,7 @@ impl<'a> InsecureLexer<'a> {
            size = sum;
            okay &= !of_flag;
            unsafe {
+                // UNSAFE(@ohsayan): We just read something, so this is fine (until EOA)
                slf.incr_cursor();
            }
        }
@ -192,7 +218,9 @@ impl<'a> InsecureLexer<'a> {
        okay &= slf.remaining() >= size;
        if compiler::likely(okay) {
            unsafe {
+                // UNSAFE(@ohsayan): Correct cursor and length (from above we know that we have enough bytes)
                slf.push_token(Lit::Bin(slice::from_raw_parts(slf.cursor(), size)));
+                // UNSAFE(@ohsayan): Correct length increment
                slf.incr_cursor_by(size);
            }
        } else {
@ -202,22 +230,31 @@ impl<'a> InsecureLexer<'a> {
    #[inline(always)]
    fn scan_quoted_string(slf: &mut RawLexer<'a>, quote_style: u8) {
        debug_assert!(
-            unsafe { slf.deref_cursor() } == quote_style,
+            unsafe {
+                // UNSAFE(@ohsayan): yessir, we just hit this byte. if called elsewhere, this function will crash and burn (or simply, segfault)
+                slf.deref_cursor()
+            } == quote_style,
            "illegal call to scan_quoted_string"
        );
-        unsafe { slf.incr_cursor() }
+        unsafe {
+            // UNSAFE(@ohsayan): Increment this cursor (this is correct since we just hit the quote)
+            slf.incr_cursor()
+        }
        let mut buf = Vec::new();
        unsafe {
            while slf.peek_neq(quote_style) {
+                // UNSAFE(@ohsayan): deref is good since peek passed
                match slf.deref_cursor() {
                    b if b != b'\\' => {
                        buf.push(b);
                    }
                    _ => {
+                        // UNSAFE(@ohsayan): we read one byte, so this should work
                        slf.incr_cursor();
                        if slf.exhausted() {
                            break;
                        }
+                        // UNSAFE(@ohsayan): correct because of the above branch
                        let b = slf.deref_cursor();
                        let quote = b == quote_style;
                        let bs = b == b'\\';
@ -228,6 +265,11 @@ impl<'a> InsecureLexer<'a> {
                        }
                    }
                }
+                /*
+                    UNSAFE(@ohsayan): This is correct because:
+                    (a) If we are in arm 1: we move the cursor ahead from the `\` byte (the branch doesn't do it)
+                    (b) If we are in arm 2: we don't skip the second quote byte in the branch, hence this is correct
+                */
                slf.incr_cursor();
            }
            let terminated = slf.peek_eq_and_forward(quote_style);
@ -260,7 +302,10 @@ impl<'a> SafeLexer<'a> {
    fn _lex(self) -> LexResult<Vec<Token<'a>>> {
        let Self { base: mut l } = self;
        while l.not_exhausted() && l.no_error() {
-            let b = unsafe { l.deref_cursor() };
+            let b = unsafe {
+                // UNSAFE(@ohsayan): This is correct because of the pre invariant
+                l.deref_cursor()
+            };
            match b {
                // ident or kw
                b if b.is_ascii_alphabetic() => l.scan_ident_or_keyword(),
@ -469,7 +514,10 @@ impl<'b> SafeQueryData<'b> {
        // incr cursor
        i += mx_extract;
        *cnt += i;
-        unsafe { slice::from_raw_parts(src.as_ptr(), mx_extract) }
+        unsafe {
+            // UNSAFE(@ohsayan): src is correct (guaranteed). even if the decoded length returns an error we still remain within bounds of the EOA
+            slice::from_raw_parts(src.as_ptr(), mx_extract)
+        }
    }
    #[inline(always)]
    pub(super) fn uint<'a>(src: Slice<'a>, cnt: &mut usize, data: &mut Vec<LitIR<'a>>) -> bool {
--- a/server/src/engine/ql/lex/raw.rs
+++ b/server/src/engine/ql/lex/raw.rs
@ -412,7 +412,10 @@ impl<'a> RawLexer<'a> {
    }
    #[inline(always)]
    pub(super) fn remaining(&self) -> usize {
-        unsafe { self.e.offset_from(self.c) as usize }
+        unsafe {
+            // UNSAFE(@ohsayan): valid ptrs
+            self.e.offset_from(self.c) as usize
+        }
    }
    #[inline(always)]
    pub(super) unsafe fn deref_cursor(&self) -> u8 {
@ -437,12 +440,21 @@ impl<'a> RawLexer<'a> {
    }
    #[inline(always)]
    pub(super) fn peek_is(&mut self, f: impl FnOnce(u8) -> bool) -> bool {
-        self.not_exhausted() && unsafe { f(self.deref_cursor()) }
+        self.not_exhausted()
+            && unsafe {
+                // UNSAFE(@ohsayan): verified cursor is nonnull
+                f(self.deref_cursor())
+            }
    }
    #[inline(always)]
    pub(super) fn peek_is_and_forward(&mut self, f: impl FnOnce(u8) -> bool) -> bool {
-        let did_fw = self.not_exhausted() && unsafe { f(self.deref_cursor()) };
+        let did_fw = self.not_exhausted()
+            && unsafe {
+                // UNSAFE(@ohsayan): verified ptr
+                f(self.deref_cursor())
+            };
        unsafe {
+            // UNSAFE(@ohsayan): increment cursor
            self.incr_cursor_if(did_fw);
        }
        did_fw
@ -450,18 +462,25 @@ impl<'a> RawLexer<'a> {
    #[inline(always)]
    fn peek_eq_and_forward_or_eof(&mut self, eq: u8) -> bool {
        unsafe {
+            // UNSAFE(@ohsayan): verified cursor
            let eq = self.not_exhausted() && self.deref_cursor() == eq;
+            // UNSAFE(@ohsayan): incr cursor if matched
            self.incr_cursor_if(eq);
            eq | self.exhausted()
        }
    }
    #[inline(always)]
    pub(super) fn peek_neq(&self, b: u8) -> bool {
-        self.not_exhausted() && unsafe { self.deref_cursor() != b }
+        self.not_exhausted()
+            && unsafe {
+                // UNSAFE(@ohsayan): verified cursor
+                self.deref_cursor() != b
+            }
    }
    #[inline(always)]
    pub(super) fn peek_eq_and_forward(&mut self, b: u8) -> bool {
        unsafe {
+            // UNSAFE(@ohsayan): verified cursor
            let r = self.not_exhausted() && self.deref_cursor() == b;
            self.incr_cursor_if(r);
            r
@ -488,8 +507,10 @@ impl<'a> RawLexer<'a> {
        let s = self.cursor();
        unsafe {
            while self.peek_is(|b| b.is_ascii_alphanumeric() || b == b'_') {
+                // UNSAFE(@ohsayan): increment cursor, this is valid
                self.incr_cursor();
            }
+            // UNSAFE(@ohsayan): valid slice and ptrs
            slice::from_raw_parts(s, self.cursor().offset_from(s) as usize)
        }
    }
@ -514,6 +535,7 @@ impl<'a> RawLexer<'a> {
            None => return self.set_error(LexError::UnexpectedByte),
        }
        unsafe {
+            // UNSAFE(@ohsayan): we are sent a byte, so fw cursor
            self.incr_cursor();
        }
    }