Generalize scanner

next
Sayan Nandan 1 year ago
parent 7238f0c0e8
commit 5e068c0e9b
No known key found for this signature in database
GPG Key ID: 42EEDF4AE9D96B54

@ -25,8 +25,8 @@
*/
mod astr;
pub mod buf;
mod ll;
pub mod scanner;
mod stackop;
mod uarray;
mod vinline;
@ -37,8 +37,8 @@ mod tests;
// re-exports
pub use {
astr::AStr,
buf::BufferedScanner,
ll::CachePadded,
scanner::BufferedScanner,
stackop::ByteStack,
uarray::UArray,
vinline::VInline,

@ -26,17 +26,19 @@
use core::{ptr, slice};
#[derive(Debug)]
pub struct BufferedScanner<'a> {
d: &'a [u8],
pub type BufferedScanner<'a> = Scanner<'a, u8>;
#[derive(Debug, PartialEq)]
pub struct Scanner<'a, T> {
d: &'a [T],
__cursor: usize,
}
impl<'a> BufferedScanner<'a> {
pub const fn new(d: &'a [u8]) -> Self {
impl<'a, T> Scanner<'a, T> {
pub const fn new(d: &'a [T]) -> Self {
unsafe { Self::new_with_cursor(d, 0) }
}
pub const unsafe fn new_with_cursor(d: &'a [u8], i: usize) -> Self {
pub const unsafe fn new_with_cursor(d: &'a [T], i: usize) -> Self {
Self { d, __cursor: i }
}
pub const fn remaining(&self) -> usize {
@ -48,27 +50,33 @@ impl<'a> BufferedScanner<'a> {
pub const fn cursor(&self) -> usize {
self.__cursor
}
pub fn current(&self) -> &[u8] {
pub fn current(&self) -> &[T] {
&self.d[self.__cursor..]
}
pub const fn cursor_ptr(&self) -> *const T {
unsafe { self.d.as_ptr().add(self.__cursor) }
}
pub fn eof(&self) -> bool {
self.remaining() == 0
}
pub fn has_left(&self, sizeof: usize) -> bool {
self.remaining() >= sizeof
}
pub fn matches_cursor_rounded(&self, f: impl Fn(u8) -> bool) -> bool {
f(self.d[(self.d.len() - 1).min(self.__cursor)])
pub fn matches_cursor_rounded(&self, f: impl Fn(&T) -> bool) -> bool {
f(&self.d[(self.d.len() - 1).min(self.__cursor)])
}
pub fn matches_cursor_rounded_and_not_eof(&self, f: impl Fn(u8) -> bool) -> bool {
pub fn matches_cursor_rounded_and_not_eof(&self, f: impl Fn(&T) -> bool) -> bool {
self.matches_cursor_rounded(f) & !self.eof()
}
}
impl<'a> BufferedScanner<'a> {
impl<'a, T> Scanner<'a, T> {
pub unsafe fn set_cursor(&mut self, i: usize) {
self.__cursor = i;
}
pub unsafe fn move_ahead(&mut self) {
self.move_back_by(1)
}
pub unsafe fn move_ahead_by(&mut self, by: usize) {
self._incr(by)
}
@ -81,12 +89,12 @@ impl<'a> BufferedScanner<'a> {
unsafe fn _incr(&mut self, by: usize) {
self.__cursor += by;
}
unsafe fn _cursor(&self) -> *const u8 {
unsafe fn _cursor(&self) -> *const T {
self.d.as_ptr().add(self.__cursor)
}
}
impl<'a> BufferedScanner<'a> {
impl<'a> Scanner<'a, u8> {
pub fn try_next_byte(&mut self) -> Option<u8> {
if self.eof() {
None
@ -116,54 +124,78 @@ pub enum BufferedReadResult<T> {
Error,
}
impl<'a> BufferedScanner<'a> {
impl<'a> Scanner<'a, u8> {
pub fn trim_ahead(&mut self, f: impl Fn(u8) -> bool) {
while self.matches_cursor_rounded_and_not_eof(|b| f(*b)) {
unsafe { self.move_ahead() }
}
}
pub fn move_ahead_if_matches(&mut self, f: impl Fn(u8) -> bool) {
unsafe { self.move_back_by(self.matches_cursor_rounded_and_not_eof(|b| f(*b)) as _) }
}
/// Attempt to parse a `\n` terminated (we move past the LF, so you can't see it)
///
/// If we were unable to read in the integer, then the cursor will be restored to its starting position
// TODO(@ohsayan): optimize
pub fn try_next_ascii_u64_lf_separated(&mut self) -> BufferedReadResult<u64> {
pub fn try_next_ascii_u64_lf_separated_with_result(&mut self) -> BufferedReadResult<u64> {
let mut okay = true;
let start = self.cursor();
let ret = self.extract_integer(&mut okay);
let payload_ok = okay;
let lf = self.matches_cursor_rounded_and_not_eof(|b| *b == b'\n');
okay &= lf;
unsafe { self._incr(okay as _) }; // skip LF
if okay {
BufferedReadResult::Value(ret)
} else {
unsafe { self.set_cursor(start) }
if payload_ok {
// payload was ok, but we missed a null
BufferedReadResult::NeedMore
} else {
// payload was NOT ok
BufferedReadResult::Error
}
}
}
pub fn try_next_ascii_u64_lf_separated(&mut self) -> Option<u64> {
let start = self.cursor();
let mut okay = true;
let ret = self.extract_integer(&mut okay);
let lf = self.matches_cursor_rounded_and_not_eof(|b| *b == b'\n');
if okay & lf {
Some(ret)
} else {
unsafe { self.set_cursor(start) }
None
}
}
pub fn extract_integer(&mut self, okay: &mut bool) -> u64 {
let mut ret = 0u64;
while self.matches_cursor_rounded_and_not_eof(|b| b != b'\n') & okay {
while self.matches_cursor_rounded_and_not_eof(|b| *b != b'\n') & *okay {
let b = self.d[self.cursor()];
okay &= b.is_ascii_digit();
*okay &= b.is_ascii_digit();
ret = match ret.checked_mul(10) {
Some(r) => r,
None => {
okay = false;
*okay = false;
break;
}
};
ret = match ret.checked_add((b & 0x0F) as u64) {
Some(r) => r,
None => {
okay = false;
*okay = false;
break;
}
};
unsafe { self._incr(1) }
}
let payload_ok = okay;
let null_ok = self.matches_cursor_rounded_and_not_eof(|b| b == b'\n');
okay &= null_ok;
unsafe { self._incr(okay as _) }; // skip LF
if okay {
BufferedReadResult::Value(ret)
} else {
unsafe { self.set_cursor(start) }
if payload_ok {
// payload was ok, but we missed a null
BufferedReadResult::NeedMore
} else {
// payload was NOT ok
BufferedReadResult::Error
}
}
ret
}
}
impl<'a> BufferedScanner<'a> {
impl<'a> Scanner<'a, u8> {
pub unsafe fn next_u64_le(&mut self) -> u64 {
u64::from_le_bytes(self.next_chunk())
}

@ -98,7 +98,7 @@ fn parse_lf_separated(
) -> LFTIntParseResult {
let mut ret = previously_buffered;
let mut okay = true;
while scanner.matches_cursor_rounded_and_not_eof(|b| b != b'\n') & okay {
while scanner.matches_cursor_rounded_and_not_eof(|b| *b != b'\n') & okay {
let b = unsafe { scanner.next_byte() };
okay &= b.is_ascii_digit();
ret = match ret.checked_mul(10) {
@ -111,7 +111,7 @@ fn parse_lf_separated(
};
}
let payload_ok = okay;
let lf_ok = scanner.matches_cursor_rounded_and_not_eof(|b| b == b'\n');
let lf_ok = scanner.matches_cursor_rounded_and_not_eof(|b| *b == b'\n');
unsafe { scanner.move_ahead_by(lf_ok as usize) }
if payload_ok & lf_ok {
LFTIntParseResult::Value(ret)

@ -26,7 +26,7 @@
use {
crate::{
engine::mem::buf::{BufferedReadResult, BufferedScanner},
engine::mem::scanner::{BufferedReadResult, BufferedScanner},
util::compiler,
},
std::slice,
@ -367,7 +367,7 @@ impl<'a> CHandshake<'a> {
AuthMode::Password => {}
}
// let us see if we can parse the username length
let uname_l = match scanner.try_next_ascii_u64_lf_separated() {
let uname_l = match scanner.try_next_ascii_u64_lf_separated_with_result() {
BufferedReadResult::NeedMore => {
return HandshakeResult::ChangeState {
new_state: HandshakeState::StaticBlock(static_header),
@ -388,7 +388,7 @@ impl<'a> CHandshake<'a> {
uname_l: usize,
) -> HandshakeResult<'a> {
// we just have to get the password len
let pwd_l = match scanner.try_next_ascii_u64_lf_separated() {
let pwd_l = match scanner.try_next_ascii_u64_lf_separated_with_result() {
BufferedReadResult::Value(v) => v as usize,
BufferedReadResult::NeedMore => {
// newline missing (or maybe there's more?)

Loading…
Cancel
Save