Reduce code redundancy by using `RawParser` and `RawParserExt`

Also added changelog
next
Sayan Nandan 2 years ago
parent 20f039cb85
commit 231dd53341
No known key found for this signature in database
GPG Key ID: 8BC07A0A4D41DD52

@ -2,6 +2,18 @@
All changes in this project will be noted in this file.
## Version 0.8.0
### Additions
- New protocol: Skyhash 2.0
- Reduced bandwidth usage (as much as 50%)
- Even simpler client implementations
- Backward compatibility with Skyhash 1.0:
- Simply set the protocol version you want to use in the config file, env vars or pass it as a CLI
argument
- Even faster implementation, even for Skyhash 1.0
## Version 0.7.5
### Additions

@ -33,6 +33,8 @@ use {
// pub mods
pub mod interface;
pub mod iter;
// internal mods
mod raw_parser;
// versions
mod v1;
mod v2;

@ -0,0 +1,148 @@
/*
* Created on Tue May 03 2022
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2022, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
use {
super::{ParseError, ParseResult, UnsafeSlice},
core::mem::transmute,
};
/// The `RawParser` trait has three methods that implementors must define:
/// - `cursor_ptr` -> Should point to the current position in the buffer for the parser
/// - `cursor_ptr_mut` -> a mutable reference to the cursor
/// - `data_end_ptr` -> a ptr to one byte past the allocated area of the buffer
///
/// # Safety
/// - `cursor_ptr` must point to a valid location in memory
/// - `data_end_ptr` must point to a valid location in memory, in the **same allocated area**
pub(super) unsafe trait RawParser {
fn cursor_ptr(&self) -> *const u8;
fn cursor_ptr_mut(&mut self) -> &mut *const u8;
fn data_end_ptr(&self) -> *const u8;
/// Check how many bytes we have left
fn remaining(&self) -> usize {
self.data_end_ptr() as usize - self.cursor_ptr() as usize
}
/// Check if we have `size` bytes remaining
fn has_remaining(&self, size: usize) -> bool {
self.remaining() >= size
}
/// Check if we have exhausted the buffer
fn exhausted(&self) -> bool {
self.cursor_ptr() >= self.data_end_ptr()
}
/// Check if the buffer is not exhausted
fn not_exhausted(&self) -> bool {
self.cursor_ptr() < self.data_end_ptr()
}
/// Attempts to return the byte pointed at by the cursor.
/// WARNING: The same segfault warning
unsafe fn get_byte_at_cursor(&self) -> u8 {
*self.cursor_ptr()
}
/// Increment the cursor by `by` positions
unsafe fn incr_cursor_by(&mut self, by: usize) {
let current = *self.cursor_ptr_mut();
*self.cursor_ptr_mut() = current.add(by);
}
/// Increment the position of the cursor by one position
unsafe fn incr_cursor(&mut self) {
self.incr_cursor_by(1);
}
}
pub(super) trait RawParserExt: RawParser {
/// Attempt to read `len` bytes
fn read_until(&mut self, len: usize) -> ParseResult<UnsafeSlice> {
if self.has_remaining(len) {
unsafe {
// UNSAFE(@ohsayan): Already verified lengths
let slice = UnsafeSlice::new(self.cursor_ptr(), len);
self.incr_cursor_by(len);
Ok(slice)
}
} else {
Err(ParseError::NotEnough)
}
}
#[cfg(test)]
/// Attempt to read a byte slice terminated by an LF
fn read_line(&mut self) -> ParseResult<UnsafeSlice> {
let start_ptr = self.cursor_ptr();
unsafe {
while self.not_exhausted() && self.get_byte_at_cursor() != b'\n' {
self.incr_cursor();
}
if self.not_exhausted() && self.get_byte_at_cursor() == b'\n' {
let len = self.cursor_ptr() as usize - start_ptr as usize;
self.incr_cursor(); // skip LF
Ok(UnsafeSlice::new(start_ptr, len))
} else {
Err(ParseError::NotEnough)
}
}
}
/// Attempt to read a line, **rejecting an empty payload**
fn read_line_pedantic(&mut self) -> ParseResult<UnsafeSlice> {
let start_ptr = self.cursor_ptr();
unsafe {
while self.not_exhausted() && self.get_byte_at_cursor() != b'\n' {
self.incr_cursor();
}
let len = self.cursor_ptr() as usize - start_ptr as usize;
let has_lf = self.not_exhausted() && self.get_byte_at_cursor() == b'\n';
if has_lf && len != 0 {
self.incr_cursor(); // skip LF
Ok(UnsafeSlice::new(start_ptr, len))
} else {
// just some silly hackery
Err(transmute(has_lf))
}
}
}
/// Attempt to read an `usize` from the buffer
fn read_usize(&mut self) -> ParseResult<usize> {
let line = self.read_line_pedantic()?;
let bytes = line.as_slice();
let mut ret = 0usize;
for byte in bytes {
if byte.is_ascii_digit() {
ret = match ret.checked_mul(10) {
Some(r) => r,
None => return Err(ParseError::DatatypeParseFailure),
};
ret = match ret.checked_add((byte & 0x0F) as _) {
Some(r) => r,
None => return Err(ParseError::DatatypeParseFailure),
};
} else {
return Err(ParseError::DatatypeParseFailure);
}
}
Ok(ret)
}
}
impl<T> RawParserExt for T where T: RawParser {}

@ -24,12 +24,16 @@
*
*/
use super::{ParseError, ParseResult, PipelinedQuery, Query, SimpleQuery, UnsafeSlice};
use crate::{
corestore::heap_array::{HeapArray, HeapArrayWriter},
dbnet::connection::QueryWithAdvance,
use {
super::{
raw_parser::{RawParser, RawParserExt},
ParseError, ParseResult, PipelinedQuery, Query, SimpleQuery, UnsafeSlice,
},
crate::{
corestore::heap_array::{HeapArray, HeapArrayWriter},
dbnet::connection::QueryWithAdvance,
},
};
use core::mem::transmute;
mod interface_impls;
// test and bench modules
@ -56,6 +60,18 @@ pub struct Parser {
cursor: *const u8,
}
unsafe impl RawParser for Parser {
fn cursor_ptr(&self) -> *const u8 {
self.cursor
}
fn cursor_ptr_mut(&mut self) -> &mut *const u8 {
&mut self.cursor
}
fn data_end_ptr(&self) -> *const u8 {
self.end
}
}
unsafe impl Send for Parser {}
unsafe impl Sync for Parser {}
@ -71,52 +87,6 @@ impl Parser {
}
}
// basic methods
impl Parser {
/// Returns a ptr one byte past the allocation of the buffer
const fn data_end_ptr(&self) -> *const u8 {
self.end
}
/// Returns the position of the cursor
/// WARNING: Deref might led to a segfault
const fn cursor_ptr(&self) -> *const u8 {
self.cursor
}
/// Check how many bytes we have left
fn remaining(&self) -> usize {
self.data_end_ptr() as usize - self.cursor_ptr() as usize
}
/// Check if we have `size` bytes remaining
fn has_remaining(&self, size: usize) -> bool {
self.remaining() >= size
}
/// Check if we have exhausted the buffer
fn exhausted(&self) -> bool {
self.cursor_ptr() >= self.data_end_ptr()
}
/// Check if the buffer is not exhausted
fn not_exhausted(&self) -> bool {
self.cursor_ptr() < self.data_end_ptr()
}
/// Attempts to return the byte pointed at by the cursor.
/// WARNING: The same segfault warning
const unsafe fn get_byte_at_cursor(&self) -> u8 {
*self.cursor_ptr()
}
}
// mutable refs
impl Parser {
/// Increment the cursor by `by` positions
unsafe fn incr_cursor_by(&mut self, by: usize) {
self.cursor = self.cursor.add(by);
}
/// Increment the position of the cursor by one position
unsafe fn incr_cursor(&mut self) {
self.incr_cursor_by(1);
}
}
// utility methods
impl Parser {
/// Returns true if the cursor will give a char, but if `this_if_nothing_ahead` is set
@ -148,58 +118,6 @@ impl Parser {
// higher level abstractions
impl Parser {
/// Attempt to read `len` bytes
fn read_until(&mut self, len: usize) -> ParseResult<UnsafeSlice> {
if self.has_remaining(len) {
unsafe {
// UNSAFE(@ohsayan): Already verified lengths
let slice = UnsafeSlice::new(self.cursor_ptr(), len);
self.incr_cursor_by(len);
Ok(slice)
}
} else {
Err(ParseError::NotEnough)
}
}
/// Attempt to read a line, **rejecting an empty payload**
fn read_line_pedantic(&mut self) -> ParseResult<UnsafeSlice> {
let start_ptr = self.cursor_ptr();
unsafe {
while self.not_exhausted() && self.get_byte_at_cursor() != b'\n' {
self.incr_cursor();
}
let len = self.cursor_ptr() as usize - start_ptr as usize;
let has_lf = self.not_exhausted() && self.get_byte_at_cursor() == b'\n';
if has_lf && len != 0 {
self.incr_cursor(); // skip LF
Ok(UnsafeSlice::new(start_ptr, len))
} else {
// just some silly hackery
Err(transmute(has_lf))
}
}
}
/// Attempt to read an `usize` from the buffer
fn read_usize(&mut self) -> ParseResult<usize> {
let line = self.read_line_pedantic()?;
let bytes = line.as_slice();
let mut ret = 0usize;
for byte in bytes {
if byte.is_ascii_digit() {
ret = match ret.checked_mul(10) {
Some(r) => r,
None => return Err(ParseError::DatatypeParseFailure),
};
ret = match ret.checked_add((byte & 0x0F) as _) {
Some(r) => r,
None => return Err(ParseError::DatatypeParseFailure),
};
} else {
return Err(ParseError::DatatypeParseFailure);
}
}
Ok(ret)
}
/// Parse the next blob. **The cursor should be at the tsymbol (passed)**
fn parse_next_blob(&mut self) -> ParseResult<UnsafeSlice> {
{

@ -26,12 +26,14 @@
mod interface_impls;
use crate::{
corestore::heap_array::HeapArray,
dbnet::connection::QueryWithAdvance,
protocol::{ParseError, ParseResult, PipelinedQuery, Query, SimpleQuery, UnsafeSlice},
use {
super::{
raw_parser::{RawParser, RawParserExt},
ParseError, ParseResult, PipelinedQuery, Query, SimpleQuery, UnsafeSlice,
},
crate::{corestore::heap_array::HeapArray, dbnet::connection::QueryWithAdvance},
};
use core::mem::transmute;
#[cfg(feature = "nightly")]
mod benches;
#[cfg(test)]
@ -43,6 +45,18 @@ pub struct Parser {
cursor: *const u8,
}
unsafe impl RawParser for Parser {
fn cursor_ptr(&self) -> *const u8 {
self.cursor
}
fn cursor_ptr_mut(&mut self) -> &mut *const u8 {
&mut self.cursor
}
fn data_end_ptr(&self) -> *const u8 {
self.end
}
}
unsafe impl Sync for Parser {}
unsafe impl Send for Parser {}
@ -58,126 +72,6 @@ impl Parser {
}
}
// basic methods
impl Parser {
/// Returns a ptr one byte past the allocation of the buffer
const fn data_end_ptr(&self) -> *const u8 {
self.end
}
/// Returns the position of the cursor
/// WARNING: Deref might led to a segfault
const fn cursor_ptr(&self) -> *const u8 {
self.cursor
}
/// Check how many bytes we have left
fn remaining(&self) -> usize {
self.data_end_ptr() as usize - self.cursor_ptr() as usize
}
/// Check if we have `size` bytes remaining
fn has_remaining(&self, size: usize) -> bool {
self.remaining() >= size
}
#[cfg(test)]
/// Check if we have exhausted the buffer
fn exhausted(&self) -> bool {
self.cursor_ptr() >= self.data_end_ptr()
}
/// Check if the buffer is not exhausted
fn not_exhausted(&self) -> bool {
self.cursor_ptr() < self.data_end_ptr()
}
/// Attempts to return the byte pointed at by the cursor.
/// WARNING: The same segfault warning
const unsafe fn get_byte_at_cursor(&self) -> u8 {
*self.cursor_ptr()
}
}
// mutable refs
impl Parser {
/// Increment the cursor by `by` positions
unsafe fn incr_cursor_by(&mut self, by: usize) {
self.cursor = self.cursor.add(by);
}
/// Increment the position of the cursor by one position
unsafe fn incr_cursor(&mut self) {
self.incr_cursor_by(1);
}
}
// higher level abstractions
impl Parser {
/// Attempt to read `len` bytes
fn read_until(&mut self, len: usize) -> ParseResult<UnsafeSlice> {
if self.has_remaining(len) {
unsafe {
// UNSAFE(@ohsayan): Already verified lengths
let slice = UnsafeSlice::new(self.cursor_ptr(), len);
self.incr_cursor_by(len);
Ok(slice)
}
} else {
Err(ParseError::NotEnough)
}
}
#[cfg(test)]
/// Attempt to read a byte slice terminated by an LF
fn read_line(&mut self) -> ParseResult<UnsafeSlice> {
let start_ptr = self.cursor_ptr();
unsafe {
while self.not_exhausted() && self.get_byte_at_cursor() != b'\n' {
self.incr_cursor();
}
if self.not_exhausted() && self.get_byte_at_cursor() == b'\n' {
let len = self.cursor_ptr() as usize - start_ptr as usize;
self.incr_cursor(); // skip LF
Ok(UnsafeSlice::new(start_ptr, len))
} else {
Err(ParseError::NotEnough)
}
}
}
/// Attempt to read a line, **rejecting an empty payload**
fn read_line_pedantic(&mut self) -> ParseResult<UnsafeSlice> {
let start_ptr = self.cursor_ptr();
unsafe {
while self.not_exhausted() && self.get_byte_at_cursor() != b'\n' {
self.incr_cursor();
}
let len = self.cursor_ptr() as usize - start_ptr as usize;
let has_lf = self.not_exhausted() && self.get_byte_at_cursor() == b'\n';
if has_lf && len != 0 {
self.incr_cursor(); // skip LF
Ok(UnsafeSlice::new(start_ptr, len))
} else {
// just some silly hackery
Err(transmute(has_lf))
}
}
}
/// Attempt to read an `usize` from the buffer
fn read_usize(&mut self) -> ParseResult<usize> {
let line = self.read_line_pedantic()?;
let bytes = line.as_slice();
let mut ret = 0usize;
for byte in bytes {
if byte.is_ascii_digit() {
ret = match ret.checked_mul(10) {
Some(r) => r,
None => return Err(ParseError::DatatypeParseFailure),
};
ret = match ret.checked_add((byte & 0x0F) as _) {
Some(r) => r,
None => return Err(ParseError::DatatypeParseFailure),
};
} else {
return Err(ParseError::DatatypeParseFailure);
}
}
Ok(ret)
}
}
// query impls
impl Parser {
/// Parse the next simple query. This should have passed the `*` tsymbol

@ -24,7 +24,10 @@
*
*/
use super::{Parser, PipelinedQuery, Query, SimpleQuery};
use super::{
super::raw_parser::{RawParser, RawParserExt},
Parser, PipelinedQuery, Query, SimpleQuery,
};
use crate::protocol::{iter::AnyArrayIter, ParseError};
use std::iter::Map;
use std::vec::IntoIter as VecIntoIter;

Loading…
Cancel
Save