Add `Integer32Buffer` for faster encoding/decoding

next
Sayan Nandan 3 years ago
parent 5790e99a98
commit 48e29b6ec6

@ -107,12 +107,21 @@ impl<T, const N: usize> Array<T, N> {
init_len: 0,
}
}
pub fn new_zeroed() -> Self {
Self {
stack: unsafe { std::mem::zeroed() },
init_len: N as u16,
}
}
pub const fn from_const(array: [MaybeUninit<T>; N], init_len: u16) -> Self {
Self {
stack: array,
init_len,
}
}
pub unsafe fn bump_init_len(&mut self, bump: u16) {
self.init_len += bump
}
/// This literally turns [T; M] into [T; N]. How can you expect it to be safe?
/// This function is extremely unsafe. I mean, I don't even know how to call it safe.
/// There's one way though: make M == N. This will panic in debug mode if M > N. In

@ -0,0 +1,195 @@
/*
* Created on Mon Jul 12 2021
*
* This file is a part of Skytable
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
* NoSQL database written by Sayan Nandan ("the Author") with the
* vision to provide flexibility in data modelling without compromising
* on performance, queryability or scalability.
*
* Copyright (c) 2021, Sayan Nandan <ohsayan@outlook.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
use super::array::Array;
use core::ops::Deref;
use core::str;
const PAIR_MAP_LUT: [u8; 200] = [
0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, 0x30, 0x36, 0x30, 0x37,
0x30, 0x38, 0x30, 0x39, // 0x30
0x31, 0x30, 0x31, 0x31, 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37,
0x31, 0x38, 0x31, 0x39, // 0x31
0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37,
0x32, 0x38, 0x32, 0x39, // 0x32
0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, 0x33, 0x36, 0x33, 0x37,
0x33, 0x38, 0x33, 0x39, // 0x33
0x34, 0x30, 0x34, 0x31, 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37,
0x34, 0x38, 0x34, 0x39, // 0x34
0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37,
0x35, 0x38, 0x35, 0x39, // 0x35
0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, 0x36, 0x36, 0x36, 0x37,
0x36, 0x38, 0x36, 0x39, // 0x36
0x37, 0x30, 0x37, 0x31, 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37,
0x37, 0x38, 0x37, 0x39, // 0x37
0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37,
0x38, 0x38, 0x38, 0x39, // 0x38
0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, 0x39, 0x36, 0x39, 0x37,
0x39, 0x38, 0x39, 0x39, // 0x39
];
#[derive(Debug)]
/// A buffer for unsigned 32-bit integers with one _extra byte_ of memory reserved for
/// adding characters. On initialization (through [`Self::init`]), your integer will be
/// encoded and stored into the _unsafe array_
pub struct Integer32Buffer {
inner_stack: Array<u8, 11>,
}
impl Integer32Buffer {
/// Initialize a buffer
pub fn init(integer: u32) -> Self {
let mut slf = Self {
inner_stack: Array::new(),
};
unsafe {
slf._init_integer(integer);
}
slf
}
/// Initialize an integer. This is unsafe to be called outside because you'll be
/// pushing in another integer and might end up corrupting your own stack as all
/// pushes are unchecked!
unsafe fn _init_integer(&mut self, mut val: u32) {
if val < 10_000 {
let d1 = (val / 100) << 1;
let d2 = (val % 100) << 1;
if val >= 1000 {
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d1 as usize]);
}
if val >= 100 {
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d1 + 1) as usize]);
}
if val >= 10 {
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d2 as usize]);
}
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d2 + 1) as usize]);
} else if val < 100_000_000 {
let b = val / 10000;
let c = val % 10000;
let d1 = (b / 100) << 1;
let d2 = (b % 100) << 1;
let d3 = (c / 100) << 1;
let d4 = (c % 100) << 1;
if val > 10_000_000 {
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d1 as usize]);
}
if val > 1_000_000 {
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d1 + 1) as usize]);
}
if val > 100_000 {
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d2 as usize]);
}
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d2 + 1) as usize]);
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d3 as usize]);
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d3 + 1) as usize]);
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d4 as usize]);
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d4 + 1) as usize]);
} else {
// worst, 1B or more
let a = val / 100000000;
val %= 100000000;
if a >= 10 {
let i = a << 1;
self.inner_stack.push_unchecked(PAIR_MAP_LUT[i as usize]);
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(i + 1) as usize]);
} else {
self.inner_stack.push_unchecked(0x30);
}
let b = val / 10000;
let c = val % 10000;
let d1 = (b / 100) << 1;
let d2 = (b % 100) << 1;
let d3 = (c / 100) << 1;
let d4 = (c % 100) << 1;
// write back
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d1 as usize]);
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d1 + 1) as usize]);
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d2 as usize]);
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d2 + 1) as usize]);
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d3 as usize]);
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d3 + 1) as usize]);
self.inner_stack.push_unchecked(PAIR_MAP_LUT[d4 as usize]);
self.inner_stack
.push_unchecked(PAIR_MAP_LUT[(d4 + 1) as usize]);
}
}
/// **This is very unsafe** Only push something when you know that the capacity won't overflow
/// your allowance of 11 bytes. Oh no, there's no panic for you because you'll silently
/// corrupt your own memory (or others' :/)
pub unsafe fn push(&mut self, val: u8) {
self.inner_stack.push_unchecked(val)
}
}
impl Deref for Integer32Buffer {
type Target = str;
fn deref(&self) -> &Self::Target {
unsafe { str::from_utf8_unchecked(&self.inner_stack) }
}
}
impl AsRef<str> for Integer32Buffer {
fn as_ref(&self) -> &str {
&self
}
}
impl<T> PartialEq<T> for Integer32Buffer
where
T: AsRef<str>,
{
fn eq(&self, other_str: &T) -> bool {
self.as_ref() == other_str.as_ref()
}
}
#[test]
fn test_int32_buffer() {
let buffer = Integer32Buffer::init(256);
assert_eq!(buffer, 256.to_string());
}
#[test]
fn test_push() {
let mut buffer = Integer32Buffer::init(278);
unsafe {
buffer.push(b'?');
}
assert_eq!(buffer, "278?");
}

@ -39,6 +39,7 @@ pub use htable::Data;
use libsky::TResult;
use std::sync::Arc;
pub mod array;
pub mod buffers;
pub mod htable;
pub mod iarray;
pub mod lazy;

@ -27,9 +27,9 @@
//! Interfaces with the file system
use super::PartitionID;
use crate::coredb::buffers::Integer32Buffer;
use crate::coredb::htable::Coremap;
use crate::coredb::htable::Data;
use crate::util;
use std::fs;
use std::io::Result as IoResult;
use std::io::{BufWriter, Write};
@ -49,17 +49,20 @@ pub fn serialize_map_into_slow_buffer<T: Write>(
}
/// Get the file for COW. If the parition ID is 0000
fn cow_file(id: PartitionID) -> String {
let mut id = util::it32_to_str(id);
id.push('_');
id
fn cow_file(id: PartitionID) -> Integer32Buffer {
let mut buffer = Integer32Buffer::init(id);
unsafe {
// UNSAFE(@ohsayan): We know we're just pushing in one thing
buffer.push(b'_');
}
buffer
}
#[test]
fn test_cowfile() {
let cow_file = cow_file(10);
assert_eq!(cow_file, "10_".to_owned());
assert_eq!(&cow_file[..cow_file.len()-1], "10".to_owned());
assert_eq!(&cow_file[..cow_file.len() - 1], "10".to_owned());
}
/// Returns a handle to a thread that was spawned to handle this specific flush routine
@ -69,10 +72,10 @@ pub fn threaded_se(
) -> JoinHandle<IoResult<()>> {
thread::spawn(move || {
let fname = cow_file(partition_id);
let mut f = fs::File::create(&fname)?;
let mut f = fs::File::create(&*fname)?;
self::serialize_map_into_slow_buffer(&mut f, &tblref)?;
f.sync_all()?;
fs::rename(&fname, &fname[..fname.len() - 1])?;
fs::rename(&*fname, &fname[..fname.len() - 1])?;
Ok(())
})
}

@ -24,9 +24,6 @@
*
*/
use core::ptr;
use core::slice;
/// # Unsafe unwrapping
///
/// This trait provides a method `unsafe_unwrap` that is potentially unsafe and has
@ -93,116 +90,3 @@ macro_rules! cfg_test {
$(#[cfg(test)] $item)*
};
}
/*
32-bit integer to String parsing. This algorithm was "invented" by Ben Voigt and written in C++
as a part of a "challenge" and was ported to Rust with some modifications by Sayan.
NOTE: This might occassionally blow up.
*/
const PAIR_MAP_LUT: [u8; 200] = [
0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, 0x30, 0x36, 0x30, 0x37,
0x30, 0x38, 0x30, 0x39, // 0x30
0x31, 0x30, 0x31, 0x31, 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37,
0x31, 0x38, 0x31, 0x39, // 0x31
0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37,
0x32, 0x38, 0x32, 0x39, // 0x32
0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, 0x33, 0x36, 0x33, 0x37,
0x33, 0x38, 0x33, 0x39, // 0x33
0x34, 0x30, 0x34, 0x31, 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37,
0x34, 0x38, 0x34, 0x39, // 0x34
0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37,
0x35, 0x38, 0x35, 0x39, // 0x35
0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, 0x36, 0x36, 0x36, 0x37,
0x36, 0x38, 0x36, 0x39, // 0x36
0x37, 0x30, 0x37, 0x31, 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37,
0x37, 0x38, 0x37, 0x39, // 0x37
0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37,
0x38, 0x38, 0x38, 0x39, // 0x38
0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, 0x39, 0x36, 0x39, 0x37,
0x39, 0x38, 0x39, 0x39, // 0x39
];
const BUFSIZE: usize = 10;
#[inline]
/// Convert a 32-bit unsigned integer to a String.
/// **Warning: This function will ocassionally blow up for some inputs**
pub fn it32_to_str(mut val: u32) -> String {
unsafe {
let mut buf: [u8; BUFSIZE] = [0u8; BUFSIZE];
let mut it = buf.as_mut_ptr().add(BUFSIZE - 2) as *mut u8;
let mut div = val / 100;
while div != 0 {
ptr::copy_nonoverlapping(
&PAIR_MAP_LUT[(2 * (val - div * 100)) as usize] as *const u8,
it,
2,
);
val = div;
it = it.sub(2);
div = val / 100;
}
ptr::copy_nonoverlapping(&PAIR_MAP_LUT[(2 * val) as usize] as *const u8, it, 2);
if val < 10 {
// let y = *it;
// *it = y + 1;
it = it.add(1);
}
String::from_utf8_unchecked(
slice::from_raw_parts(it, buf.as_ptr().add(BUFSIZE).offset_from(it) as usize)
.to_owned(),
)
}
}
#[cfg(test)]
macro_rules! assert_itoa32 {
($e:expr) => {
assert_eq!($e.to_string(), self::it32_to_str($e));
};
}
#[test]
fn test_numbers() {
// just some random funny varying length integers (except 0s) to test the function
assert_itoa32!(1);
assert_itoa32!(11);
assert_itoa32!(111);
assert_itoa32!(1111);
assert_itoa32!(11111);
assert_itoa32!(111111);
assert_itoa32!(1111111);
assert_itoa32!(11111111);
assert_itoa32!(111111111);
assert_itoa32!(1111111111);
assert_itoa32!(0000000000);
assert_itoa32!(888888888);
assert_itoa32!(77777777);
assert_itoa32!(6666666);
assert_itoa32!(555555);
assert_itoa32!(44444);
assert_itoa32!(3333);
assert_itoa32!(222);
assert_itoa32!(11);
assert_itoa32!(0);
assert_itoa32!(9);
assert_itoa32!(99);
assert_itoa32!(999);
assert_itoa32!(9999);
assert_itoa32!(99999);
assert_itoa32!(999999);
assert_itoa32!(9999999);
assert_itoa32!(99999999);
assert_itoa32!(999999999);
assert_itoa32!(123456789);
assert_itoa32!(12345678);
assert_itoa32!(1234567);
assert_itoa32!(123456);
assert_itoa32!(12345);
assert_itoa32!(1234);
assert_itoa32!(123);
assert_itoa32!(12);
}

Loading…
Cancel
Save