diff --git a/server/src/coredb/array.rs b/server/src/coredb/array.rs index a2b44d9f..89c9ca31 100644 --- a/server/src/coredb/array.rs +++ b/server/src/coredb/array.rs @@ -107,12 +107,21 @@ impl Array { init_len: 0, } } + pub fn new_zeroed() -> Self { + Self { + stack: unsafe { std::mem::zeroed() }, + init_len: N as u16, + } + } pub const fn from_const(array: [MaybeUninit; N], init_len: u16) -> Self { Self { stack: array, init_len, } } + pub unsafe fn bump_init_len(&mut self, bump: u16) { + self.init_len += bump + } /// This literally turns [T; M] into [T; N]. How can you expect it to be safe? /// This function is extremely unsafe. I mean, I don't even know how to call it safe. /// There's one way though: make M == N. This will panic in debug mode if M > N. In diff --git a/server/src/coredb/buffers.rs b/server/src/coredb/buffers.rs new file mode 100644 index 00000000..31212c7b --- /dev/null +++ b/server/src/coredb/buffers.rs @@ -0,0 +1,195 @@ +/* + * Created on Mon Jul 12 2021 + * + * This file is a part of Skytable + * Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source + * NoSQL database written by Sayan Nandan ("the Author") with the + * vision to provide flexibility in data modelling without compromising + * on performance, queryability or scalability. + * + * Copyright (c) 2021, Sayan Nandan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * +*/ + +use super::array::Array; +use core::ops::Deref; +use core::str; + +const PAIR_MAP_LUT: [u8; 200] = [ + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, 0x30, 0x36, 0x30, 0x37, + 0x30, 0x38, 0x30, 0x39, // 0x30 + 0x31, 0x30, 0x31, 0x31, 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, // 0x31 + 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, + 0x32, 0x38, 0x32, 0x39, // 0x32 + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, 0x33, 0x36, 0x33, 0x37, + 0x33, 0x38, 0x33, 0x39, // 0x33 + 0x34, 0x30, 0x34, 0x31, 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, // 0x34 + 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, + 0x35, 0x38, 0x35, 0x39, // 0x35 + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, 0x36, 0x36, 0x36, 0x37, + 0x36, 0x38, 0x36, 0x39, // 0x36 + 0x37, 0x30, 0x37, 0x31, 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, // 0x37 + 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, + 0x38, 0x38, 0x38, 0x39, // 0x38 + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, 0x39, 0x36, 0x39, 0x37, + 0x39, 0x38, 0x39, 0x39, // 0x39 +]; + +#[derive(Debug)] +/// A buffer for unsigned 32-bit integers with one _extra byte_ of memory reserved for +/// adding characters. On initialization (through [`Self::init`]), your integer will be +/// encoded and stored into the _unsafe array_ +pub struct Integer32Buffer { + inner_stack: Array, +} + +impl Integer32Buffer { + /// Initialize a buffer + pub fn init(integer: u32) -> Self { + let mut slf = Self { + inner_stack: Array::new(), + }; + unsafe { + slf._init_integer(integer); + } + slf + } + /// Initialize an integer. This is unsafe to be called outside because you'll be + /// pushing in another integer and might end up corrupting your own stack as all + /// pushes are unchecked! + unsafe fn _init_integer(&mut self, mut val: u32) { + if val < 10_000 { + let d1 = (val / 100) << 1; + let d2 = (val % 100) << 1; + if val >= 1000 { + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d1 as usize]); + } + if val >= 100 { + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d1 + 1) as usize]); + } + if val >= 10 { + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d2 as usize]); + } + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d2 + 1) as usize]); + } else if val < 100_000_000 { + let b = val / 10000; + let c = val % 10000; + let d1 = (b / 100) << 1; + let d2 = (b % 100) << 1; + let d3 = (c / 100) << 1; + let d4 = (c % 100) << 1; + + if val > 10_000_000 { + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d1 as usize]); + } + if val > 1_000_000 { + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d1 + 1) as usize]); + } + if val > 100_000 { + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d2 as usize]); + } + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d2 + 1) as usize]); + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d3 as usize]); + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d3 + 1) as usize]); + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d4 as usize]); + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d4 + 1) as usize]); + } else { + // worst, 1B or more + let a = val / 100000000; + val %= 100000000; + + if a >= 10 { + let i = a << 1; + self.inner_stack.push_unchecked(PAIR_MAP_LUT[i as usize]); + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(i + 1) as usize]); + } else { + self.inner_stack.push_unchecked(0x30); + } + let b = val / 10000; + let c = val % 10000; + let d1 = (b / 100) << 1; + let d2 = (b % 100) << 1; + let d3 = (c / 100) << 1; + let d4 = (c % 100) << 1; + // write back + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d1 as usize]); + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d1 + 1) as usize]); + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d2 as usize]); + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d2 + 1) as usize]); + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d3 as usize]); + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d3 + 1) as usize]); + self.inner_stack.push_unchecked(PAIR_MAP_LUT[d4 as usize]); + self.inner_stack + .push_unchecked(PAIR_MAP_LUT[(d4 + 1) as usize]); + } + } + /// **This is very unsafe** Only push something when you know that the capacity won't overflow + /// your allowance of 11 bytes. Oh no, there's no panic for you because you'll silently + /// corrupt your own memory (or others' :/) + pub unsafe fn push(&mut self, val: u8) { + self.inner_stack.push_unchecked(val) + } +} + +impl Deref for Integer32Buffer { + type Target = str; + fn deref(&self) -> &Self::Target { + unsafe { str::from_utf8_unchecked(&self.inner_stack) } + } +} + +impl AsRef for Integer32Buffer { + fn as_ref(&self) -> &str { + &self + } +} + +impl PartialEq for Integer32Buffer +where + T: AsRef, +{ + fn eq(&self, other_str: &T) -> bool { + self.as_ref() == other_str.as_ref() + } +} + +#[test] +fn test_int32_buffer() { + let buffer = Integer32Buffer::init(256); + assert_eq!(buffer, 256.to_string()); +} + +#[test] +fn test_push() { + let mut buffer = Integer32Buffer::init(278); + unsafe { + buffer.push(b'?'); + } + assert_eq!(buffer, "278?"); +} diff --git a/server/src/coredb/mod.rs b/server/src/coredb/mod.rs index 947d3957..e9f36039 100644 --- a/server/src/coredb/mod.rs +++ b/server/src/coredb/mod.rs @@ -39,6 +39,7 @@ pub use htable::Data; use libsky::TResult; use std::sync::Arc; pub mod array; +pub mod buffers; pub mod htable; pub mod iarray; pub mod lazy; diff --git a/server/src/storage/interface.rs b/server/src/storage/interface.rs index be830499..f026a02f 100644 --- a/server/src/storage/interface.rs +++ b/server/src/storage/interface.rs @@ -27,9 +27,9 @@ //! Interfaces with the file system use super::PartitionID; +use crate::coredb::buffers::Integer32Buffer; use crate::coredb::htable::Coremap; use crate::coredb::htable::Data; -use crate::util; use std::fs; use std::io::Result as IoResult; use std::io::{BufWriter, Write}; @@ -49,17 +49,20 @@ pub fn serialize_map_into_slow_buffer( } /// Get the file for COW. If the parition ID is 0000 -fn cow_file(id: PartitionID) -> String { - let mut id = util::it32_to_str(id); - id.push('_'); - id +fn cow_file(id: PartitionID) -> Integer32Buffer { + let mut buffer = Integer32Buffer::init(id); + unsafe { + // UNSAFE(@ohsayan): We know we're just pushing in one thing + buffer.push(b'_'); + } + buffer } #[test] fn test_cowfile() { let cow_file = cow_file(10); assert_eq!(cow_file, "10_".to_owned()); - assert_eq!(&cow_file[..cow_file.len()-1], "10".to_owned()); + assert_eq!(&cow_file[..cow_file.len() - 1], "10".to_owned()); } /// Returns a handle to a thread that was spawned to handle this specific flush routine @@ -69,10 +72,10 @@ pub fn threaded_se( ) -> JoinHandle> { thread::spawn(move || { let fname = cow_file(partition_id); - let mut f = fs::File::create(&fname)?; + let mut f = fs::File::create(&*fname)?; self::serialize_map_into_slow_buffer(&mut f, &tblref)?; f.sync_all()?; - fs::rename(&fname, &fname[..fname.len() - 1])?; + fs::rename(&*fname, &fname[..fname.len() - 1])?; Ok(()) }) } diff --git a/server/src/util.rs b/server/src/util.rs index 560c51e5..b0e6811b 100644 --- a/server/src/util.rs +++ b/server/src/util.rs @@ -24,9 +24,6 @@ * */ -use core::ptr; -use core::slice; - /// # Unsafe unwrapping /// /// This trait provides a method `unsafe_unwrap` that is potentially unsafe and has @@ -93,116 +90,3 @@ macro_rules! cfg_test { $(#[cfg(test)] $item)* }; } - -/* - 32-bit integer to String parsing. This algorithm was "invented" by Ben Voigt and written in C++ - as a part of a "challenge" and was ported to Rust with some modifications by Sayan. - NOTE: This might occassionally blow up. -*/ - -const PAIR_MAP_LUT: [u8; 200] = [ - 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, 0x30, 0x36, 0x30, 0x37, - 0x30, 0x38, 0x30, 0x39, // 0x30 - 0x31, 0x30, 0x31, 0x31, 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, - 0x31, 0x38, 0x31, 0x39, // 0x31 - 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, - 0x32, 0x38, 0x32, 0x39, // 0x32 - 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, 0x33, 0x36, 0x33, 0x37, - 0x33, 0x38, 0x33, 0x39, // 0x33 - 0x34, 0x30, 0x34, 0x31, 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, - 0x34, 0x38, 0x34, 0x39, // 0x34 - 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, - 0x35, 0x38, 0x35, 0x39, // 0x35 - 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, 0x36, 0x36, 0x36, 0x37, - 0x36, 0x38, 0x36, 0x39, // 0x36 - 0x37, 0x30, 0x37, 0x31, 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, - 0x37, 0x38, 0x37, 0x39, // 0x37 - 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, - 0x38, 0x38, 0x38, 0x39, // 0x38 - 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, 0x39, 0x36, 0x39, 0x37, - 0x39, 0x38, 0x39, 0x39, // 0x39 -]; - -const BUFSIZE: usize = 10; - -#[inline] -/// Convert a 32-bit unsigned integer to a String. -/// **Warning: This function will ocassionally blow up for some inputs** -pub fn it32_to_str(mut val: u32) -> String { - unsafe { - let mut buf: [u8; BUFSIZE] = [0u8; BUFSIZE]; - let mut it = buf.as_mut_ptr().add(BUFSIZE - 2) as *mut u8; - let mut div = val / 100; - while div != 0 { - ptr::copy_nonoverlapping( - &PAIR_MAP_LUT[(2 * (val - div * 100)) as usize] as *const u8, - it, - 2, - ); - val = div; - it = it.sub(2); - div = val / 100; - } - ptr::copy_nonoverlapping(&PAIR_MAP_LUT[(2 * val) as usize] as *const u8, it, 2); - - if val < 10 { - // let y = *it; - // *it = y + 1; - it = it.add(1); - } - - String::from_utf8_unchecked( - slice::from_raw_parts(it, buf.as_ptr().add(BUFSIZE).offset_from(it) as usize) - .to_owned(), - ) - } -} - -#[cfg(test)] -macro_rules! assert_itoa32 { - ($e:expr) => { - assert_eq!($e.to_string(), self::it32_to_str($e)); - }; -} - -#[test] -fn test_numbers() { - // just some random funny varying length integers (except 0s) to test the function - assert_itoa32!(1); - assert_itoa32!(11); - assert_itoa32!(111); - assert_itoa32!(1111); - assert_itoa32!(11111); - assert_itoa32!(111111); - assert_itoa32!(1111111); - assert_itoa32!(11111111); - assert_itoa32!(111111111); - assert_itoa32!(1111111111); - assert_itoa32!(0000000000); - assert_itoa32!(888888888); - assert_itoa32!(77777777); - assert_itoa32!(6666666); - assert_itoa32!(555555); - assert_itoa32!(44444); - assert_itoa32!(3333); - assert_itoa32!(222); - assert_itoa32!(11); - assert_itoa32!(0); - assert_itoa32!(9); - assert_itoa32!(99); - assert_itoa32!(999); - assert_itoa32!(9999); - assert_itoa32!(99999); - assert_itoa32!(999999); - assert_itoa32!(9999999); - assert_itoa32!(99999999); - assert_itoa32!(999999999); - assert_itoa32!(123456789); - assert_itoa32!(12345678); - assert_itoa32!(1234567); - assert_itoa32!(123456); - assert_itoa32!(12345); - assert_itoa32!(1234); - assert_itoa32!(123); - assert_itoa32!(12); -}