Sync row deltas to delta state
parent
f230bc7920
commit
e4848e645e
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Created on Fri Sep 01 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use core::ops::{Deref, DerefMut};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[cfg_attr(target_arch = "s390x", repr(align(256)))]
|
||||
#[cfg_attr(
|
||||
any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "powerpc64",
|
||||
target_arch = "x86_64",
|
||||
),
|
||||
repr(align(128))
|
||||
)]
|
||||
#[cfg_attr(
|
||||
any(
|
||||
target_arch = "arm",
|
||||
target_arch = "hexagon",
|
||||
target_arch = "mips",
|
||||
target_arch = "mips64",
|
||||
target_arch = "riscv32",
|
||||
target_arch = "riscv64",
|
||||
target_arch = "sparc"
|
||||
),
|
||||
repr(align(32))
|
||||
)]
|
||||
#[cfg_attr(
|
||||
not(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm",
|
||||
target_arch = "hexagon",
|
||||
target_arch = "m68k",
|
||||
target_arch = "mips",
|
||||
target_arch = "mips64",
|
||||
target_arch = "powerpc64",
|
||||
target_arch = "riscv32",
|
||||
target_arch = "riscv64",
|
||||
target_arch = "s390x",
|
||||
target_arch = "sparc",
|
||||
target_arch = "x86_64",
|
||||
)),
|
||||
repr(align(64))
|
||||
)]
|
||||
#[cfg_attr(target_arch = "m68k", repr(align(16)))]
|
||||
/**
|
||||
cache line padding (to avoid unintended cache line invalidation)
|
||||
- 256-bit (on a side note, good lord):
|
||||
-> s390x: https://community.ibm.com/community/user/ibmz-and-linuxone/viewdocument/microprocessor-optimization-primer
|
||||
- 128-bit:
|
||||
-> aarch64: ARM64's big.LITTLE (it's a funny situation because there's a silly situation where one set of cores have one cache line
|
||||
size while the other ones have a different size; see this excellent article: https://www.mono-project.com/news/2016/09/12/arm64-icache/)
|
||||
-> powerpc64: https://reviews.llvm.org/D33656
|
||||
-> x86_64: Intel's Sandy Bridge+ (https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf)
|
||||
- 64-bit: default for all non-specific targets
|
||||
- 32-bit: arm, hexagon, mips, mips64, riscv64, and sparc have 32-byte cache line size
|
||||
- 16-bit: m68k (not very useful for us, but yeah)
|
||||
*/
|
||||
pub struct CachePadded<T> {
|
||||
data: T,
|
||||
}
|
||||
|
||||
impl<T> CachePadded<T> {
|
||||
pub const fn new(data: T) -> Self {
|
||||
Self { data }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for CachePadded<T> {
|
||||
type Target = T;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for CachePadded<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.data
|
||||
}
|
||||
}
|
@ -0,0 +1,236 @@
|
||||
/*
|
||||
* Created on Wed Aug 30 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use {
|
||||
super::atm::Atomic,
|
||||
crate::engine::mem::CachePadded,
|
||||
crossbeam_epoch::{pin, unprotected, Guard, Owned, Shared},
|
||||
std::{mem::MaybeUninit, sync::atomic::Ordering},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct QNode<T> {
|
||||
data: MaybeUninit<T>,
|
||||
next: Atomic<Self>,
|
||||
}
|
||||
|
||||
impl<T> QNode<T> {
|
||||
fn new(data: MaybeUninit<T>, next: Atomic<Self>) -> Self {
|
||||
Self { data, next }
|
||||
}
|
||||
fn null() -> Self {
|
||||
Self::new(MaybeUninit::uninit(), Atomic::null())
|
||||
}
|
||||
fn new_data(val: T) -> Self {
|
||||
Self::new(MaybeUninit::new(val), Atomic::null())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Queue<T> {
|
||||
head: CachePadded<Atomic<QNode<T>>>,
|
||||
tail: CachePadded<Atomic<QNode<T>>>,
|
||||
}
|
||||
|
||||
impl<T> Queue<T> {
|
||||
pub fn new() -> Self {
|
||||
let slf = Self {
|
||||
head: CachePadded::new(Atomic::null()),
|
||||
tail: CachePadded::new(Atomic::null()),
|
||||
};
|
||||
let g = unsafe { unprotected() };
|
||||
let sentinel = Owned::new(QNode::null()).into_shared(&g);
|
||||
slf.head.store(sentinel, Ordering::Relaxed);
|
||||
slf.tail.store(sentinel, Ordering::Relaxed);
|
||||
slf
|
||||
}
|
||||
pub fn blocking_enqueue_autopin(&self, new: T) {
|
||||
let g = pin();
|
||||
self.blocking_enqueue(new, &g);
|
||||
}
|
||||
pub fn blocking_enqueue(&self, new: T, g: &Guard) {
|
||||
let newptr = Owned::new(QNode::new_data(new)).into_shared(g);
|
||||
loop {
|
||||
// get current tail
|
||||
let tailptr = self.tail.load(Ordering::Acquire, g);
|
||||
let tail = unsafe { tailptr.deref() };
|
||||
let tail_nextptr = tail.next.load(Ordering::Acquire, g);
|
||||
if tail_nextptr.is_null() {
|
||||
// tail points to null which means this should ideally by the last LL node
|
||||
if tail
|
||||
.next
|
||||
.compare_exchange(
|
||||
Shared::null(),
|
||||
newptr,
|
||||
Ordering::Release,
|
||||
Ordering::Relaxed,
|
||||
g,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
/*
|
||||
CAS'd in but tail is *probably* lagging behind. This CAS might fail but we don't care since we're allowed to have a lagging tail
|
||||
*/
|
||||
let _ = self.tail.compare_exchange(
|
||||
tailptr,
|
||||
newptr,
|
||||
Ordering::Release,
|
||||
Ordering::Relaxed,
|
||||
g,
|
||||
);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// tail is lagging behind; attempt to help update it
|
||||
let _ = self.tail.compare_exchange(
|
||||
tailptr,
|
||||
tail_nextptr,
|
||||
Ordering::Release,
|
||||
Ordering::Relaxed,
|
||||
g,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn blocking_try_dequeue_autopin(&self) -> Option<T> {
|
||||
let g = pin();
|
||||
self.blocking_try_dequeue(&g)
|
||||
}
|
||||
pub fn blocking_try_dequeue(&self, g: &Guard) -> Option<T> {
|
||||
loop {
|
||||
// get current head
|
||||
let headptr = self.head.load(Ordering::Acquire, g);
|
||||
let head = unsafe { headptr.deref() };
|
||||
let head_nextptr = head.next.load(Ordering::Acquire, g);
|
||||
if head_nextptr.is_null() {
|
||||
// this is the sentinel; queue is empty
|
||||
return None;
|
||||
}
|
||||
// we observe at this point in time that there is atleast one element in the list
|
||||
// let us swing that into sentinel position
|
||||
if self
|
||||
.head
|
||||
.compare_exchange(
|
||||
headptr,
|
||||
head_nextptr,
|
||||
Ordering::Release,
|
||||
Ordering::Relaxed,
|
||||
g,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
// good so we were able to update the head
|
||||
let tailptr = self.tail.load(Ordering::Acquire, g);
|
||||
// but wait, was this the last node? in that case, we need to update the tail before we destroy it.
|
||||
// this is fine though, as nothing will go boom right now since the tail is allowed to lag by one
|
||||
if headptr == tailptr {
|
||||
// right so this was the last node uh oh
|
||||
let _ = self.tail.compare_exchange(
|
||||
tailptr,
|
||||
head_nextptr,
|
||||
Ordering::Release,
|
||||
Ordering::Relaxed,
|
||||
g,
|
||||
);
|
||||
}
|
||||
// now we're in a position to happily destroy this
|
||||
unsafe { g.defer_destroy(headptr) }
|
||||
// read out the ptr
|
||||
return Some(unsafe { head_nextptr.deref().data.as_ptr().read() });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for Queue<T> {
|
||||
fn drop(&mut self) {
|
||||
let g = unsafe { unprotected() };
|
||||
while self.blocking_try_dequeue(g).is_some() {}
|
||||
// dealloc sentinel
|
||||
unsafe {
|
||||
self.head.load(Ordering::Relaxed, g).into_owned();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
type StringQueue = Queue<String>;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let q = StringQueue::new();
|
||||
drop(q);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_deq() {
|
||||
let g = pin();
|
||||
let q = StringQueue::new();
|
||||
assert_eq!(q.blocking_try_dequeue(&g), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_enq() {
|
||||
let g = pin();
|
||||
let q = StringQueue::new();
|
||||
q.blocking_enqueue("hello".into(), &g);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_eq_dq() {
|
||||
const ITEMS_L: usize = 100;
|
||||
use std::{sync::Arc, thread};
|
||||
let q = Arc::new(StringQueue::new());
|
||||
let producer_q = q.clone();
|
||||
let consumer_q = q.clone();
|
||||
let producer = thread::spawn(move || {
|
||||
let mut sent = vec![];
|
||||
let g = pin();
|
||||
for i in 0..ITEMS_L {
|
||||
let item = format!("time-{i}");
|
||||
// send a message and then sleep for two seconds
|
||||
producer_q.blocking_enqueue(item.clone(), &g);
|
||||
sent.push(item);
|
||||
}
|
||||
sent
|
||||
});
|
||||
let consumer = thread::spawn(move || {
|
||||
let g = pin();
|
||||
let mut received = vec![];
|
||||
loop {
|
||||
if received.len() == ITEMS_L {
|
||||
break;
|
||||
}
|
||||
if let Some(item) = consumer_q.blocking_try_dequeue(&g) {
|
||||
received.push(item);
|
||||
}
|
||||
}
|
||||
received
|
||||
});
|
||||
let sent = producer.join().unwrap();
|
||||
let received = consumer.join().unwrap();
|
||||
assert_eq!(sent, received);
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Created on Mon Aug 28 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use crate::{
|
||||
engine::{
|
||||
core::{index::PrimaryIndexKey, GlobalNS},
|
||||
data::cell::Datacell,
|
||||
storage::v1::inf::obj,
|
||||
},
|
||||
util::{os, EndianQW},
|
||||
};
|
||||
|
||||
type Buf = Vec<u8>;
|
||||
|
||||
static mut CAP_PER_LL: usize = 0;
|
||||
static mut FREEMEM: u64 = 0;
|
||||
|
||||
/// Set the free memory and cap for deltas so that we don't bust through memory
|
||||
///
|
||||
/// ## Safety
|
||||
/// - All models must have been loaded
|
||||
/// - This must be called **before** the arbiter spawns threads for connections
|
||||
pub unsafe fn set_limits(gns: &GlobalNS) {
|
||||
let model_cnt: usize = gns
|
||||
.spaces()
|
||||
.read()
|
||||
.values()
|
||||
.map(|space| space.models().read().len())
|
||||
.sum();
|
||||
let available_mem = os::free_memory_in_bytes();
|
||||
FREEMEM = available_mem;
|
||||
CAP_PER_LL = ((available_mem as usize / core::cmp::max(1, model_cnt)) as f64 * 0.01) as usize;
|
||||
}
|
||||
|
||||
/*
|
||||
misc. methods
|
||||
*/
|
||||
|
||||
fn encode_primary_key(buf: &mut Buf, pk: &PrimaryIndexKey) {
|
||||
buf.push(pk.tag().d());
|
||||
static EXEC: [unsafe fn(&mut Buf, &PrimaryIndexKey); 2] = [
|
||||
|buf, pk| unsafe { buf.extend(pk.read_uint().to_le_bytes()) },
|
||||
|buf, pk| unsafe {
|
||||
let bin = pk.read_bin();
|
||||
buf.extend(bin.len().u64_bytes_le());
|
||||
buf.extend(bin);
|
||||
},
|
||||
];
|
||||
unsafe {
|
||||
// UNSAFE(@ohsayan): tag map
|
||||
assert!((pk.tag().d() / 2) < 2);
|
||||
EXEC[(pk.tag().d() / 2) as usize](buf, pk);
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_dc(buf: &mut Buf, dc: &Datacell) {
|
||||
obj::encode_element(buf, dc)
|
||||
}
|
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Created on Sat Sep 02 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
extern crate winapi;
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "macos"))]
|
||||
extern crate libc;
|
||||
|
||||
pub fn free_memory_in_bytes() -> u64 {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
use winapi::um::sysinfoapi::{GlobalMemoryStatusEx, MEMORYSTATUSEX};
|
||||
|
||||
let mut statex: MEMORYSTATUSEX = unsafe { std::mem::zeroed() };
|
||||
statex.dwLength = std::mem::size_of::<MEMORYSTATUSEX>() as u32;
|
||||
|
||||
unsafe {
|
||||
GlobalMemoryStatusEx(&mut statex);
|
||||
}
|
||||
|
||||
// Return free physical memory
|
||||
return statex.ullAvailPhys;
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
use libc::sysinfo;
|
||||
let mut info: libc::sysinfo = unsafe { core::mem::zeroed() };
|
||||
|
||||
unsafe {
|
||||
if sysinfo(&mut info) == 0 {
|
||||
// Return free memory
|
||||
return (info.freeram as u64) * (info.mem_unit as u64);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
use std::mem;
|
||||
unsafe {
|
||||
let page_size = libc::sysconf(libc::_SC_PAGESIZE);
|
||||
let mut count: u32 = libc::HOST_VM_INFO64_COUNT as _;
|
||||
let mut stat: libc::vm_statistics64 = mem::zeroed();
|
||||
libc::host_statistics64(
|
||||
libc::mach_host_self(),
|
||||
libc::HOST_VM_INFO64,
|
||||
&mut stat as *mut libc::vm_statistics64 as *mut _,
|
||||
&mut count,
|
||||
);
|
||||
|
||||
// see this: https://opensource.apple.com/source/xnu/xnu-4570.31.3/osfmk/mach/vm_statistics.h.auto.html
|
||||
return (stat.free_count as u64)
|
||||
.saturating_add(stat.inactive_count as _)
|
||||
.saturating_add(stat.compressor_page_count as u64)
|
||||
.saturating_mul(page_size as _);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos")))]
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue