Implement batched persistence system
parent
ee9ccd5a30
commit
20c937451f
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Created on Sun Sep 03 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use crc::{Crc, Digest, CRC_64_XZ};
|
||||
|
||||
/*
|
||||
NOTE(@ohsayan): we're currently using crc's impl. but the reason I decided to make a wrapper is because I have a
|
||||
different impl in mind
|
||||
*/
|
||||
|
||||
const CRC64: Crc<u64> = Crc::<u64>::new(&CRC_64_XZ);
|
||||
|
||||
pub struct SCrc {
|
||||
digest: Digest<'static, u64>,
|
||||
}
|
||||
|
||||
impl SCrc {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
digest: CRC64.digest(),
|
||||
}
|
||||
}
|
||||
pub fn recompute_with_new_byte(&mut self, b: u8) {
|
||||
self.digest.update(&[b])
|
||||
}
|
||||
pub fn recompute_with_new_block<const N: usize>(&mut self, b: [u8; N]) {
|
||||
self.digest.update(&b);
|
||||
}
|
||||
pub fn recompute_with_new_var_block(&mut self, b: &[u8]) {
|
||||
self.digest.update(b)
|
||||
}
|
||||
pub fn finish(self) -> u64 {
|
||||
self.digest.finalize()
|
||||
}
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Created on Sun Sep 03 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
mod persist;
|
||||
mod restore;
|
||||
|
||||
/// the data batch file was reopened
|
||||
const MARKER_BATCH_REOPEN: u8 = 0xFB;
|
||||
/// the data batch file was closed
|
||||
const MARKER_BATCH_CLOSED: u8 = 0xFC;
|
||||
/// end of batch marker
|
||||
const MARKER_END_OF_BATCH: u8 = 0xFD;
|
||||
/// "real" batch event marker
|
||||
const MARKER_ACTUAL_BATCH_EVENT: u8 = 0xFE;
|
||||
/// recovery batch event marker
|
||||
const MARKER_RECOVERY_EVENT: u8 = 0xFF;
|
||||
/// recovery threshold
|
||||
const RECOVERY_THRESHOLD: usize = 10;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) use restore::{DecodedBatchEvent, DecodedBatchEventKind, NormalBatch};
|
||||
pub use {persist::DataBatchPersistDriver, restore::DataBatchRestoreDriver};
|
@ -0,0 +1,290 @@
|
||||
/*
|
||||
* Created on Tue Sep 05 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use {
|
||||
super::{
|
||||
MARKER_ACTUAL_BATCH_EVENT, MARKER_BATCH_CLOSED, MARKER_BATCH_REOPEN, MARKER_END_OF_BATCH,
|
||||
MARKER_RECOVERY_EVENT,
|
||||
},
|
||||
crate::{
|
||||
engine::{
|
||||
core::{
|
||||
index::{PrimaryIndexKey, RowData},
|
||||
model::{
|
||||
delta::{DataDelta, DataDeltaKind, DeltaVersion, IRModel},
|
||||
Model,
|
||||
},
|
||||
},
|
||||
data::{
|
||||
cell::Datacell,
|
||||
tag::{DataTag, TagClass, TagUnique},
|
||||
},
|
||||
idx::STIndexSeq,
|
||||
storage::v1::{
|
||||
inf::PersistTypeDscr,
|
||||
rw::{RawFileIOInterface, SDSSFileIO, SDSSFileTrackedWriter},
|
||||
SDSSError, SDSSResult,
|
||||
},
|
||||
},
|
||||
util::EndianQW,
|
||||
},
|
||||
crossbeam_epoch::pin,
|
||||
};
|
||||
|
||||
pub struct DataBatchPersistDriver<F> {
|
||||
f: SDSSFileTrackedWriter<F>,
|
||||
}
|
||||
|
||||
impl<F: RawFileIOInterface> DataBatchPersistDriver<F> {
|
||||
pub fn new(mut file: SDSSFileIO<F>, is_new: bool) -> SDSSResult<Self> {
|
||||
if !is_new {
|
||||
file.fsynced_write(&[MARKER_BATCH_REOPEN])?;
|
||||
}
|
||||
Ok(Self {
|
||||
f: SDSSFileTrackedWriter::new(file),
|
||||
})
|
||||
}
|
||||
pub fn close(mut self) -> SDSSResult<()> {
|
||||
if self
|
||||
.f
|
||||
.inner_file()
|
||||
.fsynced_write(&[MARKER_BATCH_CLOSED])
|
||||
.is_ok()
|
||||
{
|
||||
return Ok(());
|
||||
} else {
|
||||
return Err(SDSSError::DataBatchCloseError);
|
||||
}
|
||||
}
|
||||
pub fn write_new_batch(&mut self, model: &Model, observed_len: usize) -> SDSSResult<()> {
|
||||
// pin model
|
||||
let irm = model.intent_read_model();
|
||||
let schema_version = model.delta_state().schema_current_version();
|
||||
let data_q = model.delta_state().__data_delta_queue();
|
||||
let g = pin();
|
||||
// init restore list
|
||||
let mut restore_list = Vec::new();
|
||||
// prepare computations
|
||||
let mut i = 0;
|
||||
let mut inconsistent_reads = 0;
|
||||
let mut exec = || -> SDSSResult<()> {
|
||||
// write batch start
|
||||
self.write_batch_start(observed_len, schema_version)?;
|
||||
while i < observed_len {
|
||||
let delta = data_q.blocking_try_dequeue(&g).unwrap();
|
||||
restore_list.push(delta.clone()); // TODO(@ohsayan): avoid this
|
||||
match delta.change() {
|
||||
DataDeltaKind::Delete => {
|
||||
self.write_batch_item_common_row_data(&delta)?;
|
||||
self.encode_pk_only(delta.row().d_key())?;
|
||||
}
|
||||
DataDeltaKind::Insert | DataDeltaKind::Update => {
|
||||
// resolve deltas (this is yet another opportunity for us to reclaim memory from deleted items)
|
||||
let row_data = delta
|
||||
.row()
|
||||
.resolve_schema_deltas_and_freeze_if(&model.delta_state(), |row| {
|
||||
row.get_txn_revised() <= delta.data_version()
|
||||
});
|
||||
if row_data.get_txn_revised() > delta.data_version() {
|
||||
// we made an inconsistent (stale) read; someone updated the state after our snapshot
|
||||
inconsistent_reads += 1;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
self.write_batch_item_common_row_data(&delta)?;
|
||||
// encode data
|
||||
self.encode_pk_only(delta.row().d_key())?;
|
||||
self.encode_row_data(model, &irm, &row_data)?;
|
||||
}
|
||||
}
|
||||
// fsync now; we're good to go
|
||||
self.f.fsync_all()?;
|
||||
i += 1;
|
||||
}
|
||||
return self.append_batch_summary(observed_len, inconsistent_reads);
|
||||
};
|
||||
match exec() {
|
||||
Ok(()) => Ok(()),
|
||||
Err(_) => {
|
||||
// republish changes since we failed to commit
|
||||
restore_list
|
||||
.into_iter()
|
||||
.for_each(|delta| model.delta_state().append_new_data_delta(delta, &g));
|
||||
// now attempt to fix the file
|
||||
return self.attempt_fix_data_batchfile();
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Write the batch start block:
|
||||
/// - Batch start magic
|
||||
/// - Expected commit
|
||||
/// - Schema version
|
||||
fn write_batch_start(
|
||||
&mut self,
|
||||
observed_len: usize,
|
||||
schema_version: DeltaVersion,
|
||||
) -> Result<(), SDSSError> {
|
||||
self.f.unfsynced_write(&[MARKER_ACTUAL_BATCH_EVENT])?;
|
||||
let observed_len_bytes = observed_len.u64_bytes_le();
|
||||
self.f.unfsynced_write(&observed_len_bytes)?;
|
||||
self.f
|
||||
.unfsynced_write(&schema_version.value_u64().to_le_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
/// Append a summary of this batch
|
||||
fn append_batch_summary(
|
||||
&mut self,
|
||||
observed_len: usize,
|
||||
inconsistent_reads: usize,
|
||||
) -> Result<(), SDSSError> {
|
||||
// [0xFD][actual_commit][checksum]
|
||||
self.f.unfsynced_write(&[MARKER_END_OF_BATCH])?;
|
||||
let actual_commit = (observed_len - inconsistent_reads).u64_bytes_le();
|
||||
self.f.unfsynced_write(&actual_commit)?;
|
||||
let cs = self.f.reset_and_finish_checksum().to_le_bytes();
|
||||
self.f.inner_file().fsynced_write(&cs)?;
|
||||
Ok(())
|
||||
}
|
||||
/// Attempt to fix the batch journal
|
||||
// TODO(@ohsayan): declare an "international system disaster" when this happens
|
||||
fn attempt_fix_data_batchfile(&mut self) -> SDSSResult<()> {
|
||||
/*
|
||||
attempt to append 0xFF to the part of the file where a corruption likely occurred, marking
|
||||
it recoverable
|
||||
*/
|
||||
let f = self.f.inner_file();
|
||||
if f.fsynced_write(&[MARKER_RECOVERY_EVENT]).is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
Err(SDSSError::DataBatchRecoveryFailStageOne)
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: RawFileIOInterface> DataBatchPersistDriver<F> {
|
||||
/// encode the primary key only. this means NO TAG is encoded.
|
||||
fn encode_pk_only(&mut self, pk: &PrimaryIndexKey) -> SDSSResult<()> {
|
||||
let buf = &mut self.f;
|
||||
match pk.tag() {
|
||||
TagUnique::UnsignedInt | TagUnique::SignedInt => {
|
||||
let data = unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
pk.read_uint()
|
||||
}
|
||||
.to_le_bytes();
|
||||
buf.unfsynced_write(&data)?;
|
||||
}
|
||||
TagUnique::Str | TagUnique::Bin => {
|
||||
let slice = unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
pk.read_bin()
|
||||
};
|
||||
let slice_l = slice.len().u64_bytes_le();
|
||||
buf.unfsynced_write(&slice_l)?;
|
||||
buf.unfsynced_write(slice)?;
|
||||
}
|
||||
TagUnique::Illegal => unsafe {
|
||||
// UNSAFE(@ohsayan): a pk can't be constructed with illegal
|
||||
impossible!()
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
/// Encode a single cell
|
||||
fn encode_cell(&mut self, value: &Datacell) -> SDSSResult<()> {
|
||||
let ref mut buf = self.f;
|
||||
buf.unfsynced_write(&[
|
||||
PersistTypeDscr::translate_from_class(value.tag().tag_class()).value_u8(),
|
||||
])?;
|
||||
match value.tag().tag_class() {
|
||||
TagClass::Bool if value.is_null() => {}
|
||||
TagClass::Bool => {
|
||||
let bool = unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
value.read_bool()
|
||||
} as u8;
|
||||
buf.unfsynced_write(&[bool])?;
|
||||
}
|
||||
TagClass::SignedInt | TagClass::UnsignedInt | TagClass::Float => {
|
||||
let chunk = unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
value.read_uint()
|
||||
}
|
||||
.to_le_bytes();
|
||||
buf.unfsynced_write(&chunk)?;
|
||||
}
|
||||
TagClass::Str | TagClass::Bin => {
|
||||
let slice = unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
value.read_bin()
|
||||
};
|
||||
let slice_l = slice.len().u64_bytes_le();
|
||||
buf.unfsynced_write(&slice_l)?;
|
||||
buf.unfsynced_write(slice)?;
|
||||
}
|
||||
TagClass::List => {
|
||||
let list = unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
value.read_list()
|
||||
}
|
||||
.read();
|
||||
let list_l = list.len().u64_bytes_le();
|
||||
buf.unfsynced_write(&list_l)?;
|
||||
for item in list.iter() {
|
||||
self.encode_cell(item)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
/// Encode row data
|
||||
fn encode_row_data(
|
||||
&mut self,
|
||||
mdl: &Model,
|
||||
irm: &IRModel,
|
||||
row_data: &RowData,
|
||||
) -> SDSSResult<()> {
|
||||
// nasty hack; we need to avoid the pk
|
||||
self.f
|
||||
.unfsynced_write(&(row_data.fields().len()).to_le_bytes())?;
|
||||
for field_name in irm.fields().stseq_ord_key() {
|
||||
match row_data.fields().get(field_name) {
|
||||
Some(cell) => {
|
||||
self.encode_cell(cell)?;
|
||||
}
|
||||
None if field_name.as_ref() == mdl.p_key() => {}
|
||||
None => self.f.unfsynced_write(&[0])?,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
fn write_batch_item_common_row_data(&mut self, delta: &DataDelta) -> Result<(), SDSSError> {
|
||||
let p1_dc_pk_ty = [delta.change().value_u8(), delta.row().d_key().tag().d()];
|
||||
self.f.unfsynced_write(&p1_dc_pk_ty)?;
|
||||
let txn_id = delta.data_version().value_u64().to_le_bytes();
|
||||
self.f.unfsynced_write(&txn_id)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -0,0 +1,445 @@
|
||||
/*
|
||||
* Created on Tue Sep 05 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use super::{MARKER_BATCH_CLOSED, MARKER_BATCH_REOPEN};
|
||||
|
||||
use {
|
||||
super::{
|
||||
MARKER_ACTUAL_BATCH_EVENT, MARKER_END_OF_BATCH, MARKER_RECOVERY_EVENT, RECOVERY_THRESHOLD,
|
||||
},
|
||||
crate::{
|
||||
engine::{
|
||||
core::{index::PrimaryIndexKey, model::Model},
|
||||
data::{
|
||||
cell::Datacell,
|
||||
tag::{CUTag, TagClass, TagUnique},
|
||||
},
|
||||
storage::v1::{
|
||||
inf::PersistTypeDscr,
|
||||
rw::{RawFileIOInterface, SDSSFileIO, SDSSFileTrackedReader},
|
||||
SDSSError, SDSSResult,
|
||||
},
|
||||
},
|
||||
util::copy_slice_to_array,
|
||||
},
|
||||
std::mem::ManuallyDrop,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(in crate::engine::storage::v1) struct DecodedBatchEvent {
|
||||
txn_id: u64,
|
||||
pk: PrimaryIndexKey,
|
||||
kind: DecodedBatchEventKind,
|
||||
}
|
||||
|
||||
impl DecodedBatchEvent {
|
||||
pub(in crate::engine::storage::v1) const fn new(
|
||||
txn_id: u64,
|
||||
pk: PrimaryIndexKey,
|
||||
kind: DecodedBatchEventKind,
|
||||
) -> Self {
|
||||
Self { txn_id, pk, kind }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(in crate::engine::storage::v1) enum DecodedBatchEventKind {
|
||||
Delete,
|
||||
Insert(Vec<Datacell>),
|
||||
Update(Vec<Datacell>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(in crate::engine::storage::v1) struct NormalBatch {
|
||||
events: Vec<DecodedBatchEvent>,
|
||||
schema_version: u64,
|
||||
}
|
||||
|
||||
impl NormalBatch {
|
||||
pub(in crate::engine::storage::v1) fn new(
|
||||
events: Vec<DecodedBatchEvent>,
|
||||
schema_version: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
events,
|
||||
schema_version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum Batch {
|
||||
RecoveredFromerror,
|
||||
Normal(NormalBatch),
|
||||
FinishedEarly(NormalBatch),
|
||||
BatchClosed,
|
||||
}
|
||||
|
||||
pub struct DataBatchRestoreDriver<F> {
|
||||
f: SDSSFileTrackedReader<F>,
|
||||
}
|
||||
|
||||
impl<F: RawFileIOInterface> DataBatchRestoreDriver<F> {
|
||||
pub fn new(f: SDSSFileIO<F>) -> SDSSResult<Self> {
|
||||
Ok(Self {
|
||||
f: SDSSFileTrackedReader::new(f)?,
|
||||
})
|
||||
}
|
||||
pub fn into_file(self) -> SDSSFileIO<F> {
|
||||
self.f.into_inner_file()
|
||||
}
|
||||
pub(in crate::engine::storage::v1) fn read_data_batch_into_model(
|
||||
&mut self,
|
||||
model: &Model,
|
||||
) -> SDSSResult<()> {
|
||||
self.read_all_batches_and_for_each(|batch| {
|
||||
// apply the batch
|
||||
Self::apply_batch(model, batch)
|
||||
})
|
||||
}
|
||||
pub fn read_all_batches(&mut self) -> SDSSResult<Vec<NormalBatch>> {
|
||||
let mut all_batches = vec![];
|
||||
self.read_all_batches_and_for_each(|batch| {
|
||||
all_batches.push(batch);
|
||||
Ok(())
|
||||
})?;
|
||||
Ok(all_batches)
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: RawFileIOInterface> DataBatchRestoreDriver<F> {
|
||||
fn read_all_batches_and_for_each(
|
||||
&mut self,
|
||||
mut f: impl FnMut(NormalBatch) -> SDSSResult<()>,
|
||||
) -> SDSSResult<()> {
|
||||
// begin
|
||||
let mut closed = false;
|
||||
while !self.f.is_eof() && !closed {
|
||||
// try to decode this batch
|
||||
let Ok(batch) = self.read_batch() else {
|
||||
self.attempt_recover_data_batch()?;
|
||||
continue;
|
||||
};
|
||||
// see what happened when decoding it
|
||||
let finished_early = matches!(batch, Batch::FinishedEarly { .. });
|
||||
let batch = match batch {
|
||||
Batch::RecoveredFromerror => {
|
||||
// there was an error, but it was safely "handled" because of a recovery byte mark
|
||||
continue;
|
||||
}
|
||||
Batch::FinishedEarly(batch) | Batch::Normal(batch) => batch,
|
||||
Batch::BatchClosed => {
|
||||
// the batch was closed; this means that we probably are done with this round; but was it re-opened?
|
||||
closed = self.handle_reopen_is_actual_close()?;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
// now we need to read the batch summary
|
||||
let Ok(actual_commit) = self.read_batch_summary(finished_early) else {
|
||||
self.attempt_recover_data_batch()?;
|
||||
continue;
|
||||
};
|
||||
// check if we have the expected batch size
|
||||
if batch.events.len() as u64 != actual_commit {
|
||||
// corrupted
|
||||
self.attempt_recover_data_batch()?;
|
||||
continue;
|
||||
}
|
||||
f(batch)?;
|
||||
// apply the batch
|
||||
}
|
||||
if closed {
|
||||
if self.f.is_eof() {
|
||||
// that was the last batch
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
// nope, this is a corrupted file
|
||||
Err(SDSSError::DataBatchRestoreCorruptedBatchFile)
|
||||
}
|
||||
fn handle_reopen_is_actual_close(&mut self) -> SDSSResult<bool> {
|
||||
if self.f.is_eof() {
|
||||
// yup, it was closed
|
||||
Ok(true)
|
||||
} else {
|
||||
// maybe not
|
||||
if self.f.read_byte()? == MARKER_BATCH_REOPEN {
|
||||
// driver was closed, but reopened
|
||||
Ok(false)
|
||||
} else {
|
||||
// that's just a nice bug
|
||||
Err(SDSSError::DataBatchRestoreCorruptedBatchFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: RawFileIOInterface> DataBatchRestoreDriver<F> {
|
||||
fn apply_batch(_: &Model, _: NormalBatch) -> SDSSResult<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: RawFileIOInterface> DataBatchRestoreDriver<F> {
|
||||
fn read_batch_summary(&mut self, finished_early: bool) -> SDSSResult<u64> {
|
||||
if !finished_early {
|
||||
// we must read the batch termination signature
|
||||
let b = self.f.read_byte()?;
|
||||
if b != MARKER_END_OF_BATCH {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedBatch);
|
||||
}
|
||||
}
|
||||
// read actual commit
|
||||
let mut actual_commit = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut actual_commit)?;
|
||||
// find actual checksum
|
||||
let actual_checksum = self.f.__reset_checksum();
|
||||
// find hardcoded checksum
|
||||
let mut hardcoded_checksum = [0; sizeof!(u64)];
|
||||
self.f
|
||||
.inner_file()
|
||||
.read_to_buffer(&mut hardcoded_checksum)?;
|
||||
// move file cursor ahead
|
||||
self.f.__cursor_ahead_by(sizeof!(u64));
|
||||
if actual_checksum == u64::from_le_bytes(hardcoded_checksum) {
|
||||
Ok(u64::from_le_bytes(actual_commit))
|
||||
} else {
|
||||
Err(SDSSError::DataBatchRestoreCorruptedBatch)
|
||||
}
|
||||
}
|
||||
fn read_batch(&mut self) -> SDSSResult<Batch> {
|
||||
let mut this_batch = vec![];
|
||||
// check batch type
|
||||
let batch_type = self.f.read_byte()?;
|
||||
match batch_type {
|
||||
MARKER_ACTUAL_BATCH_EVENT => {}
|
||||
MARKER_RECOVERY_EVENT => {
|
||||
// while attempting to write this batch, some sort of an error occurred but we got a nice recovery byte
|
||||
// so proceed that way
|
||||
return Ok(Batch::RecoveredFromerror);
|
||||
}
|
||||
MARKER_BATCH_CLOSED => {
|
||||
// this isn't a batch; it has been closed
|
||||
return Ok(Batch::BatchClosed);
|
||||
}
|
||||
_ => {
|
||||
// this is the only singular byte that is expected to be intact. If this isn't intact either, I'm sorry
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedBatch);
|
||||
}
|
||||
}
|
||||
// we're expecting a "good batch"
|
||||
let mut batch_size_schema_version = [0; sizeof!(u64, 2)];
|
||||
self.f.read_into_buffer(&mut batch_size_schema_version)?;
|
||||
// we have the batch length
|
||||
let batch_size = u64::from_le_bytes(copy_slice_to_array(&batch_size_schema_version[..8]));
|
||||
let schema_version =
|
||||
u64::from_le_bytes(copy_slice_to_array(&batch_size_schema_version[8..]));
|
||||
let mut processed_in_this_batch = 0;
|
||||
while (processed_in_this_batch != batch_size) & !self.f.is_eof() {
|
||||
// decode common row data
|
||||
let change_type = self.f.read_byte()?;
|
||||
// now decode event
|
||||
match change_type {
|
||||
MARKER_END_OF_BATCH => {
|
||||
// the file tells us that we've reached the end of this batch; hmmm
|
||||
return Ok(Batch::FinishedEarly(NormalBatch::new(
|
||||
this_batch,
|
||||
schema_version,
|
||||
)));
|
||||
}
|
||||
normal_event => {
|
||||
let (pk_type, txnid) = self.read_normal_event_metadata()?;
|
||||
match normal_event {
|
||||
0 => {
|
||||
// delete
|
||||
let pk = self.decode_primary_key(pk_type)?;
|
||||
this_batch.push(DecodedBatchEvent::new(
|
||||
txnid,
|
||||
pk,
|
||||
DecodedBatchEventKind::Delete,
|
||||
));
|
||||
processed_in_this_batch += 1;
|
||||
}
|
||||
1 | 2 => {
|
||||
// insert or update
|
||||
// get pk
|
||||
let pk = self.decode_primary_key(pk_type)?;
|
||||
// get column count
|
||||
let mut column_count = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut column_count)?;
|
||||
let mut column_count = u64::from_le_bytes(column_count);
|
||||
// prepare row
|
||||
let mut row = vec![];
|
||||
while column_count != 0 && !self.f.is_eof() {
|
||||
row.push(self.decode_cell()?);
|
||||
column_count -= 1;
|
||||
}
|
||||
if column_count != 0 {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
}
|
||||
if change_type == 1 {
|
||||
this_batch.push(DecodedBatchEvent::new(
|
||||
txnid,
|
||||
pk,
|
||||
DecodedBatchEventKind::Insert(row),
|
||||
));
|
||||
} else {
|
||||
this_batch.push(DecodedBatchEvent::new(
|
||||
txnid,
|
||||
pk,
|
||||
DecodedBatchEventKind::Update(row),
|
||||
));
|
||||
}
|
||||
processed_in_this_batch += 1;
|
||||
}
|
||||
_ => {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedBatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Batch::Normal(NormalBatch::new(this_batch, schema_version)))
|
||||
}
|
||||
fn read_normal_event_metadata(&mut self) -> Result<(u8, u64), SDSSError> {
|
||||
let pk_type = self.f.read_byte()?;
|
||||
let mut txnid = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut txnid)?;
|
||||
let txnid = u64::from_le_bytes(txnid);
|
||||
Ok((pk_type, txnid))
|
||||
}
|
||||
fn attempt_recover_data_batch(&mut self) -> SDSSResult<()> {
|
||||
let mut max_threshold = RECOVERY_THRESHOLD;
|
||||
while max_threshold != 0 && self.f.has_left(1) {
|
||||
if let Ok(MARKER_RECOVERY_EVENT) = self.f.inner_file().read_byte() {
|
||||
return Ok(());
|
||||
}
|
||||
max_threshold -= 1;
|
||||
}
|
||||
Err(SDSSError::DataBatchRestoreCorruptedBatch)
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: RawFileIOInterface> DataBatchRestoreDriver<F> {
|
||||
fn decode_primary_key(&mut self, pk_type: u8) -> SDSSResult<PrimaryIndexKey> {
|
||||
let Some(pk_type) = TagUnique::try_from_raw(pk_type) else {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
};
|
||||
Ok(match pk_type {
|
||||
TagUnique::SignedInt | TagUnique::UnsignedInt => {
|
||||
let mut chunk = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut chunk)?;
|
||||
unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
PrimaryIndexKey::new_from_qw(pk_type, u64::from_le_bytes(chunk))
|
||||
}
|
||||
}
|
||||
TagUnique::Str | TagUnique::Bin => {
|
||||
let mut len = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut len)?;
|
||||
let mut data = vec![0; u64::from_le_bytes(len) as usize];
|
||||
self.f.read_into_buffer(&mut data)?;
|
||||
if pk_type == TagUnique::Str {
|
||||
if core::str::from_utf8(&data).is_err() {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck +verityck
|
||||
let mut md = ManuallyDrop::new(data);
|
||||
PrimaryIndexKey::new_from_dual(
|
||||
pk_type,
|
||||
u64::from_le_bytes(len),
|
||||
md.as_mut_ptr() as usize,
|
||||
)
|
||||
}
|
||||
}
|
||||
_ => unsafe {
|
||||
// UNSAFE(@ohsayan): TagUnique::try_from_raw rejects an construction with Invalid as the dscr
|
||||
impossible!()
|
||||
},
|
||||
})
|
||||
}
|
||||
fn decode_cell(&mut self) -> SDSSResult<Datacell> {
|
||||
let cell_type_sig = self.f.read_byte()?;
|
||||
let Some(cell_type) = PersistTypeDscr::try_from_raw(cell_type_sig) else {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
};
|
||||
Ok(match cell_type {
|
||||
PersistTypeDscr::Null => Datacell::null(),
|
||||
PersistTypeDscr::Bool => {
|
||||
let bool = self.f.read_byte()?;
|
||||
if bool > 1 {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
}
|
||||
Datacell::new_bool(bool == 1)
|
||||
}
|
||||
PersistTypeDscr::UnsignedInt | PersistTypeDscr::SignedInt | PersistTypeDscr::Float => {
|
||||
let mut block = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut block)?;
|
||||
unsafe {
|
||||
// UNSAFE(@ohsayan): choosing the correct type and tag
|
||||
let tc = TagClass::from_raw(cell_type.value_u8() - 1);
|
||||
Datacell::new_qw(u64::from_le_bytes(block), CUTag::new(tc, tc.tag_unique()))
|
||||
}
|
||||
}
|
||||
PersistTypeDscr::Str | PersistTypeDscr::Bin => {
|
||||
let mut len_block = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut len_block)?;
|
||||
let len = u64::from_le_bytes(len_block) as usize;
|
||||
let mut data = vec![0; len];
|
||||
self.f.read_into_buffer(&mut data)?;
|
||||
unsafe {
|
||||
// UNSAFE(@ohsayan): +tagck
|
||||
if cell_type == PersistTypeDscr::Str {
|
||||
if core::str::from_utf8(&data).is_err() {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
}
|
||||
Datacell::new_str(String::from_utf8_unchecked(data).into_boxed_str())
|
||||
} else {
|
||||
Datacell::new_bin(data.into_boxed_slice())
|
||||
}
|
||||
}
|
||||
}
|
||||
PersistTypeDscr::List => {
|
||||
let mut len_block = [0; sizeof!(u64)];
|
||||
self.f.read_into_buffer(&mut len_block)?;
|
||||
let len = u64::from_le_bytes(len_block);
|
||||
let mut list = Vec::new();
|
||||
while !self.f.is_eof() && list.len() as u64 != len {
|
||||
list.push(self.decode_cell()?);
|
||||
}
|
||||
if len != list.len() as u64 {
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
}
|
||||
Datacell::new_list(list)
|
||||
}
|
||||
PersistTypeDscr::Dict => {
|
||||
// we don't support dicts just yet
|
||||
return Err(SDSSError::DataBatchRestoreCorruptedEntry);
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Created on Wed Sep 06 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use {
|
||||
crate::engine::{
|
||||
core::{
|
||||
index::{DcFieldIndex, PrimaryIndexKey, Row},
|
||||
model::{
|
||||
delta::{DataDelta, DataDeltaKind, DeltaVersion},
|
||||
Field, Layer, Model,
|
||||
},
|
||||
},
|
||||
data::{cell::Datacell, tag::TagSelector, uuid::Uuid},
|
||||
storage::v1::{
|
||||
batch_jrnl::{
|
||||
DataBatchPersistDriver, DataBatchRestoreDriver, DecodedBatchEvent,
|
||||
DecodedBatchEventKind, NormalBatch,
|
||||
},
|
||||
header_meta::{FileScope, FileSpecifier, FileSpecifierVersion, HostRunMode},
|
||||
rw::{FileOpen, SDSSFileIO},
|
||||
test_util::VirtualFS,
|
||||
},
|
||||
},
|
||||
crossbeam_epoch::pin,
|
||||
};
|
||||
|
||||
fn pkey(v: impl Into<Datacell>) -> PrimaryIndexKey {
|
||||
PrimaryIndexKey::try_from_dc(v.into()).unwrap()
|
||||
}
|
||||
|
||||
fn open_file(fpath: &str) -> FileOpen<SDSSFileIO<VirtualFS>> {
|
||||
SDSSFileIO::open_or_create_perm_rw::<false>(
|
||||
fpath,
|
||||
FileScope::DataBatch,
|
||||
FileSpecifier::TableDataBatch,
|
||||
FileSpecifierVersion::__new(0),
|
||||
0,
|
||||
HostRunMode::Dev,
|
||||
1,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn open_batch_data(fpath: &str, mdl: &Model) -> DataBatchPersistDriver<VirtualFS> {
|
||||
match open_file(fpath) {
|
||||
FileOpen::Created(f) => DataBatchPersistDriver::new(f, true),
|
||||
FileOpen::Existing(f, _) => {
|
||||
let mut dbr = DataBatchRestoreDriver::new(f).unwrap();
|
||||
dbr.read_data_batch_into_model(mdl).unwrap();
|
||||
DataBatchPersistDriver::new(dbr.into_file(), false)
|
||||
}
|
||||
}
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn new_delta(
|
||||
schema: u64,
|
||||
txnid: u64,
|
||||
pk: Datacell,
|
||||
data: DcFieldIndex,
|
||||
change: DataDeltaKind,
|
||||
) -> DataDelta {
|
||||
new_delta_with_row(
|
||||
schema,
|
||||
txnid,
|
||||
Row::new(
|
||||
pkey(pk),
|
||||
data,
|
||||
DeltaVersion::test_new(schema),
|
||||
DeltaVersion::test_new(txnid),
|
||||
),
|
||||
change,
|
||||
)
|
||||
}
|
||||
|
||||
fn new_delta_with_row(schema: u64, txnid: u64, row: Row, change: DataDeltaKind) -> DataDelta {
|
||||
DataDelta::new(
|
||||
DeltaVersion::test_new(schema),
|
||||
DeltaVersion::test_new(txnid),
|
||||
row,
|
||||
change,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deltas_only_insert() {
|
||||
// prepare model definition
|
||||
let uuid = Uuid::new();
|
||||
let mdl = Model::new_restore(
|
||||
uuid,
|
||||
"catname".into(),
|
||||
TagSelector::Str.into_full(),
|
||||
into_dict!(
|
||||
"catname" => Field::new([Layer::str()].into(), false),
|
||||
"is_good" => Field::new([Layer::bool()].into(), false),
|
||||
"magical" => Field::new([Layer::bool()].into(), false),
|
||||
),
|
||||
);
|
||||
let row = Row::new(
|
||||
pkey("Schrödinger's cat"),
|
||||
into_dict!("is_good" => Datacell::new_bool(true), "magical" => Datacell::new_bool(false)),
|
||||
DeltaVersion::test_new(0),
|
||||
DeltaVersion::test_new(2),
|
||||
);
|
||||
{
|
||||
// update the row
|
||||
let mut wl = row.d_data().write();
|
||||
wl.set_txn_revised(DeltaVersion::test_new(3));
|
||||
*wl.fields_mut().get_mut("magical").unwrap() = Datacell::new_bool(true);
|
||||
}
|
||||
// prepare deltas
|
||||
let deltas = [
|
||||
// insert catname: Schrödinger's cat, is_good: true
|
||||
new_delta_with_row(0, 0, row.clone(), DataDeltaKind::Insert),
|
||||
// insert catname: good cat, is_good: true, magical: false
|
||||
new_delta(
|
||||
0,
|
||||
1,
|
||||
Datacell::new_str("good cat".into()),
|
||||
into_dict!("is_good" => Datacell::new_bool(true), "magical" => Datacell::new_bool(false)),
|
||||
DataDeltaKind::Insert,
|
||||
),
|
||||
// insert catname: bad cat, is_good: false, magical: false
|
||||
new_delta(
|
||||
0,
|
||||
2,
|
||||
Datacell::new_str("bad cat".into()),
|
||||
into_dict!("is_good" => Datacell::new_bool(false), "magical" => Datacell::new_bool(false)),
|
||||
DataDeltaKind::Insert,
|
||||
),
|
||||
// update catname: Schrödinger's cat, is_good: true, magical: true
|
||||
new_delta_with_row(0, 3, row.clone(), DataDeltaKind::Update),
|
||||
];
|
||||
// delta queue
|
||||
let g = pin();
|
||||
for delta in deltas.clone() {
|
||||
mdl.delta_state().append_new_data_delta(delta, &g);
|
||||
}
|
||||
let file = open_file("deltas_only_insert.db-btlog")
|
||||
.into_created()
|
||||
.unwrap();
|
||||
{
|
||||
let mut persist_driver = DataBatchPersistDriver::new(file, true).unwrap();
|
||||
persist_driver.write_new_batch(&mdl, deltas.len()).unwrap();
|
||||
persist_driver.close().unwrap();
|
||||
}
|
||||
let mut restore_driver = DataBatchRestoreDriver::new(
|
||||
open_file("deltas_only_insert.db-btlog")
|
||||
.into_existing()
|
||||
.unwrap()
|
||||
.0,
|
||||
)
|
||||
.unwrap();
|
||||
let batch = restore_driver.read_all_batches().unwrap();
|
||||
assert_eq!(
|
||||
batch,
|
||||
vec![NormalBatch::new(
|
||||
vec![
|
||||
DecodedBatchEvent::new(
|
||||
1,
|
||||
pkey("good cat"),
|
||||
DecodedBatchEventKind::Insert(vec![
|
||||
Datacell::new_bool(true),
|
||||
Datacell::new_bool(false)
|
||||
])
|
||||
),
|
||||
DecodedBatchEvent::new(
|
||||
2,
|
||||
pkey("bad cat"),
|
||||
DecodedBatchEventKind::Insert(vec![
|
||||
Datacell::new_bool(false),
|
||||
Datacell::new_bool(false)
|
||||
])
|
||||
),
|
||||
DecodedBatchEvent::new(
|
||||
3,
|
||||
pkey("Schrödinger's cat"),
|
||||
DecodedBatchEventKind::Update(vec![
|
||||
Datacell::new_bool(true),
|
||||
Datacell::new_bool(true)
|
||||
])
|
||||
)
|
||||
],
|
||||
0
|
||||
)]
|
||||
)
|
||||
}
|
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Created on Tue Sep 05 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use crate::engine::storage::v1::{
|
||||
header_impl::{FileScope, FileSpecifier, FileSpecifierVersion, HostRunMode},
|
||||
rw::{FileOpen, SDSSFileIO},
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn create_delete() {
|
||||
let f = SDSSFileIO::<super::VirtualFS>::open_or_create_perm_rw::<false>(
|
||||
"hello_world.db-tlog",
|
||||
FileScope::Journal,
|
||||
FileSpecifier::GNSTxnLog,
|
||||
FileSpecifierVersion::__new(0),
|
||||
0,
|
||||
HostRunMode::Prod,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
match f {
|
||||
FileOpen::Existing(_, _) => panic!(),
|
||||
FileOpen::Created(_) => {}
|
||||
};
|
||||
let open = SDSSFileIO::<super::VirtualFS>::open_or_create_perm_rw::<false>(
|
||||
"hello_world.db-tlog",
|
||||
FileScope::Journal,
|
||||
FileSpecifier::GNSTxnLog,
|
||||
FileSpecifierVersion::__new(0),
|
||||
0,
|
||||
HostRunMode::Prod,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
let h = match open {
|
||||
FileOpen::Existing(_, header) => header,
|
||||
_ => panic!(),
|
||||
};
|
||||
assert_eq!(h.gr_mdr().file_scope(), FileScope::Journal);
|
||||
assert_eq!(h.gr_mdr().file_spec(), FileSpecifier::GNSTxnLog);
|
||||
assert_eq!(h.gr_mdr().file_spec_id(), FileSpecifierVersion::__new(0));
|
||||
assert_eq!(h.gr_hr().run_mode(), HostRunMode::Prod);
|
||||
assert_eq!(h.gr_hr().setting_version(), 0);
|
||||
assert_eq!(h.gr_hr().startup_counter(), 0);
|
||||
}
|
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Created on Tue Sep 05 2023
|
||||
*
|
||||
* This file is a part of Skytable
|
||||
* Skytable (formerly known as TerrabaseDB or Skybase) is a free and open-source
|
||||
* NoSQL database written by Sayan Nandan ("the Author") with the
|
||||
* vision to provide flexibility in data modelling without compromising
|
||||
* on performance, queryability or scalability.
|
||||
*
|
||||
* Copyright (c) 2023, Sayan Nandan <ohsayan@outlook.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
use {
|
||||
crate::{
|
||||
engine::storage::v1::{
|
||||
header_impl::{FileSpecifier, FileSpecifierVersion, HostRunMode},
|
||||
journal::{self, JournalAdapter, JournalWriter},
|
||||
SDSSError, SDSSResult,
|
||||
},
|
||||
util,
|
||||
},
|
||||
std::cell::RefCell,
|
||||
};
|
||||
pub struct Database {
|
||||
data: RefCell<[u8; 10]>,
|
||||
}
|
||||
impl Database {
|
||||
fn copy_data(&self) -> [u8; 10] {
|
||||
*self.data.borrow()
|
||||
}
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
data: RefCell::new([0; 10]),
|
||||
}
|
||||
}
|
||||
fn reset(&self) {
|
||||
*self.data.borrow_mut() = [0; 10];
|
||||
}
|
||||
fn txn_reset(
|
||||
&self,
|
||||
txn_writer: &mut JournalWriter<super::VirtualFS, DatabaseTxnAdapter>,
|
||||
) -> SDSSResult<()> {
|
||||
self.reset();
|
||||
txn_writer.append_event(TxEvent::Reset)
|
||||
}
|
||||
fn set(&self, pos: usize, val: u8) {
|
||||
self.data.borrow_mut()[pos] = val;
|
||||
}
|
||||
fn txn_set(
|
||||
&self,
|
||||
pos: usize,
|
||||
val: u8,
|
||||
txn_writer: &mut JournalWriter<super::VirtualFS, DatabaseTxnAdapter>,
|
||||
) -> SDSSResult<()> {
|
||||
self.set(pos, val);
|
||||
txn_writer.append_event(TxEvent::Set(pos, val))
|
||||
}
|
||||
}
|
||||
pub enum TxEvent {
|
||||
Reset,
|
||||
Set(usize, u8),
|
||||
}
|
||||
#[derive(Debug)]
|
||||
pub enum TxError {
|
||||
SDSS(SDSSError),
|
||||
}
|
||||
direct_from! {
|
||||
TxError => {
|
||||
SDSSError as SDSS
|
||||
}
|
||||
}
|
||||
#[derive(Debug)]
|
||||
pub struct DatabaseTxnAdapter;
|
||||
impl JournalAdapter for DatabaseTxnAdapter {
|
||||
const RECOVERY_PLUGIN: bool = false;
|
||||
type Error = TxError;
|
||||
type JournalEvent = TxEvent;
|
||||
type GlobalState = Database;
|
||||
|
||||
fn encode(event: Self::JournalEvent) -> Box<[u8]> {
|
||||
/*
|
||||
[1B: opcode][8B:Index][1B: New value]
|
||||
*/
|
||||
let opcode = match event {
|
||||
TxEvent::Reset => 0u8,
|
||||
TxEvent::Set(_, _) => 1u8,
|
||||
};
|
||||
let index = match event {
|
||||
TxEvent::Reset => 0u64,
|
||||
TxEvent::Set(index, _) => index as u64,
|
||||
};
|
||||
let new_value = match event {
|
||||
TxEvent::Reset => 0,
|
||||
TxEvent::Set(_, val) => val,
|
||||
};
|
||||
let mut ret = Vec::with_capacity(10);
|
||||
ret.push(opcode);
|
||||
ret.extend(index.to_le_bytes());
|
||||
ret.push(new_value);
|
||||
ret.into_boxed_slice()
|
||||
}
|
||||
|
||||
fn decode_and_update_state(payload: &[u8], gs: &Self::GlobalState) -> Result<(), TxError> {
|
||||
if payload.len() != 10 {
|
||||
return Err(SDSSError::CorruptedFile("testtxn.log").into());
|
||||
}
|
||||
let opcode = payload[0];
|
||||
let index = u64::from_le_bytes(util::copy_slice_to_array(&payload[1..9]));
|
||||
let new_value = payload[9];
|
||||
match opcode {
|
||||
0 if index == 0 && new_value == 0 => gs.reset(),
|
||||
1 if index < 10 && index < isize::MAX as u64 => gs.set(index as usize, new_value),
|
||||
_ => return Err(SDSSError::JournalLogEntryCorrupted.into()),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn open_log(
|
||||
log_name: &str,
|
||||
db: &Database,
|
||||
) -> SDSSResult<JournalWriter<super::VirtualFS, DatabaseTxnAdapter>> {
|
||||
journal::open_journal::<DatabaseTxnAdapter, super::VirtualFS>(
|
||||
log_name,
|
||||
FileSpecifier::TestTransactionLog,
|
||||
FileSpecifierVersion::__new(0),
|
||||
0,
|
||||
HostRunMode::Prod,
|
||||
1,
|
||||
&db,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_boot_second_readonly() {
|
||||
// create log
|
||||
let db1 = Database::new();
|
||||
let x = || -> SDSSResult<()> {
|
||||
let mut log = open_log("testtxn.log", &db1)?;
|
||||
db1.txn_set(0, 20, &mut log)?;
|
||||
db1.txn_set(9, 21, &mut log)?;
|
||||
log.append_journal_close_and_close()
|
||||
};
|
||||
x().unwrap();
|
||||
// backup original data
|
||||
let original_data = db1.copy_data();
|
||||
// restore log
|
||||
let empty_db2 = Database::new();
|
||||
open_log("testtxn.log", &empty_db2)
|
||||
.unwrap()
|
||||
.append_journal_close_and_close()
|
||||
.unwrap();
|
||||
assert_eq!(original_data, empty_db2.copy_data());
|
||||
}
|
||||
#[test]
|
||||
fn oneboot_mod_twoboot_mod_thirdboot_read() {
|
||||
// first boot: set all to 1
|
||||
let db1 = Database::new();
|
||||
let x = || -> SDSSResult<()> {
|
||||
let mut log = open_log("duatxn.db-tlog", &db1)?;
|
||||
for i in 0..10 {
|
||||
db1.txn_set(i, 1, &mut log)?;
|
||||
}
|
||||
log.append_journal_close_and_close()
|
||||
};
|
||||
x().unwrap();
|
||||
let bkp_db1 = db1.copy_data();
|
||||
drop(db1);
|
||||
// second boot
|
||||
let db2 = Database::new();
|
||||
let x = || -> SDSSResult<()> {
|
||||
let mut log = open_log("duatxn.db-tlog", &db2)?;
|
||||
assert_eq!(bkp_db1, db2.copy_data());
|
||||
for i in 0..10 {
|
||||
let current_val = db2.data.borrow()[i];
|
||||
db2.txn_set(i, current_val + i as u8, &mut log)?;
|
||||
}
|
||||
log.append_journal_close_and_close()
|
||||
};
|
||||
x().unwrap();
|
||||
let bkp_db2 = db2.copy_data();
|
||||
drop(db2);
|
||||
// third boot
|
||||
let db3 = Database::new();
|
||||
let log = open_log("duatxn.db-tlog", &db3).unwrap();
|
||||
log.append_journal_close_and_close().unwrap();
|
||||
assert_eq!(bkp_db2, db3.copy_data());
|
||||
assert_eq!(
|
||||
db3.copy_data(),
|
||||
(1..=10)
|
||||
.into_iter()
|
||||
.map(u8::from)
|
||||
.collect::<Box<[u8]>>()
|
||||
.as_ref()
|
||||
);
|
||||
}
|
Loading…
Reference in New Issue