Poison the database by default if snapshotting fails (#160)

* Stop accepting writes if snapshotting fails

This is an important consideration: if BGSAVE fails and poisons the
database, snapshotting can and should too. But this is debatable in some
parts. For example, users may configure snapshots to be on a network
file system (symlinked maybe) and this can fail.
Now in some cases, this failure 'may be acceptable'. This commit adds a
way to customize this behavior through the `failsafe` key in the
snapshots section of the cfg file and through the --stop-write-on-fail
option passed to `skyd` on startup. However, BGSAVE remains unchanged:
it will always poison the database if it fails. If the user doesn't want
this, they can simply disable BGSAVE.

* Add changelog
next
Sayan 3 years ago committed by GitHub
parent ac336ff821
commit 952c5caa86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,6 +2,14 @@
All changes in this project will be noted in this file.
<details>
<summary><b>Unreleased</b></summary>
* Snapshotting failure will now poison the database (customizable through CLI options or the configuration file)
[see [#160](https://github.com/skytable/skytable/pull/160)]
</details>
## Version 0.6.0 [2021-05-27]
> Breaking changes!

@ -23,6 +23,7 @@ every = 120
[snapshot]
every = 3600 # Make a snapshot after every 1 hour (60min * 60sec= 3600secs)
atmost = 4 # Keep the 4 most recent snapshots
failsafe = true # stops accepting writes if snapshotting fails
# This key is *OPTIONAL*, used for TLS/SSL config
[ssl]

@ -3,90 +3,95 @@ version: 0.6.0
author: Sayan N. <ohsayan@outlook.com>
about: The Skytable Database server
args:
- config:
short: c
required: false
long: withconfig
value_name: cfgfile
help: Sets a configuration file to start skyd
takes_value: true
- restore:
short: r
required: false
long: restore
value_name: snapshotfile
help: Restores data from a previous snapshot
takes_value: true
- host:
short: h
required: false
long: host
value_name: host
help: Sets the host to which the server will bind
takes_value: true
- port:
short: p
required: false
long: port
value_name: port
help: Sets the port to which the server will bind
takes_value: true
- noart:
required: false
long: noart
help: Disables terminal artwork
takes_value: false
- nosave:
required: false
long: nosave
help: Disables automated background saving
takes_value: false
- saveduration:
required: false
long: saveduration
value_name: duration
short: S
takes_value: true
help: Set the BGSAVE duration
- snapevery:
required: false
long: snapevery
value_name: duration
help: Set the periodic snapshot duration
takes_value: true
- snapkeep:
required: false
long: snapkeep
value_name: count
help: Sets the number of most recent snapshots to keep
takes_value: true
- sslkey:
required: false
long: sslkey
short: k
value_name: key
help: Sets the PEM key file to use for SSL/TLS
takes_value: true
- sslchain:
required: false
long: sslchain
short: z
value_name: chain
help: Sets the PEM chain file to use for SSL/TLS
takes_value: true
- sslonly:
required: false
long: sslonly
takes_value: false
help: Tells the server to only accept SSL connections and disables the non-SSL port
- config:
short: c
required: false
long: withconfig
value_name: cfgfile
help: Sets a configuration file to start skyd
takes_value: true
- restore:
short: r
required: false
long: restore
value_name: snapshotfile
help: Restores data from a previous snapshot
takes_value: true
- host:
short: h
required: false
long: host
value_name: host
help: Sets the host to which the server will bind
takes_value: true
- port:
short: p
required: false
long: port
value_name: port
help: Sets the port to which the server will bind
takes_value: true
- noart:
required: false
long: noart
help: Disables terminal artwork
takes_value: false
- nosave:
required: false
long: nosave
help: Disables automated background saving
takes_value: false
- saveduration:
required: false
long: saveduration
value_name: duration
short: S
takes_value: true
help: Set the BGSAVE duration
- snapevery:
required: false
long: snapevery
value_name: duration
help: Set the periodic snapshot duration
takes_value: true
- snapkeep:
required: false
long: snapkeep
value_name: count
help: Sets the number of most recent snapshots to keep
takes_value: true
- sslkey:
required: false
long: sslkey
short: k
value_name: key
help: Sets the PEM key file to use for SSL/TLS
takes_value: true
- sslchain:
required: false
long: sslchain
short: z
value_name: chain
help: Sets the PEM chain file to use for SSL/TLS
takes_value: true
- sslonly:
required: false
long: sslonly
takes_value: false
help: Tells the server to only accept SSL connections and disables the non-SSL port
- stopwriteonfail:
required: false
long: stop-write-on-fail
takes_value: true
help: Stop accepting writes if any persistence method except BGSAVE fails (defaults to true)
subcommands:
- upgrade:
about: Upgrades old datsets to the latest format supported by this server edition
args:
- format:
short: f
long: from
takes_value: true
value_name: format
required: true
help: The format of the old files which need to be upgraded
- upgrade:
about: Upgrades old datsets to the latest format supported by this server edition
args:
- format:
short: f
long: from
takes_value: true
value_name: format
required: true
help: The format of the old files which need to be upgraded

@ -135,6 +135,8 @@ pub struct ConfigKeySnapshot {
///
/// If atmost is set to `0`, then all the snapshots will be kept
atmost: usize,
/// Prevent writes to the database if snapshotting fails
failsafe: Option<bool>,
}
/// Port configuration
@ -214,16 +216,22 @@ pub struct SnapshotPref {
pub every: u64,
/// The maximum numeber of snapshots to be kept
pub atmost: usize,
/// Lock writes if snapshotting fails
pub poison: bool,
}
impl SnapshotPref {
/// Create a new a new `SnapshotPref` instance
pub const fn new(every: u64, atmost: usize) -> Self {
SnapshotPref { every, atmost }
pub const fn new(every: u64, atmost: usize, poison: bool) -> Self {
SnapshotPref {
every,
atmost,
poison,
}
}
/// Returns `every,almost` as a tuple for pattern matching
pub const fn decompose(self) -> (u64, usize) {
(self.every, self.atmost)
pub const fn decompose(self) -> (u64, usize, bool) {
(self.every, self.atmost, self.poison)
}
}
@ -295,7 +303,11 @@ impl ParsedConfig {
snapshot: cfg_info
.snapshot
.map(|snapshot| {
SnapshotConfig::Enabled(SnapshotPref::new(snapshot.every, snapshot.atmost))
SnapshotConfig::Enabled(SnapshotPref::new(
snapshot.every,
snapshot.atmost,
snapshot.failsafe.unwrap_or(true),
))
})
.unwrap_or(SnapshotConfig::default()),
ports: if let Some(sslopts) = cfg_info.ssl {
@ -306,7 +318,6 @@ impl ParsedConfig {
chain: sslopts.chain,
port: sslopts.port,
},
host: cfg_info.server.host,
}
} else {
@ -526,8 +537,21 @@ pub fn get_config_file_or_return_cfg() -> Result<ConfigType<ParsedConfig, String
},
None => None,
};
let failsafe = if let Ok(failsafe) = matches
.value_of("stop-write-on-fail")
.map(|val| val.parse::<bool>())
.unwrap_or(Ok(true))
{
failsafe
} else {
return Err(ConfigError::CliArgErr(
"Please provide a boolean `true` or `false` value to --stop-write-on-fail",
));
};
let snapcfg = match (snapevery, snapkeep) {
(Some(every), Some(keep)) => SnapshotConfig::Enabled(SnapshotPref::new(every, keep)),
(Some(every), Some(keep)) => {
SnapshotConfig::Enabled(SnapshotPref::new(every, keep, failsafe))
}
(Some(_), None) => {
return Err(ConfigError::CliArgErr(
"No value supplied for `--snapkeep`. When you supply `--snapevery`, you also need to specify `--snapkeep`"
@ -702,7 +726,7 @@ mod tests {
ParsedConfig::new(
false,
BGSave::default(),
SnapshotConfig::Enabled(SnapshotPref::new(3600, 4)),
SnapshotConfig::Enabled(SnapshotPref::new(3600, 4, true)),
PortConfig::new_secure_only(
DEFAULT_IPV4,
SslOpts::new(
@ -782,7 +806,7 @@ mod tests {
assert_eq!(
cfg,
ParsedConfig {
snapshot: SnapshotConfig::Enabled(SnapshotPref::new(3600, 4)),
snapshot: SnapshotConfig::Enabled(SnapshotPref::new(3600, 4, true)),
bgsave: BGSave::default(),
noart: false,
ports: PortConfig::default()

@ -160,7 +160,10 @@ impl CoreDB {
pub fn new(snapshot_cfg: &SnapshotConfig, restore_file: Option<String>) -> TResult<Self> {
let coremap = diskstore::get_saved(restore_file)?;
let mut snap_count = None;
if let SnapshotConfig::Enabled(SnapshotPref { every: _, atmost }) = snapshot_cfg {
if let SnapshotConfig::Enabled(SnapshotPref {
every: _, atmost, ..
}) = snapshot_cfg
{
snap_count = Some(atmost);
}
let snapcfg = snap_count

@ -35,7 +35,8 @@ use tokio::time::{self, Duration};
/// This service calls `SnapEngine::mksnap()` periodically to create snapshots. Whenever
/// the interval for snapshotting expires or elapses, we create a snapshot. The snapshot service
/// keeps creating snapshots, as long as the database keeps running. Once [`dbnet::run`] broadcasts
/// a termination signal, we're ready to quit
/// a termination signal, we're ready to quit. This function will, by default, poison the database
/// if snapshotting fails, unless customized by the user.
pub async fn snapshot_service(
handle: CoreDB,
ss_config: SnapshotConfig,
@ -47,7 +48,7 @@ pub async fn snapshot_service(
return;
}
SnapshotConfig::Enabled(configuration) => {
let (duration, atmost) = configuration.decompose();
let (duration, atmost, failsafe) = configuration.decompose();
let duration = Duration::from_secs(duration);
let mut sengine = match SnapshotEngine::new(atmost, &handle, None) {
Ok(ss) => ss,
@ -59,7 +60,14 @@ pub async fn snapshot_service(
loop {
tokio::select! {
_ = time::sleep_until(time::Instant::now() + duration) => {
let _ = sengine.mksnap().await;
if sengine.mksnap().await {
// it passed, so unpoison the handle
drop(handle.unpoison());
} else if failsafe {
// mksnap returned false and we are set to stop writes if snapshotting failed
// so let's poison the handle
drop(handle.poison());
}
},
_ = termination_signal.receive_signal() => {
// time to terminate; goodbye!

Loading…
Cancel
Save