1
0
Fork 0

Merge pull request #1253 from rqlite/remove-on-shutdown

Support removing self from cluster on shutdown
master
Philip O'Toole 1 year ago committed by GitHub
commit 75481a5dbd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,4 +1,7 @@
## 7.16.1 (unreleased)
## 7.17.0 (unreleased)
### New features
- [PR #1253](https://github.com/rqlite/rqlite/pull/1253): Node can remove itself from cluster automatically when shutting down.
### Implementation changes and bug fixes
- [PR #1252](https://github.com/rqlite/rqlite/pull/1252): Stop the HTTP server first on shutdown.

@ -183,6 +183,9 @@ type Config struct {
// RaftShutdownOnRemove sets whether Raft should be shutdown if the node is removed
RaftShutdownOnRemove bool
// RaftClusterRemoveOnShutdown sets whether the node should remove itself from the cluster on shutdown
RaftClusterRemoveOnShutdown bool
// RaftStepdownOnShutdown sets whether Leadership should be relinquished on shutdown
RaftStepdownOnShutdown bool
@ -475,6 +478,7 @@ func ParseFlags(name, desc string, build *BuildInfo) (*Config, error) {
flag.DurationVar(&config.RaftLeaderLeaseTimeout, "raft-leader-lease-timeout", 0, "Raft leader lease timeout. Use 0s for Raft default")
flag.BoolVar(&config.RaftStepdownOnShutdown, "raft-shutdown-stepdown", true, "Stepdown as leader before shutting down. Enabled by default")
flag.BoolVar(&config.RaftShutdownOnRemove, "raft-remove-shutdown", false, "Shutdown Raft if node removed")
flag.BoolVar(&config.RaftClusterRemoveOnShutdown, "raft-cluster-remove-shutdown", false, "Node removes itself from cluster on shutdown")
flag.BoolVar(&config.RaftNoFreelistSync, "raft-no-freelist-sync", false, "Do not sync Raft log database freelist to disk")
flag.StringVar(&config.RaftLogLevel, "raft-log-level", "INFO", "Minimum log level for Raft module")
flag.DurationVar(&config.RaftReapNodeTimeout, "raft-reap-node-timeout", 0*time.Hour, "Time after which a non-reachable voting node will be reaped. If not set, no reaping takes place")

@ -24,6 +24,7 @@ import (
"github.com/rqlite/rqlite/aws"
"github.com/rqlite/rqlite/cluster"
"github.com/rqlite/rqlite/cmd"
"github.com/rqlite/rqlite/command"
"github.com/rqlite/rqlite/db"
"github.com/rqlite/rqlite/disco"
httpd "github.com/rqlite/rqlite/http"
@ -198,6 +199,14 @@ func main() {
// possible that the node is going away.
httpServ.Close()
if cfg.RaftClusterRemoveOnShutdown {
if err := removeSelf(cfg, str, clstrClient); err != nil {
log.Printf("failed to remove self from cluster: %s", err.Error())
} else {
log.Printf("removed self successfully from cluster")
}
}
if cfg.RaftStepdownOnShutdown {
if str.IsLeader() {
// Don't log a confusing message if not (probably) Leader
@ -628,3 +637,15 @@ func createHTTPTLSConfig(cfg *Config) (*tls.Config, error) {
return rtls.CreateClientConfig(cfg.HTTPx509Cert, cfg.HTTPx509Key, cfg.HTTPx509CACert,
cfg.NoHTTPVerify, cfg.TLS1011)
}
func removeSelf(cfg *Config, str *store.Store, client *cluster.Client) error {
rn := &command.RemoveNodeRequest{
Id: cfg.NodeID,
}
laddr, err := str.LeaderAddr()
if err != nil {
log.Fatalf("failed to get leader address: %s", err.Error())
}
return client.RemoveNode(rn, laddr, nil, 30*time.Second)
}

@ -647,8 +647,8 @@ func (s *Store) LeaderAddr() (string, error) {
if !s.open {
return "", nil
}
return string(s.raft.Leader()), nil
addr, _ := s.raft.LeaderWithID()
return string(addr), nil
}
// LeaderID returns the node ID of the Raft leader. Returns a

@ -74,6 +74,7 @@ class Node(object):
raft_addr=None, raft_adv=None,
raft_voter=True,
raft_snap_threshold=8192, raft_snap_int="1s",
raft_cluster_remove_shutdown=False,
http_cert=None, http_key=None, http_no_verify=False,
node_cert=None, node_key=None, node_no_verify=False,
auth=None, auto_backup=None, auto_restore=None,
@ -113,6 +114,7 @@ class Node(object):
self.raft_voter = raft_voter
self.raft_snap_threshold = raft_snap_threshold
self.raft_snap_int = raft_snap_int
self.raft_cluster_remove_shutdown = raft_cluster_remove_shutdown
self.http_cert = http_cert
self.http_key = http_key
self.http_no_verify = http_no_verify
@ -171,6 +173,7 @@ class Node(object):
'-raft-addr', self.raft_addr,
'-raft-snap', str(self.raft_snap_threshold),
'-raft-snap-int', self.raft_snap_int,
'-raft-cluster-remove-shutdown=%s' % str(self.raft_cluster_remove_shutdown).lower(),
'-raft-non-voter=%s' % str(not self.raft_voter).lower()]
if self.api_adv is not None:
command += ['-http-adv-addr', self.api_adv]
@ -226,10 +229,13 @@ class Node(object):
break
return self
def stop(self):
def stop(self, graceful=False):
if self.process is None:
return
self.process.kill()
if graceful:
self.process.terminate()
else:
self.process.kill()
self.process.wait()
self.process = None
return self

@ -600,6 +600,33 @@ class TestEndToEndSnapRestoreCluster(unittest.TestCase):
deprovision_node(self.n1)
deprovision_node(self.n2)
class TestShutdown(unittest.TestCase):
def test_cluster_remove_on_shutdown(self):
'''Test that removing a node on shutdown leaves a good cluster'''
n0 = Node(RQLITED_PATH, '0', raft_cluster_remove_shutdown=True)
n0.start()
n0.wait_for_leader()
n1 = Node(RQLITED_PATH, '1', raft_cluster_remove_shutdown=True)
n1.start(join=n0.APIAddr())
n1.wait_for_leader()
nodes = n0.nodes()
self.assertEqual(len(nodes), 2)
n0.stop(graceful=True)
nodes = n1.nodes()
self.assertEqual(len(nodes), 1)
# Check that we have a working single-node cluster with a leader by doing
# a write.
n1.wait_for_ready()
j = n1.execute('CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)')
self.assertEqual(j, d_("{'results': [{}]}"))
deprovision_node(n0)
deprovision_node(n1)
if __name__ == "__main__":
unittest.main(verbosity=2)

Loading…
Cancel
Save