1
0
Fork 0

Merge remote-tracking branch 'origin' into count-failed-snapshots

master
Philip O'Toole 9 months ago
commit 889fbd6c59

@ -4,6 +4,7 @@
- [PR #1523](https://github.com/rqlite/rqlite/pull/1523): Move download functionality into _restore_ module.
- [PR #1524](https://github.com/rqlite/rqlite/pull/1524): Disco mode not supported when explicitly joining.
- [PR #1525](https://github.com/rqlite/rqlite/pull/1525): Make Store _Notify_ logic clearer.
- [PR #1526](https://github.com/rqlite/rqlite/pull/1526): Bootstrapper explicitly supports Voting nodes.
## 8.13.4 (December 23rd 2023)
This release makes sure the version information is correctly recorded in the released binaries. There are no functional changes.

@ -35,6 +35,45 @@ const (
BootTimeout
)
// Suffrage is the type of suffrage -- voting or non-voting -- a node has.
type Suffrage int
const (
SuffrageUnknown Suffrage = iota
Voter
NonVoter
)
// VoterSuffrage returns a Suffrage based on the given boolean.
func VoterSuffrage(b bool) Suffrage {
if b {
return Voter
}
return NonVoter
}
// String returns a string representation of the Suffrage.
func (s Suffrage) String() string {
switch s {
case Voter:
return "voter"
case NonVoter:
return "non-voter"
default:
panic("unknown suffrage")
}
}
// IsVoter returns whether the Suffrage is a Voter.
func (s Suffrage) IsVoter() bool {
return s == Voter
}
// IsNonVoter returns whether the Suffrage is a NonVoter.
func (s Suffrage) IsNonVoter() bool {
return s == NonVoter
}
const (
requestTimeout = 5 * time.Second
numJoinAttempts = 1
@ -94,19 +133,20 @@ func (b *Bootstrapper) SetCredentials(creds *Credentials) {
}
// Boot performs the bootstrapping process for this node. This means it will
// ensure this node becomes part of a cluster. It does this by either joining
// an existing cluster by explicitly joining it through one of these nodes,
// or by notifying those nodes that it exists, allowing a cluster-wide bootstap
// take place.
// ensure this node becomes part of a cluster. It does this by either:
// - joining an existing cluster by explicitly joining it through a node returned
// by the AddressProvider, or
// - if it's a Voting node, notifying all nodes returned by the AddressProvider
// that it exists, allowing a cluster-wide bootstrap take place.
//
// Returns nil if the boot operation was successful, or if done() ever returns
// true. done() is periodically polled by the boot process. Returns an error
// the boot process encounters an unrecoverable error, or booting does not
// occur within the given timeout.
//
// id and raftAddr are those of the node calling Boot. All operations
// performed by this function are done as a voting node.
func (b *Bootstrapper) Boot(id, raftAddr string, done func() bool, timeout time.Duration) error {
// id and raftAddr are those of the node calling Boot. suf is whether this node
// is a Voter or NonVoter.
func (b *Bootstrapper) Boot(id, raftAddr string, suf Suffrage, done func() bool, timeout time.Duration) error {
timeoutT := time.NewTimer(timeout)
defer timeoutT.Stop()
tickerT := time.NewTimer(random.Jitter(time.Millisecond))
@ -132,31 +172,32 @@ func (b *Bootstrapper) Boot(id, raftAddr string, done func() bool, timeout time.
if err != nil {
b.logger.Printf("provider lookup failed %s", err.Error())
}
if len(targets) == 0 {
continue
}
// Try an explicit join first. Joining an existing cluster is always given priority
// over trying to form a new cluster.
if j, err := joiner.Do(targets, id, raftAddr, true); err == nil {
if j, err := joiner.Do(targets, id, raftAddr, suf); err == nil {
b.logger.Printf("succeeded directly joining cluster via node at %s", j)
b.setBootStatus(BootJoin)
return nil
}
// This is where we have to be careful. This node failed to join with any node
// in the targets list. This could be because none of the nodes are contactable,
// or none of the nodes are in a functioning cluster with a leader. That means that
// this node could be part of a set nodes that are bootstrapping to form a cluster
// de novo. For that to happen it needs to now let the other nodes know it is here.
// If this is a new cluster, some node will then reach the bootstrap-expect value
// first, form the cluster, beating all other nodes to it.
if err := b.notify(targets, id, raftAddr); err != nil {
b.logger.Printf("failed to notify all targets: %s (%s, will retry)", targets,
err.Error())
} else {
b.logger.Printf("succeeded notifying all targets: %s", targets)
if suf.IsVoter() {
// This is where we have to be careful. This node failed to join with any node
// in the targets list. This could be because none of the nodes are contactable,
// or none of the nodes are in a functioning cluster with a leader. That means that
// this node could be part of a set nodes that are bootstrapping to form a cluster
// de novo. For that to happen it needs to now let the other nodes know it is here.
// If this is a new cluster, some node will then reach the bootstrap-expect value
// first, form the cluster, beating all other nodes to it.
if err := b.notify(targets, id, raftAddr); err != nil {
b.logger.Printf("failed to notify all targets: %s (%s, will retry)", targets,
err.Error())
} else {
b.logger.Printf("succeeded notifying all targets: %s", targets)
}
}
}
}

@ -48,7 +48,7 @@ func Test_BootstrapperBootDoneImmediately(t *testing.T) {
}
p := NewAddressProviderString([]string{srv.Addr()})
bs := NewBootstrapper(p, nil)
if err := bs.Boot("node1", "192.168.1.1:1234", done, 10*time.Second); err != nil {
if err := bs.Boot("node1", "192.168.1.1:1234", Voter, done, 10*time.Second); err != nil {
t.Fatalf("failed to boot: %s", err)
}
if exp, got := BootDone, bs.Status(); exp != got {
@ -69,7 +69,7 @@ func Test_BootstrapperBootTimeout(t *testing.T) {
p := NewAddressProviderString([]string{srv.Addr()})
bs := NewBootstrapper(p, NewClient(&simpleDialer{}, 0))
bs.Interval = time.Second
err := bs.Boot("node1", "192.168.1.1:1234", done, 5*time.Second)
err := bs.Boot("node1", "192.168.1.1:1234", Voter, done, 5*time.Second)
if err == nil {
t.Fatalf("no error returned from timed-out boot")
}
@ -120,7 +120,7 @@ func Test_BootstrapperBootSingleJoin(t *testing.T) {
bs := NewBootstrapper(p, NewClient(&simpleDialer{}, 0))
bs.Interval = time.Second
err := bs.Boot("node1", "192.168.1.1:1234", done, 5*time.Second)
err := bs.Boot("node1", "192.168.1.1:1234", Voter, done, 5*time.Second)
if err != nil {
t.Fatalf("failed to boot: %s", err)
}
@ -166,7 +166,7 @@ func Test_BootstrapperBootSingleNotify(t *testing.T) {
bs := NewBootstrapper(p, NewClient(&simpleDialer{}, 0))
bs.Interval = time.Second
err := bs.Boot("node1", "192.168.1.1:1234", done, 60*time.Second)
err := bs.Boot("node1", "192.168.1.1:1234", Voter, done, 60*time.Second)
if err != nil {
t.Fatalf("failed to boot: %s", err)
}
@ -255,7 +255,7 @@ func Test_BootstrapperBootMultiJoinNotify(t *testing.T) {
bs := NewBootstrapper(p, NewClient(&simpleDialer{}, 0))
bs.Interval = time.Second
err := bs.Boot("node1", "192.168.1.1:1234", done, 60*time.Second)
err := bs.Boot("node1", "192.168.1.1:1234", Voter, done, 60*time.Second)
if err != nil {
t.Fatalf("failed to boot: %s", err)
}

@ -47,7 +47,7 @@ func (j *Joiner) SetCredentials(creds *Credentials) {
// Do makes the actual join request. If the join is successful with any address,
// that address is returned. Otherwise, an error is returned.
func (j *Joiner) Do(targetAddrs []string, id, addr string, voter bool) (string, error) {
func (j *Joiner) Do(targetAddrs []string, id, addr string, suf Suffrage) (string, error) {
if id == "" {
return "", ErrNodeIDRequired
}
@ -56,7 +56,7 @@ func (j *Joiner) Do(targetAddrs []string, id, addr string, voter bool) (string,
var joinee string
for i := 0; i < j.numAttempts; i++ {
for _, ta := range targetAddrs {
joinee, err = j.join(ta, id, addr, voter)
joinee, err = j.join(ta, id, addr, suf)
if err == nil {
// Success!
return joinee, nil
@ -73,11 +73,11 @@ func (j *Joiner) Do(targetAddrs []string, id, addr string, voter bool) (string,
return "", ErrJoinFailed
}
func (j *Joiner) join(targetAddr, id, addr string, voter bool) (string, error) {
func (j *Joiner) join(targetAddr, id, addr string, suf Suffrage) (string, error) {
req := &command.JoinRequest{
Id: id,
Address: addr,
Voter: voter,
Voter: suf.IsVoter(),
}
// Attempt to join.

@ -49,7 +49,7 @@ func Test_SingleJoinOK(t *testing.T) {
c := NewClient(&simpleDialer{}, 0)
joiner := NewJoiner(c, numAttempts, attemptInterval)
addr, err := joiner.Do([]string{srv.Addr()}, "id0", "1.2.3.4", true)
addr, err := joiner.Do([]string{srv.Addr()}, "id0", "1.2.3.4", Voter)
if err != nil {
t.Fatal(err)
}
@ -68,7 +68,7 @@ func Test_SingleJoinZeroAttempts(t *testing.T) {
c := NewClient(&simpleDialer{}, 0)
joiner := NewJoiner(c, 0, attemptInterval)
_, err := joiner.Do([]string{srv.Addr()}, "id0", "1.2.3.4", true)
_, err := joiner.Do([]string{srv.Addr()}, "id0", "1.2.3.4", Voter)
if err != ErrJoinFailed {
t.Fatalf("Incorrect error returned when zero attempts specified")
}
@ -99,7 +99,7 @@ func Test_SingleJoinFail(t *testing.T) {
c := NewClient(&simpleDialer{}, 0)
joiner := NewJoiner(c, numAttempts, attemptInterval)
_, err := joiner.Do([]string{srv.Addr()}, "id0", "1.2.3.4", true)
_, err := joiner.Do([]string{srv.Addr()}, "id0", "1.2.3.4", Voter)
if err == nil {
t.Fatalf("expected error when joining bad node")
}
@ -153,7 +153,7 @@ func Test_DoubleJoinOKSecondNode(t *testing.T) {
c := NewClient(&simpleDialer{}, 0)
joiner := NewJoiner(c, numAttempts, attemptInterval)
addr, err := joiner.Do([]string{srv1.Addr(), srv2.Addr()}, "id0", "1.2.3.4", true)
addr, err := joiner.Do([]string{srv1.Addr(), srv2.Addr()}, "id0", "1.2.3.4", Voter)
if err != nil {
t.Fatal(err)
}

@ -447,7 +447,7 @@ func createCluster(cfg *Config, hasPeers bool, client *cluster.Client, str *stor
joiner.SetCredentials(cluster.CredentialsFor(credStr, cfg.JoinAs))
if joins != nil && cfg.BootstrapExpect == 0 {
// Explicit join operation requested, so do it.
j, err := joiner.Do(joins, str.ID(), cfg.RaftAdv, !cfg.RaftNonVoter)
j, err := joiner.Do(joins, str.ID(), cfg.RaftAdv, cluster.VoterSuffrage(!cfg.RaftNonVoter))
if err != nil {
return fmt.Errorf("failed to join cluster: %s", err.Error())
}
@ -459,7 +459,7 @@ func createCluster(cfg *Config, hasPeers bool, client *cluster.Client, str *stor
// Bootstrap with explicit join addresses requests.
bs := cluster.NewBootstrapper(cluster.NewAddressProviderString(joins), client)
bs.SetCredentials(cluster.CredentialsFor(credStr, cfg.JoinAs))
return bs.Boot(str.ID(), cfg.RaftAdv, isClustered, cfg.BootstrapExpectTimeout)
return bs.Boot(str.ID(), cfg.RaftAdv, cluster.Voter, isClustered, cfg.BootstrapExpectTimeout)
}
if cfg.DiscoMode == "" {
@ -503,7 +503,7 @@ func createCluster(cfg *Config, hasPeers bool, client *cluster.Client, str *stor
bs := cluster.NewBootstrapper(provider, client)
bs.SetCredentials(cluster.CredentialsFor(credStr, cfg.JoinAs))
httpServ.RegisterStatus("disco", provider)
return bs.Boot(str.ID(), cfg.RaftAdv, isClustered, cfg.BootstrapExpectTimeout)
return bs.Boot(str.ID(), cfg.RaftAdv, cluster.Voter, isClustered, cfg.BootstrapExpectTimeout)
case DiscoModeEtcdKV, DiscoModeConsulKV:
discoService, err := createDiscoService(cfg, str)
@ -534,7 +534,7 @@ func createCluster(cfg *Config, hasPeers bool, client *cluster.Client, str *stor
} else {
for {
log.Printf("discovery service returned %s as join address", addr)
if j, err := joiner.Do([]string{addr}, str.ID(), cfg.RaftAdv, !cfg.RaftNonVoter); err != nil {
if j, err := joiner.Do([]string{addr}, str.ID(), cfg.RaftAdv, cluster.VoterSuffrage(!cfg.RaftNonVoter)); err != nil {
log.Printf("failed to join cluster at %s: %s", addr, err.Error())
time.Sleep(time.Second)

@ -256,7 +256,7 @@ func Test_MultiNodeClusterBootstrap(t *testing.T) {
addr, _ := node1.Store.LeaderAddr()
return addr != ""
}
node1Bs.Boot(node1.ID, node1.RaftAddr, done, 10*time.Second)
node1Bs.Boot(node1.ID, node1.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
go func() {
@ -264,7 +264,7 @@ func Test_MultiNodeClusterBootstrap(t *testing.T) {
addr, _ := node2.Store.LeaderAddr()
return addr != ""
}
node2Bs.Boot(node2.ID, node2.RaftAddr, done, 10*time.Second)
node2Bs.Boot(node2.ID, node2.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
go func() {
@ -272,7 +272,7 @@ func Test_MultiNodeClusterBootstrap(t *testing.T) {
addr, _ := node3.Store.LeaderAddr()
return addr != ""
}
node3Bs.Boot(node3.ID, node3.RaftAddr, done, 10*time.Second)
node3Bs.Boot(node3.ID, node3.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
wg.Wait()
@ -424,7 +424,7 @@ func Test_MultiNodeClusterBootstrapLaterJoin(t *testing.T) {
addr, _ := node1.Store.LeaderAddr()
return addr != ""
}
node1Bs.Boot(node1.ID, node1.RaftAddr, done, 10*time.Second)
node1Bs.Boot(node1.ID, node1.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
go func() {
@ -432,7 +432,7 @@ func Test_MultiNodeClusterBootstrapLaterJoin(t *testing.T) {
addr, _ := node2.Store.LeaderAddr()
return addr != ""
}
node2Bs.Boot(node2.ID, node2.RaftAddr, done, 10*time.Second)
node2Bs.Boot(node2.ID, node2.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
go func() {
@ -440,7 +440,7 @@ func Test_MultiNodeClusterBootstrapLaterJoin(t *testing.T) {
addr, _ := node3.Store.LeaderAddr()
return addr != ""
}
node3Bs.Boot(node3.ID, node3.RaftAddr, done, 10*time.Second)
node3Bs.Boot(node3.ID, node3.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
wg.Wait()
@ -477,7 +477,7 @@ func Test_MultiNodeClusterBootstrapLaterJoin(t *testing.T) {
addr, _ := node4.Store.LeaderAddr()
return addr != ""
}
if err := node4Bs.Boot(node4.ID, node4.RaftAddr, done, 10*time.Second); err != nil {
if err := node4Bs.Boot(node4.ID, node4.RaftAddr, cluster.Voter, done, 10*time.Second); err != nil {
t.Fatalf("node 4 failed to boot")
}
node4Leader, err := node4.WaitForLeader()
@ -525,7 +525,7 @@ func Test_MultiNodeClusterBootstrapLaterJoinTLS(t *testing.T) {
addr, _ := node1.Store.LeaderAddr()
return addr != ""
}
node1Bs.Boot(node1.ID, node1.RaftAddr, done, 10*time.Second)
node1Bs.Boot(node1.ID, node1.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
go func() {
@ -533,7 +533,7 @@ func Test_MultiNodeClusterBootstrapLaterJoinTLS(t *testing.T) {
addr, _ := node2.Store.LeaderAddr()
return addr != ""
}
node2Bs.Boot(node2.ID, node2.RaftAddr, done, 10*time.Second)
node2Bs.Boot(node2.ID, node2.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
go func() {
@ -541,7 +541,7 @@ func Test_MultiNodeClusterBootstrapLaterJoinTLS(t *testing.T) {
addr, _ := node3.Store.LeaderAddr()
return addr != ""
}
node3Bs.Boot(node3.ID, node3.RaftAddr, done, 10*time.Second)
node3Bs.Boot(node3.ID, node3.RaftAddr, cluster.Voter, done, 10*time.Second)
wg.Done()
}()
wg.Wait()
@ -579,7 +579,7 @@ func Test_MultiNodeClusterBootstrapLaterJoinTLS(t *testing.T) {
addr, _ := node4.Store.LeaderAddr()
return addr != ""
}
if err := node4Bs.Boot(node4.ID, node4.RaftAddr, done, 10*time.Second); err != nil {
if err := node4Bs.Boot(node4.ID, node4.RaftAddr, cluster.Voter, done, 10*time.Second); err != nil {
t.Fatalf("node 4 failed to boot")
}
node4Leader, err := node4.WaitForLeader()

@ -222,14 +222,14 @@ func (n *Node) EnableTLSClient() {
// Join instructs this node to join the leader.
func (n *Node) Join(leader *Node) error {
joiner := cluster.NewJoiner(n.Client, 3, 1*time.Second)
_, err := joiner.Do([]string{leader.RaftAddr}, n.Store.ID(), n.RaftAddr, true)
_, err := joiner.Do([]string{leader.RaftAddr}, n.Store.ID(), n.RaftAddr, cluster.Voter)
return err
}
// JoinAsNonVoter instructs this node to join the leader, but as a non-voting node.
func (n *Node) JoinAsNonVoter(leader *Node) error {
joiner := cluster.NewJoiner(n.Client, 3, 1*time.Second)
_, err := joiner.Do([]string{leader.RaftAddr}, n.Store.ID(), n.RaftAddr, false)
_, err := joiner.Do([]string{leader.RaftAddr}, n.Store.ID(), n.RaftAddr, cluster.NonVoter)
return err
}

Loading…
Cancel
Save