commit
6c5c7d60b5
@ -0,0 +1,16 @@
|
||||
module github.com/rqlite/rqlite
|
||||
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
github.com/Bowery/prompt v0.0.0-20190916142128-fa8279994f75
|
||||
github.com/hashicorp/raft v1.1.1
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20191021154308-4207f1bf0617
|
||||
github.com/labstack/gommon v0.3.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.4 // indirect
|
||||
github.com/mattn/go-isatty v0.0.11 // indirect
|
||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible
|
||||
github.com/mkideal/cli v0.0.3
|
||||
github.com/mkideal/pkg v0.0.0-20170503154153-3e188c9e7ecc
|
||||
golang.org/x/crypto v0.0.0-20191219195013-becbf705a915
|
||||
)
|
@ -0,0 +1,78 @@
|
||||
github.com/Bowery/prompt v0.0.0-20190916142128-fa8279994f75 h1:xGHheKK44eC6K0u5X+DZW/fRaR1LnDdqPHMZMWx5fv8=
|
||||
github.com/Bowery/prompt v0.0.0-20190916142128-fa8279994f75/go.mod h1:4/6eNcqZ09BZ9wLK3tZOjBA1nDj+B0728nlX5YRlSmQ=
|
||||
github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
|
||||
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM=
|
||||
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg=
|
||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||
github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4=
|
||||
github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=
|
||||
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
|
||||
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
|
||||
github.com/hashicorp/go-hclog v0.9.1 h1:9PZfAcVEvez4yhLH2TBU64/h/z4xlFI80cWXRrxuKuM=
|
||||
github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
|
||||
github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0=
|
||||
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
|
||||
github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI=
|
||||
github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
|
||||
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
|
||||
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=
|
||||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hashicorp/raft v1.1.0/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM=
|
||||
github.com/hashicorp/raft v1.1.1 h1:HJr7UE1x/JrJSc9Oy6aDBHtNHUUBHjcQjTgvUVihoZs=
|
||||
github.com/hashicorp/raft v1.1.1/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk=
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20191021154308-4207f1bf0617 h1:CJDRE/2tBNFOrcoexD2nvTRbQEox3FDxl4NxIezp1b8=
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20191021154308-4207f1bf0617/go.mod h1:aUF6HQr8+t3FC/ZHAC+pZreUBhTaxumuu3L+d37uRxk=
|
||||
github.com/labstack/gommon v0.3.0 h1:JEeO0bvc78PKdyHxloTKiF8BD5iGrH8T6MSeGvSgob0=
|
||||
github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
|
||||
github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
|
||||
github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
|
||||
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
|
||||
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
|
||||
github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
|
||||
github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM=
|
||||
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
|
||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible h1:qzw9c2GNT8UFrgWNDhCTqRqYUSmu/Dav/9Z58LGpk7U=
|
||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/mkideal/cli v0.0.3 h1:Y1OXyfTVI9eQ9RTiXq12h7q88y22Q9ZU4VI09ifz6lE=
|
||||
github.com/mkideal/cli v0.0.3/go.mod h1:HLuSls75T7LFlTgByGeuLwcvdUmmx/aUQxnnEKxoZzY=
|
||||
github.com/mkideal/pkg v0.0.0-20170503154153-3e188c9e7ecc h1:eyN9UWVX+CeeCQZPudCUAPc84xQYTjEu9MWNa2HuJrs=
|
||||
github.com/mkideal/pkg v0.0.0-20170503154153-3e188c9e7ecc/go.mod h1:DECgB56amjU/mmmsKuooNPQ1856HASOMC3D4ntSVU70=
|
||||
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
|
||||
github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191219195013-becbf705a915 h1:aJ0ex187qoXrJHPo8ZasVTASQB7llQP6YeNzgDALPRk=
|
||||
golang.org/x/crypto v0.0.0-20191219195013-becbf705a915/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190523142557-0e01d883c5c5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed h1:uPxWBzB3+mlnjy9W58qY1j/cjyFjutgw/Vhan2zLy/A=
|
||||
golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
@ -0,0 +1,41 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
)
|
||||
|
||||
// commandType are commands that affect the state of the cluster, and must go through Raft.
|
||||
type commandType int
|
||||
|
||||
const (
|
||||
execute commandType = iota // Commands which modify the database.
|
||||
query // Commands which query the database.
|
||||
peer // Commands that modify peers map.
|
||||
)
|
||||
|
||||
type command struct {
|
||||
Typ commandType `json:"typ,omitempty"`
|
||||
Sub json.RawMessage `json:"sub,omitempty"`
|
||||
}
|
||||
|
||||
func newCommand(t commandType, d interface{}) (*command, error) {
|
||||
b, err := json.Marshal(d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &command{
|
||||
Typ: t,
|
||||
Sub: b,
|
||||
}, nil
|
||||
|
||||
}
|
||||
|
||||
// databaseSub is a command sub which involves interaction with the database.
|
||||
type databaseSub struct {
|
||||
Tx bool `json:"tx,omitempty"`
|
||||
Queries []string `json:"queries,omitempty"`
|
||||
Timings bool `json:"timings,omitempty"`
|
||||
}
|
||||
|
||||
// peersSub is a command which sets the API address for a Raft address.
|
||||
type peersSub map[string]string
|
@ -1,99 +0,0 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Test_NumPeersEnableSingle tests that a single node reports
|
||||
// itself as capable of joining a cluster.
|
||||
func Test_NumPeersEnableSingle(t *testing.T) {
|
||||
s0 := mustNewStore(true)
|
||||
defer os.RemoveAll(s0.Path())
|
||||
if err := s0.Open(true); err != nil {
|
||||
t.Fatalf("failed to open node for num peers test: %s", err.Error())
|
||||
}
|
||||
s0.WaitForLeader(5 * time.Second)
|
||||
s0.Close(true)
|
||||
|
||||
j, err := JoinAllowed(s0.Path())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check join status of %s: %s", s0.Path(), err.Error())
|
||||
}
|
||||
if !j {
|
||||
t.Fatalf("config files at %s indicate joining is not allowed", s0.Path())
|
||||
}
|
||||
}
|
||||
|
||||
// Test_NumPeersDisableSingle tests that a single node reports
|
||||
// itself as capable of joining a cluster, when explicitly configured
|
||||
// as not capable of self-electing.
|
||||
func Test_NumPeersDisableSingle(t *testing.T) {
|
||||
s0 := mustNewStore(true)
|
||||
defer os.RemoveAll(s0.Path())
|
||||
if err := s0.Open(false); err != nil {
|
||||
t.Fatalf("failed to open node for num peers test: %s", err.Error())
|
||||
}
|
||||
s0.Close(true)
|
||||
|
||||
j, err := JoinAllowed(s0.Path())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check join status of %s: %s", s0.Path(), err.Error())
|
||||
}
|
||||
if !j {
|
||||
t.Fatalf("config files at %s indicate joining is not allowed", s0.Path())
|
||||
}
|
||||
}
|
||||
|
||||
// Test_NumPeersJoin tests that the correct number of nodes are recorded by
|
||||
// nodes in a cluster.
|
||||
func Test_NumPeersJoin(t *testing.T) {
|
||||
s0 := mustNewStore(true)
|
||||
defer os.RemoveAll(s0.Path())
|
||||
if err := s0.Open(true); err != nil {
|
||||
t.Fatalf("failed to open node for num peers test: %s", err.Error())
|
||||
}
|
||||
s0.WaitForLeader(5 * time.Second)
|
||||
|
||||
s1 := mustNewStore(true)
|
||||
defer os.RemoveAll(s1.Path())
|
||||
if err := s1.Open(false); err != nil {
|
||||
t.Fatalf("failed to open node for num peers test: %s", err.Error())
|
||||
}
|
||||
|
||||
// Get sorted list of cluster nodes.
|
||||
storeNodes := []string{s0.Addr().String(), s1.Addr().String()}
|
||||
sort.StringSlice(storeNodes).Sort()
|
||||
|
||||
// Join the second node to the first.
|
||||
if err := s0.Join(s1.Addr().String()); err != nil {
|
||||
t.Fatalf("failed to join to node at %s: %s", s0.Addr().String(), err.Error())
|
||||
}
|
||||
s1.WaitForLeader(5 * time.Second)
|
||||
s1.Close(true)
|
||||
s0.Close(true)
|
||||
|
||||
// Check that peers are set as expected.
|
||||
m, _ := NumPeers(s0.Path())
|
||||
if m != 2 {
|
||||
t.Fatalf("got wrong value for number of peers, exp %d, got %d", 2, m)
|
||||
}
|
||||
|
||||
j, err := JoinAllowed(s0.Path())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check join status of %s: %s", s0.Path(), err.Error())
|
||||
}
|
||||
if j {
|
||||
t.Fatalf("config files at %s indicate joining is allowed", s0.Path())
|
||||
}
|
||||
|
||||
k, err := JoinAllowed(s1.Path())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check join status of %s: %s", s1.Path(), err.Error())
|
||||
}
|
||||
if k {
|
||||
t.Fatalf("config files at %s indicate joining is allowed", s1.Path())
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
// Transport is the interface the network service must provide.
|
||||
type Transport interface {
|
||||
net.Listener
|
||||
|
||||
// Dial is used to create a new outgoing connection
|
||||
Dial(address string, timeout time.Duration) (net.Conn, error)
|
||||
}
|
||||
|
||||
// raftTransport takes a Transport and makes it suitable for use by the Raft
|
||||
// networking system.
|
||||
type raftTransport struct {
|
||||
tn Transport
|
||||
}
|
||||
|
||||
func (r *raftTransport) Dial(address raft.ServerAddress, timeout time.Duration) (net.Conn, error) {
|
||||
return r.tn.Dial(string(address), timeout)
|
||||
}
|
||||
|
||||
func (r *raftTransport) Accept() (net.Conn, error) {
|
||||
return r.tn.Accept()
|
||||
}
|
||||
|
||||
func (r *raftTransport) Addr() net.Addr {
|
||||
return r.tn.Addr()
|
||||
}
|
||||
|
||||
func (r *raftTransport) Close() error {
|
||||
return r.tn.Close()
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.6
|
||||
- 1.7
|
||||
- tip
|
||||
|
||||
install: make deps
|
||||
script:
|
||||
- make test
|
@ -1,362 +0,0 @@
|
||||
Mozilla Public License, version 2.0
|
||||
|
||||
1. Definitions
|
||||
|
||||
1.1. "Contributor"
|
||||
|
||||
means each individual or legal entity that creates, contributes to the
|
||||
creation of, or owns Covered Software.
|
||||
|
||||
1.2. "Contributor Version"
|
||||
|
||||
means the combination of the Contributions of others (if any) used by a
|
||||
Contributor and that particular Contributor's Contribution.
|
||||
|
||||
1.3. "Contribution"
|
||||
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. "Covered Software"
|
||||
|
||||
means Source Code Form to which the initial Contributor has attached the
|
||||
notice in Exhibit A, the Executable Form of such Source Code Form, and
|
||||
Modifications of such Source Code Form, in each case including portions
|
||||
thereof.
|
||||
|
||||
1.5. "Incompatible With Secondary Licenses"
|
||||
means
|
||||
|
||||
a. that the initial Contributor has attached the notice described in
|
||||
Exhibit B to the Covered Software; or
|
||||
|
||||
b. that the Covered Software was made available under the terms of
|
||||
version 1.1 or earlier of the License, but not also under the terms of
|
||||
a Secondary License.
|
||||
|
||||
1.6. "Executable Form"
|
||||
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
|
||||
means a work that combines Covered Software with other material, in a
|
||||
separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
|
||||
means this document.
|
||||
|
||||
1.9. "Licensable"
|
||||
|
||||
means having the right to grant, to the maximum extent possible, whether
|
||||
at the time of the initial grant or subsequently, any and all of the
|
||||
rights conveyed by this License.
|
||||
|
||||
1.10. "Modifications"
|
||||
|
||||
means any of the following:
|
||||
|
||||
a. any file in Source Code Form that results from an addition to,
|
||||
deletion from, or modification of the contents of Covered Software; or
|
||||
|
||||
b. any new file in Source Code Form that contains any Covered Software.
|
||||
|
||||
1.11. "Patent Claims" of a Contributor
|
||||
|
||||
means any patent claim(s), including without limitation, method,
|
||||
process, and apparatus claims, in any patent Licensable by such
|
||||
Contributor that would be infringed, but for the grant of the License,
|
||||
by the making, using, selling, offering for sale, having made, import,
|
||||
or transfer of either its Contributions or its Contributor Version.
|
||||
|
||||
1.12. "Secondary License"
|
||||
|
||||
means either the GNU General Public License, Version 2.0, the GNU Lesser
|
||||
General Public License, Version 2.1, the GNU Affero General Public
|
||||
License, Version 3.0, or any later versions of those licenses.
|
||||
|
||||
1.13. "Source Code Form"
|
||||
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. "You" (or "Your")
|
||||
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, "You" includes any entity that controls, is
|
||||
controlled by, or is under common control with You. For purposes of this
|
||||
definition, "control" means (a) the power, direct or indirect, to cause
|
||||
the direction or management of such entity, whether by contract or
|
||||
otherwise, or (b) ownership of more than fifty percent (50%) of the
|
||||
outstanding shares or beneficial ownership of such entity.
|
||||
|
||||
|
||||
2. License Grants and Conditions
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
a. under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or
|
||||
as part of a Larger Work; and
|
||||
|
||||
b. under Patent Claims of such Contributor to make, use, sell, offer for
|
||||
sale, have made, import, and otherwise transfer either its
|
||||
Contributions or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution
|
||||
become effective for each Contribution on the date the Contributor first
|
||||
distributes such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under
|
||||
this License. No additional rights or licenses will be implied from the
|
||||
distribution or licensing of Covered Software under this License.
|
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||
Contributor:
|
||||
|
||||
a. for any code that a Contributor has removed from Covered Software; or
|
||||
|
||||
b. for infringements caused by: (i) Your and any other third party's
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
c. under Patent Claims infringed by Covered Software in the absence of
|
||||
its Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks,
|
||||
or logos of any Contributor (except as may be necessary to comply with
|
||||
the notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this
|
||||
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||
permitted under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its
|
||||
Contributions are its original creation(s) or it has sufficient rights to
|
||||
grant the rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under
|
||||
applicable copyright doctrines of fair use, fair dealing, or other
|
||||
equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
|
||||
Section 2.1.
|
||||
|
||||
|
||||
3. Responsibilities
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under
|
||||
the terms of this License. You must inform recipients that the Source
|
||||
Code Form of the Covered Software is governed by the terms of this
|
||||
License, and how they can obtain a copy of this License. You may not
|
||||
attempt to alter or restrict the recipients' rights in the Source Code
|
||||
Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
a. such Covered Software must also be made available in Source Code Form,
|
||||
as described in Section 3.1, and You must inform recipients of the
|
||||
Executable Form how they can obtain a copy of such Source Code Form by
|
||||
reasonable means in a timely manner, at a charge no more than the cost
|
||||
of distribution to the recipient; and
|
||||
|
||||
b. You may distribute such Executable Form under the terms of this
|
||||
License, or sublicense it under different terms, provided that the
|
||||
license for the Executable Form does not attempt to limit or alter the
|
||||
recipients' rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for
|
||||
the Covered Software. If the Larger Work is a combination of Covered
|
||||
Software with a work governed by one or more Secondary Licenses, and the
|
||||
Covered Software is not Incompatible With Secondary Licenses, this
|
||||
License permits You to additionally distribute such Covered Software
|
||||
under the terms of such Secondary License(s), so that the recipient of
|
||||
the Larger Work may, at their option, further distribute the Covered
|
||||
Software under the terms of either this License or such Secondary
|
||||
License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices
|
||||
(including copyright notices, patent notices, disclaimers of warranty, or
|
||||
limitations of liability) contained within the Source Code Form of the
|
||||
Covered Software, except that You may alter any license notices to the
|
||||
extent required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on
|
||||
behalf of any Contributor. You must make it absolutely clear that any
|
||||
such warranty, support, indemnity, or liability obligation is offered by
|
||||
You alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this License
|
||||
with respect to some or all of the Covered Software due to statute,
|
||||
judicial order, or regulation then You must: (a) comply with the terms of
|
||||
this License to the maximum extent possible; and (b) describe the
|
||||
limitations and the code they affect. Such description must be placed in a
|
||||
text file included with all distributions of the Covered Software under
|
||||
this License. Except to the extent prohibited by statute or regulation,
|
||||
such description must be sufficiently detailed for a recipient of ordinary
|
||||
skill to be able to understand it.
|
||||
|
||||
5. Termination
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically if You
|
||||
fail to comply with any of its terms. However, if You become compliant,
|
||||
then the rights granted under this License from a particular Contributor
|
||||
are reinstated (a) provisionally, unless and until such Contributor
|
||||
explicitly and finally terminates Your grants, and (b) on an ongoing
|
||||
basis, if such Contributor fails to notify You of the non-compliance by
|
||||
some reasonable means prior to 60 days after You have come back into
|
||||
compliance. Moreover, Your grants from a particular Contributor are
|
||||
reinstated on an ongoing basis if such Contributor notifies You of the
|
||||
non-compliance by some reasonable means, this is the first time You have
|
||||
received notice of non-compliance with this License from such
|
||||
Contributor, and You become compliant prior to 30 days after Your receipt
|
||||
of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions,
|
||||
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||
directly or indirectly infringes any patent, then the rights granted to
|
||||
You by any and all Contributors for the Covered Software under Section
|
||||
2.1 of this License shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
|
||||
license agreements (excluding distributors and resellers) which have been
|
||||
validly granted by You or Your distributors under this License prior to
|
||||
termination shall survive termination.
|
||||
|
||||
6. Disclaimer of Warranty
|
||||
|
||||
Covered Software is provided under this License on an "as is" basis,
|
||||
without warranty of any kind, either expressed, implied, or statutory,
|
||||
including, without limitation, warranties that the Covered Software is free
|
||||
of defects, merchantable, fit for a particular purpose or non-infringing.
|
||||
The entire risk as to the quality and performance of the Covered Software
|
||||
is with You. Should any Covered Software prove defective in any respect,
|
||||
You (not any Contributor) assume the cost of any necessary servicing,
|
||||
repair, or correction. This disclaimer of warranty constitutes an essential
|
||||
part of this License. No use of any Covered Software is authorized under
|
||||
this License except under this disclaimer.
|
||||
|
||||
7. Limitation of Liability
|
||||
|
||||
Under no circumstances and under no legal theory, whether tort (including
|
||||
negligence), contract, or otherwise, shall any Contributor, or anyone who
|
||||
distributes Covered Software as permitted above, be liable to You for any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character including, without limitation, damages for lost profits, loss of
|
||||
goodwill, work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses, even if such party shall have been
|
||||
informed of the possibility of such damages. This limitation of liability
|
||||
shall not apply to liability for death or personal injury resulting from
|
||||
such party's negligence to the extent applicable law prohibits such
|
||||
limitation. Some jurisdictions do not allow the exclusion or limitation of
|
||||
incidental or consequential damages, so this exclusion and limitation may
|
||||
not apply to You.
|
||||
|
||||
8. Litigation
|
||||
|
||||
Any litigation relating to this License may be brought only in the courts
|
||||
of a jurisdiction where the defendant maintains its principal place of
|
||||
business and such litigation shall be governed by laws of that
|
||||
jurisdiction, without reference to its conflict-of-law provisions. Nothing
|
||||
in this Section shall prevent a party's ability to bring cross-claims or
|
||||
counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
|
||||
This License represents the complete agreement concerning the subject
|
||||
matter hereof. If any provision of this License is held to be
|
||||
unenforceable, such provision shall be reformed only to the extent
|
||||
necessary to make it enforceable. Any law or regulation which provides that
|
||||
the language of a contract shall be construed against the drafter shall not
|
||||
be used to construe this License against a Contributor.
|
||||
|
||||
|
||||
10. Versions of the License
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version
|
||||
of the License under which You originally received the Covered Software,
|
||||
or under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a
|
||||
modified version of this License if you rename the license and remove
|
||||
any references to the name of the license steward (except to note that
|
||||
such modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||
Licenses If You choose to distribute Source Code Form that is
|
||||
Incompatible With Secondary Licenses under the terms of this version of
|
||||
the License, the notice described in Exhibit B of this License must be
|
||||
attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
|
||||
This Source Code Form is subject to the
|
||||
terms of the Mozilla Public License, v.
|
||||
2.0. If a copy of the MPL was not
|
||||
distributed with this file, You can
|
||||
obtain one at
|
||||
http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular file,
|
||||
then You may include the notice in a location (such as a LICENSE file in a
|
||||
relevant directory) where a recipient would be likely to look for such a
|
||||
notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
||||
|
||||
This Source Code Form is "Incompatible
|
||||
With Secondary Licenses", as defined by
|
||||
the Mozilla Public License, v. 2.0.
|
@ -1,11 +0,0 @@
|
||||
DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
|
||||
|
||||
.PHONY: test deps
|
||||
|
||||
test:
|
||||
go test -timeout=30s ./...
|
||||
|
||||
deps:
|
||||
go get -d -v ./...
|
||||
echo $(DEPS) | xargs -n1 go get -d
|
||||
|
@ -1,11 +0,0 @@
|
||||
raft-boltdb
|
||||
===========
|
||||
|
||||
This repository provides the `raftboltdb` package. The package exports the
|
||||
`BoltStore` which is an implementation of both a `LogStore` and `StableStore`.
|
||||
|
||||
It is meant to be used as a backend for the `raft` [package
|
||||
here](https://github.com/hashicorp/raft).
|
||||
|
||||
This implementation uses [BoltDB](https://github.com/boltdb/bolt). BoltDB is
|
||||
a simple key/value store implemented in pure Go, and inspired by LMDB.
|
@ -1,88 +0,0 @@
|
||||
package raftboltdb
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/raft/bench"
|
||||
)
|
||||
|
||||
func BenchmarkBoltStore_FirstIndex(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.FirstIndex(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_LastIndex(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.LastIndex(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_GetLog(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.GetLog(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_StoreLog(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.StoreLog(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_StoreLogs(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.StoreLogs(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_DeleteRange(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.DeleteRange(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_Set(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.Set(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_Get(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.Get(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_SetUint64(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.SetUint64(b, store)
|
||||
}
|
||||
|
||||
func BenchmarkBoltStore_GetUint64(b *testing.B) {
|
||||
store := testBoltStore(b)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
raftbench.GetUint64(b, store)
|
||||
}
|
@ -1,255 +0,0 @@
|
||||
package raftboltdb
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
const (
|
||||
// Permissions to use on the db file. This is only used if the
|
||||
// database file does not exist and needs to be created.
|
||||
dbFileMode = 0600
|
||||
)
|
||||
|
||||
var (
|
||||
// Bucket names we perform transactions in
|
||||
dbLogs = []byte("logs")
|
||||
dbConf = []byte("conf")
|
||||
|
||||
// An error indicating a given key does not exist
|
||||
ErrKeyNotFound = errors.New("not found")
|
||||
)
|
||||
|
||||
// BoltStore provides access to BoltDB for Raft to store and retrieve
|
||||
// log entries. It also provides key/value storage, and can be used as
|
||||
// a LogStore and StableStore.
|
||||
type BoltStore struct {
|
||||
// conn is the underlying handle to the db.
|
||||
conn *bolt.DB
|
||||
|
||||
// The path to the Bolt database file
|
||||
path string
|
||||
}
|
||||
|
||||
// Options contains all the configuraiton used to open the BoltDB
|
||||
type Options struct {
|
||||
// Path is the file path to the BoltDB to use
|
||||
Path string
|
||||
|
||||
// BoltOptions contains any specific BoltDB options you might
|
||||
// want to specify [e.g. open timeout]
|
||||
BoltOptions *bolt.Options
|
||||
}
|
||||
|
||||
// readOnly returns true if the contained bolt options say to open
|
||||
// the DB in readOnly mode [this can be useful to tools that want
|
||||
// to examine the log]
|
||||
func (o *Options) readOnly() bool {
|
||||
return o != nil && o.BoltOptions != nil && o.BoltOptions.ReadOnly
|
||||
}
|
||||
|
||||
// NewBoltStore takes a file path and returns a connected Raft backend.
|
||||
func NewBoltStore(path string) (*BoltStore, error) {
|
||||
return New(Options{Path: path})
|
||||
}
|
||||
|
||||
// New uses the supplied options to open the BoltDB and prepare it for use as a raft backend.
|
||||
func New(options Options) (*BoltStore, error) {
|
||||
// Try to connect
|
||||
handle, err := bolt.Open(options.Path, dbFileMode, options.BoltOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the new store
|
||||
store := &BoltStore{
|
||||
conn: handle,
|
||||
path: options.Path,
|
||||
}
|
||||
|
||||
// If the store was opened read-only, don't try and create buckets
|
||||
if !options.readOnly() {
|
||||
// Set up our buckets
|
||||
if err := store.initialize(); err != nil {
|
||||
store.Close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return store, nil
|
||||
}
|
||||
|
||||
// initialize is used to set up all of the buckets.
|
||||
func (b *BoltStore) initialize() error {
|
||||
tx, err := b.conn.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
// Create all the buckets
|
||||
if _, err := tx.CreateBucketIfNotExists(dbLogs); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.CreateBucketIfNotExists(dbConf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// Close is used to gracefully close the DB connection.
|
||||
func (b *BoltStore) Close() error {
|
||||
return b.conn.Close()
|
||||
}
|
||||
|
||||
// FirstIndex returns the first known index from the Raft log.
|
||||
func (b *BoltStore) FirstIndex() (uint64, error) {
|
||||
tx, err := b.conn.Begin(false)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
curs := tx.Bucket(dbLogs).Cursor()
|
||||
if first, _ := curs.First(); first == nil {
|
||||
return 0, nil
|
||||
} else {
|
||||
return bytesToUint64(first), nil
|
||||
}
|
||||
}
|
||||
|
||||
// LastIndex returns the last known index from the Raft log.
|
||||
func (b *BoltStore) LastIndex() (uint64, error) {
|
||||
tx, err := b.conn.Begin(false)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
curs := tx.Bucket(dbLogs).Cursor()
|
||||
if last, _ := curs.Last(); last == nil {
|
||||
return 0, nil
|
||||
} else {
|
||||
return bytesToUint64(last), nil
|
||||
}
|
||||
}
|
||||
|
||||
// GetLog is used to retrieve a log from BoltDB at a given index.
|
||||
func (b *BoltStore) GetLog(idx uint64, log *raft.Log) error {
|
||||
tx, err := b.conn.Begin(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
bucket := tx.Bucket(dbLogs)
|
||||
val := bucket.Get(uint64ToBytes(idx))
|
||||
|
||||
if val == nil {
|
||||
return raft.ErrLogNotFound
|
||||
}
|
||||
return decodeMsgPack(val, log)
|
||||
}
|
||||
|
||||
// StoreLog is used to store a single raft log
|
||||
func (b *BoltStore) StoreLog(log *raft.Log) error {
|
||||
return b.StoreLogs([]*raft.Log{log})
|
||||
}
|
||||
|
||||
// StoreLogs is used to store a set of raft logs
|
||||
func (b *BoltStore) StoreLogs(logs []*raft.Log) error {
|
||||
tx, err := b.conn.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
for _, log := range logs {
|
||||
key := uint64ToBytes(log.Index)
|
||||
val, err := encodeMsgPack(log)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
bucket := tx.Bucket(dbLogs)
|
||||
if err := bucket.Put(key, val.Bytes()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// DeleteRange is used to delete logs within a given range inclusively.
|
||||
func (b *BoltStore) DeleteRange(min, max uint64) error {
|
||||
minKey := uint64ToBytes(min)
|
||||
|
||||
tx, err := b.conn.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
curs := tx.Bucket(dbLogs).Cursor()
|
||||
for k, _ := curs.Seek(minKey); k != nil; k, _ = curs.Next() {
|
||||
// Handle out-of-range log index
|
||||
if bytesToUint64(k) > max {
|
||||
break
|
||||
}
|
||||
|
||||
// Delete in-range log index
|
||||
if err := curs.Delete(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// Set is used to set a key/value set outside of the raft log
|
||||
func (b *BoltStore) Set(k, v []byte) error {
|
||||
tx, err := b.conn.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
bucket := tx.Bucket(dbConf)
|
||||
if err := bucket.Put(k, v); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// Get is used to retrieve a value from the k/v store by key
|
||||
func (b *BoltStore) Get(k []byte) ([]byte, error) {
|
||||
tx, err := b.conn.Begin(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
bucket := tx.Bucket(dbConf)
|
||||
val := bucket.Get(k)
|
||||
|
||||
if val == nil {
|
||||
return nil, ErrKeyNotFound
|
||||
}
|
||||
return append([]byte(nil), val...), nil
|
||||
}
|
||||
|
||||
// SetUint64 is like Set, but handles uint64 values
|
||||
func (b *BoltStore) SetUint64(key []byte, val uint64) error {
|
||||
return b.Set(key, uint64ToBytes(val))
|
||||
}
|
||||
|
||||
// GetUint64 is like Get, but handles uint64 values
|
||||
func (b *BoltStore) GetUint64(key []byte) (uint64, error) {
|
||||
val, err := b.Get(key)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return bytesToUint64(val), nil
|
||||
}
|
@ -1,416 +0,0 @@
|
||||
package raftboltdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
func testBoltStore(t testing.TB) *BoltStore {
|
||||
fh, err := ioutil.TempFile("", "bolt")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
os.Remove(fh.Name())
|
||||
|
||||
// Successfully creates and returns a store
|
||||
store, err := NewBoltStore(fh.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
return store
|
||||
}
|
||||
|
||||
func testRaftLog(idx uint64, data string) *raft.Log {
|
||||
return &raft.Log{
|
||||
Data: []byte(data),
|
||||
Index: idx,
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_Implements(t *testing.T) {
|
||||
var store interface{} = &BoltStore{}
|
||||
if _, ok := store.(raft.StableStore); !ok {
|
||||
t.Fatalf("BoltStore does not implement raft.StableStore")
|
||||
}
|
||||
if _, ok := store.(raft.LogStore); !ok {
|
||||
t.Fatalf("BoltStore does not implement raft.LogStore")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltOptionsTimeout(t *testing.T) {
|
||||
fh, err := ioutil.TempFile("", "bolt")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
os.Remove(fh.Name())
|
||||
defer os.Remove(fh.Name())
|
||||
options := Options{
|
||||
Path: fh.Name(),
|
||||
BoltOptions: &bolt.Options{
|
||||
Timeout: time.Second / 10,
|
||||
},
|
||||
}
|
||||
store, err := New(options)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer store.Close()
|
||||
// trying to open it again should timeout
|
||||
doneCh := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := New(options)
|
||||
doneCh <- err
|
||||
}()
|
||||
select {
|
||||
case err := <-doneCh:
|
||||
if err == nil || err.Error() != "timeout" {
|
||||
t.Errorf("Expected timeout error but got %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Errorf("Gave up waiting for timeout response")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltOptionsReadOnly(t *testing.T) {
|
||||
fh, err := ioutil.TempFile("", "bolt")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
defer os.Remove(fh.Name())
|
||||
store, err := NewBoltStore(fh.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
// Create the log
|
||||
log := &raft.Log{
|
||||
Data: []byte("log1"),
|
||||
Index: 1,
|
||||
}
|
||||
// Attempt to store the log
|
||||
if err := store.StoreLog(log); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
store.Close()
|
||||
options := Options{
|
||||
Path: fh.Name(),
|
||||
BoltOptions: &bolt.Options{
|
||||
Timeout: time.Second / 10,
|
||||
ReadOnly: true,
|
||||
},
|
||||
}
|
||||
roStore, err := New(options)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
defer roStore.Close()
|
||||
result := new(raft.Log)
|
||||
if err := roStore.GetLog(1, result); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Ensure the log comes back the same
|
||||
if !reflect.DeepEqual(log, result) {
|
||||
t.Errorf("bad: %v", result)
|
||||
}
|
||||
// Attempt to store the log, should fail on a read-only store
|
||||
err = roStore.StoreLog(log)
|
||||
if err != bolt.ErrDatabaseReadOnly {
|
||||
t.Errorf("expecting error %v, but got %v", bolt.ErrDatabaseReadOnly, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewBoltStore(t *testing.T) {
|
||||
fh, err := ioutil.TempFile("", "bolt")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
os.Remove(fh.Name())
|
||||
defer os.Remove(fh.Name())
|
||||
|
||||
// Successfully creates and returns a store
|
||||
store, err := NewBoltStore(fh.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Ensure the file was created
|
||||
if store.path != fh.Name() {
|
||||
t.Fatalf("unexpected file path %q", store.path)
|
||||
}
|
||||
if _, err := os.Stat(fh.Name()); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Close the store so we can open again
|
||||
if err := store.Close(); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Ensure our tables were created
|
||||
db, err := bolt.Open(fh.Name(), dbFileMode, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
tx, err := db.Begin(true)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if _, err := tx.CreateBucket([]byte(dbLogs)); err != bolt.ErrBucketExists {
|
||||
t.Fatalf("bad: %v", err)
|
||||
}
|
||||
if _, err := tx.CreateBucket([]byte(dbConf)); err != bolt.ErrBucketExists {
|
||||
t.Fatalf("bad: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_FirstIndex(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
// Should get 0 index on empty log
|
||||
idx, err := store.FirstIndex()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 0 {
|
||||
t.Fatalf("bad: %v", idx)
|
||||
}
|
||||
|
||||
// Set a mock raft log
|
||||
logs := []*raft.Log{
|
||||
testRaftLog(1, "log1"),
|
||||
testRaftLog(2, "log2"),
|
||||
testRaftLog(3, "log3"),
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
t.Fatalf("bad: %s", err)
|
||||
}
|
||||
|
||||
// Fetch the first Raft index
|
||||
idx, err = store.FirstIndex()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 1 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_LastIndex(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
// Should get 0 index on empty log
|
||||
idx, err := store.LastIndex()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 0 {
|
||||
t.Fatalf("bad: %v", idx)
|
||||
}
|
||||
|
||||
// Set a mock raft log
|
||||
logs := []*raft.Log{
|
||||
testRaftLog(1, "log1"),
|
||||
testRaftLog(2, "log2"),
|
||||
testRaftLog(3, "log3"),
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
t.Fatalf("bad: %s", err)
|
||||
}
|
||||
|
||||
// Fetch the last Raft index
|
||||
idx, err = store.LastIndex()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 3 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_GetLog(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
log := new(raft.Log)
|
||||
|
||||
// Should return an error on non-existent log
|
||||
if err := store.GetLog(1, log); err != raft.ErrLogNotFound {
|
||||
t.Fatalf("expected raft log not found error, got: %v", err)
|
||||
}
|
||||
|
||||
// Set a mock raft log
|
||||
logs := []*raft.Log{
|
||||
testRaftLog(1, "log1"),
|
||||
testRaftLog(2, "log2"),
|
||||
testRaftLog(3, "log3"),
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
t.Fatalf("bad: %s", err)
|
||||
}
|
||||
|
||||
// Should return the proper log
|
||||
if err := store.GetLog(2, log); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(log, logs[1]) {
|
||||
t.Fatalf("bad: %#v", log)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_SetLog(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
// Create the log
|
||||
log := &raft.Log{
|
||||
Data: []byte("log1"),
|
||||
Index: 1,
|
||||
}
|
||||
|
||||
// Attempt to store the log
|
||||
if err := store.StoreLog(log); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Retrieve the log again
|
||||
result := new(raft.Log)
|
||||
if err := store.GetLog(1, result); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Ensure the log comes back the same
|
||||
if !reflect.DeepEqual(log, result) {
|
||||
t.Fatalf("bad: %v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_SetLogs(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
// Create a set of logs
|
||||
logs := []*raft.Log{
|
||||
testRaftLog(1, "log1"),
|
||||
testRaftLog(2, "log2"),
|
||||
}
|
||||
|
||||
// Attempt to store the logs
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Ensure we stored them all
|
||||
result1, result2 := new(raft.Log), new(raft.Log)
|
||||
if err := store.GetLog(1, result1); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(logs[0], result1) {
|
||||
t.Fatalf("bad: %#v", result1)
|
||||
}
|
||||
if err := store.GetLog(2, result2); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(logs[1], result2) {
|
||||
t.Fatalf("bad: %#v", result2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_DeleteRange(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
// Create a set of logs
|
||||
log1 := testRaftLog(1, "log1")
|
||||
log2 := testRaftLog(2, "log2")
|
||||
log3 := testRaftLog(3, "log3")
|
||||
logs := []*raft.Log{log1, log2, log3}
|
||||
|
||||
// Attempt to store the logs
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Attempt to delete a range of logs
|
||||
if err := store.DeleteRange(1, 2); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Ensure the logs were deleted
|
||||
if err := store.GetLog(1, new(raft.Log)); err != raft.ErrLogNotFound {
|
||||
t.Fatalf("should have deleted log1")
|
||||
}
|
||||
if err := store.GetLog(2, new(raft.Log)); err != raft.ErrLogNotFound {
|
||||
t.Fatalf("should have deleted log2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_Set_Get(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
// Returns error on non-existent key
|
||||
if _, err := store.Get([]byte("bad")); err != ErrKeyNotFound {
|
||||
t.Fatalf("expected not found error, got: %q", err)
|
||||
}
|
||||
|
||||
k, v := []byte("hello"), []byte("world")
|
||||
|
||||
// Try to set a k/v pair
|
||||
if err := store.Set(k, v); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Try to read it back
|
||||
val, err := store.Get(k)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !bytes.Equal(val, v) {
|
||||
t.Fatalf("bad: %v", val)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltStore_SetUint64_GetUint64(t *testing.T) {
|
||||
store := testBoltStore(t)
|
||||
defer store.Close()
|
||||
defer os.Remove(store.path)
|
||||
|
||||
// Returns error on non-existent key
|
||||
if _, err := store.GetUint64([]byte("bad")); err != ErrKeyNotFound {
|
||||
t.Fatalf("expected not found error, got: %q", err)
|
||||
}
|
||||
|
||||
k, v := []byte("abc"), uint64(123)
|
||||
|
||||
// Attempt to set the k/v pair
|
||||
if err := store.SetUint64(k, v); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Read back the value
|
||||
val, err := store.GetUint64(k)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if val != v {
|
||||
t.Fatalf("bad: %v", val)
|
||||
}
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
package raftboltdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
)
|
||||
|
||||
// Decode reverses the encode operation on a byte slice input
|
||||
func decodeMsgPack(buf []byte, out interface{}) error {
|
||||
r := bytes.NewBuffer(buf)
|
||||
hd := codec.MsgpackHandle{}
|
||||
dec := codec.NewDecoder(r, &hd)
|
||||
return dec.Decode(out)
|
||||
}
|
||||
|
||||
// Encode writes an encoded object to a new bytes buffer
|
||||
func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
hd := codec.MsgpackHandle{}
|
||||
enc := codec.NewEncoder(buf, &hd)
|
||||
err := enc.Encode(in)
|
||||
return buf, err
|
||||
}
|
||||
|
||||
// Converts bytes to an integer
|
||||
func bytesToUint64(b []byte) uint64 {
|
||||
return binary.BigEndian.Uint64(b)
|
||||
}
|
||||
|
||||
// Converts a uint to a byte slice
|
||||
func uint64ToBytes(u uint64) []byte {
|
||||
buf := make([]byte, 8)
|
||||
binary.BigEndian.PutUint64(buf, u)
|
||||
return buf
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
@ -1,16 +0,0 @@
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.4
|
||||
- 1.5
|
||||
- 1.6
|
||||
- tip
|
||||
|
||||
install: make deps
|
||||
script:
|
||||
- make integ
|
||||
|
||||
notifications:
|
||||
flowdock:
|
||||
secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc=
|
||||
|
@ -1,17 +0,0 @@
|
||||
DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
|
||||
|
||||
test:
|
||||
go test -timeout=30s ./...
|
||||
|
||||
integ: test
|
||||
INTEG_TESTS=yes go test -timeout=23s -run=Integ ./...
|
||||
|
||||
deps:
|
||||
go get -d -v ./...
|
||||
echo $(DEPS) | xargs -n1 go get -d
|
||||
|
||||
cov:
|
||||
INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
|
||||
open /tmp/coverage.html
|
||||
|
||||
.PHONY: test cov integ deps
|
@ -1,107 +0,0 @@
|
||||
raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis-ci.org/hashicorp/raft)
|
||||
====
|
||||
|
||||
raft is a [Go](http://www.golang.org) library that manages a replicated
|
||||
log and can be used with an FSM to manage replicated state machines. It
|
||||
is a library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
|
||||
|
||||
The use cases for such a library are far-reaching as replicated state
|
||||
machines are a key component of many distributed systems. They enable
|
||||
building Consistent, Partition Tolerant (CP) systems, with limited
|
||||
fault tolerance as well.
|
||||
|
||||
## Building
|
||||
|
||||
If you wish to build raft you'll need Go version 1.2+ installed.
|
||||
|
||||
Please check your installation with:
|
||||
|
||||
```
|
||||
go version
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
|
||||
|
||||
To prevent complications with cgo, the primary backend `MDBStore` is in a separate repository,
|
||||
called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
|
||||
for the `LogStore` and `StableStore`.
|
||||
|
||||
A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
|
||||
[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
|
||||
and `StableStore`.
|
||||
|
||||
## Tagged Releases
|
||||
|
||||
As of September 2017, Hashicorp will start using tags for this library to clearly indicate
|
||||
major version updates. We recommend you vendor your application's dependency on this library.
|
||||
|
||||
* v0.1.0 is the original stable version of the library that was in master and has been maintained
|
||||
with no breaking API changes. This was in use by Consul prior to version 0.7.0.
|
||||
|
||||
* v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version
|
||||
manages server identities using a UUID, so introduces some breaking API changes. It also versions
|
||||
the Raft protocol, and requires some special steps when interoperating with Raft servers running
|
||||
older versions of the library (see the detailed comment in config.go about version compatibility).
|
||||
You can reference https://github.com/hashicorp/consul/pull/2222 for an idea of what was required
|
||||
to port Consul to these new interfaces.
|
||||
|
||||
This version includes some new features as well, including non voting servers, a new address
|
||||
provider abstraction in the transport layer, and more resilient snapshots.
|
||||
|
||||
## Protocol
|
||||
|
||||
raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
|
||||
|
||||
A high level overview of the Raft protocol is described below, but for details please read the full
|
||||
[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
|
||||
followed by the raft source. Any questions about the raft protocol should be sent to the
|
||||
[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
|
||||
|
||||
### Protocol Description
|
||||
|
||||
Raft nodes are always in one of three states: follower, candidate or leader. All
|
||||
nodes initially start out as a follower. In this state, nodes can accept log entries
|
||||
from a leader and cast votes. If no entries are received for some time, nodes
|
||||
self-promote to the candidate state. In the candidate state nodes request votes from
|
||||
their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
|
||||
The leader must accept new log entries and replicate to all the other followers.
|
||||
In addition, if stale reads are not acceptable, all queries must also be performed on
|
||||
the leader.
|
||||
|
||||
Once a cluster has a leader, it is able to accept new log entries. A client can
|
||||
request that a leader append a new log entry, which is an opaque binary blob to
|
||||
Raft. The leader then writes the entry to durable storage and attempts to replicate
|
||||
to a quorum of followers. Once the log entry is considered *committed*, it can be
|
||||
*applied* to a finite state machine. The finite state machine is application specific,
|
||||
and is implemented using an interface.
|
||||
|
||||
An obvious question relates to the unbounded nature of a replicated log. Raft provides
|
||||
a mechanism by which the current state is snapshotted, and the log is compacted. Because
|
||||
of the FSM abstraction, restoring the state of the FSM must result in the same state
|
||||
as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
|
||||
and then remove all the logs that were used to reach that state. This is performed automatically
|
||||
without user intervention, and prevents unbounded disk usage as well as minimizing
|
||||
time spent replaying logs.
|
||||
|
||||
Lastly, there is the issue of updating the peer set when new servers are joining
|
||||
or existing servers are leaving. As long as a quorum of nodes is available, this
|
||||
is not an issue as Raft provides mechanisms to dynamically update the peer set.
|
||||
If a quorum of nodes is unavailable, then this becomes a very challenging issue.
|
||||
For example, suppose there are only 2 peers, A and B. The quorum size is also
|
||||
2, meaning both nodes must agree to commit a log entry. If either A or B fails,
|
||||
it is now impossible to reach quorum. This means the cluster is unable to add,
|
||||
or remove a node, or commit any additional log entries. This results in *unavailability*.
|
||||
At this point, manual intervention would be required to remove either A or B,
|
||||
and to restart the remaining node in bootstrap mode.
|
||||
|
||||
A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
|
||||
of 5 can tolerate 2 node failures. The recommended configuration is to either
|
||||
run 3 or 5 raft servers. This maximizes availability without
|
||||
greatly sacrificing performance.
|
||||
|
||||
In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
|
||||
committing a log entry requires a single round trip to half of the cluster.
|
||||
Thus performance is bound by disk I/O and network latency.
|
||||
|
@ -1,171 +0,0 @@
|
||||
package raftbench
|
||||
|
||||
// raftbench provides common benchmarking functions which can be used by
|
||||
// anything which implements the raft.LogStore and raft.StableStore interfaces.
|
||||
// All functions accept these interfaces and perform benchmarking. This
|
||||
// makes comparing backend performance easier by sharing the tests.
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/raft"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func FirstIndex(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data
|
||||
var logs []*raft.Log
|
||||
for i := 1; i < 10; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run FirstIndex a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
store.FirstIndex()
|
||||
}
|
||||
}
|
||||
|
||||
func LastIndex(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data
|
||||
var logs []*raft.Log
|
||||
for i := 1; i < 10; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run LastIndex a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
store.LastIndex()
|
||||
}
|
||||
}
|
||||
|
||||
func GetLog(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data
|
||||
var logs []*raft.Log
|
||||
for i := 1; i < 10; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run GetLog a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if err := store.GetLog(5, new(raft.Log)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func StoreLog(b *testing.B, store raft.LogStore) {
|
||||
// Run StoreLog a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
log := &raft.Log{Index: uint64(n), Data: []byte("data")}
|
||||
if err := store.StoreLog(log); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func StoreLogs(b *testing.B, store raft.LogStore) {
|
||||
// Run StoreLogs a number of times. We want to set multiple logs each
|
||||
// run, so we create 3 logs with incrementing indexes for each iteration.
|
||||
for n := 0; n < b.N; n++ {
|
||||
b.StopTimer()
|
||||
offset := 3 * (n + 1)
|
||||
logs := []*raft.Log{
|
||||
&raft.Log{Index: uint64(offset - 2), Data: []byte("data")},
|
||||
&raft.Log{Index: uint64(offset - 1), Data: []byte("data")},
|
||||
&raft.Log{Index: uint64(offset), Data: []byte("data")},
|
||||
}
|
||||
b.StartTimer()
|
||||
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func DeleteRange(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data. In this case, we create 3 new log entries for each
|
||||
// test case, and separate them by index in multiples of 10. This allows
|
||||
// some room so that we can test deleting ranges with "extra" logs to
|
||||
// to ensure we stop going to the database once our max index is hit.
|
||||
var logs []*raft.Log
|
||||
for n := 0; n < b.N; n++ {
|
||||
offset := 10 * n
|
||||
for i := offset; i < offset+3; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Delete a range of the data
|
||||
for n := 0; n < b.N; n++ {
|
||||
offset := 10 * n
|
||||
if err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Set(b *testing.B, store raft.StableStore) {
|
||||
// Run Set a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if err := store.Set([]byte{byte(n)}, []byte("val")); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Get(b *testing.B, store raft.StableStore) {
|
||||
// Create some fake data
|
||||
for i := 1; i < 10; i++ {
|
||||
if err := store.Set([]byte{byte(i)}, []byte("val")); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run Get a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if _, err := store.Get([]byte{0x05}); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func SetUint64(b *testing.B, store raft.StableStore) {
|
||||
// Run SetUint64 a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func GetUint64(b *testing.B, store raft.StableStore) {
|
||||
// Create some fake data
|
||||
for i := 0; i < 10; i++ {
|
||||
if err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run GetUint64 a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if _, err := store.Get([]byte{0x05}); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,84 +0,0 @@
|
||||
package raft
|
||||
|
||||
// AppendEntriesRequest is the command used to append entries to the
|
||||
// replicated log.
|
||||
type AppendEntriesRequest struct {
|
||||
// Provide the current term and leader
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// Provide the previous entries for integrity checking
|
||||
PrevLogEntry uint64
|
||||
PrevLogTerm uint64
|
||||
|
||||
// New entries to commit
|
||||
Entries []*Log
|
||||
|
||||
// Commit index on the leader
|
||||
LeaderCommitIndex uint64
|
||||
}
|
||||
|
||||
// AppendEntriesResponse is the response returned from an
|
||||
// AppendEntriesRequest.
|
||||
type AppendEntriesResponse struct {
|
||||
// Newer term if leader is out of date
|
||||
Term uint64
|
||||
|
||||
// Last Log is a hint to help accelerate rebuilding slow nodes
|
||||
LastLog uint64
|
||||
|
||||
// We may not succeed if we have a conflicting entry
|
||||
Success bool
|
||||
|
||||
// There are scenarios where this request didn't succeed
|
||||
// but there's no need to wait/back-off the next attempt.
|
||||
NoRetryBackoff bool
|
||||
}
|
||||
|
||||
// RequestVoteRequest is the command used by a candidate to ask a Raft peer
|
||||
// for a vote in an election.
|
||||
type RequestVoteRequest struct {
|
||||
// Provide the term and our id
|
||||
Term uint64
|
||||
Candidate []byte
|
||||
|
||||
// Used to ensure safety
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
}
|
||||
|
||||
// RequestVoteResponse is the response returned from a RequestVoteRequest.
|
||||
type RequestVoteResponse struct {
|
||||
// Newer term if leader is out of date
|
||||
Term uint64
|
||||
|
||||
// Return the peers, so that a node can shutdown on removal
|
||||
Peers []byte
|
||||
|
||||
// Is the vote granted
|
||||
Granted bool
|
||||
}
|
||||
|
||||
// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
|
||||
// log (and state machine) from a snapshot on another peer.
|
||||
type InstallSnapshotRequest struct {
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// These are the last index/term included in the snapshot
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
|
||||
// Peer Set in the snapshot
|
||||
Peers []byte
|
||||
|
||||
// Size of the snapshot
|
||||
Size int64
|
||||
}
|
||||
|
||||
// InstallSnapshotResponse is the response returned from an
|
||||
// InstallSnapshotRequest.
|
||||
type InstallSnapshotResponse struct {
|
||||
Term uint64
|
||||
Success bool
|
||||
}
|
@ -1,136 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Config provides any necessary configuration to
|
||||
// the Raft server
|
||||
type Config struct {
|
||||
// HeartbeatTimeout specifies the time in follower state without
|
||||
// a leader before we attempt an election.
|
||||
HeartbeatTimeout time.Duration
|
||||
|
||||
// ElectionTimeout specifies the time in candidate state without
|
||||
// a leader before we attempt an election.
|
||||
ElectionTimeout time.Duration
|
||||
|
||||
// CommitTimeout controls the time without an Apply() operation
|
||||
// before we heartbeat to ensure a timely commit. Due to random
|
||||
// staggering, may be delayed as much as 2x this value.
|
||||
CommitTimeout time.Duration
|
||||
|
||||
// MaxAppendEntries controls the maximum number of append entries
|
||||
// to send at once. We want to strike a balance between efficiency
|
||||
// and avoiding waste if the follower is going to reject because of
|
||||
// an inconsistent log.
|
||||
MaxAppendEntries int
|
||||
|
||||
// If we are a member of a cluster, and RemovePeer is invoked for the
|
||||
// local node, then we forget all peers and transition into the follower state.
|
||||
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
|
||||
// we can become a leader of a cluster containing only this node.
|
||||
ShutdownOnRemove bool
|
||||
|
||||
// DisableBootstrapAfterElect is used to turn off EnableSingleNode
|
||||
// after the node is elected. This is used to prevent self-election
|
||||
// if the node is removed from the Raft cluster via RemovePeer. Setting
|
||||
// it to false will keep the bootstrap mode, allowing the node to self-elect
|
||||
// and potentially bootstrap a separate cluster.
|
||||
DisableBootstrapAfterElect bool
|
||||
|
||||
// TrailingLogs controls how many logs we leave after a snapshot. This is
|
||||
// used so that we can quickly replay logs on a follower instead of being
|
||||
// forced to send an entire snapshot.
|
||||
TrailingLogs uint64
|
||||
|
||||
// SnapshotInterval controls how often we check if we should perform a snapshot.
|
||||
// We randomly stagger between this value and 2x this value to avoid the entire
|
||||
// cluster from performing a snapshot at once.
|
||||
SnapshotInterval time.Duration
|
||||
|
||||
// SnapshotThreshold controls how many outstanding logs there must be before
|
||||
// we perform a snapshot. This is to prevent excessive snapshots when we can
|
||||
// just replay a small set of logs.
|
||||
SnapshotThreshold uint64
|
||||
|
||||
// EnableSingleNode allows for a single node mode of operation. This
|
||||
// is false by default, which prevents a lone node from electing itself.
|
||||
// leader.
|
||||
EnableSingleNode bool
|
||||
|
||||
// LeaderLeaseTimeout is used to control how long the "lease" lasts
|
||||
// for being the leader without being able to contact a quorum
|
||||
// of nodes. If we reach this interval without contact, we will
|
||||
// step down as leader.
|
||||
LeaderLeaseTimeout time.Duration
|
||||
|
||||
// StartAsLeader forces Raft to start in the leader state. This should
|
||||
// never be used except for testing purposes, as it can cause a split-brain.
|
||||
StartAsLeader bool
|
||||
|
||||
// NotifyCh is used to provide a channel that will be notified of leadership
|
||||
// changes. Raft will block writing to this channel, so it should either be
|
||||
// buffered or aggressively consumed.
|
||||
NotifyCh chan<- bool
|
||||
|
||||
// LogOutput is used as a sink for logs, unless Logger is specified.
|
||||
// Defaults to os.Stderr.
|
||||
LogOutput io.Writer
|
||||
|
||||
// Logger is a user-provided logger. If nil, a logger writing to LogOutput
|
||||
// is used.
|
||||
Logger *log.Logger
|
||||
}
|
||||
|
||||
// DefaultConfig returns a Config with usable defaults.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
HeartbeatTimeout: 1000 * time.Millisecond,
|
||||
ElectionTimeout: 1000 * time.Millisecond,
|
||||
CommitTimeout: 50 * time.Millisecond,
|
||||
MaxAppendEntries: 64,
|
||||
ShutdownOnRemove: true,
|
||||
DisableBootstrapAfterElect: true,
|
||||
TrailingLogs: 10240,
|
||||
SnapshotInterval: 120 * time.Second,
|
||||
SnapshotThreshold: 8192,
|
||||
EnableSingleNode: false,
|
||||
LeaderLeaseTimeout: 500 * time.Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateConfig is used to validate a sane configuration
|
||||
func ValidateConfig(config *Config) error {
|
||||
if config.HeartbeatTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Heartbeat timeout is too low")
|
||||
}
|
||||
if config.ElectionTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Election timeout is too low")
|
||||
}
|
||||
if config.CommitTimeout < time.Millisecond {
|
||||
return fmt.Errorf("Commit timeout is too low")
|
||||
}
|
||||
if config.MaxAppendEntries <= 0 {
|
||||
return fmt.Errorf("MaxAppendEntries must be positive")
|
||||
}
|
||||
if config.MaxAppendEntries > 1024 {
|
||||
return fmt.Errorf("MaxAppendEntries is too large")
|
||||
}
|
||||
if config.SnapshotInterval < 5*time.Millisecond {
|
||||
return fmt.Errorf("Snapshot interval is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Leader lease timeout is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
|
||||
}
|
||||
if config.ElectionTimeout < config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
|
||||
}
|
||||
return nil
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// DiscardSnapshotStore is used to successfully snapshot while
|
||||
// always discarding the snapshot. This is useful for when the
|
||||
// log should be truncated but no snapshot should be retained.
|
||||
// This should never be used for production use, and is only
|
||||
// suitable for testing.
|
||||
type DiscardSnapshotStore struct{}
|
||||
|
||||
type DiscardSnapshotSink struct{}
|
||||
|
||||
// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
|
||||
func NewDiscardSnapshotStore() *DiscardSnapshotStore {
|
||||
return &DiscardSnapshotStore{}
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
|
||||
return &DiscardSnapshotSink{}, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
return nil, nil, fmt.Errorf("open is not supported")
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) ID() string {
|
||||
return "discard"
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Cancel() error {
|
||||
return nil
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package raft
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestDiscardSnapshotStoreImpl(t *testing.T) {
|
||||
var impl interface{} = &DiscardSnapshotStore{}
|
||||
if _, ok := impl.(SnapshotStore); !ok {
|
||||
t.Fatalf("DiscardSnapshotStore not a SnapshotStore")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiscardSnapshotSinkImpl(t *testing.T) {
|
||||
var impl interface{} = &DiscardSnapshotSink{}
|
||||
if _, ok := impl.(SnapshotSink); !ok {
|
||||
t.Fatalf("DiscardSnapshotSink not a SnapshotSink")
|
||||
}
|
||||
}
|
@ -1,513 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
testPath = "permTest"
|
||||
snapPath = "snapshots"
|
||||
metaFilePath = "meta.json"
|
||||
stateFilePath = "state.bin"
|
||||
tmpSuffix = ".tmp"
|
||||
)
|
||||
|
||||
// FileSnapshotStore implements the SnapshotStore interface and allows
|
||||
// snapshots to be made on the local disk.
|
||||
type FileSnapshotStore struct {
|
||||
path string
|
||||
retain int
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
type snapMetaSlice []*fileSnapshotMeta
|
||||
|
||||
// FileSnapshotSink implements SnapshotSink with a file.
|
||||
type FileSnapshotSink struct {
|
||||
store *FileSnapshotStore
|
||||
logger *log.Logger
|
||||
dir string
|
||||
parentDir string
|
||||
meta fileSnapshotMeta
|
||||
|
||||
stateFile *os.File
|
||||
stateHash hash.Hash64
|
||||
buffered *bufio.Writer
|
||||
|
||||
closed bool
|
||||
}
|
||||
|
||||
// fileSnapshotMeta is stored on disk. We also put a CRC
|
||||
// on disk so that we can verify the snapshot.
|
||||
type fileSnapshotMeta struct {
|
||||
SnapshotMeta
|
||||
CRC []byte
|
||||
}
|
||||
|
||||
// bufferedFile is returned when we open a snapshot. This way
|
||||
// reads are buffered and the file still gets closed.
|
||||
type bufferedFile struct {
|
||||
bh *bufio.Reader
|
||||
fh *os.File
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Read(p []byte) (n int, err error) {
|
||||
return b.bh.Read(p)
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Close() error {
|
||||
return b.fh.Close()
|
||||
}
|
||||
|
||||
// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
|
||||
if retain < 1 {
|
||||
return nil, fmt.Errorf("must retain at least one snapshot")
|
||||
}
|
||||
if logger == nil {
|
||||
logger = log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
|
||||
// Ensure our path exists
|
||||
path := filepath.Join(base, snapPath)
|
||||
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, fmt.Errorf("snapshot path not accessible: %v", err)
|
||||
}
|
||||
|
||||
// Setup the store
|
||||
store := &FileSnapshotStore{
|
||||
path: path,
|
||||
retain: retain,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
// Do a permissions test
|
||||
if err := store.testPermissions(); err != nil {
|
||||
return nil, fmt.Errorf("permissions test failed: %v", err)
|
||||
}
|
||||
return store, nil
|
||||
}
|
||||
|
||||
// NewFileSnapshotStore creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
|
||||
if logOutput == nil {
|
||||
logOutput = os.Stderr
|
||||
}
|
||||
return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
|
||||
}
|
||||
|
||||
// testPermissions tries to touch a file in our path to see if it works.
|
||||
func (f *FileSnapshotStore) testPermissions() error {
|
||||
path := filepath.Join(f.path, testPath)
|
||||
fh, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = fh.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = os.Remove(path); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotName generates a name for the snapshot.
|
||||
func snapshotName(term, index uint64) string {
|
||||
now := time.Now()
|
||||
msec := now.UnixNano() / int64(time.Millisecond)
|
||||
return fmt.Sprintf("%d-%d-%d", term, index, msec)
|
||||
}
|
||||
|
||||
// Create is used to start a new snapshot
|
||||
func (f *FileSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
|
||||
// Create a new path
|
||||
name := snapshotName(term, index)
|
||||
path := filepath.Join(f.path, name+tmpSuffix)
|
||||
f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
|
||||
|
||||
// Make the directory
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the sink
|
||||
sink := &FileSnapshotSink{
|
||||
store: f,
|
||||
logger: f.logger,
|
||||
dir: path,
|
||||
parentDir: f.path,
|
||||
meta: fileSnapshotMeta{
|
||||
SnapshotMeta: SnapshotMeta{
|
||||
ID: name,
|
||||
Index: index,
|
||||
Term: term,
|
||||
Peers: peers,
|
||||
},
|
||||
CRC: nil,
|
||||
},
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := sink.writeMeta(); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(path, stateFilePath)
|
||||
fh, err := os.Create(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
sink.stateFile = fh
|
||||
|
||||
// Create a CRC64 hash
|
||||
sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Wrap both the hash and file in a MultiWriter with buffering
|
||||
multi := io.MultiWriter(sink.stateFile, sink.stateHash)
|
||||
sink.buffered = bufio.NewWriter(multi)
|
||||
|
||||
// Done
|
||||
return sink, nil
|
||||
}
|
||||
|
||||
// List returns available snapshots in the store.
|
||||
func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var snapMeta []*SnapshotMeta
|
||||
for _, meta := range snapshots {
|
||||
snapMeta = append(snapMeta, &meta.SnapshotMeta)
|
||||
if len(snapMeta) == f.retain {
|
||||
break
|
||||
}
|
||||
}
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// getSnapshots returns all the known snapshots.
|
||||
func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := ioutil.ReadDir(f.path)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Populate the metadata
|
||||
var snapMeta []*fileSnapshotMeta
|
||||
for _, snap := range snapshots {
|
||||
// Ignore any files
|
||||
if !snap.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ignore any temporary snapshots
|
||||
dirName := snap.Name()
|
||||
if strings.HasSuffix(dirName, tmpSuffix) {
|
||||
f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to read the meta data
|
||||
meta, err := f.readMeta(dirName)
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Append, but only return up to the retain count
|
||||
snapMeta = append(snapMeta, meta)
|
||||
}
|
||||
|
||||
// Sort the snapshot, reverse so we get new -> old
|
||||
sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
|
||||
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// readMeta is used to read the meta data for a given named backup
|
||||
func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(f.path, name, metaFilePath)
|
||||
fh, err := os.Open(metaPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewReader(fh)
|
||||
|
||||
// Read in the JSON
|
||||
meta := &fileSnapshotMeta{}
|
||||
dec := json.NewDecoder(buffered)
|
||||
if err := dec.Decode(meta); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return meta, nil
|
||||
}
|
||||
|
||||
// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
|
||||
func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
// Get the metadata
|
||||
meta, err := f.readMeta(id)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(f.path, id, stateFilePath)
|
||||
fh, err := os.Open(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Create a CRC64 hash
|
||||
stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Compute the hash
|
||||
_, err = io.Copy(stateHash, fh)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Verify the hash
|
||||
computed := stateHash.Sum(nil)
|
||||
if bytes.Compare(meta.CRC, computed) != 0 {
|
||||
f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
|
||||
meta.CRC, computed)
|
||||
fh.Close()
|
||||
return nil, nil, fmt.Errorf("CRC mismatch")
|
||||
}
|
||||
|
||||
// Seek to the start
|
||||
if _, err := fh.Seek(0, 0); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Return a buffered file
|
||||
buffered := &bufferedFile{
|
||||
bh: bufio.NewReader(fh),
|
||||
fh: fh,
|
||||
}
|
||||
|
||||
return &meta.SnapshotMeta, buffered, nil
|
||||
}
|
||||
|
||||
// ReapSnapshots reaps any snapshots beyond the retain count.
|
||||
func (f *FileSnapshotStore) ReapSnapshots() error {
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
for i := f.retain; i < len(snapshots); i++ {
|
||||
path := filepath.Join(f.path, snapshots[i].ID)
|
||||
f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
|
||||
if err := os.RemoveAll(path); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ID returns the ID of the snapshot, can be used with Open()
|
||||
// after the snapshot is finalized.
|
||||
func (s *FileSnapshotSink) ID() string {
|
||||
return s.meta.ID
|
||||
}
|
||||
|
||||
// Write is used to append to the state file. We write to the
|
||||
// buffered IO object to reduce the amount of context switches.
|
||||
func (s *FileSnapshotSink) Write(b []byte) (int, error) {
|
||||
return s.buffered.Write(b)
|
||||
}
|
||||
|
||||
// Close is used to indicate a successful end.
|
||||
func (s *FileSnapshotSink) Close() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
if delErr := os.RemoveAll(s.dir); delErr != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to delete temporary snapshot at path %v: %v", s.dir, delErr)
|
||||
return delErr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := s.writeMeta(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Move the directory into place
|
||||
newPath := strings.TrimSuffix(s.dir, tmpSuffix)
|
||||
if err := os.Rename(s.dir, newPath); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if runtime.GOOS != "windows" { //skipping fsync for directory entry edits on Windows, only needed for *nix style file systems
|
||||
parentFH, err := os.Open(s.parentDir)
|
||||
defer parentFH.Close()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to open snapshot parent directory %v, error: %v", s.parentDir, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err = parentFH.Sync(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed syncing parent directory %v, error: %v", s.parentDir, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Reap any old snapshots
|
||||
if err := s.store.ReapSnapshots(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cancel is used to indicate an unsuccessful end.
|
||||
func (s *FileSnapshotSink) Cancel() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Attempt to remove all artifacts
|
||||
return os.RemoveAll(s.dir)
|
||||
}
|
||||
|
||||
// finalize is used to close all of our resources.
|
||||
func (s *FileSnapshotSink) finalize() error {
|
||||
// Flush any remaining data
|
||||
if err := s.buffered.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Sync to force fsync to disk
|
||||
if err := s.stateFile.Sync(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get the file size
|
||||
stat, statErr := s.stateFile.Stat()
|
||||
|
||||
// Close the file
|
||||
if err := s.stateFile.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the file size, check after we close
|
||||
if statErr != nil {
|
||||
return statErr
|
||||
}
|
||||
s.meta.Size = stat.Size()
|
||||
|
||||
// Set the CRC
|
||||
s.meta.CRC = s.stateHash.Sum(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeMeta is used to write out the metadata we have.
|
||||
func (s *FileSnapshotSink) writeMeta() error {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(s.dir, metaFilePath)
|
||||
fh, err := os.Create(metaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewWriter(fh)
|
||||
|
||||
// Write out as JSON
|
||||
enc := json.NewEncoder(buffered)
|
||||
if err := enc.Encode(&s.meta); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = buffered.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = fh.Sync(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Implement the sort interface for []*fileSnapshotMeta.
|
||||
func (s snapMetaSlice) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Less(i, j int) bool {
|
||||
if s[i].Term != s[j].Term {
|
||||
return s[i].Term < s[j].Term
|
||||
}
|
||||
if s[i].Index != s[j].Index {
|
||||
return s[i].Index < s[j].Index
|
||||
}
|
||||
return s[i].ID < s[j].ID
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
@ -1,343 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"runtime"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func FileSnapTest(t *testing.T) (string, *FileSnapshotStore) {
|
||||
// Create a test dir
|
||||
dir, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
|
||||
snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
return dir, snap
|
||||
}
|
||||
|
||||
func TestFileSnapshotStoreImpl(t *testing.T) {
|
||||
var impl interface{} = &FileSnapshotStore{}
|
||||
if _, ok := impl.(SnapshotStore); !ok {
|
||||
t.Fatalf("FileSnapshotStore not a SnapshotStore")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileSnapshotSinkImpl(t *testing.T) {
|
||||
var impl interface{} = &FileSnapshotSink{}
|
||||
if _, ok := impl.(SnapshotSink); !ok {
|
||||
t.Fatalf("FileSnapshotSink not a SnapshotSink")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileSS_CreateSnapshotMissingParentDir(t *testing.T) {
|
||||
parent, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
defer os.RemoveAll(parent)
|
||||
|
||||
dir, err := ioutil.TempDir(parent, "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
|
||||
snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
os.RemoveAll(parent)
|
||||
peers := []byte("all my lovely friends")
|
||||
_, err = snap.Create(10, 3, peers)
|
||||
if err != nil {
|
||||
t.Fatalf("should not fail when using non existing parent")
|
||||
}
|
||||
|
||||
}
|
||||
func TestFileSS_CreateSnapshot(t *testing.T) {
|
||||
// Create a test dir
|
||||
dir, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Check no snapshots
|
||||
snaps, err := snap.List()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(snaps) != 0 {
|
||||
t.Fatalf("did not expect any snapshots: %v", snaps)
|
||||
}
|
||||
|
||||
// Create a new sink
|
||||
peers := []byte("all my lovely friends")
|
||||
sink, err := snap.Create(10, 3, peers)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// The sink is not done, should not be in a list!
|
||||
snaps, err = snap.List()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(snaps) != 0 {
|
||||
t.Fatalf("did not expect any snapshots: %v", snaps)
|
||||
}
|
||||
|
||||
// Write to the sink
|
||||
_, err = sink.Write([]byte("first\n"))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
_, err = sink.Write([]byte("second\n"))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Done!
|
||||
err = sink.Close()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Should have a snapshot!
|
||||
snaps, err = snap.List()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(snaps) != 1 {
|
||||
t.Fatalf("expect a snapshots: %v", snaps)
|
||||
}
|
||||
|
||||
// Check the latest
|
||||
latest := snaps[0]
|
||||
if latest.Index != 10 {
|
||||
t.Fatalf("bad snapshot: %v", *latest)
|
||||
}
|
||||
if latest.Term != 3 {
|
||||
t.Fatalf("bad snapshot: %v", *latest)
|
||||
}
|
||||
if bytes.Compare(latest.Peers, peers) != 0 {
|
||||
t.Fatalf("bad snapshot: %v", *latest)
|
||||
}
|
||||
if latest.Size != 13 {
|
||||
t.Fatalf("bad snapshot: %v", *latest)
|
||||
}
|
||||
|
||||
// Read the snapshot
|
||||
_, r, err := snap.Open(latest.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Read out everything
|
||||
var buf bytes.Buffer
|
||||
if _, err := io.Copy(&buf, r); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := r.Close(); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a match
|
||||
if bytes.Compare(buf.Bytes(), []byte("first\nsecond\n")) != 0 {
|
||||
t.Fatalf("content mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileSS_CancelSnapshot(t *testing.T) {
|
||||
// Create a test dir
|
||||
dir, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Create a new sink
|
||||
peers := []byte("all my lovely friends")
|
||||
sink, err := snap.Create(10, 3, peers)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Cancel the snapshot! Should delete
|
||||
err = sink.Cancel()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// The sink is canceled, should not be in a list!
|
||||
snaps, err := snap.List()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(snaps) != 0 {
|
||||
t.Fatalf("did not expect any snapshots: %v", snaps)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileSS_Retention(t *testing.T) {
|
||||
// Create a test dir
|
||||
dir, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
snap, err := NewFileSnapshotStoreWithLogger(dir, 2, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Create a new sink
|
||||
peers := []byte("all my lovely friends")
|
||||
|
||||
// Create a few snapshots
|
||||
for i := 10; i < 15; i++ {
|
||||
sink, err := snap.Create(uint64(i), 3, peers)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = sink.Close()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Should only have 2 listed!
|
||||
snaps, err := snap.List()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(snaps) != 2 {
|
||||
t.Fatalf("expect 2 snapshots: %v", snaps)
|
||||
}
|
||||
|
||||
// Check they are the latest
|
||||
if snaps[0].Index != 14 {
|
||||
t.Fatalf("bad snap: %#v", *snaps[0])
|
||||
}
|
||||
if snaps[1].Index != 13 {
|
||||
t.Fatalf("bad snap: %#v", *snaps[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileSS_BadPerm(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("skipping file permission test on windows")
|
||||
}
|
||||
|
||||
// Create a temp dir
|
||||
dir1, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
defer os.RemoveAll(dir1)
|
||||
|
||||
// Create a sub dir and remove all permissions
|
||||
dir2, err := ioutil.TempDir(dir1, "badperm")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if err := os.Chmod(dir2, 000); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
defer os.Chmod(dir2, 777) // Set perms back for delete
|
||||
|
||||
// Should fail
|
||||
if _, err := NewFileSnapshotStore(dir2, 3, nil); err == nil {
|
||||
t.Fatalf("should fail to use dir with bad perms")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileSS_MissingParentDir(t *testing.T) {
|
||||
parent, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
defer os.RemoveAll(parent)
|
||||
|
||||
dir, err := ioutil.TempDir(parent, "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
|
||||
os.RemoveAll(parent)
|
||||
_, err = NewFileSnapshotStore(dir, 3, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("should not fail when using non existing parent")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileSS_Ordering(t *testing.T) {
|
||||
// Create a test dir
|
||||
dir, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Create a new sink
|
||||
peers := []byte("all my lovely friends")
|
||||
|
||||
sink, err := snap.Create(130350, 5, peers)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = sink.Close()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
sink, err = snap.Create(204917, 36, peers)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = sink.Close()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Should only have 2 listed!
|
||||
snaps, err := snap.List()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(snaps) != 2 {
|
||||
t.Fatalf("expect 2 snapshots: %v", snaps)
|
||||
}
|
||||
|
||||
// Check they are ordered
|
||||
if snaps[0].Term != 36 {
|
||||
t.Fatalf("bad snap: %#v", *snaps[0])
|
||||
}
|
||||
if snaps[1].Term != 5 {
|
||||
t.Fatalf("bad snap: %#v", *snaps[1])
|
||||
}
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// FSM provides an interface that can be implemented by
|
||||
// clients to make use of the replicated log.
|
||||
type FSM interface {
|
||||
// Apply log is invoked once a log entry is committed.
|
||||
// It returns a value which will be made available in the
|
||||
// ApplyFuture returned by Raft.Apply method if that
|
||||
// method was called on the same Raft node as the FSM.
|
||||
Apply(*Log) interface{}
|
||||
|
||||
// Snapshot is used to support log compaction. This call should
|
||||
// return an FSMSnapshot which can be used to save a point-in-time
|
||||
// snapshot of the FSM. Apply and Snapshot are not called in multiple
|
||||
// threads, but Apply will be called concurrently with Persist. This means
|
||||
// the FSM should be implemented in a fashion that allows for concurrent
|
||||
// updates while a snapshot is happening.
|
||||
Snapshot() (FSMSnapshot, error)
|
||||
|
||||
// Restore is used to restore an FSM from a snapshot. It is not called
|
||||
// concurrently with any other command. The FSM must discard all previous
|
||||
// state.
|
||||
Restore(io.ReadCloser) error
|
||||
}
|
||||
|
||||
// FSMSnapshot is returned by an FSM in response to a Snapshot
|
||||
// It must be safe to invoke FSMSnapshot methods with concurrent
|
||||
// calls to Apply.
|
||||
type FSMSnapshot interface {
|
||||
// Persist should dump all necessary state to the WriteCloser 'sink',
|
||||
// and call sink.Close() when finished or call sink.Cancel() on error.
|
||||
Persist(sink SnapshotSink) error
|
||||
|
||||
// Release is invoked when we are finished with the snapshot.
|
||||
Release()
|
||||
}
|
@ -1,203 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Future is used to represent an action that may occur in the future.
|
||||
type Future interface {
|
||||
// Error blocks until the future arrives and then
|
||||
// returns the error status of the future.
|
||||
// This may be called any number of times - all
|
||||
// calls will return the same value.
|
||||
// Note that it is not OK to call this method
|
||||
// twice concurrently on the same Future instance.
|
||||
Error() error
|
||||
}
|
||||
|
||||
// ApplyFuture is used for Apply() and may return the FSM response.
|
||||
type ApplyFuture interface {
|
||||
Future
|
||||
|
||||
// Response returns the FSM response as returned
|
||||
// by the FSM.Apply method. This must not be called
|
||||
// until after the Error method has returned.
|
||||
Response() interface{}
|
||||
|
||||
// Index holds the index of the newly applied log entry.
|
||||
// This must not be called
|
||||
// until after the Error method has returned.
|
||||
Index() uint64
|
||||
}
|
||||
|
||||
// errorFuture is used to return a static error.
|
||||
type errorFuture struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (e errorFuture) Error() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func (e errorFuture) Response() interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e errorFuture) Index() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// deferError can be embedded to allow a future
|
||||
// to provide an error in the future.
|
||||
type deferError struct {
|
||||
err error
|
||||
errCh chan error
|
||||
responded bool
|
||||
}
|
||||
|
||||
func (d *deferError) init() {
|
||||
d.errCh = make(chan error, 1)
|
||||
}
|
||||
|
||||
func (d *deferError) Error() error {
|
||||
if d.err != nil {
|
||||
// Note that when we've received a nil error, this
|
||||
// won't trigger, but the channel is closed after
|
||||
// send so we'll still return nil below.
|
||||
return d.err
|
||||
}
|
||||
if d.errCh == nil {
|
||||
panic("waiting for response on nil channel")
|
||||
}
|
||||
d.err = <-d.errCh
|
||||
return d.err
|
||||
}
|
||||
|
||||
func (d *deferError) respond(err error) {
|
||||
if d.errCh == nil {
|
||||
return
|
||||
}
|
||||
if d.responded {
|
||||
return
|
||||
}
|
||||
d.errCh <- err
|
||||
close(d.errCh)
|
||||
d.responded = true
|
||||
}
|
||||
|
||||
// logFuture is used to apply a log entry and waits until
|
||||
// the log is considered committed.
|
||||
type logFuture struct {
|
||||
deferError
|
||||
log Log
|
||||
policy quorumPolicy
|
||||
response interface{}
|
||||
dispatch time.Time
|
||||
}
|
||||
|
||||
func (l *logFuture) Response() interface{} {
|
||||
return l.response
|
||||
}
|
||||
|
||||
func (l *logFuture) Index() uint64 {
|
||||
return l.log.Index
|
||||
}
|
||||
|
||||
type peerFuture struct {
|
||||
deferError
|
||||
peers []string
|
||||
}
|
||||
|
||||
type shutdownFuture struct {
|
||||
raft *Raft
|
||||
}
|
||||
|
||||
func (s *shutdownFuture) Error() error {
|
||||
if s.raft == nil {
|
||||
return nil
|
||||
}
|
||||
s.raft.waitShutdown()
|
||||
if closeable, ok := s.raft.trans.(WithClose); ok {
|
||||
closeable.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotFuture is used for waiting on a snapshot to complete.
|
||||
type snapshotFuture struct {
|
||||
deferError
|
||||
}
|
||||
|
||||
// reqSnapshotFuture is used for requesting a snapshot start.
|
||||
// It is only used internally.
|
||||
type reqSnapshotFuture struct {
|
||||
deferError
|
||||
|
||||
// snapshot details provided by the FSM runner before responding
|
||||
index uint64
|
||||
term uint64
|
||||
peers []string
|
||||
snapshot FSMSnapshot
|
||||
}
|
||||
|
||||
// restoreFuture is used for requesting an FSM to perform a
|
||||
// snapshot restore. Used internally only.
|
||||
type restoreFuture struct {
|
||||
deferError
|
||||
ID string
|
||||
}
|
||||
|
||||
// verifyFuture is used to verify the current node is still
|
||||
// the leader. This is to prevent a stale read.
|
||||
type verifyFuture struct {
|
||||
deferError
|
||||
notifyCh chan *verifyFuture
|
||||
quorumSize int
|
||||
votes int
|
||||
voteLock sync.Mutex
|
||||
}
|
||||
|
||||
// vote is used to respond to a verifyFuture.
|
||||
// This may block when responding on the notifyCh.
|
||||
func (v *verifyFuture) vote(leader bool) {
|
||||
v.voteLock.Lock()
|
||||
defer v.voteLock.Unlock()
|
||||
|
||||
// Guard against having notified already
|
||||
if v.notifyCh == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if leader {
|
||||
v.votes++
|
||||
if v.votes >= v.quorumSize {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
} else {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendFuture is used for waiting on a pipelined append
|
||||
// entries RPC.
|
||||
type appendFuture struct {
|
||||
deferError
|
||||
start time.Time
|
||||
args *AppendEntriesRequest
|
||||
resp *AppendEntriesResponse
|
||||
}
|
||||
|
||||
func (a *appendFuture) Start() time.Time {
|
||||
return a.start
|
||||
}
|
||||
|
||||
func (a *appendFuture) Request() *AppendEntriesRequest {
|
||||
return a.args
|
||||
}
|
||||
|
||||
func (a *appendFuture) Response() *AppendEntriesResponse {
|
||||
return a.resp
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDeferFutureSuccess(t *testing.T) {
|
||||
var f deferError
|
||||
f.init()
|
||||
f.respond(nil)
|
||||
if err := f.Error(); err != nil {
|
||||
t.Fatalf("unexpected error result; got %#v want nil", err)
|
||||
}
|
||||
if err := f.Error(); err != nil {
|
||||
t.Fatalf("unexpected error result; got %#v want nil", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeferFutureError(t *testing.T) {
|
||||
want := errors.New("x")
|
||||
var f deferError
|
||||
f.init()
|
||||
f.respond(want)
|
||||
if got := f.Error(); got != want {
|
||||
t.Fatalf("unexpected error result; got %#v want %#v", got, want)
|
||||
}
|
||||
if got := f.Error(); got != want {
|
||||
t.Fatalf("unexpected error result; got %#v want %#v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeferFutureConcurrent(t *testing.T) {
|
||||
// Food for the race detector.
|
||||
want := errors.New("x")
|
||||
var f deferError
|
||||
f.init()
|
||||
go f.respond(want)
|
||||
if got := f.Error(); got != want {
|
||||
t.Errorf("unexpected error result; got %#v want %#v", got, want)
|
||||
}
|
||||
}
|
@ -1,213 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// QuorumPolicy allows individual logFutures to have different
|
||||
// commitment rules while still using the inflight mechanism.
|
||||
type quorumPolicy interface {
|
||||
// Checks if a commit from a given peer is enough to
|
||||
// satisfy the commitment rules
|
||||
Commit() bool
|
||||
|
||||
// Checks if a commit is committed
|
||||
IsCommitted() bool
|
||||
}
|
||||
|
||||
// MajorityQuorum is used by Apply transactions and requires
|
||||
// a simple majority of nodes.
|
||||
type majorityQuorum struct {
|
||||
count int
|
||||
votesNeeded int
|
||||
}
|
||||
|
||||
func newMajorityQuorum(clusterSize int) *majorityQuorum {
|
||||
votesNeeded := (clusterSize / 2) + 1
|
||||
return &majorityQuorum{count: 0, votesNeeded: votesNeeded}
|
||||
}
|
||||
|
||||
func (m *majorityQuorum) Commit() bool {
|
||||
m.count++
|
||||
return m.count >= m.votesNeeded
|
||||
}
|
||||
|
||||
func (m *majorityQuorum) IsCommitted() bool {
|
||||
return m.count >= m.votesNeeded
|
||||
}
|
||||
|
||||
// Inflight is used to track operations that are still in-flight.
|
||||
type inflight struct {
|
||||
sync.Mutex
|
||||
committed *list.List
|
||||
commitCh chan struct{}
|
||||
minCommit uint64
|
||||
maxCommit uint64
|
||||
operations map[uint64]*logFuture
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
// NewInflight returns an inflight struct that notifies
|
||||
// the provided channel when logs are finished committing.
|
||||
func newInflight(commitCh chan struct{}) *inflight {
|
||||
return &inflight{
|
||||
committed: list.New(),
|
||||
commitCh: commitCh,
|
||||
minCommit: 0,
|
||||
maxCommit: 0,
|
||||
operations: make(map[uint64]*logFuture),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Start is used to mark a logFuture as being inflight. It
|
||||
// also commits the entry, as it is assumed the leader is
|
||||
// starting.
|
||||
func (i *inflight) Start(l *logFuture) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.start(l)
|
||||
}
|
||||
|
||||
// StartAll is used to mark a list of logFuture's as being
|
||||
// inflight. It also commits each entry as the leader is
|
||||
// assumed to be starting.
|
||||
func (i *inflight) StartAll(logs []*logFuture) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
for _, l := range logs {
|
||||
i.start(l)
|
||||
}
|
||||
}
|
||||
|
||||
// start is used to mark a single entry as inflight,
|
||||
// must be invoked with the lock held.
|
||||
func (i *inflight) start(l *logFuture) {
|
||||
idx := l.log.Index
|
||||
i.operations[idx] = l
|
||||
|
||||
if idx > i.maxCommit {
|
||||
i.maxCommit = idx
|
||||
}
|
||||
if i.minCommit == 0 {
|
||||
i.minCommit = idx
|
||||
}
|
||||
i.commit(idx)
|
||||
}
|
||||
|
||||
// Cancel is used to cancel all in-flight operations.
|
||||
// This is done when the leader steps down, and all futures
|
||||
// are sent the given error.
|
||||
func (i *inflight) Cancel(err error) {
|
||||
// Close the channel first to unblock any pending commits
|
||||
close(i.stopCh)
|
||||
|
||||
// Lock after close to avoid deadlock
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
|
||||
// Respond to all inflight operations
|
||||
for _, op := range i.operations {
|
||||
op.respond(err)
|
||||
}
|
||||
|
||||
// Clear all the committed but not processed
|
||||
for e := i.committed.Front(); e != nil; e = e.Next() {
|
||||
e.Value.(*logFuture).respond(err)
|
||||
}
|
||||
|
||||
// Clear the map
|
||||
i.operations = make(map[uint64]*logFuture)
|
||||
|
||||
// Clear the list of committed
|
||||
i.committed = list.New()
|
||||
|
||||
// Close the commmitCh
|
||||
close(i.commitCh)
|
||||
|
||||
// Reset indexes
|
||||
i.minCommit = 0
|
||||
i.maxCommit = 0
|
||||
}
|
||||
|
||||
// Committed returns all the committed operations in order.
|
||||
func (i *inflight) Committed() (l *list.List) {
|
||||
i.Lock()
|
||||
l, i.committed = i.committed, list.New()
|
||||
i.Unlock()
|
||||
return l
|
||||
}
|
||||
|
||||
// Commit is used by leader replication routines to indicate that
|
||||
// a follower was finished committing a log to disk.
|
||||
func (i *inflight) Commit(index uint64) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.commit(index)
|
||||
}
|
||||
|
||||
// CommitRange is used to commit a range of indexes inclusively.
|
||||
// It is optimized to avoid commits for indexes that are not tracked.
|
||||
func (i *inflight) CommitRange(minIndex, maxIndex uint64) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
|
||||
// Update the minimum index
|
||||
minIndex = max(i.minCommit, minIndex)
|
||||
|
||||
// Commit each index
|
||||
for idx := minIndex; idx <= maxIndex; idx++ {
|
||||
i.commit(idx)
|
||||
}
|
||||
}
|
||||
|
||||
// commit is used to commit a single index. Must be called with the lock held.
|
||||
func (i *inflight) commit(index uint64) {
|
||||
op, ok := i.operations[index]
|
||||
if !ok {
|
||||
// Ignore if not in the map, as it may be committed already
|
||||
return
|
||||
}
|
||||
|
||||
// Check if we've satisfied the commit
|
||||
if !op.policy.Commit() {
|
||||
return
|
||||
}
|
||||
|
||||
// Cannot commit if this is not the minimum inflight. This can happen
|
||||
// if the quorum size changes, meaning a previous commit requires a larger
|
||||
// quorum that this commit. We MUST block until the previous log is committed,
|
||||
// otherwise logs will be applied out of order.
|
||||
if index != i.minCommit {
|
||||
return
|
||||
}
|
||||
|
||||
NOTIFY:
|
||||
// Add the operation to the committed list
|
||||
i.committed.PushBack(op)
|
||||
|
||||
// Stop tracking since it is committed
|
||||
delete(i.operations, index)
|
||||
|
||||
// Update the indexes
|
||||
if index == i.maxCommit {
|
||||
i.minCommit = 0
|
||||
i.maxCommit = 0
|
||||
|
||||
} else {
|
||||
i.minCommit++
|
||||
}
|
||||
|
||||
// Check if the next in-flight operation is ready
|
||||
if i.minCommit != 0 {
|
||||
op = i.operations[i.minCommit]
|
||||
if op.policy.IsCommitted() {
|
||||
index = i.minCommit
|
||||
goto NOTIFY
|
||||
}
|
||||
}
|
||||
|
||||
// Async notify of ready operations
|
||||
asyncNotifyCh(i.commitCh)
|
||||
}
|
@ -1,150 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInflight_StartCommit(t *testing.T) {
|
||||
commitCh := make(chan struct{}, 1)
|
||||
in := newInflight(commitCh)
|
||||
|
||||
// Commit a transaction as being in flight
|
||||
l := &logFuture{log: Log{Index: 1}}
|
||||
l.policy = newMajorityQuorum(5)
|
||||
in.Start(l)
|
||||
|
||||
// Commit 3 times
|
||||
in.Commit(1)
|
||||
if in.Committed().Len() != 0 {
|
||||
t.Fatalf("should not be commited")
|
||||
}
|
||||
|
||||
in.Commit(1)
|
||||
if in.Committed().Len() != 1 {
|
||||
t.Fatalf("should be commited")
|
||||
}
|
||||
|
||||
// Already committed but should work anyways
|
||||
in.Commit(1)
|
||||
}
|
||||
|
||||
func TestInflight_Cancel(t *testing.T) {
|
||||
commitCh := make(chan struct{}, 1)
|
||||
in := newInflight(commitCh)
|
||||
|
||||
// Commit a transaction as being in flight
|
||||
l := &logFuture{
|
||||
log: Log{Index: 1},
|
||||
}
|
||||
l.init()
|
||||
l.policy = newMajorityQuorum(3)
|
||||
in.Start(l)
|
||||
|
||||
// Cancel with an error
|
||||
err := fmt.Errorf("error 1")
|
||||
in.Cancel(err)
|
||||
|
||||
// Should get an error return
|
||||
if l.Error() != err {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInflight_StartAll(t *testing.T) {
|
||||
commitCh := make(chan struct{}, 1)
|
||||
in := newInflight(commitCh)
|
||||
|
||||
// Commit a few transaction as being in flight
|
||||
l1 := &logFuture{log: Log{Index: 2}}
|
||||
l1.policy = newMajorityQuorum(5)
|
||||
l2 := &logFuture{log: Log{Index: 3}}
|
||||
l2.policy = newMajorityQuorum(5)
|
||||
l3 := &logFuture{log: Log{Index: 4}}
|
||||
l3.policy = newMajorityQuorum(5)
|
||||
|
||||
// Start all the entries
|
||||
in.StartAll([]*logFuture{l1, l2, l3})
|
||||
|
||||
// Commit ranges
|
||||
in.CommitRange(1, 5)
|
||||
in.CommitRange(1, 4)
|
||||
in.CommitRange(1, 10)
|
||||
|
||||
// Should get 3 back
|
||||
if in.Committed().Len() != 3 {
|
||||
t.Fatalf("expected all 3 to commit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInflight_CommitRange(t *testing.T) {
|
||||
commitCh := make(chan struct{}, 1)
|
||||
in := newInflight(commitCh)
|
||||
|
||||
// Commit a few transaction as being in flight
|
||||
l1 := &logFuture{log: Log{Index: 2}}
|
||||
l1.policy = newMajorityQuorum(5)
|
||||
in.Start(l1)
|
||||
|
||||
l2 := &logFuture{log: Log{Index: 3}}
|
||||
l2.policy = newMajorityQuorum(5)
|
||||
in.Start(l2)
|
||||
|
||||
l3 := &logFuture{log: Log{Index: 4}}
|
||||
l3.policy = newMajorityQuorum(5)
|
||||
in.Start(l3)
|
||||
|
||||
// Commit ranges
|
||||
in.CommitRange(1, 5)
|
||||
in.CommitRange(1, 4)
|
||||
in.CommitRange(1, 10)
|
||||
|
||||
// Should get 3 back
|
||||
if in.Committed().Len() != 3 {
|
||||
t.Fatalf("expected all 3 to commit")
|
||||
}
|
||||
}
|
||||
|
||||
// Should panic if we commit non contiguously!
|
||||
func TestInflight_NonContiguous(t *testing.T) {
|
||||
commitCh := make(chan struct{}, 1)
|
||||
in := newInflight(commitCh)
|
||||
|
||||
// Commit a few transaction as being in flight
|
||||
l1 := &logFuture{log: Log{Index: 2}}
|
||||
l1.policy = newMajorityQuorum(5)
|
||||
in.Start(l1)
|
||||
|
||||
l2 := &logFuture{log: Log{Index: 3}}
|
||||
l2.policy = newMajorityQuorum(5)
|
||||
in.Start(l2)
|
||||
|
||||
in.Commit(3)
|
||||
in.Commit(3)
|
||||
in.Commit(3) // panic!
|
||||
|
||||
if in.Committed().Len() != 0 {
|
||||
t.Fatalf("should not commit")
|
||||
}
|
||||
|
||||
in.Commit(2)
|
||||
in.Commit(2)
|
||||
in.Commit(2) // panic!
|
||||
|
||||
committed := in.Committed()
|
||||
if committed.Len() != 2 {
|
||||
t.Fatalf("should commit both")
|
||||
}
|
||||
|
||||
current := committed.Front()
|
||||
l := current.Value.(*logFuture)
|
||||
if l.log.Index != 2 {
|
||||
t.Fatalf("bad: %v", *l)
|
||||
}
|
||||
|
||||
current = current.Next()
|
||||
l = current.Value.(*logFuture)
|
||||
if l.log.Index != 3 {
|
||||
t.Fatalf("bad: %v", *l)
|
||||
}
|
||||
}
|
@ -1,116 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// InmemStore implements the LogStore and StableStore interface.
|
||||
// It should NOT EVER be used for production. It is used only for
|
||||
// unit tests. Use the MDBStore implementation instead.
|
||||
type InmemStore struct {
|
||||
l sync.RWMutex
|
||||
lowIndex uint64
|
||||
highIndex uint64
|
||||
logs map[uint64]*Log
|
||||
kv map[string][]byte
|
||||
kvInt map[string]uint64
|
||||
}
|
||||
|
||||
// NewInmemStore returns a new in-memory backend. Do not ever
|
||||
// use for production. Only for testing.
|
||||
func NewInmemStore() *InmemStore {
|
||||
i := &InmemStore{
|
||||
logs: make(map[uint64]*Log),
|
||||
kv: make(map[string][]byte),
|
||||
kvInt: make(map[string]uint64),
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// FirstIndex implements the LogStore interface.
|
||||
func (i *InmemStore) FirstIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.lowIndex, nil
|
||||
}
|
||||
|
||||
// LastIndex implements the LogStore interface.
|
||||
func (i *InmemStore) LastIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.highIndex, nil
|
||||
}
|
||||
|
||||
// GetLog implements the LogStore interface.
|
||||
func (i *InmemStore) GetLog(index uint64, log *Log) error {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
l, ok := i.logs[index]
|
||||
if !ok {
|
||||
return ErrLogNotFound
|
||||
}
|
||||
*log = *l
|
||||
return nil
|
||||
}
|
||||
|
||||
// StoreLog implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLog(log *Log) error {
|
||||
return i.StoreLogs([]*Log{log})
|
||||
}
|
||||
|
||||
// StoreLogs implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLogs(logs []*Log) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for _, l := range logs {
|
||||
i.logs[l.Index] = l
|
||||
if i.lowIndex == 0 {
|
||||
i.lowIndex = l.Index
|
||||
}
|
||||
if l.Index > i.highIndex {
|
||||
i.highIndex = l.Index
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteRange implements the LogStore interface.
|
||||
func (i *InmemStore) DeleteRange(min, max uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for j := min; j <= max; j++ {
|
||||
delete(i.logs, j)
|
||||
}
|
||||
i.lowIndex = max + 1
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set implements the StableStore interface.
|
||||
func (i *InmemStore) Set(key []byte, val []byte) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kv[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get implements the StableStore interface.
|
||||
func (i *InmemStore) Get(key []byte) ([]byte, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kv[string(key)], nil
|
||||
}
|
||||
|
||||
// SetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) SetUint64(key []byte, val uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kvInt[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kvInt[string(key)], nil
|
||||
}
|
@ -1,324 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// NewInmemAddr returns a new in-memory addr with
|
||||
// a randomly generate UUID as the ID.
|
||||
func NewInmemAddr() string {
|
||||
return generateUUID()
|
||||
}
|
||||
|
||||
// inmemPipeline is used to pipeline requests for the in-mem transport.
|
||||
type inmemPipeline struct {
|
||||
trans *InmemTransport
|
||||
peer *InmemTransport
|
||||
peerAddr string
|
||||
|
||||
doneCh chan AppendFuture
|
||||
inprogressCh chan *inmemPipelineInflight
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
}
|
||||
|
||||
type inmemPipelineInflight struct {
|
||||
future *appendFuture
|
||||
respCh <-chan RPCResponse
|
||||
}
|
||||
|
||||
// InmemTransport Implements the Transport interface, to allow Raft to be
|
||||
// tested in-memory without going over a network.
|
||||
type InmemTransport struct {
|
||||
sync.RWMutex
|
||||
consumerCh chan RPC
|
||||
localAddr string
|
||||
peers map[string]*InmemTransport
|
||||
pipelines []*inmemPipeline
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewInmemTransport is used to initialize a new transport
|
||||
// and generates a random local address if none is specified
|
||||
func NewInmemTransport(addr string) (string, *InmemTransport) {
|
||||
if addr == "" {
|
||||
addr = NewInmemAddr()
|
||||
}
|
||||
trans := &InmemTransport{
|
||||
consumerCh: make(chan RPC, 16),
|
||||
localAddr: addr,
|
||||
peers: make(map[string]*InmemTransport),
|
||||
timeout: 50 * time.Millisecond,
|
||||
}
|
||||
return addr, trans
|
||||
}
|
||||
|
||||
// SetHeartbeatHandler is used to set optional fast-path for
|
||||
// heartbeats, not supported for this transport.
|
||||
func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
|
||||
}
|
||||
|
||||
// Consumer implements the Transport interface.
|
||||
func (i *InmemTransport) Consumer() <-chan RPC {
|
||||
return i.consumerCh
|
||||
}
|
||||
|
||||
// LocalAddr implements the Transport interface.
|
||||
func (i *InmemTransport) LocalAddr() string {
|
||||
return i.localAddr
|
||||
}
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
func (i *InmemTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to connect to peer: %v", target)
|
||||
}
|
||||
pipeline := newInmemPipeline(i, peer, target)
|
||||
i.Lock()
|
||||
i.pipelines = append(i.pipelines, pipeline)
|
||||
i.Unlock()
|
||||
return pipeline, nil
|
||||
}
|
||||
|
||||
// AppendEntries implements the Transport interface.
|
||||
func (i *InmemTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*AppendEntriesResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// RequestVote implements the Transport interface.
|
||||
func (i *InmemTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*RequestVoteResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// InstallSnapshot implements the Transport interface.
|
||||
func (i *InmemTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
|
||||
rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*InstallSnapshotResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *InmemTransport) makeRPC(target string, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
|
||||
if !ok {
|
||||
err = fmt.Errorf("failed to connect to peer: %v", target)
|
||||
return
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse)
|
||||
peer.consumerCh <- RPC{
|
||||
Command: args,
|
||||
Reader: r,
|
||||
RespChan: respCh,
|
||||
}
|
||||
|
||||
// Wait for a response
|
||||
select {
|
||||
case rpcResp = <-respCh:
|
||||
if rpcResp.Error != nil {
|
||||
err = rpcResp.Error
|
||||
}
|
||||
case <-time.After(timeout):
|
||||
err = fmt.Errorf("command timed out")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodePeer implements the Transport interface. It uses the UUID as the
|
||||
// address directly.
|
||||
func (i *InmemTransport) EncodePeer(p string) []byte {
|
||||
return []byte(p)
|
||||
}
|
||||
|
||||
// DecodePeer implements the Transport interface. It wraps the UUID in an
|
||||
// InmemAddr.
|
||||
func (i *InmemTransport) DecodePeer(buf []byte) string {
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
// Connect is used to connect this transport to another transport for
|
||||
// a given peer name. This allows for local routing.
|
||||
func (i *InmemTransport) Connect(peer string, t Transport) {
|
||||
trans := t.(*InmemTransport)
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers[peer] = trans
|
||||
}
|
||||
|
||||
// Disconnect is used to remove the ability to route to a given peer.
|
||||
func (i *InmemTransport) Disconnect(peer string) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
delete(i.peers, peer)
|
||||
|
||||
// Disconnect any pipelines
|
||||
n := len(i.pipelines)
|
||||
for idx := 0; idx < n; idx++ {
|
||||
if i.pipelines[idx].peerAddr == peer {
|
||||
i.pipelines[idx].Close()
|
||||
i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
|
||||
idx--
|
||||
n--
|
||||
}
|
||||
}
|
||||
i.pipelines = i.pipelines[:n]
|
||||
}
|
||||
|
||||
// DisconnectAll is used to remove all routes to peers.
|
||||
func (i *InmemTransport) DisconnectAll() {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers = make(map[string]*InmemTransport)
|
||||
|
||||
// Handle pipelines
|
||||
for _, pipeline := range i.pipelines {
|
||||
pipeline.Close()
|
||||
}
|
||||
i.pipelines = nil
|
||||
}
|
||||
|
||||
// Close is used to permanently disable the transport
|
||||
func (i *InmemTransport) Close() error {
|
||||
i.DisconnectAll()
|
||||
return nil
|
||||
}
|
||||
|
||||
func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr string) *inmemPipeline {
|
||||
i := &inmemPipeline{
|
||||
trans: trans,
|
||||
peer: peer,
|
||||
peerAddr: addr,
|
||||
doneCh: make(chan AppendFuture, 16),
|
||||
inprogressCh: make(chan *inmemPipelineInflight, 16),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
go i.decodeResponses()
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) decodeResponses() {
|
||||
timeout := i.trans.timeout
|
||||
for {
|
||||
select {
|
||||
case inp := <-i.inprogressCh:
|
||||
var timeoutCh <-chan time.Time
|
||||
if timeout > 0 {
|
||||
timeoutCh = time.After(timeout)
|
||||
}
|
||||
|
||||
select {
|
||||
case rpcResp := <-inp.respCh:
|
||||
// Copy the result back
|
||||
*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
|
||||
inp.future.respond(rpcResp.Error)
|
||||
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-timeoutCh:
|
||||
inp.future.respond(fmt.Errorf("command timed out"))
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
|
||||
// Create a new future
|
||||
future := &appendFuture{
|
||||
start: time.Now(),
|
||||
args: args,
|
||||
resp: resp,
|
||||
}
|
||||
future.init()
|
||||
|
||||
// Handle a timeout
|
||||
var timeout <-chan time.Time
|
||||
if i.trans.timeout > 0 {
|
||||
timeout = time.After(i.trans.timeout)
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse, 1)
|
||||
rpc := RPC{
|
||||
Command: args,
|
||||
RespChan: respCh,
|
||||
}
|
||||
select {
|
||||
case i.peer.consumerCh <- rpc:
|
||||
case <-timeout:
|
||||
return nil, fmt.Errorf("command enqueue timeout")
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
|
||||
// Send to be decoded
|
||||
select {
|
||||
case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
|
||||
return future, nil
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Consumer() <-chan AppendFuture {
|
||||
return i.doneCh
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Close() error {
|
||||
i.shutdownLock.Lock()
|
||||
defer i.shutdownLock.Unlock()
|
||||
if i.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
i.shutdown = true
|
||||
close(i.shutdownCh)
|
||||
return nil
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInmemTransportImpl(t *testing.T) {
|
||||
var inm interface{} = &InmemTransport{}
|
||||
if _, ok := inm.(Transport); !ok {
|
||||
t.Fatalf("InmemTransport is not a Transport")
|
||||
}
|
||||
if _, ok := inm.(LoopbackTransport); !ok {
|
||||
t.Fatalf("InmemTransport is not a Loopback Transport")
|
||||
}
|
||||
if _, ok := inm.(WithPeers); !ok {
|
||||
t.Fatalf("InmemTransport is not a WithPeers Transport")
|
||||
}
|
||||
}
|
@ -1,336 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CheckInteg will skip a test if integration testing is not enabled.
|
||||
func CheckInteg(t *testing.T) {
|
||||
if !IsInteg() {
|
||||
t.SkipNow()
|
||||
}
|
||||
}
|
||||
|
||||
// IsInteg returns a boolean telling you if we're in integ testing mode.
|
||||
func IsInteg() bool {
|
||||
return os.Getenv("INTEG_TESTS") != ""
|
||||
}
|
||||
|
||||
type RaftEnv struct {
|
||||
dir string
|
||||
conf *Config
|
||||
fsm *MockFSM
|
||||
store *InmemStore
|
||||
snapshot *FileSnapshotStore
|
||||
peers *JSONPeers
|
||||
trans *NetworkTransport
|
||||
raft *Raft
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// Release shuts down and cleans up any stored data, its not restartable after this
|
||||
func (r *RaftEnv) Release() {
|
||||
r.Shutdown()
|
||||
os.RemoveAll(r.dir)
|
||||
}
|
||||
|
||||
// Shutdown shuts down raft & transport, but keeps track of its data, its restartable
|
||||
// after a Shutdown() by calling Start()
|
||||
func (r *RaftEnv) Shutdown() {
|
||||
r.logger.Printf("[WARN] Shutdown node at %v", r.raft.localAddr)
|
||||
f := r.raft.Shutdown()
|
||||
if err := f.Error(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
r.trans.Close()
|
||||
}
|
||||
|
||||
// Restart will start a raft node that was previously Shutdown()
|
||||
func (r *RaftEnv) Restart(t *testing.T) {
|
||||
trans, err := NewTCPTransport(r.raft.localAddr, nil, 2, time.Second, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
r.trans = trans
|
||||
r.logger.Printf("[INFO] Starting node at %v", trans.LocalAddr())
|
||||
raft, err := NewRaft(r.conf, r.fsm, r.store, r.store, r.snapshot, r.peers, r.trans)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
r.raft = raft
|
||||
}
|
||||
|
||||
func MakeRaft(t *testing.T, conf *Config) *RaftEnv {
|
||||
// Set the config
|
||||
if conf == nil {
|
||||
conf = inmemConfig(t)
|
||||
}
|
||||
|
||||
dir, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
|
||||
stable := NewInmemStore()
|
||||
|
||||
snap, err := NewFileSnapshotStore(dir, 3, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
env := &RaftEnv{
|
||||
conf: conf,
|
||||
dir: dir,
|
||||
store: stable,
|
||||
snapshot: snap,
|
||||
fsm: &MockFSM{},
|
||||
}
|
||||
|
||||
trans, err := NewTCPTransport("127.0.0.1:0", nil, 2, time.Second, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
env.logger = log.New(os.Stdout, trans.LocalAddr()+" :", log.Lmicroseconds)
|
||||
env.trans = trans
|
||||
|
||||
env.peers = NewJSONPeers(dir, trans)
|
||||
|
||||
env.logger.Printf("[INFO] Starting node at %v", trans.LocalAddr())
|
||||
conf.Logger = env.logger
|
||||
raft, err := NewRaft(conf, env.fsm, stable, stable, snap, env.peers, trans)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
env.raft = raft
|
||||
return env
|
||||
}
|
||||
|
||||
func WaitFor(env *RaftEnv, state RaftState) error {
|
||||
limit := time.Now().Add(200 * time.Millisecond)
|
||||
for env.raft.State() != state {
|
||||
if time.Now().Before(limit) {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
} else {
|
||||
return fmt.Errorf("failed to transition to state %v", state)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func WaitForAny(state RaftState, envs []*RaftEnv) (*RaftEnv, error) {
|
||||
limit := time.Now().Add(200 * time.Millisecond)
|
||||
CHECK:
|
||||
for _, env := range envs {
|
||||
if env.raft.State() == state {
|
||||
return env, nil
|
||||
}
|
||||
}
|
||||
if time.Now().Before(limit) {
|
||||
goto WAIT
|
||||
}
|
||||
return nil, fmt.Errorf("failed to find node in %v state", state)
|
||||
WAIT:
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
goto CHECK
|
||||
}
|
||||
|
||||
func WaitFuture(f Future, t *testing.T) error {
|
||||
timer := time.AfterFunc(200*time.Millisecond, func() {
|
||||
panic(fmt.Errorf("timeout waiting for future %v", f))
|
||||
})
|
||||
defer timer.Stop()
|
||||
return f.Error()
|
||||
}
|
||||
|
||||
func NoErr(err error, t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func CheckConsistent(envs []*RaftEnv, t *testing.T) {
|
||||
limit := time.Now().Add(400 * time.Millisecond)
|
||||
first := envs[0]
|
||||
first.fsm.Lock()
|
||||
defer first.fsm.Unlock()
|
||||
var err error
|
||||
CHECK:
|
||||
l1 := len(first.fsm.logs)
|
||||
for i := 1; i < len(envs); i++ {
|
||||
env := envs[i]
|
||||
env.fsm.Lock()
|
||||
l2 := len(env.fsm.logs)
|
||||
if l1 != l2 {
|
||||
err = fmt.Errorf("log length mismatch %d %d", l1, l2)
|
||||
env.fsm.Unlock()
|
||||
goto ERR
|
||||
}
|
||||
for idx, log := range first.fsm.logs {
|
||||
other := env.fsm.logs[idx]
|
||||
if bytes.Compare(log, other) != 0 {
|
||||
err = fmt.Errorf("log entry %d mismatch between %s/%s : '%s' / '%s'", idx, first.raft.localAddr, env.raft.localAddr, log, other)
|
||||
env.fsm.Unlock()
|
||||
goto ERR
|
||||
}
|
||||
}
|
||||
env.fsm.Unlock()
|
||||
}
|
||||
return
|
||||
ERR:
|
||||
if time.Now().After(limit) {
|
||||
t.Fatalf("%v", err)
|
||||
}
|
||||
first.fsm.Unlock()
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
first.fsm.Lock()
|
||||
goto CHECK
|
||||
}
|
||||
|
||||
// return a log entry that's at least sz long that has the prefix 'test i '
|
||||
func logBytes(i, sz int) []byte {
|
||||
var logBuffer bytes.Buffer
|
||||
fmt.Fprintf(&logBuffer, "test %d ", i)
|
||||
for logBuffer.Len() < sz {
|
||||
logBuffer.WriteByte('x')
|
||||
}
|
||||
return logBuffer.Bytes()
|
||||
|
||||
}
|
||||
|
||||
// Tests Raft by creating a cluster, growing it to 5 nodes while
|
||||
// causing various stressful conditions
|
||||
func TestRaft_Integ(t *testing.T) {
|
||||
CheckInteg(t)
|
||||
conf := DefaultConfig()
|
||||
conf.HeartbeatTimeout = 50 * time.Millisecond
|
||||
conf.ElectionTimeout = 50 * time.Millisecond
|
||||
conf.LeaderLeaseTimeout = 50 * time.Millisecond
|
||||
conf.CommitTimeout = 5 * time.Millisecond
|
||||
conf.SnapshotThreshold = 100
|
||||
conf.TrailingLogs = 10
|
||||
conf.EnableSingleNode = true
|
||||
|
||||
// Create a single node
|
||||
env1 := MakeRaft(t, conf)
|
||||
NoErr(WaitFor(env1, Leader), t)
|
||||
|
||||
totalApplied := 0
|
||||
applyAndWait := func(leader *RaftEnv, n int, sz int) {
|
||||
// Do some commits
|
||||
var futures []ApplyFuture
|
||||
for i := 0; i < n; i++ {
|
||||
futures = append(futures, leader.raft.Apply(logBytes(i, sz), 0))
|
||||
}
|
||||
for _, f := range futures {
|
||||
NoErr(WaitFuture(f, t), t)
|
||||
leader.logger.Printf("[DEBUG] Applied at %d, size %d", f.Index(), sz)
|
||||
}
|
||||
totalApplied += n
|
||||
}
|
||||
// Do some commits
|
||||
applyAndWait(env1, 100, 10)
|
||||
|
||||
// Do a snapshot
|
||||
NoErr(WaitFuture(env1.raft.Snapshot(), t), t)
|
||||
|
||||
// Join a few nodes!
|
||||
var envs []*RaftEnv
|
||||
for i := 0; i < 4; i++ {
|
||||
env := MakeRaft(t, conf)
|
||||
addr := env.trans.LocalAddr()
|
||||
NoErr(WaitFuture(env1.raft.AddPeer(addr), t), t)
|
||||
envs = append(envs, env)
|
||||
}
|
||||
|
||||
// Wait for a leader
|
||||
leader, err := WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))
|
||||
NoErr(err, t)
|
||||
|
||||
// Do some more commits
|
||||
applyAndWait(leader, 100, 10)
|
||||
|
||||
// snapshot the leader
|
||||
NoErr(WaitFuture(leader.raft.Snapshot(), t), t)
|
||||
|
||||
CheckConsistent(append([]*RaftEnv{env1}, envs...), t)
|
||||
|
||||
// shutdown a follower
|
||||
disconnected := envs[len(envs)-1]
|
||||
disconnected.Shutdown()
|
||||
|
||||
// Do some more commits [make sure the resulting snapshot will be a reasonable size]
|
||||
applyAndWait(leader, 100, 10000)
|
||||
|
||||
// snapshot the leader [leaders log should be compacted past the disconnected follower log now]
|
||||
NoErr(WaitFuture(leader.raft.Snapshot(), t), t)
|
||||
|
||||
// Unfortuantly we need to wait for the leader to start backing off RPCs to the down follower
|
||||
// such that when the follower comes back up it'll run an election before it gets an rpc from
|
||||
// the leader
|
||||
time.Sleep(time.Second * 5)
|
||||
|
||||
// start the now out of date follower back up
|
||||
disconnected.Restart(t)
|
||||
|
||||
// wait for it to get caught up
|
||||
timeout := time.Now().Add(time.Second * 10)
|
||||
for disconnected.raft.getLastApplied() < leader.raft.getLastApplied() {
|
||||
time.Sleep(time.Millisecond)
|
||||
if time.Now().After(timeout) {
|
||||
t.Fatalf("Gave up waiting for follower to get caught up to leader")
|
||||
}
|
||||
}
|
||||
|
||||
CheckConsistent(append([]*RaftEnv{env1}, envs...), t)
|
||||
|
||||
// Shoot two nodes in the head!
|
||||
rm1, rm2 := envs[0], envs[1]
|
||||
rm1.Release()
|
||||
rm2.Release()
|
||||
envs = envs[2:]
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Wait for a leader
|
||||
leader, err = WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))
|
||||
NoErr(err, t)
|
||||
|
||||
// Do some more commits
|
||||
applyAndWait(leader, 100, 10)
|
||||
|
||||
// Join a few new nodes!
|
||||
for i := 0; i < 2; i++ {
|
||||
env := MakeRaft(t, conf)
|
||||
addr := env.trans.LocalAddr()
|
||||
NoErr(WaitFuture(leader.raft.AddPeer(addr), t), t)
|
||||
envs = append(envs, env)
|
||||
}
|
||||
|
||||
// Remove the old nodes
|
||||
NoErr(WaitFuture(leader.raft.RemovePeer(rm1.raft.localAddr), t), t)
|
||||
NoErr(WaitFuture(leader.raft.RemovePeer(rm2.raft.localAddr), t), t)
|
||||
|
||||
// Shoot the leader
|
||||
env1.Release()
|
||||
time.Sleep(3 * conf.HeartbeatTimeout)
|
||||
|
||||
// Wait for a leader
|
||||
leader, err = WaitForAny(Leader, envs)
|
||||
NoErr(err, t)
|
||||
|
||||
allEnvs := append([]*RaftEnv{env1}, envs...)
|
||||
CheckConsistent(allEnvs, t)
|
||||
|
||||
if len(env1.fsm.logs) != totalApplied {
|
||||
t.Fatalf("should apply %d logs! %d", totalApplied, len(env1.fsm.logs))
|
||||
}
|
||||
|
||||
for _, e := range envs {
|
||||
e.Release()
|
||||
}
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
package raft
|
||||
|
||||
// LogType describes various types of log entries.
|
||||
type LogType uint8
|
||||
|
||||
const (
|
||||
// LogCommand is applied to a user FSM.
|
||||
LogCommand LogType = iota
|
||||
|
||||
// LogNoop is used to assert leadership.
|
||||
LogNoop
|
||||
|
||||
// LogAddPeer is used to add a new peer.
|
||||
LogAddPeer
|
||||
|
||||
// LogRemovePeer is used to remove an existing peer.
|
||||
LogRemovePeer
|
||||
|
||||
// LogBarrier is used to ensure all preceding operations have been
|
||||
// applied to the FSM. It is similar to LogNoop, but instead of returning
|
||||
// once committed, it only returns once the FSM manager acks it. Otherwise
|
||||
// it is possible there are operations committed but not yet applied to
|
||||
// the FSM.
|
||||
LogBarrier
|
||||
)
|
||||
|
||||
// Log entries are replicated to all members of the Raft cluster
|
||||
// and form the heart of the replicated state machine.
|
||||
type Log struct {
|
||||
// Index holds the index of the log entry.
|
||||
Index uint64
|
||||
|
||||
// Term holds the election term of the log entry.
|
||||
Term uint64
|
||||
|
||||
// Type holds the type of the log entry.
|
||||
Type LogType
|
||||
|
||||
// Data holds the log entry's type-specific data.
|
||||
Data []byte
|
||||
|
||||
// peer is not exported since it is not transmitted, only used
|
||||
// internally to construct the Data field.
|
||||
peer string
|
||||
}
|
||||
|
||||
// LogStore is used to provide an interface for storing
|
||||
// and retrieving logs in a durable fashion.
|
||||
type LogStore interface {
|
||||
// FirstIndex returns the first index written. 0 for no entries.
|
||||
FirstIndex() (uint64, error)
|
||||
|
||||
// LastIndex returns the last index written. 0 for no entries.
|
||||
LastIndex() (uint64, error)
|
||||
|
||||
// GetLog gets a log entry at a given index.
|
||||
GetLog(index uint64, log *Log) error
|
||||
|
||||
// StoreLog stores a log entry.
|
||||
StoreLog(log *Log) error
|
||||
|
||||
// StoreLogs stores multiple log entries.
|
||||
StoreLogs(logs []*Log) error
|
||||
|
||||
// DeleteRange deletes a range of log entries. The range is inclusive.
|
||||
DeleteRange(min, max uint64) error
|
||||
}
|
@ -1,79 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// LogCache wraps any LogStore implementation to provide an
|
||||
// in-memory ring buffer. This is used to cache access to
|
||||
// the recently written entries. For implementations that do not
|
||||
// cache themselves, this can provide a substantial boost by
|
||||
// avoiding disk I/O on recent entries.
|
||||
type LogCache struct {
|
||||
store LogStore
|
||||
|
||||
cache []*Log
|
||||
l sync.RWMutex
|
||||
}
|
||||
|
||||
// NewLogCache is used to create a new LogCache with the
|
||||
// given capacity and backend store.
|
||||
func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
|
||||
if capacity <= 0 {
|
||||
return nil, fmt.Errorf("capacity must be positive")
|
||||
}
|
||||
c := &LogCache{
|
||||
store: store,
|
||||
cache: make([]*Log, capacity),
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *LogCache) GetLog(idx uint64, log *Log) error {
|
||||
// Check the buffer for an entry
|
||||
c.l.RLock()
|
||||
cached := c.cache[idx%uint64(len(c.cache))]
|
||||
c.l.RUnlock()
|
||||
|
||||
// Check if entry is valid
|
||||
if cached != nil && cached.Index == idx {
|
||||
*log = *cached
|
||||
return nil
|
||||
}
|
||||
|
||||
// Forward request on cache miss
|
||||
return c.store.GetLog(idx, log)
|
||||
}
|
||||
|
||||
func (c *LogCache) StoreLog(log *Log) error {
|
||||
return c.StoreLogs([]*Log{log})
|
||||
}
|
||||
|
||||
func (c *LogCache) StoreLogs(logs []*Log) error {
|
||||
// Insert the logs into the ring buffer
|
||||
c.l.Lock()
|
||||
for _, l := range logs {
|
||||
c.cache[l.Index%uint64(len(c.cache))] = l
|
||||
}
|
||||
c.l.Unlock()
|
||||
|
||||
return c.store.StoreLogs(logs)
|
||||
}
|
||||
|
||||
func (c *LogCache) FirstIndex() (uint64, error) {
|
||||
return c.store.FirstIndex()
|
||||
}
|
||||
|
||||
func (c *LogCache) LastIndex() (uint64, error) {
|
||||
return c.store.LastIndex()
|
||||
}
|
||||
|
||||
func (c *LogCache) DeleteRange(min, max uint64) error {
|
||||
// Invalidate the cache on deletes
|
||||
c.l.Lock()
|
||||
c.cache = make([]*Log, len(c.cache))
|
||||
c.l.Unlock()
|
||||
|
||||
return c.store.DeleteRange(min, max)
|
||||
}
|
@ -1,88 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLogCache(t *testing.T) {
|
||||
store := NewInmemStore()
|
||||
c, _ := NewLogCache(16, store)
|
||||
|
||||
// Insert into the in-mem store
|
||||
for i := 0; i < 32; i++ {
|
||||
log := &Log{Index: uint64(i) + 1}
|
||||
store.StoreLog(log)
|
||||
}
|
||||
|
||||
// Check the indexes
|
||||
if idx, _ := c.FirstIndex(); idx != 1 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
if idx, _ := c.LastIndex(); idx != 32 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
|
||||
// Try get log with a miss
|
||||
var out Log
|
||||
err := c.GetLog(1, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if out.Index != 1 {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
// Store logs
|
||||
l1 := &Log{Index: 33}
|
||||
l2 := &Log{Index: 34}
|
||||
err = c.StoreLogs([]*Log{l1, l2})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
if idx, _ := c.LastIndex(); idx != 34 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
|
||||
// Check that it wrote-through
|
||||
err = store.GetLog(33, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = store.GetLog(34, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Delete in the backend
|
||||
err = store.DeleteRange(33, 34)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Should be in the ring buffer
|
||||
err = c.GetLog(33, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = c.GetLog(34, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Purge the ring buffer
|
||||
err = c.DeleteRange(33, 34)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Should not be in the ring buffer
|
||||
err = c.GetLog(33, &out)
|
||||
if err != ErrLogNotFound {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = c.GetLog(34, &out)
|
||||
if err != ErrLogNotFound {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
@ -1,622 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
)
|
||||
|
||||
const (
|
||||
rpcAppendEntries uint8 = iota
|
||||
rpcRequestVote
|
||||
rpcInstallSnapshot
|
||||
|
||||
// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.
|
||||
DefaultTimeoutScale = 256 * 1024 // 256KB
|
||||
|
||||
// rpcMaxPipeline controls the maximum number of outstanding
|
||||
// AppendEntries RPC calls.
|
||||
rpcMaxPipeline = 128
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrTransportShutdown is returned when operations on a transport are
|
||||
// invoked after it's been terminated.
|
||||
ErrTransportShutdown = errors.New("transport shutdown")
|
||||
|
||||
// ErrPipelineShutdown is returned when the pipeline is closed.
|
||||
ErrPipelineShutdown = errors.New("append pipeline closed")
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
NetworkTransport provides a network based transport that can be
|
||||
used to communicate with Raft on remote machines. It requires
|
||||
an underlying stream layer to provide a stream abstraction, which can
|
||||
be simple TCP, TLS, etc.
|
||||
|
||||
This transport is very simple and lightweight. Each RPC request is
|
||||
framed by sending a byte that indicates the message type, followed
|
||||
by the MsgPack encoded request.
|
||||
|
||||
The response is an error string followed by the response object,
|
||||
both are encoded using MsgPack.
|
||||
|
||||
InstallSnapshot is special, in that after the RPC request we stream
|
||||
the entire state. That socket is not re-used as the connection state
|
||||
is not known if there is an error.
|
||||
|
||||
*/
|
||||
type NetworkTransport struct {
|
||||
connPool map[string][]*netConn
|
||||
connPoolLock sync.Mutex
|
||||
|
||||
consumeCh chan RPC
|
||||
|
||||
heartbeatFn func(RPC)
|
||||
heartbeatFnLock sync.Mutex
|
||||
|
||||
logger *log.Logger
|
||||
|
||||
maxPool int
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
|
||||
stream StreamLayer
|
||||
|
||||
timeout time.Duration
|
||||
TimeoutScale int
|
||||
}
|
||||
|
||||
// StreamLayer is used with the NetworkTransport to provide
|
||||
// the low level stream abstraction.
|
||||
type StreamLayer interface {
|
||||
net.Listener
|
||||
|
||||
// Dial is used to create a new outgoing connection
|
||||
Dial(address string, timeout time.Duration) (net.Conn, error)
|
||||
}
|
||||
|
||||
type netConn struct {
|
||||
target string
|
||||
conn net.Conn
|
||||
r *bufio.Reader
|
||||
w *bufio.Writer
|
||||
dec *codec.Decoder
|
||||
enc *codec.Encoder
|
||||
}
|
||||
|
||||
func (n *netConn) Release() error {
|
||||
return n.conn.Close()
|
||||
}
|
||||
|
||||
type netPipeline struct {
|
||||
conn *netConn
|
||||
trans *NetworkTransport
|
||||
|
||||
doneCh chan AppendFuture
|
||||
inprogressCh chan *appendFuture
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
}
|
||||
|
||||
// NewNetworkTransport creates a new network transport with the given dialer
|
||||
// and listener. The maxPool controls how many connections we will pool. The
|
||||
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
|
||||
// the timeout by (SnapshotSize / TimeoutScale).
|
||||
func NewNetworkTransport(
|
||||
stream StreamLayer,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logOutput io.Writer,
|
||||
) *NetworkTransport {
|
||||
if logOutput == nil {
|
||||
logOutput = os.Stderr
|
||||
}
|
||||
return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags))
|
||||
}
|
||||
|
||||
// NewNetworkTransportWithLogger creates a new network transport with the given dialer
|
||||
// and listener. The maxPool controls how many connections we will pool. The
|
||||
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
|
||||
// the timeout by (SnapshotSize / TimeoutScale).
|
||||
func NewNetworkTransportWithLogger(
|
||||
stream StreamLayer,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logger *log.Logger,
|
||||
) *NetworkTransport {
|
||||
if logger == nil {
|
||||
logger = log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
trans := &NetworkTransport{
|
||||
connPool: make(map[string][]*netConn),
|
||||
consumeCh: make(chan RPC),
|
||||
logger: logger,
|
||||
maxPool: maxPool,
|
||||
shutdownCh: make(chan struct{}),
|
||||
stream: stream,
|
||||
timeout: timeout,
|
||||
TimeoutScale: DefaultTimeoutScale,
|
||||
}
|
||||
go trans.listen()
|
||||
return trans
|
||||
}
|
||||
|
||||
// SetHeartbeatHandler is used to setup a heartbeat handler
|
||||
// as a fast-pass. This is to avoid head-of-line blocking from
|
||||
// disk IO.
|
||||
func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {
|
||||
n.heartbeatFnLock.Lock()
|
||||
defer n.heartbeatFnLock.Unlock()
|
||||
n.heartbeatFn = cb
|
||||
}
|
||||
|
||||
// Close is used to stop the network transport.
|
||||
func (n *NetworkTransport) Close() error {
|
||||
n.shutdownLock.Lock()
|
||||
defer n.shutdownLock.Unlock()
|
||||
|
||||
if !n.shutdown {
|
||||
close(n.shutdownCh)
|
||||
n.stream.Close()
|
||||
n.shutdown = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Consumer implements the Transport interface.
|
||||
func (n *NetworkTransport) Consumer() <-chan RPC {
|
||||
return n.consumeCh
|
||||
}
|
||||
|
||||
// LocalAddr implements the Transport interface.
|
||||
func (n *NetworkTransport) LocalAddr() string {
|
||||
return n.stream.Addr().String()
|
||||
}
|
||||
|
||||
// IsShutdown is used to check if the transport is shutdown.
|
||||
func (n *NetworkTransport) IsShutdown() bool {
|
||||
select {
|
||||
case <-n.shutdownCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// getExistingConn is used to grab a pooled connection.
|
||||
func (n *NetworkTransport) getPooledConn(target string) *netConn {
|
||||
n.connPoolLock.Lock()
|
||||
defer n.connPoolLock.Unlock()
|
||||
|
||||
conns, ok := n.connPool[target]
|
||||
if !ok || len(conns) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var conn *netConn
|
||||
num := len(conns)
|
||||
conn, conns[num-1] = conns[num-1], nil
|
||||
n.connPool[target] = conns[:num-1]
|
||||
return conn
|
||||
}
|
||||
|
||||
// getConn is used to get a connection from the pool.
|
||||
func (n *NetworkTransport) getConn(target string) (*netConn, error) {
|
||||
// Check for a pooled conn
|
||||
if conn := n.getPooledConn(target); conn != nil {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
// Dial a new connection
|
||||
conn, err := n.stream.Dial(target, n.timeout)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Wrap the conn
|
||||
netConn := &netConn{
|
||||
target: target,
|
||||
conn: conn,
|
||||
r: bufio.NewReader(conn),
|
||||
w: bufio.NewWriter(conn),
|
||||
}
|
||||
|
||||
// Setup encoder/decoders
|
||||
netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{})
|
||||
netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{})
|
||||
|
||||
// Done
|
||||
return netConn, nil
|
||||
}
|
||||
|
||||
// returnConn returns a connection back to the pool.
|
||||
func (n *NetworkTransport) returnConn(conn *netConn) {
|
||||
n.connPoolLock.Lock()
|
||||
defer n.connPoolLock.Unlock()
|
||||
|
||||
key := conn.target
|
||||
conns, _ := n.connPool[key]
|
||||
|
||||
if !n.IsShutdown() && len(conns) < n.maxPool {
|
||||
n.connPool[key] = append(conns, conn)
|
||||
} else {
|
||||
conn.Release()
|
||||
}
|
||||
}
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
func (n *NetworkTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
|
||||
// Get a connection
|
||||
conn, err := n.getConn(target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the pipeline
|
||||
return newNetPipeline(n, conn), nil
|
||||
}
|
||||
|
||||
// AppendEntries implements the Transport interface.
|
||||
func (n *NetworkTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
|
||||
return n.genericRPC(target, rpcAppendEntries, args, resp)
|
||||
}
|
||||
|
||||
// RequestVote implements the Transport interface.
|
||||
func (n *NetworkTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
|
||||
return n.genericRPC(target, rpcRequestVote, args, resp)
|
||||
}
|
||||
|
||||
// genericRPC handles a simple request/response RPC.
|
||||
func (n *NetworkTransport) genericRPC(target string, rpcType uint8, args interface{}, resp interface{}) error {
|
||||
// Get a conn
|
||||
conn, err := n.getConn(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set a deadline
|
||||
if n.timeout > 0 {
|
||||
conn.conn.SetDeadline(time.Now().Add(n.timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err = sendRPC(conn, rpcType, args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode the response
|
||||
canReturn, err := decodeResponse(conn, resp)
|
||||
if canReturn {
|
||||
n.returnConn(conn)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// InstallSnapshot implements the Transport interface.
|
||||
func (n *NetworkTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
|
||||
// Get a conn, always close for InstallSnapshot
|
||||
conn, err := n.getConn(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Release()
|
||||
|
||||
// Set a deadline, scaled by request size
|
||||
if n.timeout > 0 {
|
||||
timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))
|
||||
if timeout < n.timeout {
|
||||
timeout = n.timeout
|
||||
}
|
||||
conn.conn.SetDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err = sendRPC(conn, rpcInstallSnapshot, args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Stream the state
|
||||
if _, err = io.Copy(conn.w, data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err = conn.w.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode the response, do not return conn
|
||||
_, err = decodeResponse(conn, resp)
|
||||
return err
|
||||
}
|
||||
|
||||
// EncodePeer implements the Transport interface.
|
||||
func (n *NetworkTransport) EncodePeer(p string) []byte {
|
||||
return []byte(p)
|
||||
}
|
||||
|
||||
// DecodePeer implements the Transport interface.
|
||||
func (n *NetworkTransport) DecodePeer(buf []byte) string {
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
// listen is used to handling incoming connections.
|
||||
func (n *NetworkTransport) listen() {
|
||||
for {
|
||||
// Accept incoming connections
|
||||
conn, err := n.stream.Accept()
|
||||
if err != nil {
|
||||
if n.IsShutdown() {
|
||||
return
|
||||
}
|
||||
n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err)
|
||||
continue
|
||||
}
|
||||
n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr())
|
||||
|
||||
// Handle the connection in dedicated routine
|
||||
go n.handleConn(conn)
|
||||
}
|
||||
}
|
||||
|
||||
// handleConn is used to handle an inbound connection for its lifespan.
|
||||
func (n *NetworkTransport) handleConn(conn net.Conn) {
|
||||
defer conn.Close()
|
||||
r := bufio.NewReader(conn)
|
||||
w := bufio.NewWriter(conn)
|
||||
dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
|
||||
enc := codec.NewEncoder(w, &codec.MsgpackHandle{})
|
||||
|
||||
for {
|
||||
if err := n.handleCommand(r, dec, enc); err != nil {
|
||||
if err != io.EOF {
|
||||
n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleCommand is used to decode and dispatch a single command.
|
||||
func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {
|
||||
// Get the rpc type
|
||||
rpcType, err := r.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create the RPC object
|
||||
respCh := make(chan RPCResponse, 1)
|
||||
rpc := RPC{
|
||||
RespChan: respCh,
|
||||
}
|
||||
|
||||
// Decode the command
|
||||
isHeartbeat := false
|
||||
switch rpcType {
|
||||
case rpcAppendEntries:
|
||||
var req AppendEntriesRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
|
||||
// Check if this is a heartbeat
|
||||
if req.Term != 0 && req.Leader != nil &&
|
||||
req.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&
|
||||
len(req.Entries) == 0 && req.LeaderCommitIndex == 0 {
|
||||
isHeartbeat = true
|
||||
}
|
||||
|
||||
case rpcRequestVote:
|
||||
var req RequestVoteRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
|
||||
case rpcInstallSnapshot:
|
||||
var req InstallSnapshotRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
rpc.Reader = io.LimitReader(r, req.Size)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unknown rpc type %d", rpcType)
|
||||
}
|
||||
|
||||
// Check for heartbeat fast-path
|
||||
if isHeartbeat {
|
||||
n.heartbeatFnLock.Lock()
|
||||
fn := n.heartbeatFn
|
||||
n.heartbeatFnLock.Unlock()
|
||||
if fn != nil {
|
||||
fn(rpc)
|
||||
goto RESP
|
||||
}
|
||||
}
|
||||
|
||||
// Dispatch the RPC
|
||||
select {
|
||||
case n.consumeCh <- rpc:
|
||||
case <-n.shutdownCh:
|
||||
return ErrTransportShutdown
|
||||
}
|
||||
|
||||
// Wait for response
|
||||
RESP:
|
||||
select {
|
||||
case resp := <-respCh:
|
||||
// Send the error first
|
||||
respErr := ""
|
||||
if resp.Error != nil {
|
||||
respErr = resp.Error.Error()
|
||||
}
|
||||
if err := enc.Encode(respErr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Send the response
|
||||
if err := enc.Encode(resp.Response); err != nil {
|
||||
return err
|
||||
}
|
||||
case <-n.shutdownCh:
|
||||
return ErrTransportShutdown
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeResponse is used to decode an RPC response and reports whether
|
||||
// the connection can be reused.
|
||||
func decodeResponse(conn *netConn, resp interface{}) (bool, error) {
|
||||
// Decode the error if any
|
||||
var rpcError string
|
||||
if err := conn.dec.Decode(&rpcError); err != nil {
|
||||
conn.Release()
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Decode the response
|
||||
if err := conn.dec.Decode(resp); err != nil {
|
||||
conn.Release()
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Format an error if any
|
||||
if rpcError != "" {
|
||||
return true, fmt.Errorf(rpcError)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// sendRPC is used to encode and send the RPC.
|
||||
func sendRPC(conn *netConn, rpcType uint8, args interface{}) error {
|
||||
// Write the request type
|
||||
if err := conn.w.WriteByte(rpcType); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
|
||||
// Send the request
|
||||
if err := conn.enc.Encode(args); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err := conn.w.Flush(); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// newNetPipeline is used to construct a netPipeline from a given
|
||||
// transport and connection.
|
||||
func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline {
|
||||
n := &netPipeline{
|
||||
conn: conn,
|
||||
trans: trans,
|
||||
doneCh: make(chan AppendFuture, rpcMaxPipeline),
|
||||
inprogressCh: make(chan *appendFuture, rpcMaxPipeline),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
go n.decodeResponses()
|
||||
return n
|
||||
}
|
||||
|
||||
// decodeResponses is a long running routine that decodes the responses
|
||||
// sent on the connection.
|
||||
func (n *netPipeline) decodeResponses() {
|
||||
timeout := n.trans.timeout
|
||||
for {
|
||||
select {
|
||||
case future := <-n.inprogressCh:
|
||||
if timeout > 0 {
|
||||
n.conn.conn.SetReadDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
_, err := decodeResponse(n.conn, future.resp)
|
||||
future.respond(err)
|
||||
select {
|
||||
case n.doneCh <- future:
|
||||
case <-n.shutdownCh:
|
||||
return
|
||||
}
|
||||
case <-n.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AppendEntries is used to pipeline a new append entries request.
|
||||
func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
|
||||
// Create a new future
|
||||
future := &appendFuture{
|
||||
start: time.Now(),
|
||||
args: args,
|
||||
resp: resp,
|
||||
}
|
||||
future.init()
|
||||
|
||||
// Add a send timeout
|
||||
if timeout := n.trans.timeout; timeout > 0 {
|
||||
n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Hand-off for decoding, this can also cause back-pressure
|
||||
// to prevent too many inflight requests
|
||||
select {
|
||||
case n.inprogressCh <- future:
|
||||
return future, nil
|
||||
case <-n.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
}
|
||||
|
||||
// Consumer returns a channel that can be used to consume complete futures.
|
||||
func (n *netPipeline) Consumer() <-chan AppendFuture {
|
||||
return n.doneCh
|
||||
}
|
||||
|
||||
// Closed is used to shutdown the pipeline connection.
|
||||
func (n *netPipeline) Close() error {
|
||||
n.shutdownLock.Lock()
|
||||
defer n.shutdownLock.Unlock()
|
||||
if n.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Release the connection
|
||||
n.conn.Release()
|
||||
|
||||
n.shutdown = true
|
||||
close(n.shutdownCh)
|
||||
return nil
|
||||
}
|
@ -1,449 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestNetworkTransport_StartStop(t *testing.T) {
|
||||
trans, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
trans.Close()
|
||||
}
|
||||
|
||||
func TestNetworkTransport_Heartbeat_FastPath(t *testing.T) {
|
||||
// Transport 1 is consumer
|
||||
trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans1.Close()
|
||||
|
||||
// Make the RPC request
|
||||
args := AppendEntriesRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("cartman"),
|
||||
}
|
||||
resp := AppendEntriesResponse{
|
||||
Term: 4,
|
||||
LastLog: 90,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
invoked := false
|
||||
fastpath := func(rpc RPC) {
|
||||
// Verify the command
|
||||
req := rpc.Command.(*AppendEntriesRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
|
||||
rpc.Respond(&resp, nil)
|
||||
invoked = true
|
||||
}
|
||||
trans1.SetHeartbeatHandler(fastpath)
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans2.Close()
|
||||
|
||||
var out AppendEntriesResponse
|
||||
if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
|
||||
// Ensure fast-path is used
|
||||
if !invoked {
|
||||
t.Fatalf("fast-path not used")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNetworkTransport_AppendEntries(t *testing.T) {
|
||||
// Transport 1 is consumer
|
||||
trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := AppendEntriesRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("cartman"),
|
||||
PrevLogEntry: 100,
|
||||
PrevLogTerm: 4,
|
||||
Entries: []*Log{
|
||||
&Log{
|
||||
Index: 101,
|
||||
Term: 4,
|
||||
Type: LogNoop,
|
||||
},
|
||||
},
|
||||
LeaderCommitIndex: 90,
|
||||
}
|
||||
resp := AppendEntriesResponse{
|
||||
Term: 4,
|
||||
LastLog: 90,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*AppendEntriesRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans2.Close()
|
||||
|
||||
var out AppendEntriesResponse
|
||||
if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNetworkTransport_AppendEntriesPipeline(t *testing.T) {
|
||||
// Transport 1 is consumer
|
||||
trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := AppendEntriesRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("cartman"),
|
||||
PrevLogEntry: 100,
|
||||
PrevLogTerm: 4,
|
||||
Entries: []*Log{
|
||||
&Log{
|
||||
Index: 101,
|
||||
Term: 4,
|
||||
Type: LogNoop,
|
||||
},
|
||||
},
|
||||
LeaderCommitIndex: 90,
|
||||
}
|
||||
resp := AppendEntriesResponse{
|
||||
Term: 4,
|
||||
LastLog: 90,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*AppendEntriesRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans2.Close()
|
||||
|
||||
pipeline, err := trans2.AppendEntriesPipeline(trans1.LocalAddr())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer pipeline.Close()
|
||||
for i := 0; i < 10; i++ {
|
||||
out := new(AppendEntriesResponse)
|
||||
if _, err := pipeline.AppendEntries(&args, out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
respCh := pipeline.Consumer()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case ready := <-respCh:
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(&resp, ready.Response()) {
|
||||
t.Fatalf("command mismatch: %#v %#v", &resp, ready.Response())
|
||||
}
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNetworkTransport_RequestVote(t *testing.T) {
|
||||
// Transport 1 is consumer
|
||||
trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := RequestVoteRequest{
|
||||
Term: 20,
|
||||
Candidate: []byte("butters"),
|
||||
LastLogIndex: 100,
|
||||
LastLogTerm: 19,
|
||||
}
|
||||
resp := RequestVoteResponse{
|
||||
Term: 100,
|
||||
Peers: []byte("blah"),
|
||||
Granted: false,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*RequestVoteRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans2.Close()
|
||||
|
||||
var out RequestVoteResponse
|
||||
if err := trans2.RequestVote(trans1.LocalAddr(), &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNetworkTransport_InstallSnapshot(t *testing.T) {
|
||||
// Transport 1 is consumer
|
||||
trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := InstallSnapshotRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("kyle"),
|
||||
LastLogIndex: 100,
|
||||
LastLogTerm: 9,
|
||||
Peers: []byte("blah blah"),
|
||||
Size: 10,
|
||||
}
|
||||
resp := InstallSnapshotResponse{
|
||||
Term: 10,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*InstallSnapshotRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
|
||||
// Try to read the bytes
|
||||
buf := make([]byte, 10)
|
||||
rpc.Reader.Read(buf)
|
||||
|
||||
// Compare
|
||||
if bytes.Compare(buf, []byte("0123456789")) != 0 {
|
||||
t.Fatalf("bad buf %v", buf)
|
||||
}
|
||||
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans2.Close()
|
||||
|
||||
// Create a buffer
|
||||
buf := bytes.NewBuffer([]byte("0123456789"))
|
||||
|
||||
var out InstallSnapshotResponse
|
||||
if err := trans2.InstallSnapshot(trans1.LocalAddr(), &args, &out, buf); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNetworkTransport_EncodeDecode(t *testing.T) {
|
||||
// Transport 1 is consumer
|
||||
trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans1.Close()
|
||||
|
||||
local := trans1.LocalAddr()
|
||||
enc := trans1.EncodePeer(local)
|
||||
dec := trans1.DecodePeer(enc)
|
||||
|
||||
if dec != local {
|
||||
t.Fatalf("enc/dec fail: %v %v", dec, local)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNetworkTransport_PooledConn(t *testing.T) {
|
||||
// Transport 1 is consumer
|
||||
trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := AppendEntriesRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("cartman"),
|
||||
PrevLogEntry: 100,
|
||||
PrevLogTerm: 4,
|
||||
Entries: []*Log{
|
||||
&Log{
|
||||
Index: 101,
|
||||
Term: 4,
|
||||
Type: LogNoop,
|
||||
},
|
||||
},
|
||||
LeaderCommitIndex: 90,
|
||||
}
|
||||
resp := AppendEntriesResponse{
|
||||
Term: 4,
|
||||
LastLog: 90,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*AppendEntriesRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request, 3 conn pool
|
||||
trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 3, time.Second, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer trans2.Close()
|
||||
|
||||
// Create wait group
|
||||
wg := &sync.WaitGroup{}
|
||||
wg.Add(5)
|
||||
|
||||
appendFunc := func() {
|
||||
defer wg.Done()
|
||||
var out AppendEntriesResponse
|
||||
if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
}
|
||||
|
||||
// Try to do parallel appends, should stress the conn pool
|
||||
for i := 0; i < 5; i++ {
|
||||
go appendFunc()
|
||||
}
|
||||
|
||||
// Wait for the routines to finish
|
||||
wg.Wait()
|
||||
|
||||
// Check the conn pool size
|
||||
addr := trans1.LocalAddr()
|
||||
if len(trans2.connPool[addr]) != 3 {
|
||||
t.Fatalf("Expected 2 pooled conns!")
|
||||
}
|
||||
}
|
@ -1,122 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Observation is sent along the given channel to observers when an event occurs.
|
||||
type Observation struct {
|
||||
// Raft holds the Raft instance generating the observation.
|
||||
Raft *Raft
|
||||
// Data holds observation-specific data. Possible types are
|
||||
// *RequestVoteRequest, RaftState and LeaderObservation.
|
||||
Data interface{}
|
||||
}
|
||||
|
||||
// LeaderObservation is used in Observation.Data when leadership changes.
|
||||
type LeaderObservation struct {
|
||||
Leader string
|
||||
}
|
||||
|
||||
// nextObserverId is used to provide a unique ID for each observer to aid in
|
||||
// deregistration.
|
||||
var nextObserverID uint64
|
||||
|
||||
// FilterFn is a function that can be registered in order to filter observations.
|
||||
// The function reports whether the observation should be included - if
|
||||
// it returns false, the observation will be filtered out.
|
||||
type FilterFn func(o *Observation) bool
|
||||
|
||||
// Observer describes what to do with a given observation.
|
||||
type Observer struct {
|
||||
// numObserved and numDropped are performance counters for this observer.
|
||||
// 64 bit types must be 64 bit aligned to use with atomic operations on
|
||||
// 32 bit platforms, so keep them at the top of the struct.
|
||||
numObserved uint64
|
||||
numDropped uint64
|
||||
|
||||
// channel receives observations.
|
||||
channel chan Observation
|
||||
|
||||
// blocking, if true, will cause Raft to block when sending an observation
|
||||
// to this observer. This should generally be set to false.
|
||||
blocking bool
|
||||
|
||||
// filter will be called to determine if an observation should be sent to
|
||||
// the channel.
|
||||
filter FilterFn
|
||||
|
||||
// id is the ID of this observer in the Raft map.
|
||||
id uint64
|
||||
}
|
||||
|
||||
// NewObserver creates a new observer that can be registered
|
||||
// to make observations on a Raft instance. Observations
|
||||
// will be sent on the given channel if they satisfy the
|
||||
// given filter.
|
||||
//
|
||||
// If blocking is true, the observer will block when it can't
|
||||
// send on the channel, otherwise it may discard events.
|
||||
func NewObserver(channel chan Observation, blocking bool, filter FilterFn) *Observer {
|
||||
return &Observer{
|
||||
channel: channel,
|
||||
blocking: blocking,
|
||||
filter: filter,
|
||||
id: atomic.AddUint64(&nextObserverID, 1),
|
||||
}
|
||||
}
|
||||
|
||||
// GetNumObserved returns the number of observations.
|
||||
func (or *Observer) GetNumObserved() uint64 {
|
||||
return atomic.LoadUint64(&or.numObserved)
|
||||
}
|
||||
|
||||
// GetNumDropped returns the number of dropped observations due to blocking.
|
||||
func (or *Observer) GetNumDropped() uint64 {
|
||||
return atomic.LoadUint64(&or.numDropped)
|
||||
}
|
||||
|
||||
// RegisterObserver registers a new observer.
|
||||
func (r *Raft) RegisterObserver(or *Observer) {
|
||||
r.observersLock.Lock()
|
||||
defer r.observersLock.Unlock()
|
||||
r.observers[or.id] = or
|
||||
}
|
||||
|
||||
// DeregisterObserver deregisters an observer.
|
||||
func (r *Raft) DeregisterObserver(or *Observer) {
|
||||
r.observersLock.Lock()
|
||||
defer r.observersLock.Unlock()
|
||||
delete(r.observers, or.id)
|
||||
}
|
||||
|
||||
// observe sends an observation to every observer.
|
||||
func (r *Raft) observe(o interface{}) {
|
||||
// In general observers should not block. But in any case this isn't
|
||||
// disastrous as we only hold a read lock, which merely prevents
|
||||
// registration / deregistration of observers.
|
||||
r.observersLock.RLock()
|
||||
defer r.observersLock.RUnlock()
|
||||
for _, or := range r.observers {
|
||||
// It's wasteful to do this in the loop, but for the common case
|
||||
// where there are no observers we won't create any objects.
|
||||
ob := Observation{Raft: r, Data: o}
|
||||
if or.filter != nil && !or.filter(&ob) {
|
||||
continue
|
||||
}
|
||||
if or.channel == nil {
|
||||
continue
|
||||
}
|
||||
if or.blocking {
|
||||
or.channel <- ob
|
||||
atomic.AddUint64(&or.numObserved, 1)
|
||||
} else {
|
||||
select {
|
||||
case or.channel <- ob:
|
||||
atomic.AddUint64(&or.numObserved, 1)
|
||||
default:
|
||||
atomic.AddUint64(&or.numDropped, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,122 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
jsonPeerPath = "peers.json"
|
||||
)
|
||||
|
||||
// PeerStore provides an interface for persistent storage and
|
||||
// retrieval of peers. We use a separate interface than StableStore
|
||||
// since the peers may need to be edited by a human operator. For example,
|
||||
// in a two node cluster, the failure of either node requires human intervention
|
||||
// since consensus is impossible.
|
||||
type PeerStore interface {
|
||||
// Peers returns the list of known peers.
|
||||
Peers() ([]string, error)
|
||||
|
||||
// SetPeers sets the list of known peers. This is invoked when a peer is
|
||||
// added or removed.
|
||||
SetPeers([]string) error
|
||||
}
|
||||
|
||||
// StaticPeers is used to provide a static list of peers.
|
||||
type StaticPeers struct {
|
||||
StaticPeers []string
|
||||
l sync.Mutex
|
||||
}
|
||||
|
||||
// Peers implements the PeerStore interface.
|
||||
func (s *StaticPeers) Peers() ([]string, error) {
|
||||
s.l.Lock()
|
||||
peers := s.StaticPeers
|
||||
s.l.Unlock()
|
||||
return peers, nil
|
||||
}
|
||||
|
||||
// SetPeers implements the PeerStore interface.
|
||||
func (s *StaticPeers) SetPeers(p []string) error {
|
||||
s.l.Lock()
|
||||
s.StaticPeers = p
|
||||
s.l.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// JSONPeers is used to provide peer persistence on disk in the form
|
||||
// of a JSON file. This allows human operators to manipulate the file.
|
||||
type JSONPeers struct {
|
||||
l sync.Mutex
|
||||
path string
|
||||
trans Transport
|
||||
}
|
||||
|
||||
// NewJSONPeers creates a new JSONPeers store. Requires a transport
|
||||
// to handle the serialization of network addresses.
|
||||
func NewJSONPeers(base string, trans Transport) *JSONPeers {
|
||||
path := filepath.Join(base, jsonPeerPath)
|
||||
store := &JSONPeers{
|
||||
path: path,
|
||||
trans: trans,
|
||||
}
|
||||
return store
|
||||
}
|
||||
|
||||
// Peers implements the PeerStore interface.
|
||||
func (j *JSONPeers) Peers() ([]string, error) {
|
||||
j.l.Lock()
|
||||
defer j.l.Unlock()
|
||||
|
||||
// Read the file
|
||||
buf, err := ioutil.ReadFile(j.path)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check for no peers
|
||||
if len(buf) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Decode the peers
|
||||
var peerSet []string
|
||||
dec := json.NewDecoder(bytes.NewReader(buf))
|
||||
if err := dec.Decode(&peerSet); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Deserialize each peer
|
||||
var peers []string
|
||||
for _, p := range peerSet {
|
||||
peers = append(peers, j.trans.DecodePeer([]byte(p)))
|
||||
}
|
||||
return peers, nil
|
||||
}
|
||||
|
||||
// SetPeers implements the PeerStore interface.
|
||||
func (j *JSONPeers) SetPeers(peers []string) error {
|
||||
j.l.Lock()
|
||||
defer j.l.Unlock()
|
||||
|
||||
// Encode each peer
|
||||
var peerSet []string
|
||||
for _, p := range peers {
|
||||
peerSet = append(peerSet, string(j.trans.EncodePeer(p)))
|
||||
}
|
||||
|
||||
// Convert to JSON
|
||||
var buf bytes.Buffer
|
||||
enc := json.NewEncoder(&buf)
|
||||
if err := enc.Encode(peerSet); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write out as JSON
|
||||
return ioutil.WriteFile(j.path, buf.Bytes(), 0755)
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestJSONPeers(t *testing.T) {
|
||||
// Create a test dir
|
||||
dir, err := ioutil.TempDir("", "raft")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v ", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
// Create the store
|
||||
_, trans := NewInmemTransport("")
|
||||
store := NewJSONPeers(dir, trans)
|
||||
|
||||
// Try a read, should get nothing
|
||||
peers, err := store.Peers()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(peers) != 0 {
|
||||
t.Fatalf("peers: %v", peers)
|
||||
}
|
||||
|
||||
// Initialize some peers
|
||||
newPeers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
|
||||
if err := store.SetPeers(newPeers); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Try a read, should peers
|
||||
peers, err = store.Peers()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(peers) != 3 {
|
||||
t.Fatalf("peers: %v", peers)
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,522 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
maxFailureScale = 12
|
||||
failureWait = 10 * time.Millisecond
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrLogNotFound indicates a given log entry is not available.
|
||||
ErrLogNotFound = errors.New("log not found")
|
||||
|
||||
// ErrPipelineReplicationNotSupported can be returned by the transport to
|
||||
// signal that pipeline replication is not supported in general, and that
|
||||
// no error message should be produced.
|
||||
ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported")
|
||||
)
|
||||
|
||||
type followerReplication struct {
|
||||
peer string
|
||||
inflight *inflight
|
||||
|
||||
stopCh chan uint64
|
||||
triggerCh chan struct{}
|
||||
|
||||
currentTerm uint64
|
||||
matchIndex uint64
|
||||
nextIndex uint64
|
||||
|
||||
lastContact time.Time
|
||||
lastContactLock sync.RWMutex
|
||||
|
||||
failures uint64
|
||||
|
||||
notifyCh chan struct{}
|
||||
notify []*verifyFuture
|
||||
notifyLock sync.Mutex
|
||||
|
||||
// stepDown is used to indicate to the leader that we
|
||||
// should step down based on information from a follower.
|
||||
stepDown chan struct{}
|
||||
|
||||
// allowPipeline is used to control it seems like
|
||||
// pipeline replication should be enabled.
|
||||
allowPipeline bool
|
||||
}
|
||||
|
||||
// notifyAll is used to notify all the waiting verify futures
|
||||
// if the follower believes we are still the leader.
|
||||
func (s *followerReplication) notifyAll(leader bool) {
|
||||
// Clear the waiting notifies minimizing lock time
|
||||
s.notifyLock.Lock()
|
||||
n := s.notify
|
||||
s.notify = nil
|
||||
s.notifyLock.Unlock()
|
||||
|
||||
// Submit our votes
|
||||
for _, v := range n {
|
||||
v.vote(leader)
|
||||
}
|
||||
}
|
||||
|
||||
// LastContact returns the time of last contact.
|
||||
func (s *followerReplication) LastContact() time.Time {
|
||||
s.lastContactLock.RLock()
|
||||
last := s.lastContact
|
||||
s.lastContactLock.RUnlock()
|
||||
return last
|
||||
}
|
||||
|
||||
// setLastContact sets the last contact to the current time.
|
||||
func (s *followerReplication) setLastContact() {
|
||||
s.lastContactLock.Lock()
|
||||
s.lastContact = time.Now()
|
||||
s.lastContactLock.Unlock()
|
||||
}
|
||||
|
||||
// replicate is a long running routine that is used to manage
|
||||
// the process of replicating logs to our followers.
|
||||
func (r *Raft) replicate(s *followerReplication) {
|
||||
// Start an async heartbeating routing
|
||||
stopHeartbeat := make(chan struct{})
|
||||
defer close(stopHeartbeat)
|
||||
r.goFunc(func() { r.heartbeat(s, stopHeartbeat) })
|
||||
|
||||
RPC:
|
||||
shouldStop := false
|
||||
for !shouldStop {
|
||||
select {
|
||||
case maxIndex := <-s.stopCh:
|
||||
// Make a best effort to replicate up to this index
|
||||
if maxIndex > 0 {
|
||||
r.replicateTo(s, maxIndex)
|
||||
}
|
||||
return
|
||||
case <-s.triggerCh:
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.replicateTo(s, lastLogIdx)
|
||||
case <-randomTimeout(r.conf.CommitTimeout):
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.replicateTo(s, lastLogIdx)
|
||||
}
|
||||
|
||||
// If things looks healthy, switch to pipeline mode
|
||||
if !shouldStop && s.allowPipeline {
|
||||
goto PIPELINE
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
PIPELINE:
|
||||
// Disable until re-enabled
|
||||
s.allowPipeline = false
|
||||
|
||||
// Replicates using a pipeline for high performance. This method
|
||||
// is not able to gracefully recover from errors, and so we fall back
|
||||
// to standard mode on failure.
|
||||
if err := r.pipelineReplicate(s); err != nil {
|
||||
if err != ErrPipelineReplicationNotSupported {
|
||||
r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err)
|
||||
}
|
||||
}
|
||||
goto RPC
|
||||
}
|
||||
|
||||
// replicateTo is used to replicate the logs up to a given last index.
|
||||
// If the follower log is behind, we take care to bring them up to date.
|
||||
func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {
|
||||
// Create the base request
|
||||
var req AppendEntriesRequest
|
||||
var resp AppendEntriesResponse
|
||||
var start time.Time
|
||||
START:
|
||||
// Prevent an excessive retry rate on errors
|
||||
if s.failures > 0 {
|
||||
select {
|
||||
case <-time.After(backoff(failureWait, s.failures, maxFailureScale)):
|
||||
case <-r.shutdownCh:
|
||||
}
|
||||
}
|
||||
|
||||
// Setup the request
|
||||
if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound {
|
||||
goto SEND_SNAP
|
||||
} else if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Make the RPC call
|
||||
start = time.Now()
|
||||
if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err)
|
||||
s.failures++
|
||||
return
|
||||
}
|
||||
appendStats(s.peer, start, float32(len(req.Entries)))
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return true
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Update s based on success
|
||||
if resp.Success {
|
||||
// Update our replication state
|
||||
updateLastAppended(s, &req)
|
||||
|
||||
// Clear any failures, allow pipelining
|
||||
s.failures = 0
|
||||
s.allowPipeline = true
|
||||
} else {
|
||||
s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1)
|
||||
s.matchIndex = s.nextIndex - 1
|
||||
if resp.NoRetryBackoff {
|
||||
s.failures = 0
|
||||
} else {
|
||||
s.failures++
|
||||
}
|
||||
r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex)
|
||||
}
|
||||
|
||||
CHECK_MORE:
|
||||
// Check if there are more logs to replicate
|
||||
if s.nextIndex <= lastIndex {
|
||||
goto START
|
||||
}
|
||||
return
|
||||
|
||||
// SEND_SNAP is used when we fail to get a log, usually because the follower
|
||||
// is too far behind, and we must ship a snapshot down instead
|
||||
SEND_SNAP:
|
||||
if stop, err := r.sendLatestSnapshot(s); stop {
|
||||
return true
|
||||
} else if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if there is more to replicate
|
||||
goto CHECK_MORE
|
||||
}
|
||||
|
||||
// sendLatestSnapshot is used to send the latest snapshot we have
|
||||
// down to our follower.
|
||||
func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {
|
||||
// Get the snapshots
|
||||
snapshots, err := r.snapshots.List()
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Check we have at least a single snapshot
|
||||
if len(snapshots) == 0 {
|
||||
return false, fmt.Errorf("no snapshots found")
|
||||
}
|
||||
|
||||
// Open the most recent snapshot
|
||||
snapID := snapshots[0].ID
|
||||
meta, snapshot, err := r.snapshots.Open(snapID)
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err)
|
||||
return false, err
|
||||
}
|
||||
defer snapshot.Close()
|
||||
|
||||
// Setup the request
|
||||
req := InstallSnapshotRequest{
|
||||
Term: s.currentTerm,
|
||||
Leader: r.trans.EncodePeer(r.localAddr),
|
||||
LastLogIndex: meta.Index,
|
||||
LastLogTerm: meta.Term,
|
||||
Peers: meta.Peers,
|
||||
Size: meta.Size,
|
||||
}
|
||||
|
||||
// Make the call
|
||||
start := time.Now()
|
||||
var resp InstallSnapshotResponse
|
||||
if err := r.trans.InstallSnapshot(s.peer, &req, &resp, snapshot); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err)
|
||||
s.failures++
|
||||
return false, err
|
||||
}
|
||||
metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", s.peer}, start)
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Check for success
|
||||
if resp.Success {
|
||||
// Mark any inflight logs as committed
|
||||
s.inflight.CommitRange(s.matchIndex+1, meta.Index)
|
||||
|
||||
// Update the indexes
|
||||
s.matchIndex = meta.Index
|
||||
s.nextIndex = s.matchIndex + 1
|
||||
|
||||
// Clear any failures
|
||||
s.failures = 0
|
||||
|
||||
// Notify we are still leader
|
||||
s.notifyAll(true)
|
||||
} else {
|
||||
s.failures++
|
||||
r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// heartbeat is used to periodically invoke AppendEntries on a peer
|
||||
// to ensure they don't time out. This is done async of replicate(),
|
||||
// since that routine could potentially be blocked on disk IO.
|
||||
func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {
|
||||
var failures uint64
|
||||
req := AppendEntriesRequest{
|
||||
Term: s.currentTerm,
|
||||
Leader: r.trans.EncodePeer(r.localAddr),
|
||||
}
|
||||
var resp AppendEntriesResponse
|
||||
for {
|
||||
// Wait for the next heartbeat interval or forced notify
|
||||
select {
|
||||
case <-s.notifyCh:
|
||||
case <-randomTimeout(r.conf.HeartbeatTimeout / 10):
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer, err)
|
||||
failures++
|
||||
select {
|
||||
case <-time.After(backoff(failureWait, failures, maxFailureScale)):
|
||||
case <-stopCh:
|
||||
}
|
||||
} else {
|
||||
s.setLastContact()
|
||||
failures = 0
|
||||
metrics.MeasureSince([]string{"raft", "replication", "heartbeat", s.peer}, start)
|
||||
s.notifyAll(resp.Success)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pipelineReplicate is used when we have synchronized our state with the follower,
|
||||
// and want to switch to a higher performance pipeline mode of replication.
|
||||
// We only pipeline AppendEntries commands, and if we ever hit an error, we fall
|
||||
// back to the standard replication which can handle more complex situations.
|
||||
func (r *Raft) pipelineReplicate(s *followerReplication) error {
|
||||
// Create a new pipeline
|
||||
pipeline, err := r.trans.AppendEntriesPipeline(s.peer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer pipeline.Close()
|
||||
|
||||
// Log start and stop of pipeline
|
||||
r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer)
|
||||
defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer)
|
||||
|
||||
// Create a shutdown and finish channel
|
||||
stopCh := make(chan struct{})
|
||||
finishCh := make(chan struct{})
|
||||
|
||||
// Start a dedicated decoder
|
||||
r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })
|
||||
|
||||
// Start pipeline sends at the last good nextIndex
|
||||
nextIndex := s.nextIndex
|
||||
|
||||
shouldStop := false
|
||||
SEND:
|
||||
for !shouldStop {
|
||||
select {
|
||||
case <-finishCh:
|
||||
break SEND
|
||||
case maxIndex := <-s.stopCh:
|
||||
if maxIndex > 0 {
|
||||
r.pipelineSend(s, pipeline, &nextIndex, maxIndex)
|
||||
}
|
||||
break SEND
|
||||
case <-s.triggerCh:
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
|
||||
case <-randomTimeout(r.conf.CommitTimeout):
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
|
||||
}
|
||||
}
|
||||
|
||||
// Stop our decoder, and wait for it to finish
|
||||
close(stopCh)
|
||||
select {
|
||||
case <-finishCh:
|
||||
case <-r.shutdownCh:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// pipelineSend is used to send data over a pipeline.
|
||||
func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {
|
||||
// Create a new append request
|
||||
req := new(AppendEntriesRequest)
|
||||
if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Pipeline the append entries
|
||||
if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err)
|
||||
return true
|
||||
}
|
||||
|
||||
// Increase the next send log to avoid re-sending old logs
|
||||
if n := len(req.Entries); n > 0 {
|
||||
last := req.Entries[n-1]
|
||||
*nextIdx = last.Index + 1
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// pipelineDecode is used to decode the responses of pipelined requests.
|
||||
func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {
|
||||
defer close(finishCh)
|
||||
respCh := p.Consumer()
|
||||
for {
|
||||
select {
|
||||
case ready := <-respCh:
|
||||
req, resp := ready.Request(), ready.Response()
|
||||
appendStats(s.peer, ready.Start(), float32(len(req.Entries)))
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Abort pipeline if not successful
|
||||
if !resp.Success {
|
||||
return
|
||||
}
|
||||
|
||||
// Update our replication state
|
||||
updateLastAppended(s, req)
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// setupAppendEntries is used to setup an append entries request.
|
||||
func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
|
||||
req.Term = s.currentTerm
|
||||
req.Leader = r.trans.EncodePeer(r.localAddr)
|
||||
req.LeaderCommitIndex = r.getCommitIndex()
|
||||
if err := r.setPreviousLog(req, nextIndex); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an
|
||||
// AppendEntriesRequest given the next index to replicate.
|
||||
func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {
|
||||
// Guard for the first index, since there is no 0 log entry
|
||||
// Guard against the previous index being a snapshot as well
|
||||
lastSnapIdx, lastSnapTerm := r.getLastSnapshot()
|
||||
if nextIndex == 1 {
|
||||
req.PrevLogEntry = 0
|
||||
req.PrevLogTerm = 0
|
||||
|
||||
} else if (nextIndex - 1) == lastSnapIdx {
|
||||
req.PrevLogEntry = lastSnapIdx
|
||||
req.PrevLogTerm = lastSnapTerm
|
||||
|
||||
} else {
|
||||
var l Log
|
||||
if err := r.logs.GetLog(nextIndex-1, &l); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v",
|
||||
nextIndex-1, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the previous index and term (0 if nextIndex is 1)
|
||||
req.PrevLogEntry = l.Index
|
||||
req.PrevLogTerm = l.Term
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setNewLogs is used to setup the logs which should be appended for a request.
|
||||
func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
|
||||
// Append up to MaxAppendEntries or up to the lastIndex
|
||||
req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries)
|
||||
maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex)
|
||||
for i := nextIndex; i <= maxIndex; i++ {
|
||||
oldLog := new(Log)
|
||||
if err := r.logs.GetLog(i, oldLog); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err)
|
||||
return err
|
||||
}
|
||||
req.Entries = append(req.Entries, oldLog)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// appendStats is used to emit stats about an AppendEntries invocation.
|
||||
func appendStats(peer string, start time.Time, logs float32) {
|
||||
metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start)
|
||||
metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs)
|
||||
}
|
||||
|
||||
// handleStaleTerm is used when a follower indicates that we have a stale term.
|
||||
func (r *Raft) handleStaleTerm(s *followerReplication) {
|
||||
r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer)
|
||||
s.notifyAll(false) // No longer leader
|
||||
asyncNotifyCh(s.stepDown)
|
||||
}
|
||||
|
||||
// updateLastAppended is used to update follower replication state after a successful
|
||||
// AppendEntries RPC.
|
||||
func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {
|
||||
// Mark any inflight logs as committed
|
||||
if logs := req.Entries; len(logs) > 0 {
|
||||
first := logs[0]
|
||||
last := logs[len(logs)-1]
|
||||
s.inflight.CommitRange(first.Index, last.Index)
|
||||
|
||||
// Update the indexes
|
||||
s.matchIndex = last.Index
|
||||
s.nextIndex = last.Index + 1
|
||||
}
|
||||
|
||||
// Notify still leader
|
||||
s.notifyAll(true)
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// SnapshotMeta is for metadata of a snapshot.
|
||||
type SnapshotMeta struct {
|
||||
ID string // ID is opaque to the store, and is used for opening
|
||||
Index uint64
|
||||
Term uint64
|
||||
Peers []byte
|
||||
Size int64
|
||||
}
|
||||
|
||||
// SnapshotStore interface is used to allow for flexible implementations
|
||||
// of snapshot storage and retrieval. For example, a client could implement
|
||||
// a shared state store such as S3, allowing new nodes to restore snapshots
|
||||
// without streaming from the leader.
|
||||
type SnapshotStore interface {
|
||||
// Create is used to begin a snapshot at a given index and term,
|
||||
// with the current peer set already encoded.
|
||||
Create(index, term uint64, peers []byte) (SnapshotSink, error)
|
||||
|
||||
// List is used to list the available snapshots in the store.
|
||||
// It should return then in descending order, with the highest index first.
|
||||
List() ([]*SnapshotMeta, error)
|
||||
|
||||
// Open takes a snapshot ID and provides a ReadCloser. Once close is
|
||||
// called it is assumed the snapshot is no longer needed.
|
||||
Open(id string) (*SnapshotMeta, io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// SnapshotSink is returned by StartSnapshot. The FSM will Write state
|
||||
// to the sink and call Close on completion. On error, Cancel will be invoked.
|
||||
type SnapshotSink interface {
|
||||
io.WriteCloser
|
||||
ID() string
|
||||
Cancel() error
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
package raft
|
||||
|
||||
// StableStore is used to provide stable storage
|
||||
// of key configurations to ensure safety.
|
||||
type StableStore interface {
|
||||
Set(key []byte, val []byte) error
|
||||
|
||||
// Get returns the value for key, or an empty byte slice if key was not found.
|
||||
Get(key []byte) ([]byte, error)
|
||||
|
||||
SetUint64(key []byte, val uint64) error
|
||||
|
||||
// GetUint64 returns the uint64 value for key, or 0 if key was not found.
|
||||
GetUint64(key []byte) (uint64, error)
|
||||
}
|
@ -1,171 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// RaftState captures the state of a Raft node: Follower, Candidate, Leader,
|
||||
// or Shutdown.
|
||||
type RaftState uint32
|
||||
|
||||
const (
|
||||
// Follower is the initial state of a Raft node.
|
||||
Follower RaftState = iota
|
||||
|
||||
// Candidate is one of the valid states of a Raft node.
|
||||
Candidate
|
||||
|
||||
// Leader is one of the valid states of a Raft node.
|
||||
Leader
|
||||
|
||||
// Shutdown is the terminal state of a Raft node.
|
||||
Shutdown
|
||||
)
|
||||
|
||||
func (s RaftState) String() string {
|
||||
switch s {
|
||||
case Follower:
|
||||
return "Follower"
|
||||
case Candidate:
|
||||
return "Candidate"
|
||||
case Leader:
|
||||
return "Leader"
|
||||
case Shutdown:
|
||||
return "Shutdown"
|
||||
default:
|
||||
return "Unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// raftState is used to maintain various state variables
|
||||
// and provides an interface to set/get the variables in a
|
||||
// thread safe manner.
|
||||
type raftState struct {
|
||||
// currentTerm commitIndex, lastApplied, must be kept at the top of
|
||||
// the struct so they're 64 bit aligned which is a requirement for
|
||||
// atomic ops on 32 bit platforms.
|
||||
|
||||
// The current term, cache of StableStore
|
||||
currentTerm uint64
|
||||
|
||||
// Highest committed log entry
|
||||
commitIndex uint64
|
||||
|
||||
// Last applied log to the FSM
|
||||
lastApplied uint64
|
||||
|
||||
// protects 4 next fields
|
||||
lastLock sync.Mutex
|
||||
|
||||
// Cache the latest snapshot index/term
|
||||
lastSnapshotIndex uint64
|
||||
lastSnapshotTerm uint64
|
||||
|
||||
// Cache the latest log from LogStore
|
||||
lastLogIndex uint64
|
||||
lastLogTerm uint64
|
||||
|
||||
// Tracks running goroutines
|
||||
routinesGroup sync.WaitGroup
|
||||
|
||||
// The current state
|
||||
state RaftState
|
||||
}
|
||||
|
||||
func (r *raftState) getState() RaftState {
|
||||
stateAddr := (*uint32)(&r.state)
|
||||
return RaftState(atomic.LoadUint32(stateAddr))
|
||||
}
|
||||
|
||||
func (r *raftState) setState(s RaftState) {
|
||||
stateAddr := (*uint32)(&r.state)
|
||||
atomic.StoreUint32(stateAddr, uint32(s))
|
||||
}
|
||||
|
||||
func (r *raftState) getCurrentTerm() uint64 {
|
||||
return atomic.LoadUint64(&r.currentTerm)
|
||||
}
|
||||
|
||||
func (r *raftState) setCurrentTerm(term uint64) {
|
||||
atomic.StoreUint64(&r.currentTerm, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastLog() (index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
index = r.lastLogIndex
|
||||
term = r.lastLogTerm
|
||||
r.lastLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
func (r *raftState) setLastLog(index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
r.lastLogIndex = index
|
||||
r.lastLogTerm = term
|
||||
r.lastLock.Unlock()
|
||||
}
|
||||
|
||||
func (r *raftState) getLastSnapshot() (index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
index = r.lastSnapshotIndex
|
||||
term = r.lastSnapshotTerm
|
||||
r.lastLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
func (r *raftState) setLastSnapshot(index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
r.lastSnapshotIndex = index
|
||||
r.lastSnapshotTerm = term
|
||||
r.lastLock.Unlock()
|
||||
}
|
||||
|
||||
func (r *raftState) getCommitIndex() uint64 {
|
||||
return atomic.LoadUint64(&r.commitIndex)
|
||||
}
|
||||
|
||||
func (r *raftState) setCommitIndex(index uint64) {
|
||||
atomic.StoreUint64(&r.commitIndex, index)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastApplied() uint64 {
|
||||
return atomic.LoadUint64(&r.lastApplied)
|
||||
}
|
||||
|
||||
func (r *raftState) setLastApplied(index uint64) {
|
||||
atomic.StoreUint64(&r.lastApplied, index)
|
||||
}
|
||||
|
||||
// Start a goroutine and properly handle the race between a routine
|
||||
// starting and incrementing, and exiting and decrementing.
|
||||
func (r *raftState) goFunc(f func()) {
|
||||
r.routinesGroup.Add(1)
|
||||
go func() {
|
||||
defer r.routinesGroup.Done()
|
||||
f()
|
||||
}()
|
||||
}
|
||||
|
||||
func (r *raftState) waitShutdown() {
|
||||
r.routinesGroup.Wait()
|
||||
}
|
||||
|
||||
// getLastIndex returns the last index in stable storage.
|
||||
// Either from the last log or from the last snapshot.
|
||||
func (r *raftState) getLastIndex() uint64 {
|
||||
r.lastLock.Lock()
|
||||
defer r.lastLock.Unlock()
|
||||
return max(r.lastLogIndex, r.lastSnapshotIndex)
|
||||
}
|
||||
|
||||
// getLastEntry returns the last index and term in stable storage.
|
||||
// Either from the last log or from the last snapshot.
|
||||
func (r *raftState) getLastEntry() (uint64, uint64) {
|
||||
r.lastLock.Lock()
|
||||
defer r.lastLock.Unlock()
|
||||
if r.lastLogIndex >= r.lastSnapshotIndex {
|
||||
return r.lastLogIndex, r.lastLogTerm
|
||||
}
|
||||
return r.lastSnapshotIndex, r.lastSnapshotTerm
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# The version must be supplied from the environment. Do not include the
|
||||
# leading "v".
|
||||
if [ -z $VERSION ]; then
|
||||
echo "Please specify a version."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Generate the tag.
|
||||
echo "==> Tagging version $VERSION..."
|
||||
git commit --allow-empty -a --gpg-sign=348FFC4C -m "Release v$VERSION"
|
||||
git tag -a -m "Version $VERSION" -s -u 348FFC4C "v${VERSION}" master
|
||||
|
||||
exit 0
|
@ -1,105 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
errNotAdvertisable = errors.New("local bind address is not advertisable")
|
||||
errNotTCP = errors.New("local address is not a TCP address")
|
||||
)
|
||||
|
||||
// TCPStreamLayer implements StreamLayer interface for plain TCP.
|
||||
type TCPStreamLayer struct {
|
||||
advertise net.Addr
|
||||
listener *net.TCPListener
|
||||
}
|
||||
|
||||
// NewTCPTransport returns a NetworkTransport that is built on top of
|
||||
// a TCP streaming transport layer.
|
||||
func NewTCPTransport(
|
||||
bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logOutput io.Writer,
|
||||
) (*NetworkTransport, error) {
|
||||
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
|
||||
return NewNetworkTransport(stream, maxPool, timeout, logOutput)
|
||||
})
|
||||
}
|
||||
|
||||
// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
|
||||
// a TCP streaming transport layer, with log output going to the supplied Logger
|
||||
func NewTCPTransportWithLogger(
|
||||
bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logger *log.Logger,
|
||||
) (*NetworkTransport, error) {
|
||||
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
|
||||
return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
|
||||
})
|
||||
}
|
||||
|
||||
func newTCPTransport(bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
|
||||
// Try to bind
|
||||
list, err := net.Listen("tcp", bindAddr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create stream
|
||||
stream := &TCPStreamLayer{
|
||||
advertise: advertise,
|
||||
listener: list.(*net.TCPListener),
|
||||
}
|
||||
|
||||
// Verify that we have a usable advertise address
|
||||
addr, ok := stream.Addr().(*net.TCPAddr)
|
||||
if !ok {
|
||||
list.Close()
|
||||
return nil, errNotTCP
|
||||
}
|
||||
if addr.IP.IsUnspecified() {
|
||||
list.Close()
|
||||
return nil, errNotAdvertisable
|
||||
}
|
||||
|
||||
// Create the network transport
|
||||
trans := transportCreator(stream)
|
||||
return trans, nil
|
||||
}
|
||||
|
||||
// Dial implements the StreamLayer interface.
|
||||
func (t *TCPStreamLayer) Dial(address string, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("tcp", address, timeout)
|
||||
}
|
||||
|
||||
// Accept implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
|
||||
return t.listener.Accept()
|
||||
}
|
||||
|
||||
// Close implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Close() (err error) {
|
||||
return t.listener.Close()
|
||||
}
|
||||
|
||||
// Addr implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Addr() net.Addr {
|
||||
// Use an advertise addr if provided
|
||||
if t.advertise != nil {
|
||||
return t.advertise
|
||||
}
|
||||
return t.listener.Addr()
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTCPTransport_BadAddr(t *testing.T) {
|
||||
_, err := NewTCPTransportWithLogger("0.0.0.0:0", nil, 1, 0, newTestLogger(t))
|
||||
if err != errNotAdvertisable {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTCPTransport_WithAdvertise(t *testing.T) {
|
||||
addr := &net.TCPAddr{IP: []byte{127, 0, 0, 1}, Port: 12345}
|
||||
trans, err := NewTCPTransportWithLogger("0.0.0.0:0", addr, 1, 0, newTestLogger(t))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if trans.LocalAddr() != "127.0.0.1:12345" {
|
||||
t.Fatalf("bad: %v", trans.LocalAddr())
|
||||
}
|
||||
}
|
@ -1,124 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RPCResponse captures both a response and a potential error.
|
||||
type RPCResponse struct {
|
||||
Response interface{}
|
||||
Error error
|
||||
}
|
||||
|
||||
// RPC has a command, and provides a response mechanism.
|
||||
type RPC struct {
|
||||
Command interface{}
|
||||
Reader io.Reader // Set only for InstallSnapshot
|
||||
RespChan chan<- RPCResponse
|
||||
}
|
||||
|
||||
// Respond is used to respond with a response, error or both
|
||||
func (r *RPC) Respond(resp interface{}, err error) {
|
||||
r.RespChan <- RPCResponse{resp, err}
|
||||
}
|
||||
|
||||
// Transport provides an interface for network transports
|
||||
// to allow Raft to communicate with other nodes.
|
||||
type Transport interface {
|
||||
// Consumer returns a channel that can be used to
|
||||
// consume and respond to RPC requests.
|
||||
Consumer() <-chan RPC
|
||||
|
||||
// LocalAddr is used to return our local address to distinguish from our peers.
|
||||
LocalAddr() string
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
AppendEntriesPipeline(target string) (AppendPipeline, error)
|
||||
|
||||
// AppendEntries sends the appropriate RPC to the target node.
|
||||
AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
|
||||
|
||||
// RequestVote sends the appropriate RPC to the target node.
|
||||
RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error
|
||||
|
||||
// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
|
||||
// the ReadCloser and streamed to the client.
|
||||
InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
|
||||
|
||||
// EncodePeer is used to serialize a peer name.
|
||||
EncodePeer(string) []byte
|
||||
|
||||
// DecodePeer is used to deserialize a peer name.
|
||||
DecodePeer([]byte) string
|
||||
|
||||
// SetHeartbeatHandler is used to setup a heartbeat handler
|
||||
// as a fast-pass. This is to avoid head-of-line blocking from
|
||||
// disk IO. If a Transport does not support this, it can simply
|
||||
// ignore the call, and push the heartbeat onto the Consumer channel.
|
||||
SetHeartbeatHandler(cb func(rpc RPC))
|
||||
}
|
||||
|
||||
// WithClose is an interface that a transport may provide which
|
||||
// allows a transport to be shut down cleanly when a Raft instance
|
||||
// shuts down.
|
||||
//
|
||||
// It is defined separately from Transport as unfortunately it wasn't in the
|
||||
// original interface specification.
|
||||
type WithClose interface {
|
||||
// Close permanently closes a transport, stopping
|
||||
// any associated goroutines and freeing other resources.
|
||||
Close() error
|
||||
}
|
||||
|
||||
// LoopbackTransport is an interface that provides a loopback transport suitable for testing
|
||||
// e.g. InmemTransport. It's there so we don't have to rewrite tests.
|
||||
type LoopbackTransport interface {
|
||||
Transport // Embedded transport reference
|
||||
WithPeers // Embedded peer management
|
||||
WithClose // with a close routine
|
||||
}
|
||||
|
||||
// WithPeers is an interface that a transport may provide which allows for connection and
|
||||
// disconnection. Unless the transport is a loopback transport, the transport specified to
|
||||
// "Connect" is likely to be nil.
|
||||
type WithPeers interface {
|
||||
Connect(peer string, t Transport) // Connect a peer
|
||||
Disconnect(peer string) // Disconnect a given peer
|
||||
DisconnectAll() // Disconnect all peers, possibly to reconnect them later
|
||||
}
|
||||
|
||||
// AppendPipeline is used for pipelining AppendEntries requests. It is used
|
||||
// to increase the replication throughput by masking latency and better
|
||||
// utilizing bandwidth.
|
||||
type AppendPipeline interface {
|
||||
// AppendEntries is used to add another request to the pipeline.
|
||||
// The send may block which is an effective form of back-pressure.
|
||||
AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
|
||||
|
||||
// Consumer returns a channel that can be used to consume
|
||||
// response futures when they are ready.
|
||||
Consumer() <-chan AppendFuture
|
||||
|
||||
// Close closes the pipeline and cancels all inflight RPCs
|
||||
Close() error
|
||||
}
|
||||
|
||||
// AppendFuture is used to return information about a pipelined AppendEntries request.
|
||||
type AppendFuture interface {
|
||||
Future
|
||||
|
||||
// Start returns the time that the append request was started.
|
||||
// It is always OK to call this method.
|
||||
Start() time.Time
|
||||
|
||||
// Request holds the parameters of the AppendEntries call.
|
||||
// It is always OK to call this method.
|
||||
Request() *AppendEntriesRequest
|
||||
|
||||
// Response holds the results of the AppendEntries call.
|
||||
// This method must only be called after the Error
|
||||
// method returns, and will only be valid on success.
|
||||
Response() *AppendEntriesResponse
|
||||
}
|
@ -1,313 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
TT_Inmem = iota
|
||||
|
||||
// NOTE: must be last
|
||||
numTestTransports
|
||||
)
|
||||
|
||||
func NewTestTransport(ttype int, addr string) (string, LoopbackTransport) {
|
||||
switch ttype {
|
||||
case TT_Inmem:
|
||||
addr, lt := NewInmemTransport(addr)
|
||||
return addr, lt
|
||||
default:
|
||||
panic("Unknown transport type")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport_StartStop(t *testing.T) {
|
||||
for ttype := 0; ttype < numTestTransports; ttype++ {
|
||||
_, trans := NewTestTransport(ttype, "")
|
||||
if err := trans.Close(); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport_AppendEntries(t *testing.T) {
|
||||
for ttype := 0; ttype < numTestTransports; ttype++ {
|
||||
addr1, trans1 := NewTestTransport(ttype, "")
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := AppendEntriesRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("cartman"),
|
||||
PrevLogEntry: 100,
|
||||
PrevLogTerm: 4,
|
||||
Entries: []*Log{
|
||||
&Log{
|
||||
Index: 101,
|
||||
Term: 4,
|
||||
Type: LogNoop,
|
||||
},
|
||||
},
|
||||
LeaderCommitIndex: 90,
|
||||
}
|
||||
resp := AppendEntriesResponse{
|
||||
Term: 4,
|
||||
LastLog: 90,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*AppendEntriesRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
addr2, trans2 := NewTestTransport(ttype, "")
|
||||
defer trans2.Close()
|
||||
|
||||
trans1.Connect(addr2, trans2)
|
||||
trans2.Connect(addr1, trans1)
|
||||
|
||||
var out AppendEntriesResponse
|
||||
if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport_AppendEntriesPipeline(t *testing.T) {
|
||||
for ttype := 0; ttype < numTestTransports; ttype++ {
|
||||
addr1, trans1 := NewTestTransport(ttype, "")
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := AppendEntriesRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("cartman"),
|
||||
PrevLogEntry: 100,
|
||||
PrevLogTerm: 4,
|
||||
Entries: []*Log{
|
||||
&Log{
|
||||
Index: 101,
|
||||
Term: 4,
|
||||
Type: LogNoop,
|
||||
},
|
||||
},
|
||||
LeaderCommitIndex: 90,
|
||||
}
|
||||
resp := AppendEntriesResponse{
|
||||
Term: 4,
|
||||
LastLog: 90,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*AppendEntriesRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
addr2, trans2 := NewTestTransport(ttype, "")
|
||||
defer trans2.Close()
|
||||
|
||||
trans1.Connect(addr2, trans2)
|
||||
trans2.Connect(addr1, trans1)
|
||||
|
||||
pipeline, err := trans2.AppendEntriesPipeline(trans1.LocalAddr())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer pipeline.Close()
|
||||
for i := 0; i < 10; i++ {
|
||||
out := new(AppendEntriesResponse)
|
||||
if _, err := pipeline.AppendEntries(&args, out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
respCh := pipeline.Consumer()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case ready := <-respCh:
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(&resp, ready.Response()) {
|
||||
t.Fatalf("command mismatch: %#v %#v", &resp, ready.Response())
|
||||
}
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport_RequestVote(t *testing.T) {
|
||||
for ttype := 0; ttype < numTestTransports; ttype++ {
|
||||
addr1, trans1 := NewTestTransport(ttype, "")
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := RequestVoteRequest{
|
||||
Term: 20,
|
||||
Candidate: []byte("butters"),
|
||||
LastLogIndex: 100,
|
||||
LastLogTerm: 19,
|
||||
}
|
||||
resp := RequestVoteResponse{
|
||||
Term: 100,
|
||||
Peers: []byte("blah"),
|
||||
Granted: false,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*RequestVoteRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
addr2, trans2 := NewTestTransport(ttype, "")
|
||||
defer trans2.Close()
|
||||
|
||||
trans1.Connect(addr2, trans2)
|
||||
trans2.Connect(addr1, trans1)
|
||||
|
||||
var out RequestVoteResponse
|
||||
if err := trans2.RequestVote(trans1.LocalAddr(), &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport_InstallSnapshot(t *testing.T) {
|
||||
for ttype := 0; ttype < numTestTransports; ttype++ {
|
||||
addr1, trans1 := NewTestTransport(ttype, "")
|
||||
defer trans1.Close()
|
||||
rpcCh := trans1.Consumer()
|
||||
|
||||
// Make the RPC request
|
||||
args := InstallSnapshotRequest{
|
||||
Term: 10,
|
||||
Leader: []byte("kyle"),
|
||||
LastLogIndex: 100,
|
||||
LastLogTerm: 9,
|
||||
Peers: []byte("blah blah"),
|
||||
Size: 10,
|
||||
}
|
||||
resp := InstallSnapshotResponse{
|
||||
Term: 10,
|
||||
Success: true,
|
||||
}
|
||||
|
||||
// Listen for a request
|
||||
go func() {
|
||||
select {
|
||||
case rpc := <-rpcCh:
|
||||
// Verify the command
|
||||
req := rpc.Command.(*InstallSnapshotRequest)
|
||||
if !reflect.DeepEqual(req, &args) {
|
||||
t.Fatalf("command mismatch: %#v %#v", *req, args)
|
||||
}
|
||||
|
||||
// Try to read the bytes
|
||||
buf := make([]byte, 10)
|
||||
rpc.Reader.Read(buf)
|
||||
|
||||
// Compare
|
||||
if bytes.Compare(buf, []byte("0123456789")) != 0 {
|
||||
t.Fatalf("bad buf %v", buf)
|
||||
}
|
||||
|
||||
rpc.Respond(&resp, nil)
|
||||
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}()
|
||||
|
||||
// Transport 2 makes outbound request
|
||||
addr2, trans2 := NewTestTransport(ttype, "")
|
||||
defer trans2.Close()
|
||||
|
||||
trans1.Connect(addr2, trans2)
|
||||
trans2.Connect(addr1, trans1)
|
||||
|
||||
// Create a buffer
|
||||
buf := bytes.NewBuffer([]byte("0123456789"))
|
||||
|
||||
var out InstallSnapshotResponse
|
||||
if err := trans2.InstallSnapshot(trans1.LocalAddr(), &args, &out, buf); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify the response
|
||||
if !reflect.DeepEqual(resp, out) {
|
||||
t.Fatalf("command mismatch: %#v %#v", resp, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport_EncodeDecode(t *testing.T) {
|
||||
for ttype := 0; ttype < numTestTransports; ttype++ {
|
||||
_, trans1 := NewTestTransport(ttype, "")
|
||||
defer trans1.Close()
|
||||
|
||||
local := trans1.LocalAddr()
|
||||
enc := trans1.EncodePeer(local)
|
||||
dec := trans1.DecodePeer(enc)
|
||||
|
||||
if dec != local {
|
||||
t.Fatalf("enc/dec fail: %v %v", dec, local)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,179 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
crand "crypto/rand"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/big"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Ensure we use a high-entropy seed for the psuedo-random generator
|
||||
rand.Seed(newSeed())
|
||||
}
|
||||
|
||||
// returns an int64 from a crypto random source
|
||||
// can be used to seed a source for a math/rand.
|
||||
func newSeed() int64 {
|
||||
r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to read random bytes: %v", err))
|
||||
}
|
||||
return r.Int64()
|
||||
}
|
||||
|
||||
// randomTimeout returns a value that is between the minVal and 2x minVal.
|
||||
func randomTimeout(minVal time.Duration) <-chan time.Time {
|
||||
if minVal == 0 {
|
||||
return nil
|
||||
}
|
||||
extra := (time.Duration(rand.Int63()) % minVal)
|
||||
return time.After(minVal + extra)
|
||||
}
|
||||
|
||||
// min returns the minimum.
|
||||
func min(a, b uint64) uint64 {
|
||||
if a <= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// max returns the maximum.
|
||||
func max(a, b uint64) uint64 {
|
||||
if a >= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// generateUUID is used to generate a random UUID.
|
||||
func generateUUID() string {
|
||||
buf := make([]byte, 16)
|
||||
if _, err := crand.Read(buf); err != nil {
|
||||
panic(fmt.Errorf("failed to read random bytes: %v", err))
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
|
||||
buf[0:4],
|
||||
buf[4:6],
|
||||
buf[6:8],
|
||||
buf[8:10],
|
||||
buf[10:16])
|
||||
}
|
||||
|
||||
// asyncNotifyCh is used to do an async channel send
|
||||
// to a single channel without blocking.
|
||||
func asyncNotifyCh(ch chan struct{}) {
|
||||
select {
|
||||
case ch <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// asyncNotifyBool is used to do an async notification
|
||||
// on a bool channel.
|
||||
func asyncNotifyBool(ch chan bool, v bool) {
|
||||
select {
|
||||
case ch <- v:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// ExcludePeer is used to exclude a single peer from a list of peers.
|
||||
func ExcludePeer(peers []string, peer string) []string {
|
||||
otherPeers := make([]string, 0, len(peers))
|
||||
for _, p := range peers {
|
||||
if p != peer {
|
||||
otherPeers = append(otherPeers, p)
|
||||
}
|
||||
}
|
||||
return otherPeers
|
||||
}
|
||||
|
||||
// PeerContained checks if a given peer is contained in a list.
|
||||
func PeerContained(peers []string, peer string) bool {
|
||||
for _, p := range peers {
|
||||
if p == peer {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AddUniquePeer is used to add a peer to a list of existing
|
||||
// peers only if it is not already contained.
|
||||
func AddUniquePeer(peers []string, peer string) []string {
|
||||
if PeerContained(peers, peer) {
|
||||
return peers
|
||||
}
|
||||
return append(peers, peer)
|
||||
}
|
||||
|
||||
// encodePeers is used to serialize a list of peers.
|
||||
func encodePeers(peers []string, trans Transport) []byte {
|
||||
// Encode each peer
|
||||
var encPeers [][]byte
|
||||
for _, p := range peers {
|
||||
encPeers = append(encPeers, trans.EncodePeer(p))
|
||||
}
|
||||
|
||||
// Encode the entire array
|
||||
buf, err := encodeMsgPack(encPeers)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to encode peers: %v", err))
|
||||
}
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// decodePeers is used to deserialize a list of peers.
|
||||
func decodePeers(buf []byte, trans Transport) []string {
|
||||
// Decode the buffer first
|
||||
var encPeers [][]byte
|
||||
if err := decodeMsgPack(buf, &encPeers); err != nil {
|
||||
panic(fmt.Errorf("failed to decode peers: %v", err))
|
||||
}
|
||||
|
||||
// Deserialize each peer
|
||||
var peers []string
|
||||
for _, enc := range encPeers {
|
||||
peers = append(peers, trans.DecodePeer(enc))
|
||||
}
|
||||
|
||||
return peers
|
||||
}
|
||||
|
||||
// Decode reverses the encode operation on a byte slice input.
|
||||
func decodeMsgPack(buf []byte, out interface{}) error {
|
||||
r := bytes.NewBuffer(buf)
|
||||
hd := codec.MsgpackHandle{}
|
||||
dec := codec.NewDecoder(r, &hd)
|
||||
return dec.Decode(out)
|
||||
}
|
||||
|
||||
// Encode writes an encoded object to a new bytes buffer.
|
||||
func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
hd := codec.MsgpackHandle{}
|
||||
enc := codec.NewEncoder(buf, &hd)
|
||||
err := enc.Encode(in)
|
||||
return buf, err
|
||||
}
|
||||
|
||||
// backoff is used to compute an exponential backoff
|
||||
// duration. Base time is scaled by the current round,
|
||||
// up to some maximum scale factor.
|
||||
func backoff(base time.Duration, round, limit uint64) time.Duration {
|
||||
power := min(round, limit)
|
||||
for power > 2 {
|
||||
base *= 2
|
||||
power--
|
||||
}
|
||||
return base
|
||||
}
|
@ -1,152 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRandomTimeout(t *testing.T) {
|
||||
start := time.Now()
|
||||
timeout := randomTimeout(time.Millisecond)
|
||||
|
||||
select {
|
||||
case <-timeout:
|
||||
diff := time.Now().Sub(start)
|
||||
if diff < time.Millisecond {
|
||||
t.Fatalf("fired early")
|
||||
}
|
||||
case <-time.After(3 * time.Millisecond):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewSeed(t *testing.T) {
|
||||
vals := make(map[int64]bool)
|
||||
for i := 0; i < 1000; i++ {
|
||||
seed := newSeed()
|
||||
if _, exists := vals[seed]; exists {
|
||||
t.Fatal("newSeed() return a value it'd previously returned")
|
||||
}
|
||||
vals[seed] = true
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomTimeout_NoTime(t *testing.T) {
|
||||
timeout := randomTimeout(0)
|
||||
if timeout != nil {
|
||||
t.Fatalf("expected nil channel")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMin(t *testing.T) {
|
||||
if min(1, 1) != 1 {
|
||||
t.Fatalf("bad min")
|
||||
}
|
||||
if min(2, 1) != 1 {
|
||||
t.Fatalf("bad min")
|
||||
}
|
||||
if min(1, 2) != 1 {
|
||||
t.Fatalf("bad min")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMax(t *testing.T) {
|
||||
if max(1, 1) != 1 {
|
||||
t.Fatalf("bad max")
|
||||
}
|
||||
if max(2, 1) != 2 {
|
||||
t.Fatalf("bad max")
|
||||
}
|
||||
if max(1, 2) != 2 {
|
||||
t.Fatalf("bad max")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateUUID(t *testing.T) {
|
||||
prev := generateUUID()
|
||||
for i := 0; i < 100; i++ {
|
||||
id := generateUUID()
|
||||
if prev == id {
|
||||
t.Fatalf("Should get a new ID!")
|
||||
}
|
||||
|
||||
matched, err := regexp.MatchString(
|
||||
`[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}`, id)
|
||||
if !matched || err != nil {
|
||||
t.Fatalf("expected match %s %v %s", id, matched, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExcludePeer(t *testing.T) {
|
||||
peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
|
||||
peer := peers[2]
|
||||
|
||||
after := ExcludePeer(peers, peer)
|
||||
if len(after) != 2 {
|
||||
t.Fatalf("Bad length")
|
||||
}
|
||||
if after[0] == peer || after[1] == peer {
|
||||
t.Fatalf("should not contain peer")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerContained(t *testing.T) {
|
||||
peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
|
||||
|
||||
if !PeerContained(peers, peers[2]) {
|
||||
t.Fatalf("Expect contained")
|
||||
}
|
||||
if PeerContained(peers, NewInmemAddr()) {
|
||||
t.Fatalf("unexpected contained")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddUniquePeer(t *testing.T) {
|
||||
peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
|
||||
after := AddUniquePeer(peers, peers[2])
|
||||
if !reflect.DeepEqual(after, peers) {
|
||||
t.Fatalf("unexpected append")
|
||||
}
|
||||
after = AddUniquePeer(peers, NewInmemAddr())
|
||||
if len(after) != 4 {
|
||||
t.Fatalf("expected append")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeDecodePeers(t *testing.T) {
|
||||
peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
|
||||
_, trans := NewInmemTransport("")
|
||||
|
||||
// Try to encode/decode
|
||||
buf := encodePeers(peers, trans)
|
||||
decoded := decodePeers(buf, trans)
|
||||
|
||||
if !reflect.DeepEqual(peers, decoded) {
|
||||
t.Fatalf("mismatch %v %v", peers, decoded)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackoff(t *testing.T) {
|
||||
b := backoff(10*time.Millisecond, 1, 8)
|
||||
if b != 10*time.Millisecond {
|
||||
t.Fatalf("bad: %v", b)
|
||||
}
|
||||
|
||||
b = backoff(20*time.Millisecond, 2, 8)
|
||||
if b != 20*time.Millisecond {
|
||||
t.Fatalf("bad: %v", b)
|
||||
}
|
||||
|
||||
b = backoff(10*time.Millisecond, 8, 8)
|
||||
if b != 640*time.Millisecond {
|
||||
t.Fatalf("bad: %v", b)
|
||||
}
|
||||
|
||||
b = backoff(10*time.Millisecond, 9, 8)
|
||||
if b != 640*time.Millisecond {
|
||||
t.Fatalf("bad: %v", b)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue