diff --git a/CHANGELOG.md b/CHANGELOG.md index f4131f61..05fb03e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,13 @@ -## 4.7.0 (Unreleased) +## 5.0.0 (Unreleased) +This release uses a new Raft consensus version, with the move to Hashicorp Raft v1. As a result **the Raft system in 5.0 is not compatible with the 4.0 series**. To upgrade from an earlier version to this release you should backup your 4.0 leader node, and restore the database dump into a new 5.0 cluster. The HTTP API remains unchanged however. + ### New features - [PR #595](https://github.com/rqlite/rqlite/pull/595): rqlite CLI prints Welcome message on startup. ### Implementation changes and bug fixes - [PR #597](https://github.com/rqlite/rqlite/pull/597): Don't ignore any Join error, instead return it. - [PR #598](https://github.com/rqlite/rqlite/pull/598): Ensure backup is correctly closed. +- [PR #600](https://github.com/rqlite/rqlite/pull/600): Move to Hashicorp Raft v1. ## 4.6.0 (November 29th 2019) _This release adds significant new functionality to the command-line tool, including much more control over backup and restore of the database. [Visit the Releases page](https://github.com/rqlite/rqlite/releases/tag/v4.6.0) to download this release._ diff --git a/DOC/CLUSTER_MGMT.md b/DOC/CLUSTER_MGMT.md index ba19ed4d..baee18b2 100644 --- a/DOC/CLUSTER_MGMT.md +++ b/DOC/CLUSTER_MGMT.md @@ -16,24 +16,27 @@ Let's say you have 3 host machines, _host1_, _host2_, and _host3_, and that each To create a cluster you must first launch a node that can act as the initial leader. Do this as follows on _host1_: ```bash -host1:$ rqlited -http-addr host1:4001 -raft-addr host1:4002 ~/node +host1:$ rqlited -node-id 1 -http-addr host1:4001 -raft-addr host1:4002 ~/node ``` With this command a single node is started, listening for API requests on port 4001 and listening on port 4002 for intra-cluster communication and cluster-join requests from other nodes. This node stores its state at `~/node`. To join a second node to this leader, execute the following command on _host2_: ```bash -host2:$ rqlited -http-addr host2:4001 -raft-addr host2:4002 -join http://host1:4001 ~/node +host2:$ rqlited -node-id 2 -http-addr host2:4001 -raft-addr host2:4002 -join http://host1:4001 ~/node ``` _If a node receives a join request, and that node is not actually the leader of the cluster, the receiving node will automatically redirect the requesting node to the leader node. As a result a node can actually join a cluster by contacting any node in the cluster. You can also specify multiple join addresses, and the node will try each address until joining is successful._ Once executed you now have a cluster of two nodes. Of course, for fault-tolerance you need a 3-node cluster, so launch a third node like so on _host3_: ```bash -host3:$ rqlited -http-addr host3:4001 -raft-addr host3:4002 -join http://host1:4001 ~/node +host3:$ rqlited -node-id 3 -http-addr host3:4001 -raft-addr host3:4002 -join http://host1:4001 ~/node ``` _When restarting a node, there is no further need to pass `-join`. It will be ignored if a node is already a member of a cluster._ You've now got a fault-tolerant, distributed, relational database. It can tolerate the failure of any node, even the leader, and remain operational. +## Node IDs +You can set the Node ID (`-node-id`) to anything you wish, as long as it's unique for each node. + ## Listening on all interfaces You can pass `0.0.0.0` to both `-http-addr` and `-raft-addr` if you wish a node to listen on all interfaces. You must still pass an explicit network address to `-join` however. In this case you'll also want to set `-http-adv-addr` to the actual interface address, so other nodes learn the correct network address to use to reach the node listening on `0.0.0.0`. diff --git a/appveyor.yml b/appveyor.yml index 8aa86646..ab036c79 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -version: 4.{build} +version: 5.{build} platform: x64 diff --git a/cluster/join.go b/cluster/join.go index 3f3f56c7..5458cc72 100644 --- a/cluster/join.go +++ b/cluster/join.go @@ -19,10 +19,10 @@ import ( const numAttempts int = 3 const attemptInterval time.Duration = 5 * time.Second -// Join attempts to join the cluster at one of the addresses given in joinAddr. -// It walks through joinAddr in order, and sets the Raft address of the joining -// node as advAddr. It returns the endpoint successfully used to join the cluster. -func Join(joinAddr []string, advAddr string, tlsConfig *tls.Config) (string, error) { +// It walks through joinAddr in order, and sets the node ID and Raft address of +// the joining node as nodeID advAddr respectively. It returns the endpoint +// successfully used to join the cluster. +func Join(joinAddr []string, nodeID, advAddr string, tlsConfig *tls.Config) (string, error) { var err error var j string logger := log.New(os.Stderr, "[cluster-join] ", log.LstdFlags) @@ -32,7 +32,7 @@ func Join(joinAddr []string, advAddr string, tlsConfig *tls.Config) (string, err for i := 0; i < numAttempts; i++ { for _, a := range joinAddr { - j, err = join(a, advAddr, tlsConfig, logger) + j, err = join(a, nodeID, advAddr, tlsConfig, logger) if err == nil { // Success! return j, nil @@ -45,7 +45,11 @@ func Join(joinAddr []string, advAddr string, tlsConfig *tls.Config) (string, err return "", err } -func join(joinAddr string, advAddr string, tlsConfig *tls.Config, logger *log.Logger) (string, error) { +func join(joinAddr, nodeID, advAddr string, tlsConfig *tls.Config, logger *log.Logger) (string, error) { + if nodeID == "" { + return "", fmt.Errorf("node ID not set") + } + // Join using IP address, as that is what Hashicorp Raft works in. resv, err := net.ResolveTCPAddr("tcp", advAddr) if err != nil { @@ -65,10 +69,10 @@ func join(joinAddr string, advAddr string, tlsConfig *tls.Config, logger *log.Lo } for { - b, err := json.Marshal(map[string]string{"addr": resv.String()}) - if err != nil { - return "", err - } + b, err := json.Marshal(map[string]string{ + "id": nodeID, + "addr": resv.String(), + }) // Attempt to join. resp, err := client.Post(fullAddr, "application-type/json", bytes.NewReader(b)) diff --git a/cluster/join_test.go b/cluster/join_test.go index 2545fc64..c521800f 100644 --- a/cluster/join_test.go +++ b/cluster/join_test.go @@ -16,7 +16,7 @@ func Test_SingleJoinOK(t *testing.T) { })) defer ts.Close() - j, err := Join([]string{ts.URL}, "127.0.0.1:9090", nil) + j, err := Join([]string{ts.URL}, "id0", "127.0.0.1:9090", nil) if err != nil { t.Fatalf("failed to join a single node: %s", err.Error()) } @@ -31,7 +31,7 @@ func Test_SingleJoinFail(t *testing.T) { })) defer ts.Close() - _, err := Join([]string{ts.URL}, "127.0.0.1:9090", nil) + _, err := Join([]string{ts.URL}, "id0", "127.0.0.1:9090", nil) if err == nil { t.Fatalf("expected error when joining bad node") } @@ -45,7 +45,7 @@ func Test_DoubleJoinOK(t *testing.T) { })) defer ts2.Close() - j, err := Join([]string{ts1.URL, ts2.URL}, "127.0.0.1:9090", nil) + j, err := Join([]string{ts1.URL, ts2.URL}, "id0", "127.0.0.1:9090", nil) if err != nil { t.Fatalf("failed to join a single node: %s", err.Error()) } @@ -63,7 +63,7 @@ func Test_DoubleJoinOKSecondNode(t *testing.T) { })) defer ts2.Close() - j, err := Join([]string{ts1.URL, ts2.URL}, "127.0.0.1:9090", nil) + j, err := Join([]string{ts1.URL, ts2.URL}, "id0", "127.0.0.1:9090", nil) if err != nil { t.Fatalf("failed to join a single node: %s", err.Error()) } @@ -83,7 +83,7 @@ func Test_DoubleJoinOKSecondNodeRedirect(t *testing.T) { })) defer ts2.Close() - j, err := Join([]string{ts2.URL}, "127.0.0.1:9090", nil) + j, err := Join([]string{ts2.URL}, "id0", "127.0.0.1:9090", nil) if err != nil { t.Fatalf("failed to join a single node: %s", err.Error()) } diff --git a/cmd/rqlited/main.go b/cmd/rqlited/main.go index 5ab88ad4..da3f6b2b 100644 --- a/cmd/rqlited/main.go +++ b/cmd/rqlited/main.go @@ -40,7 +40,7 @@ const logo = ` // These variables are populated via the Go linker. var ( - version = "4" + version = "5" commit = "unknown" branch = "unknown" buildtime = "unknown" @@ -66,6 +66,7 @@ var nodeEncrypt bool var nodeX509CACert string var nodeX509Cert string var nodeX509Key string +var nodeID string var raftAddr string var raftAdv string var joinAddr string @@ -92,6 +93,7 @@ const desc = `rqlite is a lightweight, distributed relational database, which us storage engine. It provides an easy-to-use, fault-tolerant store for relational data.` func init() { + flag.StringVar(&nodeID, "node-id", "", "Unique name for node. If not set, set to hostname") flag.StringVar(&httpAddr, "http-addr", "localhost:4001", "HTTP server bind address. For HTTPS, set X.509 cert and key") flag.StringVar(&httpAdv, "http-adv-addr", "", "Advertised HTTP address. If not set, same as HTTP server") flag.StringVar(&x509CACert, "http-ca-cert", "", "Path to root X.509 certificate for HTTP endpoint") @@ -196,10 +198,15 @@ func main() { } dbConf := store.NewDBConfig(dsn, !onDisk) + nid, err := idOrHostname() + if err != nil { + log.Fatalf("failed to determine node ID: %s", err.Error()) + } str := store.New(&store.StoreConfig{ DBConf: dbConf, Dir: dataPath, Tn: raftTn, + ID: nid, }) // Set optional parameters on store. @@ -271,7 +278,7 @@ func main() { } } - if j, err := cluster.Join(joins, advAddr, &tlsConfig); err != nil { + if j, err := cluster.Join(joins, nid, advAddr, &tlsConfig); err != nil { log.Fatalf("failed to join cluster at %s: %s", joins, err.Error()) } else { log.Println("successfully joined cluster at", j) @@ -408,6 +415,13 @@ func credentialStore() (*auth.CredentialsStore, error) { return cs, nil } +func idOrHostname() (string, error) { + if nodeID != "" { + return nodeID, nil + } + return os.Hostname() +} + // prof stores the file locations of active profiles. var prof struct { cpu *os.File diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..061cbd7e --- /dev/null +++ b/go.mod @@ -0,0 +1,16 @@ +module github.com/rqlite/rqlite + +go 1.13 + +require ( + github.com/Bowery/prompt v0.0.0-20190916142128-fa8279994f75 + github.com/hashicorp/raft v1.1.1 + github.com/hashicorp/raft-boltdb v0.0.0-20191021154308-4207f1bf0617 + github.com/labstack/gommon v0.3.0 // indirect + github.com/mattn/go-colorable v0.1.4 // indirect + github.com/mattn/go-isatty v0.0.11 // indirect + github.com/mattn/go-sqlite3 v2.0.2+incompatible + github.com/mkideal/cli v0.0.3 + github.com/mkideal/pkg v0.0.0-20170503154153-3e188c9e7ecc + golang.org/x/crypto v0.0.0-20191219195013-becbf705a915 +) diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..10e1c90b --- /dev/null +++ b/go.sum @@ -0,0 +1,78 @@ +github.com/Bowery/prompt v0.0.0-20190916142128-fa8279994f75 h1:xGHheKK44eC6K0u5X+DZW/fRaR1LnDdqPHMZMWx5fv8= +github.com/Bowery/prompt v0.0.0-20190916142128-fa8279994f75/go.mod h1:4/6eNcqZ09BZ9wLK3tZOjBA1nDj+B0728nlX5YRlSmQ= +github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM= +github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= +github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-hclog v0.9.1 h1:9PZfAcVEvez4yhLH2TBU64/h/z4xlFI80cWXRrxuKuM= +github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= +github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/raft v1.1.0/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM= +github.com/hashicorp/raft v1.1.1 h1:HJr7UE1x/JrJSc9Oy6aDBHtNHUUBHjcQjTgvUVihoZs= +github.com/hashicorp/raft v1.1.1/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8= +github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk= +github.com/hashicorp/raft-boltdb v0.0.0-20191021154308-4207f1bf0617 h1:CJDRE/2tBNFOrcoexD2nvTRbQEox3FDxl4NxIezp1b8= +github.com/hashicorp/raft-boltdb v0.0.0-20191021154308-4207f1bf0617/go.mod h1:aUF6HQr8+t3FC/ZHAC+pZreUBhTaxumuu3L+d37uRxk= +github.com/labstack/gommon v0.3.0 h1:JEeO0bvc78PKdyHxloTKiF8BD5iGrH8T6MSeGvSgob0= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA= +github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM= +github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= +github.com/mattn/go-sqlite3 v2.0.2+incompatible h1:qzw9c2GNT8UFrgWNDhCTqRqYUSmu/Dav/9Z58LGpk7U= +github.com/mattn/go-sqlite3 v2.0.2+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/mkideal/cli v0.0.3 h1:Y1OXyfTVI9eQ9RTiXq12h7q88y22Q9ZU4VI09ifz6lE= +github.com/mkideal/cli v0.0.3/go.mod h1:HLuSls75T7LFlTgByGeuLwcvdUmmx/aUQxnnEKxoZzY= +github.com/mkideal/pkg v0.0.0-20170503154153-3e188c9e7ecc h1:eyN9UWVX+CeeCQZPudCUAPc84xQYTjEu9MWNa2HuJrs= +github.com/mkideal/pkg v0.0.0-20170503154153-3e188c9e7ecc/go.mod h1:DECgB56amjU/mmmsKuooNPQ1856HASOMC3D4ntSVU70= +github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191219195013-becbf705a915 h1:aJ0ex187qoXrJHPo8ZasVTASQB7llQP6YeNzgDALPRk= +golang.org/x/crypto v0.0.0-20191219195013-becbf705a915/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190523142557-0e01d883c5c5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed h1:uPxWBzB3+mlnjy9W58qY1j/cjyFjutgw/Vhan2zLy/A= +golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/http/service.go b/http/service.go index 70c0d8d4..f77bc7b7 100644 --- a/http/service.go +++ b/http/service.go @@ -43,8 +43,8 @@ type Store interface { // is held on the database. Query(qr *store.QueryRequest) ([]*sql.Rows, error) - // Join joins the node, reachable at addr, to this node. - Join(addr string) error + // Join joins the node with the given ID, reachable at addr, to this node. + Join(id, addr string) error // Remove removes the node, specified by addr, from the cluster. Remove(addr string) error @@ -295,7 +295,8 @@ func (s *Service) handleJoin(w http.ResponseWriter, r *http.Request) { return } - if len(m) != 1 { + remoteID, ok := m["id"] + if !ok { w.WriteHeader(http.StatusBadRequest) return } @@ -306,7 +307,7 @@ func (s *Service) handleJoin(w http.ResponseWriter, r *http.Request) { return } - if err := s.store.Join(remoteAddr); err != nil { + if err := s.store.Join(remoteID, remoteAddr); err != nil { if err == store.ErrNotLeader { leader := s.store.Peer(s.store.Leader()) if leader == "" { diff --git a/http/service_test.go b/http/service_test.go index b59c048c..2339aabc 100644 --- a/http/service_test.go +++ b/http/service_test.go @@ -491,7 +491,7 @@ func (m *MockStore) Query(qr *store.QueryRequest) ([]*sql.Rows, error) { return nil, nil } -func (m *MockStore) Join(addr string) error { +func (m *MockStore) Join(id, addr string) error { return nil } diff --git a/store/command.go b/store/command.go new file mode 100644 index 00000000..7414b7be --- /dev/null +++ b/store/command.go @@ -0,0 +1,41 @@ +package store + +import ( + "encoding/json" +) + +// commandType are commands that affect the state of the cluster, and must go through Raft. +type commandType int + +const ( + execute commandType = iota // Commands which modify the database. + query // Commands which query the database. + peer // Commands that modify peers map. +) + +type command struct { + Typ commandType `json:"typ,omitempty"` + Sub json.RawMessage `json:"sub,omitempty"` +} + +func newCommand(t commandType, d interface{}) (*command, error) { + b, err := json.Marshal(d) + if err != nil { + return nil, err + } + return &command{ + Typ: t, + Sub: b, + }, nil + +} + +// databaseSub is a command sub which involves interaction with the database. +type databaseSub struct { + Tx bool `json:"tx,omitempty"` + Queries []string `json:"queries,omitempty"` + Timings bool `json:"timings,omitempty"` +} + +// peersSub is a command which sets the API address for a Raft address. +type peersSub map[string]string diff --git a/store/peers_test.go b/store/peers_test.go deleted file mode 100644 index 288be1d4..00000000 --- a/store/peers_test.go +++ /dev/null @@ -1,99 +0,0 @@ -package store - -import ( - "os" - "sort" - "testing" - "time" -) - -// Test_NumPeersEnableSingle tests that a single node reports -// itself as capable of joining a cluster. -func Test_NumPeersEnableSingle(t *testing.T) { - s0 := mustNewStore(true) - defer os.RemoveAll(s0.Path()) - if err := s0.Open(true); err != nil { - t.Fatalf("failed to open node for num peers test: %s", err.Error()) - } - s0.WaitForLeader(5 * time.Second) - s0.Close(true) - - j, err := JoinAllowed(s0.Path()) - if err != nil { - t.Fatalf("failed to check join status of %s: %s", s0.Path(), err.Error()) - } - if !j { - t.Fatalf("config files at %s indicate joining is not allowed", s0.Path()) - } -} - -// Test_NumPeersDisableSingle tests that a single node reports -// itself as capable of joining a cluster, when explicitly configured -// as not capable of self-electing. -func Test_NumPeersDisableSingle(t *testing.T) { - s0 := mustNewStore(true) - defer os.RemoveAll(s0.Path()) - if err := s0.Open(false); err != nil { - t.Fatalf("failed to open node for num peers test: %s", err.Error()) - } - s0.Close(true) - - j, err := JoinAllowed(s0.Path()) - if err != nil { - t.Fatalf("failed to check join status of %s: %s", s0.Path(), err.Error()) - } - if !j { - t.Fatalf("config files at %s indicate joining is not allowed", s0.Path()) - } -} - -// Test_NumPeersJoin tests that the correct number of nodes are recorded by -// nodes in a cluster. -func Test_NumPeersJoin(t *testing.T) { - s0 := mustNewStore(true) - defer os.RemoveAll(s0.Path()) - if err := s0.Open(true); err != nil { - t.Fatalf("failed to open node for num peers test: %s", err.Error()) - } - s0.WaitForLeader(5 * time.Second) - - s1 := mustNewStore(true) - defer os.RemoveAll(s1.Path()) - if err := s1.Open(false); err != nil { - t.Fatalf("failed to open node for num peers test: %s", err.Error()) - } - - // Get sorted list of cluster nodes. - storeNodes := []string{s0.Addr().String(), s1.Addr().String()} - sort.StringSlice(storeNodes).Sort() - - // Join the second node to the first. - if err := s0.Join(s1.Addr().String()); err != nil { - t.Fatalf("failed to join to node at %s: %s", s0.Addr().String(), err.Error()) - } - s1.WaitForLeader(5 * time.Second) - s1.Close(true) - s0.Close(true) - - // Check that peers are set as expected. - m, _ := NumPeers(s0.Path()) - if m != 2 { - t.Fatalf("got wrong value for number of peers, exp %d, got %d", 2, m) - } - - j, err := JoinAllowed(s0.Path()) - if err != nil { - t.Fatalf("failed to check join status of %s: %s", s0.Path(), err.Error()) - } - if j { - t.Fatalf("config files at %s indicate joining is allowed", s0.Path()) - } - - k, err := JoinAllowed(s1.Path()) - if err != nil { - t.Fatalf("failed to check join status of %s: %s", s1.Path(), err.Error()) - } - if k { - t.Fatalf("config files at %s indicate joining is allowed", s1.Path()) - } -} diff --git a/store/store.go b/store/store.go index f802d961..18ca0909 100644 --- a/store/store.go +++ b/store/store.go @@ -16,6 +16,7 @@ import ( "net" "os" "path/filepath" + "sort" "sync" "time" @@ -91,50 +92,6 @@ type ExecuteRequest struct { Tx bool } -// Transport is the interface the network service must provide. -type Transport interface { - net.Listener - - // Dial is used to create a new outgoing connection - Dial(address string, timeout time.Duration) (net.Conn, error) -} - -// commandType are commands that affect the state of the cluster, and must go through Raft. -type commandType int - -const ( - execute commandType = iota // Commands which modify the database. - query // Commands which query the database. - peer // Commands that modify peers map. -) - -type command struct { - Typ commandType `json:"typ,omitempty"` - Sub json.RawMessage `json:"sub,omitempty"` -} - -func newCommand(t commandType, d interface{}) (*command, error) { - b, err := json.Marshal(d) - if err != nil { - return nil, err - } - return &command{ - Typ: t, - Sub: b, - }, nil - -} - -// databaseSub is a command sub which involves interaction with the database. -type databaseSub struct { - Tx bool `json:"tx,omitempty"` - Queries []string `json:"queries,omitempty"` - Timings bool `json:"timings,omitempty"` -} - -// peersSub is a command which sets the API address for a Raft address. -type peersSub map[string]string - // ConsistencyLevel represents the available read consistency levels. type ConsistencyLevel int @@ -199,19 +156,30 @@ func NewDBConfig(dsn string, memory bool) *DBConfig { return &DBConfig{DSN: dsn, Memory: memory} } +// Server represents another node in the cluster. +type Server struct { + ID string `json:"id,omitempty"` + Addr string `json:"addr,omitempty"` +} + +type Servers []*Server + +func (s Servers) Less(i, j int) bool { return s[i].ID < s[j].ID } +func (s Servers) Len() int { return len(s) } +func (s Servers) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + // Store is a SQLite database, where all changes are made via Raft consensus. type Store struct { raftDir string mu sync.RWMutex // Sync access between queries and snapshots. - raft *raft.Raft // The consensus mechanism. - raftTransport Transport - peerStore raft.PeerStore - dbConf *DBConfig // SQLite database config. - dbPath string // Path to underlying SQLite file, if not in-memory. - db *sql.DB // The underlying SQLite store. - joinRequired bool // Whether an explicit join is required. + raft *raft.Raft // The consensus mechanism. + raftTn *raftTransport + raftID string // Node ID. + dbConf *DBConfig // SQLite database config. + dbPath string // Path to underlying SQLite file, if not in-memory. + db *sql.DB // The underlying SQLite store. metaMu sync.RWMutex meta *clusterMeta @@ -229,11 +197,11 @@ type Store struct { // StoreConfig represents the configuration of the underlying Store. type StoreConfig struct { - DBConf *DBConfig // The DBConfig object for this Store. - Dir string // The working directory for raft. - Tn Transport // The underlying Transport for raft. - Logger *log.Logger // The logger to use to log stuff. - PeerStore raft.PeerStore // The PeerStore to use for raft. + DBConf *DBConfig // The DBConfig object for this Store. + Dir string // The working directory for raft. + Tn Transport // The underlying Transport for raft. + ID string // Node ID. + Logger *log.Logger // The logger to use to log stuff. } // New returns a new Store. @@ -244,22 +212,24 @@ func New(c *StoreConfig) *Store { } return &Store{ - raftDir: c.Dir, - raftTransport: c.Tn, - dbConf: c.DBConf, - dbPath: filepath.Join(c.Dir, sqliteFile), - meta: newClusterMeta(), - logger: logger, - peerStore: c.PeerStore, - ApplyTimeout: applyTimeout, - OpenTimeout: openTimeout, + raftDir: c.Dir, + raftTn: &raftTransport{c.Tn}, + raftID: c.ID, + dbConf: c.DBConf, + dbPath: filepath.Join(c.Dir, sqliteFile), + meta: newClusterMeta(), + logger: logger, + ApplyTimeout: applyTimeout, + OpenTimeout: openTimeout, } } // Open opens the store. If enableSingle is set, and there are no existing peers, // then this node becomes the first node, and therefore leader, of the cluster. func (s *Store) Open(enableSingle bool) error { - s.logger.Printf("ensuring %s exists", s.raftDir) + s.logger.Printf("opening store with node ID %s", s.raftID) + + s.logger.Printf("ensuring directory at %s exists", s.raftDir) if err := os.MkdirAll(s.raftDir, 0755); err != nil { return err } @@ -270,31 +240,17 @@ func (s *Store) Open(enableSingle bool) error { } s.db = db - // Setup Raft communication. - transport := raft.NewNetworkTransport(s.raftTransport, 3, 10*time.Second, os.Stderr) + // Is this a brand new node? + newNode := !pathExists(filepath.Join(s.raftDir, "raft.db")) - // Create peer storage if necesssary. - if s.peerStore == nil { - s.peerStore = raft.NewJSONPeers(s.raftDir, transport) - } + // Setup Raft communication. + transport := raft.NewNetworkTransport(s.raftTn, 3, 10*time.Second, os.Stderr) // Get the Raft configuration for this store. config := s.raftConfig() - // Check for any existing peers. - peers, err := s.peerStore.Peers() - if err != nil { - return err - } - s.joinRequired = len(peers) <= 1 - - // Allow the node to entry single-mode, potentially electing itself, if - // explicitly enabled and there is only 1 node in the cluster already. - if enableSingle && len(peers) <= 1 { - s.logger.Println("enabling single-node mode") - config.EnableSingleNode = true - config.DisableBootstrapAfterElect = false - } + config.LocalID = raft.ServerID(s.raftID) + // XXXconfig.Logger = log.New(os.Stderr, "[raft] ", log.LstdFlags) // Create the snapshot store. This allows Raft to truncate the log. snapshots, err := raft.NewFileSnapshotStore(s.raftDir, retainSnapshotCount, os.Stderr) @@ -309,10 +265,26 @@ func (s *Store) Open(enableSingle bool) error { } // Instantiate the Raft system. - ra, err := raft.NewRaft(config, s, logStore, logStore, snapshots, s.peerStore, transport) + ra, err := raft.NewRaft(config, s, logStore, logStore, snapshots, transport) if err != nil { return fmt.Errorf("new raft: %s", err) } + + if enableSingle && newNode { + s.logger.Printf("bootstrap needed") + configuration := raft.Configuration{ + Servers: []raft.Server{ + raft.Server{ + ID: config.LocalID, + Address: transport.LocalAddr(), + }, + }, + } + ra.BootstrapCluster(configuration) + } else { + s.logger.Printf("no bootstrap needed") + } + s.raft = ra if s.OpenTimeout != 0 { @@ -364,11 +336,6 @@ func (s *Store) State() ClusterState { } } -// JoinRequired returns whether the node needs to join a cluster after being opened. -func (s *Store) JoinRequired() bool { - return s.joinRequired -} - // Path returns the path to the store's storage directory. func (s *Store) Path() string { return s.raftDir @@ -376,13 +343,18 @@ func (s *Store) Path() string { // Addr returns the address of the store. func (s *Store) Addr() net.Addr { - return s.raftTransport.Addr() + return s.raftTn.Addr() +} + +// ID returns the Raft ID of the store. +func (s *Store) ID() string { + return s.raftID } // Leader returns the current leader. Returns a blank string if there is // no leader. func (s *Store) Leader() string { - return s.raft.Leader() + return string(s.raft.Leader()) } // Peer returns the API address for the given addr. If there is no peer @@ -403,9 +375,24 @@ func (s *Store) APIPeers() (map[string]string, error) { return peers, nil } -// Nodes returns the list of current peers. -func (s *Store) Nodes() ([]string, error) { - return s.peerStore.Peers() +// Nodes returns the slice of nodes in the cluster, sorted by ID ascending. +func (s *Store) Nodes() ([]*Server, error) { + f := s.raft.GetConfiguration() + if f.Error() != nil { + return nil, f.Error() + } + + rs := f.Configuration().Servers + servers := make([]*Server, len(rs)) + for i := range rs { + servers[i] = &Server{ + ID: string(rs[i].ID), + Addr: string(rs[i].Address), + } + } + + sort.Sort(Servers(servers)) + return servers, nil } // WaitForLeader blocks until a leader is detected, or the timeout expires. @@ -471,13 +458,12 @@ func (s *Store) Stats() (map[string]interface{}, error) { dbStatus["path"] = ":memory:" } - s.metaMu.RLock() - defer s.metaMu.RUnlock() - peers, err := s.peerStore.Peers() + nodes, err := s.Nodes() if err != nil { return nil, err } status := map[string]interface{}{ + "node_id": s.raftID, "raft": s.raft.Stats(), "addr": s.Addr().String(), "leader": s.Leader(), @@ -487,7 +473,7 @@ func (s *Store) Stats() (map[string]interface{}, error) { "election_timeout": s.ElectionTimeout.String(), "snapshot_threshold": s.SnapshotThreshold, "meta": s.meta, - "peers": peers, + "peers": nodes, "dir": s.raftDir, "sqlite3": dbStatus, "db_conf": s.dbConf, @@ -647,15 +633,15 @@ func (s *Store) UpdateAPIPeers(peers map[string]string) error { return f.Error() } -// Join joins a node, located at addr, to this store. The node must be ready to -// respond to Raft communications at that address. -func (s *Store) Join(addr string) error { +// Join joins a node, identified by id and located at addr, to this store. +// The node must be ready to respond to Raft communications at that address. +func (s *Store) Join(id, addr string) error { s.logger.Printf("received request to join node at %s", addr) if s.raft.State() != raft.Leader { return ErrNotLeader } - f := s.raft.AddPeer(addr) + f := s.raft.AddVoter(raft.ServerID(id), raft.ServerAddress(addr), 0, 0) if e := f.(raft.Future); e.Error() != nil { if e.Error() == raft.ErrNotLeader { return ErrNotLeader @@ -666,21 +652,21 @@ func (s *Store) Join(addr string) error { return nil } -// Remove removes a node from the store, specified by addr. -func (s *Store) Remove(addr string) error { - s.logger.Printf("received request to remove node %s", addr) +// Remove removes a node from the store, specified by ID. +func (s *Store) Remove(id string) error { + s.logger.Printf("received request to remove node %s", id) if s.raft.State() != raft.Leader { return ErrNotLeader } - f := s.raft.RemovePeer(addr) + f := s.raft.RemoveServer(raft.ServerID(id), 0, 0) if f.Error() != nil { if f.Error() == raft.ErrNotLeader { return ErrNotLeader } return f.Error() } - s.logger.Printf("node %s removed successfully", addr) + s.logger.Printf("node %s removed successfully", id) return nil } @@ -965,3 +951,11 @@ func enabledFromBool(b bool) string { } return "disabled" } + +// pathExists returns true if the given path exists. +func pathExists(p string) bool { + if _, err := os.Lstat(p); err != nil && os.IsNotExist(err) { + return false + } + return true +} diff --git a/store/store_test.go b/store/store_test.go index 54a27c25..46b01f18 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -117,74 +117,6 @@ func Test_SingleNodeInMemExecuteQueryFail(t *testing.T) { } } -func Test_StoreLogTruncationMultinode(t *testing.T) { - s0 := mustNewStore(true) - defer os.RemoveAll(s0.Path()) - s0.SnapshotThreshold = 4 - s0.SnapshotInterval = 100 * time.Millisecond - - if err := s0.Open(true); err != nil { - t.Fatalf("failed to open single-node store: %s", err.Error()) - } - defer s0.Close(true) - s0.WaitForLeader(10 * time.Second) - nSnaps := stats.Get(numSnaphots).String() - - // Write more than s.SnapshotThreshold statements. - queries := []string{ - `CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)`, - `INSERT INTO foo(id, name) VALUES(1, "fiona")`, - `INSERT INTO foo(id, name) VALUES(2, "fiona")`, - `INSERT INTO foo(id, name) VALUES(3, "fiona")`, - `INSERT INTO foo(id, name) VALUES(4, "fiona")`, - `INSERT INTO foo(id, name) VALUES(5, "fiona")`, - } - for i := range queries { - _, err := s0.Execute(&ExecuteRequest{[]string{queries[i]}, false, false}) - if err != nil { - t.Fatalf("failed to execute on single node: %s", err.Error()) - } - } - - // Wait for the snapshot to happen and log to be truncated. - for { - time.Sleep(1000 * time.Millisecond) - if stats.Get(numSnaphots).String() != nSnaps { - // It's changed, so a snap and truncate has happened. - break - } - } - - // Fire up new node and ensure it picks up all changes. This will - // involve getting a snapshot and truncated log. - s1 := mustNewStore(true) - if err := s1.Open(true); err != nil { - t.Fatalf("failed to open single-node store: %s", err.Error()) - } - defer s1.Close(true) - - // Join the second node to the first. - if err := s0.Join(s1.Addr().String()); err != nil { - t.Fatalf("failed to join to node at %s: %s", s0.Addr(), err.Error()) - } - s1.WaitForLeader(10 * time.Second) - // Wait until the log entries have been applied to the follower, - // and then query. - if err := s1.WaitForAppliedIndex(8, 5*time.Second); err != nil { - t.Fatalf("error waiting for follower to apply index: %s:", err.Error()) - } - r, err := s1.Query(&QueryRequest{[]string{`SELECT count(*) FROM foo`}, false, true, None}) - if err != nil { - t.Fatalf("failed to query single node: %s", err.Error()) - } - if exp, got := `["count(*)"]`, asJSON(r[0].Columns); exp != got { - t.Fatalf("unexpected results for query\nexp: %s\ngot: %s", exp, got) - } - if exp, got := `[[5]]`, asJSON(r[0].Values); exp != got { - t.Fatalf("unexpected results for query\nexp: %s\ngot: %s", exp, got) - } -} - func Test_SingleNodeFileExecuteQuery(t *testing.T) { s := mustNewStore(false) defer os.RemoveAll(s.Path()) @@ -502,11 +434,11 @@ func Test_MultiNodeJoinRemove(t *testing.T) { defer s1.Close(true) // Get sorted list of cluster nodes. - storeNodes := []string{s0.Addr().String(), s1.Addr().String()} + storeNodes := []string{s0.ID(), s1.ID()} sort.StringSlice(storeNodes).Sort() // Join the second node to the first. - if err := s0.Join(s1.Addr().String()); err != nil { + if err := s0.Join(s1.ID(), s1.Addr().String()); err != nil { t.Fatalf("failed to join to node at %s: %s", s0.Addr().String(), err.Error()) } @@ -514,18 +446,17 @@ func Test_MultiNodeJoinRemove(t *testing.T) { if err != nil { t.Fatalf("failed to get nodes: %s", err.Error()) } - sort.StringSlice(nodes).Sort() if len(nodes) != len(storeNodes) { t.Fatalf("size of cluster is not correct") } - if storeNodes[0] != nodes[0] && storeNodes[1] != nodes[1] { + if storeNodes[0] != nodes[0].ID || storeNodes[1] != nodes[1].ID { t.Fatalf("cluster does not have correct nodes") } // Remove a node. - if err := s0.Remove(s1.Addr().String()); err != nil { - t.Fatalf("failed to remove %s from cluster: %s", s1.Addr().String(), err.Error()) + if err := s0.Remove(s1.ID()); err != nil { + t.Fatalf("failed to remove %s from cluster: %s", s1.ID(), err.Error()) } nodes, err = s0.Nodes() @@ -535,7 +466,7 @@ func Test_MultiNodeJoinRemove(t *testing.T) { if len(nodes) != 1 { t.Fatalf("size of cluster is not correct post remove") } - if s0.Addr().String() != nodes[0] { + if s0.ID() != nodes[0].ID { t.Fatalf("cluster does not have correct nodes post remove") } } @@ -557,7 +488,7 @@ func Test_MultiNodeExecuteQuery(t *testing.T) { defer s1.Close(true) // Join the second node to the first. - if err := s0.Join(s1.Addr().String()); err != nil { + if err := s0.Join(s1.ID(), s1.Addr().String()); err != nil { t.Fatalf("failed to join to node at %s: %s", s0.Addr().String(), err.Error()) } @@ -605,6 +536,74 @@ func Test_MultiNodeExecuteQuery(t *testing.T) { } } +func Test_StoreLogTruncationMultinode(t *testing.T) { + s0 := mustNewStore(true) + defer os.RemoveAll(s0.Path()) + s0.SnapshotThreshold = 4 + s0.SnapshotInterval = 100 * time.Millisecond + + if err := s0.Open(true); err != nil { + t.Fatalf("failed to open single-node store: %s", err.Error()) + } + defer s0.Close(true) + s0.WaitForLeader(10 * time.Second) + nSnaps := stats.Get(numSnaphots).String() + + // Write more than s.SnapshotThreshold statements. + queries := []string{ + `CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)`, + `INSERT INTO foo(id, name) VALUES(1, "fiona")`, + `INSERT INTO foo(id, name) VALUES(2, "fiona")`, + `INSERT INTO foo(id, name) VALUES(3, "fiona")`, + `INSERT INTO foo(id, name) VALUES(4, "fiona")`, + `INSERT INTO foo(id, name) VALUES(5, "fiona")`, + } + for i := range queries { + _, err := s0.Execute(&ExecuteRequest{[]string{queries[i]}, false, false}) + if err != nil { + t.Fatalf("failed to execute on single node: %s", err.Error()) + } + } + + // Wait for the snapshot to happen and log to be truncated. + for { + time.Sleep(1000 * time.Millisecond) + if stats.Get(numSnaphots).String() != nSnaps { + // It's changed, so a snap and truncate has happened. + break + } + } + + // Fire up new node and ensure it picks up all changes. This will + // involve getting a snapshot and truncated log. + s1 := mustNewStore(true) + if err := s1.Open(true); err != nil { + t.Fatalf("failed to open single-node store: %s", err.Error()) + } + defer s1.Close(true) + + // Join the second node to the first. + if err := s0.Join(s1.ID(), s1.Addr().String()); err != nil { + t.Fatalf("failed to join to node at %s: %s", s0.Addr(), err.Error()) + } + s1.WaitForLeader(10 * time.Second) + // Wait until the log entries have been applied to the follower, + // and then query. + if err := s1.WaitForAppliedIndex(8, 5*time.Second); err != nil { + t.Fatalf("error waiting for follower to apply index: %s:", err.Error()) + } + r, err := s1.Query(&QueryRequest{[]string{`SELECT count(*) FROM foo`}, false, true, None}) + if err != nil { + t.Fatalf("failed to query single node: %s", err.Error()) + } + if exp, got := `["count(*)"]`, asJSON(r[0].Columns); exp != got { + t.Fatalf("unexpected results for query\nexp: %s\ngot: %s", exp, got) + } + if exp, got := `[[5]]`, asJSON(r[0].Values); exp != got { + t.Fatalf("unexpected results for query\nexp: %s\ngot: %s", exp, got) + } +} + func Test_SingleNodeSnapshotOnDisk(t *testing.T) { s := mustNewStore(false) defer os.RemoveAll(s.Path()) @@ -799,11 +798,13 @@ func mustNewStore(inmem bool) *Store { path := mustTempDir() defer os.RemoveAll(path) + tn := mustMockTransport("localhost:0") cfg := NewDBConfig("", inmem) s := New(&StoreConfig{ DBConf: cfg, Dir: path, - Tn: mustMockTransport("localhost:0"), + Tn: tn, + ID: tn.Addr().String(), // Could be any unique string. }) if s == nil { panic("failed to create new store") diff --git a/store/transport.go b/store/transport.go new file mode 100644 index 00000000..1edd1782 --- /dev/null +++ b/store/transport.go @@ -0,0 +1,38 @@ +package store + +import ( + "net" + "time" + + "github.com/hashicorp/raft" +) + +// Transport is the interface the network service must provide. +type Transport interface { + net.Listener + + // Dial is used to create a new outgoing connection + Dial(address string, timeout time.Duration) (net.Conn, error) +} + +// raftTransport takes a Transport and makes it suitable for use by the Raft +// networking system. +type raftTransport struct { + tn Transport +} + +func (r *raftTransport) Dial(address raft.ServerAddress, timeout time.Duration) (net.Conn, error) { + return r.tn.Dial(string(address), timeout) +} + +func (r *raftTransport) Accept() (net.Conn, error) { + return r.tn.Accept() +} + +func (r *raftTransport) Addr() net.Addr { + return r.tn.Addr() +} + +func (r *raftTransport) Close() error { + return r.tn.Close() +} diff --git a/system_test/end_to_end.sh b/system_test/end_to_end.sh index 0f04f780..3742a5d7 100755 --- a/system_test/end_to_end.sh +++ b/system_test/end_to_end.sh @@ -5,11 +5,11 @@ TMP_DATA=`mktemp` rm $GOPATH/bin/* go install ./... -$GOPATH/bin/rqlited -http-addr localhost:4001 -raft-addr localhost:4002 ${TMP_DATA}_1 & +$GOPATH/bin/rqlited -node-id node0 -http-addr localhost:4001 -raft-addr localhost:4002 ${TMP_DATA}_1 & sleep 5 -$GOPATH/bin/rqlited -http-addr localhost:4003 -raft-addr localhost:4004 -join http://localhost:4001 ${TMP_DATA}_2 & +$GOPATH/bin/rqlited -node-id node1 -http-addr localhost:4003 -raft-addr localhost:4004 -join http://localhost:4001 ${TMP_DATA}_2 & sleep 5 -$GOPATH/bin/rqlited -http-addr localhost:4005 -raft-addr localhost:4006 -join http://localhost:4001 ${TMP_DATA}_3 & +$GOPATH/bin/rqlited -node-id node2 -http-addr localhost:4005 -raft-addr localhost:4006 -join http://localhost:4001 ${TMP_DATA}_3 & sleep 5 wait diff --git a/system_test/end_to_end_enc.sh b/system_test/end_to_end_enc.sh index 7b11adc8..4fe7c6cc 100755 --- a/system_test/end_to_end_enc.sh +++ b/system_test/end_to_end_enc.sh @@ -7,11 +7,11 @@ go install ./... openssl req -x509 -nodes -newkey rsa:4096 -keyout ${TMP_DATA}_key.pem -out ${TMP_DATA}_cert.pem -days 365 -$GOPATH/bin/rqlited -http-addr localhost:4001 -raft-addr localhost:4002 -node-cert ${TMP_DATA}_cert.pem -node-key ${TMP_DATA}_key.pem -node-no-verify -node-encrypt ${TMP_DATA}_1 & +$GOPATH/bin/rqlited -node-id node0 -http-addr localhost:4001 -raft-addr localhost:4002 -node-cert ${TMP_DATA}_cert.pem -node-key ${TMP_DATA}_key.pem -node-no-verify -node-encrypt ${TMP_DATA}_1 & sleep 5 -$GOPATH/bin/rqlited -http-addr localhost:4003 -raft-addr localhost:4004 -join http://localhost:4001 -node-cert ${TMP_DATA}_cert.pem -node-key ${TMP_DATA}_key.pem -node-no-verify -node-encrypt ${TMP_DATA}_2 & +$GOPATH/bin/rqlited -node-id node1 -http-addr localhost:4003 -raft-addr localhost:4004 -join http://localhost:4001 -node-cert ${TMP_DATA}_cert.pem -node-key ${TMP_DATA}_key.pem -node-no-verify -node-encrypt ${TMP_DATA}_2 & sleep 5 -$GOPATH/bin/rqlited -http-addr localhost:4005 -raft-addr localhost:4006 -join http://localhost:4001 -node-cert ${TMP_DATA}_cert.pem -node-key ${TMP_DATA}_key.pem -node-no-verify -node-encrypt ${TMP_DATA}_3 & +$GOPATH/bin/rqlited -node-id node2 -http-addr localhost:4005 -raft-addr localhost:4006 -join http://localhost:4001 -node-cert ${TMP_DATA}_cert.pem -node-key ${TMP_DATA}_key.pem -node-no-verify -node-encrypt ${TMP_DATA}_3 & sleep 5 wait diff --git a/system_test/full_system_test.py b/system_test/full_system_test.py index dc2c3434..33c69495 100755 --- a/system_test/full_system_test.py +++ b/system_test/full_system_test.py @@ -19,7 +19,7 @@ RQLITED_PATH = os.environ['RQLITED_PATH'] TIMEOUT=10 class Node(object): - def __init__(self, path, + def __init__(self, path, node_id, api_addr=None, api_adv=None, raft_addr=None, raft_adv=None, dir=None): @@ -33,6 +33,7 @@ class Node(object): api_adv = api_addr self.path = path + self.node_id = node_id self.api_addr = api_addr self.api_adv = api_adv self.raft_addr = raft_addr @@ -67,6 +68,7 @@ class Node(object): return command = [self.path, + '-node-id', self.node_id, '-http-addr', self.api_addr, '-raft-addr', self.raft_addr] if self.api_adv is not None: @@ -199,9 +201,9 @@ class Node(object): def _load_url(self): return 'http://' + self.APIAddr() + '/db/load' def __eq__(self, other): - return self.raft_addr == other.raft_addr + return self.node_id == other.node_id def __str__(self): - return '%s:[%s]:[%s]' % (self.APIAddr(), self.raft_addr, self.dir) + return '%s:[%s]:[%s]:[%s]' % (self.node_id, self.APIAddr(), self.raft_addr, self.dir) def __del__(self): self.stdout_fd.close() self.stderr_fd.close() @@ -239,15 +241,15 @@ class Cluster(object): class TestEndToEnd(unittest.TestCase): def setUp(self): - n0 = Node(RQLITED_PATH) + n0 = Node(RQLITED_PATH, '0') n0.start() n0.wait_for_leader() - n1 = Node(RQLITED_PATH) + n1 = Node(RQLITED_PATH, '1') n1.start(join=n0.APIAddr()) n1.wait_for_leader() - n2 = Node(RQLITED_PATH) + n2 = Node(RQLITED_PATH, '2') n2.start(join=n0.APIAddr()) n2.wait_for_leader() @@ -307,17 +309,17 @@ class TestEndToEnd(unittest.TestCase): class TestEndToEndAdvAddr(TestEndToEnd): def setUp(self): - n0 = Node(RQLITED_PATH, + n0 = Node(RQLITED_PATH, '0', api_addr="0.0.0.0:4001", api_adv="localhost:4001", raft_addr="0.0.0.0:4002", raft_adv="localhost:4002") n0.start() n0.wait_for_leader() - n1 = Node(RQLITED_PATH) + n1 = Node(RQLITED_PATH, '1') n1.start(join=n0.APIAddr()) n1.wait_for_leader() - n2 = Node(RQLITED_PATH) + n2 = Node(RQLITED_PATH, '2') n2.start(join=n0.APIAddr()) n2.wait_for_leader() @@ -328,7 +330,7 @@ class TestEndToEndBackupRestore(unittest.TestCase): fd, self.db_file = tempfile.mkstemp() os.close(fd) - self.node0 = Node(RQLITED_PATH) + self.node0 = Node(RQLITED_PATH, '0') self.node0.start() self.node0.wait_for_leader() self.node0.execute('CREATE TABLE foo (id INTEGER NOT NULL PRIMARY KEY, name TEXT)') @@ -342,7 +344,7 @@ class TestEndToEndBackupRestore(unittest.TestCase): self.assertEqual(rows[0], (1, u'fiona')) conn.close() - self.node1 = Node(RQLITED_PATH) + self.node1 = Node(RQLITED_PATH, '1') self.node1.start() self.node1.wait_for_leader() j = self.node1.restore(self.db_file) diff --git a/system_test/helpers.go b/system_test/helpers.go index 40f4cad7..8c112de5 100644 --- a/system_test/helpers.go +++ b/system_test/helpers.go @@ -84,7 +84,7 @@ func (n *Node) QueryMulti(stmts []string) (string, error) { // Join instructs this node to join the leader. func (n *Node) Join(leader *Node) error { - resp, err := DoJoinRequest(leader.APIAddr, n.RaftAddr) + resp, err := DoJoinRequest(leader.APIAddr, n.Store.ID(), n.RaftAddr) if err != nil { return err } @@ -281,9 +281,9 @@ func Remove(n *Node, addr string) error { return nil } -// DoJoinRequest sends a join request to nodeAddr, for raftAddr. -func DoJoinRequest(nodeAddr, raftAddr string) (*http.Response, error) { - b, err := json.Marshal(map[string]string{"addr": raftAddr}) +// DoJoinRequest sends a join request to nodeAddr, for raftID, reachable at raftAddr. +func DoJoinRequest(nodeAddr, raftID, raftAddr string) (*http.Response, error) { + b, err := json.Marshal(map[string]string{"id": raftID, "addr": raftAddr}) if err != nil { return nil, err } @@ -302,10 +302,12 @@ func mustNewNode(enableSingle bool) *Node { } dbConf := store.NewDBConfig("", false) + tn := mustMockTransport("localhost:0") node.Store = store.New(&store.StoreConfig{ DBConf: dbConf, Dir: node.Dir, - Tn: mustMockTransport("localhost:0"), + Tn: tn, + ID: tn.Addr().String(), }) if err := node.Store.Open(enableSingle); err != nil { node.Deprovision() diff --git a/vendor/github.com/hashicorp/raft-boltdb/.travis.yml b/vendor/github.com/hashicorp/raft-boltdb/.travis.yml deleted file mode 100644 index 58357418..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/.travis.yml +++ /dev/null @@ -1,10 +0,0 @@ -language: go - -go: - - 1.6 - - 1.7 - - tip - -install: make deps -script: - - make test diff --git a/vendor/github.com/hashicorp/raft-boltdb/LICENSE b/vendor/github.com/hashicorp/raft-boltdb/LICENSE deleted file mode 100644 index f0e5c79e..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/LICENSE +++ /dev/null @@ -1,362 +0,0 @@ -Mozilla Public License, version 2.0 - -1. Definitions - -1.1. "Contributor" - - means each individual or legal entity that creates, contributes to the - creation of, or owns Covered Software. - -1.2. "Contributor Version" - - means the combination of the Contributions of others (if any) used by a - Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - - means Source Code Form to which the initial Contributor has attached the - notice in Exhibit A, the Executable Form of such Source Code Form, and - Modifications of such Source Code Form, in each case including portions - thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - a. that the initial Contributor has attached the notice described in - Exhibit B to the Covered Software; or - - b. that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the terms of - a Secondary License. - -1.6. "Executable Form" - - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - - means a work that combines Covered Software with other material, in a - separate file or files, that is not Covered Software. - -1.8. "License" - - means this document. - -1.9. "Licensable" - - means having the right to grant, to the maximum extent possible, whether - at the time of the initial grant or subsequently, any and all of the - rights conveyed by this License. - -1.10. "Modifications" - - means any of the following: - - a. any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered Software; or - - b. any new file in Source Code Form that contains any Covered Software. - -1.11. "Patent Claims" of a Contributor - - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the License, - by the making, using, selling, offering for sale, having made, import, - or transfer of either its Contributions or its Contributor Version. - -1.12. "Secondary License" - - means either the GNU General Public License, Version 2.0, the GNU Lesser - General Public License, Version 2.1, the GNU Affero General Public - License, Version 3.0, or any later versions of those licenses. - -1.13. "Source Code Form" - - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that controls, is - controlled by, or is under common control with You. For purposes of this - definition, "control" means (a) the power, direct or indirect, to cause - the direction or management of such entity, whether by contract or - otherwise, or (b) ownership of more than fifty percent (50%) of the - outstanding shares or beneficial ownership of such entity. - - -2. License Grants and Conditions - -2.1. Grants - - Each Contributor hereby grants You a world-wide, royalty-free, - non-exclusive license: - - a. under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - - b. under Patent Claims of such Contributor to make, use, sell, offer for - sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - - The licenses granted in Section 2.1 with respect to any Contribution - become effective for each Contribution on the date the Contributor first - distributes such Contribution. - -2.3. Limitations on Grant Scope - - The licenses granted in this Section 2 are the only rights granted under - this License. No additional rights or licenses will be implied from the - distribution or licensing of Covered Software under this License. - Notwithstanding Section 2.1(b) above, no patent license is granted by a - Contributor: - - a. for any code that a Contributor has removed from Covered Software; or - - b. for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - - c. under Patent Claims infringed by Covered Software in the absence of - its Contributions. - - This License does not grant any rights in the trademarks, service marks, - or logos of any Contributor (except as may be necessary to comply with - the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - - No Contributor makes additional grants as a result of Your choice to - distribute the Covered Software under a subsequent version of this - License (see Section 10.2) or under the terms of a Secondary License (if - permitted under the terms of Section 3.3). - -2.5. Representation - - Each Contributor represents that the Contributor believes its - Contributions are its original creation(s) or it has sufficient rights to - grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - - This License is not intended to limit any rights You have under - applicable copyright doctrines of fair use, fair dealing, or other - equivalents. - -2.7. Conditions - - Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in - Section 2.1. - - -3. Responsibilities - -3.1. Distribution of Source Form - - All distribution of Covered Software in Source Code Form, including any - Modifications that You create or to which You contribute, must be under - the terms of this License. You must inform recipients that the Source - Code Form of the Covered Software is governed by the terms of this - License, and how they can obtain a copy of this License. You may not - attempt to alter or restrict the recipients' rights in the Source Code - Form. - -3.2. Distribution of Executable Form - - If You distribute Covered Software in Executable Form then: - - a. such Covered Software must also be made available in Source Code Form, - as described in Section 3.1, and You must inform recipients of the - Executable Form how they can obtain a copy of such Source Code Form by - reasonable means in a timely manner, at a charge no more than the cost - of distribution to the recipient; and - - b. You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter the - recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - - You may create and distribute a Larger Work under terms of Your choice, - provided that You also comply with the requirements of this License for - the Covered Software. If the Larger Work is a combination of Covered - Software with a work governed by one or more Secondary Licenses, and the - Covered Software is not Incompatible With Secondary Licenses, this - License permits You to additionally distribute such Covered Software - under the terms of such Secondary License(s), so that the recipient of - the Larger Work may, at their option, further distribute the Covered - Software under the terms of either this License or such Secondary - License(s). - -3.4. Notices - - You may not remove or alter the substance of any license notices - (including copyright notices, patent notices, disclaimers of warranty, or - limitations of liability) contained within the Source Code Form of the - Covered Software, except that You may alter any license notices to the - extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - - You may choose to offer, and to charge a fee for, warranty, support, - indemnity or liability obligations to one or more recipients of Covered - Software. However, You may do so only on Your own behalf, and not on - behalf of any Contributor. You must make it absolutely clear that any - such warranty, support, indemnity, or liability obligation is offered by - You alone, and You hereby agree to indemnify every Contributor for any - liability incurred by such Contributor as a result of warranty, support, - indemnity or liability terms You offer. You may include additional - disclaimers of warranty and limitations of liability specific to any - jurisdiction. - -4. Inability to Comply Due to Statute or Regulation - - If it is impossible for You to comply with any of the terms of this License - with respect to some or all of the Covered Software due to statute, - judicial order, or regulation then You must: (a) comply with the terms of - this License to the maximum extent possible; and (b) describe the - limitations and the code they affect. Such description must be placed in a - text file included with all distributions of the Covered Software under - this License. Except to the extent prohibited by statute or regulation, - such description must be sufficiently detailed for a recipient of ordinary - skill to be able to understand it. - -5. Termination - -5.1. The rights granted under this License will terminate automatically if You - fail to comply with any of its terms. However, if You become compliant, - then the rights granted under this License from a particular Contributor - are reinstated (a) provisionally, unless and until such Contributor - explicitly and finally terminates Your grants, and (b) on an ongoing - basis, if such Contributor fails to notify You of the non-compliance by - some reasonable means prior to 60 days after You have come back into - compliance. Moreover, Your grants from a particular Contributor are - reinstated on an ongoing basis if such Contributor notifies You of the - non-compliance by some reasonable means, this is the first time You have - received notice of non-compliance with this License from such - Contributor, and You become compliant prior to 30 days after Your receipt - of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent - infringement claim (excluding declaratory judgment actions, - counter-claims, and cross-claims) alleging that a Contributor Version - directly or indirectly infringes any patent, then the rights granted to - You by any and all Contributors for the Covered Software under Section - 2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user - license agreements (excluding distributors and resellers) which have been - validly granted by You or Your distributors under this License prior to - termination shall survive termination. - -6. Disclaimer of Warranty - - Covered Software is provided under this License on an "as is" basis, - without warranty of any kind, either expressed, implied, or statutory, - including, without limitation, warranties that the Covered Software is free - of defects, merchantable, fit for a particular purpose or non-infringing. - The entire risk as to the quality and performance of the Covered Software - is with You. Should any Covered Software prove defective in any respect, - You (not any Contributor) assume the cost of any necessary servicing, - repair, or correction. This disclaimer of warranty constitutes an essential - part of this License. No use of any Covered Software is authorized under - this License except under this disclaimer. - -7. Limitation of Liability - - Under no circumstances and under no legal theory, whether tort (including - negligence), contract, or otherwise, shall any Contributor, or anyone who - distributes Covered Software as permitted above, be liable to You for any - direct, indirect, special, incidental, or consequential damages of any - character including, without limitation, damages for lost profits, loss of - goodwill, work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses, even if such party shall have been - informed of the possibility of such damages. This limitation of liability - shall not apply to liability for death or personal injury resulting from - such party's negligence to the extent applicable law prohibits such - limitation. Some jurisdictions do not allow the exclusion or limitation of - incidental or consequential damages, so this exclusion and limitation may - not apply to You. - -8. Litigation - - Any litigation relating to this License may be brought only in the courts - of a jurisdiction where the defendant maintains its principal place of - business and such litigation shall be governed by laws of that - jurisdiction, without reference to its conflict-of-law provisions. Nothing - in this Section shall prevent a party's ability to bring cross-claims or - counter-claims. - -9. Miscellaneous - - This License represents the complete agreement concerning the subject - matter hereof. If any provision of this License is held to be - unenforceable, such provision shall be reformed only to the extent - necessary to make it enforceable. Any law or regulation which provides that - the language of a contract shall be construed against the drafter shall not - be used to construe this License against a Contributor. - - -10. Versions of the License - -10.1. New Versions - - Mozilla Foundation is the license steward. Except as provided in Section - 10.3, no one other than the license steward has the right to modify or - publish new versions of this License. Each version will be given a - distinguishing version number. - -10.2. Effect of New Versions - - You may distribute the Covered Software under the terms of the version - of the License under which You originally received the Covered Software, - or under the terms of any subsequent version published by the license - steward. - -10.3. Modified Versions - - If you create software not governed by this License, and you want to - create a new license for such software, you may create and use a - modified version of this License if you rename the license and remove - any references to the name of the license steward (except to note that - such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary - Licenses If You choose to distribute Source Code Form that is - Incompatible With Secondary Licenses under the terms of this version of - the License, the notice described in Exhibit B of this License must be - attached. - -Exhibit A - Source Code Form License Notice - - This Source Code Form is subject to the - terms of the Mozilla Public License, v. - 2.0. If a copy of the MPL was not - distributed with this file, You can - obtain one at - http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular file, -then You may include the notice in a location (such as a LICENSE file in a -relevant directory) where a recipient would be likely to look for such a -notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice - - This Source Code Form is "Incompatible - With Secondary Licenses", as defined by - the Mozilla Public License, v. 2.0. \ No newline at end of file diff --git a/vendor/github.com/hashicorp/raft-boltdb/Makefile b/vendor/github.com/hashicorp/raft-boltdb/Makefile deleted file mode 100644 index bc5c6cc0..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...) - -.PHONY: test deps - -test: - go test -timeout=30s ./... - -deps: - go get -d -v ./... - echo $(DEPS) | xargs -n1 go get -d - diff --git a/vendor/github.com/hashicorp/raft-boltdb/README.md b/vendor/github.com/hashicorp/raft-boltdb/README.md deleted file mode 100644 index 5d7180ab..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/README.md +++ /dev/null @@ -1,11 +0,0 @@ -raft-boltdb -=========== - -This repository provides the `raftboltdb` package. The package exports the -`BoltStore` which is an implementation of both a `LogStore` and `StableStore`. - -It is meant to be used as a backend for the `raft` [package -here](https://github.com/hashicorp/raft). - -This implementation uses [BoltDB](https://github.com/boltdb/bolt). BoltDB is -a simple key/value store implemented in pure Go, and inspired by LMDB. diff --git a/vendor/github.com/hashicorp/raft-boltdb/bench_test.go b/vendor/github.com/hashicorp/raft-boltdb/bench_test.go deleted file mode 100644 index b860706f..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/bench_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package raftboltdb - -import ( - "os" - "testing" - - "github.com/hashicorp/raft/bench" -) - -func BenchmarkBoltStore_FirstIndex(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.FirstIndex(b, store) -} - -func BenchmarkBoltStore_LastIndex(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.LastIndex(b, store) -} - -func BenchmarkBoltStore_GetLog(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.GetLog(b, store) -} - -func BenchmarkBoltStore_StoreLog(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.StoreLog(b, store) -} - -func BenchmarkBoltStore_StoreLogs(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.StoreLogs(b, store) -} - -func BenchmarkBoltStore_DeleteRange(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.DeleteRange(b, store) -} - -func BenchmarkBoltStore_Set(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.Set(b, store) -} - -func BenchmarkBoltStore_Get(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.Get(b, store) -} - -func BenchmarkBoltStore_SetUint64(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.SetUint64(b, store) -} - -func BenchmarkBoltStore_GetUint64(b *testing.B) { - store := testBoltStore(b) - defer store.Close() - defer os.Remove(store.path) - - raftbench.GetUint64(b, store) -} diff --git a/vendor/github.com/hashicorp/raft-boltdb/bolt_store.go b/vendor/github.com/hashicorp/raft-boltdb/bolt_store.go deleted file mode 100644 index 109a7989..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/bolt_store.go +++ /dev/null @@ -1,255 +0,0 @@ -package raftboltdb - -import ( - "errors" - - "github.com/boltdb/bolt" - "github.com/hashicorp/raft" -) - -const ( - // Permissions to use on the db file. This is only used if the - // database file does not exist and needs to be created. - dbFileMode = 0600 -) - -var ( - // Bucket names we perform transactions in - dbLogs = []byte("logs") - dbConf = []byte("conf") - - // An error indicating a given key does not exist - ErrKeyNotFound = errors.New("not found") -) - -// BoltStore provides access to BoltDB for Raft to store and retrieve -// log entries. It also provides key/value storage, and can be used as -// a LogStore and StableStore. -type BoltStore struct { - // conn is the underlying handle to the db. - conn *bolt.DB - - // The path to the Bolt database file - path string -} - -// Options contains all the configuraiton used to open the BoltDB -type Options struct { - // Path is the file path to the BoltDB to use - Path string - - // BoltOptions contains any specific BoltDB options you might - // want to specify [e.g. open timeout] - BoltOptions *bolt.Options -} - -// readOnly returns true if the contained bolt options say to open -// the DB in readOnly mode [this can be useful to tools that want -// to examine the log] -func (o *Options) readOnly() bool { - return o != nil && o.BoltOptions != nil && o.BoltOptions.ReadOnly -} - -// NewBoltStore takes a file path and returns a connected Raft backend. -func NewBoltStore(path string) (*BoltStore, error) { - return New(Options{Path: path}) -} - -// New uses the supplied options to open the BoltDB and prepare it for use as a raft backend. -func New(options Options) (*BoltStore, error) { - // Try to connect - handle, err := bolt.Open(options.Path, dbFileMode, options.BoltOptions) - if err != nil { - return nil, err - } - - // Create the new store - store := &BoltStore{ - conn: handle, - path: options.Path, - } - - // If the store was opened read-only, don't try and create buckets - if !options.readOnly() { - // Set up our buckets - if err := store.initialize(); err != nil { - store.Close() - return nil, err - } - } - return store, nil -} - -// initialize is used to set up all of the buckets. -func (b *BoltStore) initialize() error { - tx, err := b.conn.Begin(true) - if err != nil { - return err - } - defer tx.Rollback() - - // Create all the buckets - if _, err := tx.CreateBucketIfNotExists(dbLogs); err != nil { - return err - } - if _, err := tx.CreateBucketIfNotExists(dbConf); err != nil { - return err - } - - return tx.Commit() -} - -// Close is used to gracefully close the DB connection. -func (b *BoltStore) Close() error { - return b.conn.Close() -} - -// FirstIndex returns the first known index from the Raft log. -func (b *BoltStore) FirstIndex() (uint64, error) { - tx, err := b.conn.Begin(false) - if err != nil { - return 0, err - } - defer tx.Rollback() - - curs := tx.Bucket(dbLogs).Cursor() - if first, _ := curs.First(); first == nil { - return 0, nil - } else { - return bytesToUint64(first), nil - } -} - -// LastIndex returns the last known index from the Raft log. -func (b *BoltStore) LastIndex() (uint64, error) { - tx, err := b.conn.Begin(false) - if err != nil { - return 0, err - } - defer tx.Rollback() - - curs := tx.Bucket(dbLogs).Cursor() - if last, _ := curs.Last(); last == nil { - return 0, nil - } else { - return bytesToUint64(last), nil - } -} - -// GetLog is used to retrieve a log from BoltDB at a given index. -func (b *BoltStore) GetLog(idx uint64, log *raft.Log) error { - tx, err := b.conn.Begin(false) - if err != nil { - return err - } - defer tx.Rollback() - - bucket := tx.Bucket(dbLogs) - val := bucket.Get(uint64ToBytes(idx)) - - if val == nil { - return raft.ErrLogNotFound - } - return decodeMsgPack(val, log) -} - -// StoreLog is used to store a single raft log -func (b *BoltStore) StoreLog(log *raft.Log) error { - return b.StoreLogs([]*raft.Log{log}) -} - -// StoreLogs is used to store a set of raft logs -func (b *BoltStore) StoreLogs(logs []*raft.Log) error { - tx, err := b.conn.Begin(true) - if err != nil { - return err - } - defer tx.Rollback() - - for _, log := range logs { - key := uint64ToBytes(log.Index) - val, err := encodeMsgPack(log) - if err != nil { - return err - } - bucket := tx.Bucket(dbLogs) - if err := bucket.Put(key, val.Bytes()); err != nil { - return err - } - } - - return tx.Commit() -} - -// DeleteRange is used to delete logs within a given range inclusively. -func (b *BoltStore) DeleteRange(min, max uint64) error { - minKey := uint64ToBytes(min) - - tx, err := b.conn.Begin(true) - if err != nil { - return err - } - defer tx.Rollback() - - curs := tx.Bucket(dbLogs).Cursor() - for k, _ := curs.Seek(minKey); k != nil; k, _ = curs.Next() { - // Handle out-of-range log index - if bytesToUint64(k) > max { - break - } - - // Delete in-range log index - if err := curs.Delete(); err != nil { - return err - } - } - - return tx.Commit() -} - -// Set is used to set a key/value set outside of the raft log -func (b *BoltStore) Set(k, v []byte) error { - tx, err := b.conn.Begin(true) - if err != nil { - return err - } - defer tx.Rollback() - - bucket := tx.Bucket(dbConf) - if err := bucket.Put(k, v); err != nil { - return err - } - - return tx.Commit() -} - -// Get is used to retrieve a value from the k/v store by key -func (b *BoltStore) Get(k []byte) ([]byte, error) { - tx, err := b.conn.Begin(false) - if err != nil { - return nil, err - } - defer tx.Rollback() - - bucket := tx.Bucket(dbConf) - val := bucket.Get(k) - - if val == nil { - return nil, ErrKeyNotFound - } - return append([]byte(nil), val...), nil -} - -// SetUint64 is like Set, but handles uint64 values -func (b *BoltStore) SetUint64(key []byte, val uint64) error { - return b.Set(key, uint64ToBytes(val)) -} - -// GetUint64 is like Get, but handles uint64 values -func (b *BoltStore) GetUint64(key []byte) (uint64, error) { - val, err := b.Get(key) - if err != nil { - return 0, err - } - return bytesToUint64(val), nil -} diff --git a/vendor/github.com/hashicorp/raft-boltdb/bolt_store_test.go b/vendor/github.com/hashicorp/raft-boltdb/bolt_store_test.go deleted file mode 100644 index 12b09b21..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/bolt_store_test.go +++ /dev/null @@ -1,416 +0,0 @@ -package raftboltdb - -import ( - "bytes" - "io/ioutil" - "os" - "reflect" - "testing" - "time" - - "github.com/boltdb/bolt" - "github.com/hashicorp/raft" -) - -func testBoltStore(t testing.TB) *BoltStore { - fh, err := ioutil.TempFile("", "bolt") - if err != nil { - t.Fatalf("err: %s", err) - } - os.Remove(fh.Name()) - - // Successfully creates and returns a store - store, err := NewBoltStore(fh.Name()) - if err != nil { - t.Fatalf("err: %s", err) - } - - return store -} - -func testRaftLog(idx uint64, data string) *raft.Log { - return &raft.Log{ - Data: []byte(data), - Index: idx, - } -} - -func TestBoltStore_Implements(t *testing.T) { - var store interface{} = &BoltStore{} - if _, ok := store.(raft.StableStore); !ok { - t.Fatalf("BoltStore does not implement raft.StableStore") - } - if _, ok := store.(raft.LogStore); !ok { - t.Fatalf("BoltStore does not implement raft.LogStore") - } -} - -func TestBoltOptionsTimeout(t *testing.T) { - fh, err := ioutil.TempFile("", "bolt") - if err != nil { - t.Fatalf("err: %s", err) - } - os.Remove(fh.Name()) - defer os.Remove(fh.Name()) - options := Options{ - Path: fh.Name(), - BoltOptions: &bolt.Options{ - Timeout: time.Second / 10, - }, - } - store, err := New(options) - if err != nil { - t.Fatalf("err: %v", err) - } - defer store.Close() - // trying to open it again should timeout - doneCh := make(chan error, 1) - go func() { - _, err := New(options) - doneCh <- err - }() - select { - case err := <-doneCh: - if err == nil || err.Error() != "timeout" { - t.Errorf("Expected timeout error but got %v", err) - } - case <-time.After(5 * time.Second): - t.Errorf("Gave up waiting for timeout response") - } -} - -func TestBoltOptionsReadOnly(t *testing.T) { - fh, err := ioutil.TempFile("", "bolt") - if err != nil { - t.Fatalf("err: %s", err) - } - defer os.Remove(fh.Name()) - store, err := NewBoltStore(fh.Name()) - if err != nil { - t.Fatalf("err: %s", err) - } - // Create the log - log := &raft.Log{ - Data: []byte("log1"), - Index: 1, - } - // Attempt to store the log - if err := store.StoreLog(log); err != nil { - t.Fatalf("err: %s", err) - } - - store.Close() - options := Options{ - Path: fh.Name(), - BoltOptions: &bolt.Options{ - Timeout: time.Second / 10, - ReadOnly: true, - }, - } - roStore, err := New(options) - if err != nil { - t.Fatalf("err: %s", err) - } - defer roStore.Close() - result := new(raft.Log) - if err := roStore.GetLog(1, result); err != nil { - t.Fatalf("err: %s", err) - } - - // Ensure the log comes back the same - if !reflect.DeepEqual(log, result) { - t.Errorf("bad: %v", result) - } - // Attempt to store the log, should fail on a read-only store - err = roStore.StoreLog(log) - if err != bolt.ErrDatabaseReadOnly { - t.Errorf("expecting error %v, but got %v", bolt.ErrDatabaseReadOnly, err) - } -} - -func TestNewBoltStore(t *testing.T) { - fh, err := ioutil.TempFile("", "bolt") - if err != nil { - t.Fatalf("err: %s", err) - } - os.Remove(fh.Name()) - defer os.Remove(fh.Name()) - - // Successfully creates and returns a store - store, err := NewBoltStore(fh.Name()) - if err != nil { - t.Fatalf("err: %s", err) - } - - // Ensure the file was created - if store.path != fh.Name() { - t.Fatalf("unexpected file path %q", store.path) - } - if _, err := os.Stat(fh.Name()); err != nil { - t.Fatalf("err: %s", err) - } - - // Close the store so we can open again - if err := store.Close(); err != nil { - t.Fatalf("err: %s", err) - } - - // Ensure our tables were created - db, err := bolt.Open(fh.Name(), dbFileMode, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - tx, err := db.Begin(true) - if err != nil { - t.Fatalf("err: %s", err) - } - if _, err := tx.CreateBucket([]byte(dbLogs)); err != bolt.ErrBucketExists { - t.Fatalf("bad: %v", err) - } - if _, err := tx.CreateBucket([]byte(dbConf)); err != bolt.ErrBucketExists { - t.Fatalf("bad: %v", err) - } -} - -func TestBoltStore_FirstIndex(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - // Should get 0 index on empty log - idx, err := store.FirstIndex() - if err != nil { - t.Fatalf("err: %s", err) - } - if idx != 0 { - t.Fatalf("bad: %v", idx) - } - - // Set a mock raft log - logs := []*raft.Log{ - testRaftLog(1, "log1"), - testRaftLog(2, "log2"), - testRaftLog(3, "log3"), - } - if err := store.StoreLogs(logs); err != nil { - t.Fatalf("bad: %s", err) - } - - // Fetch the first Raft index - idx, err = store.FirstIndex() - if err != nil { - t.Fatalf("err: %s", err) - } - if idx != 1 { - t.Fatalf("bad: %d", idx) - } -} - -func TestBoltStore_LastIndex(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - // Should get 0 index on empty log - idx, err := store.LastIndex() - if err != nil { - t.Fatalf("err: %s", err) - } - if idx != 0 { - t.Fatalf("bad: %v", idx) - } - - // Set a mock raft log - logs := []*raft.Log{ - testRaftLog(1, "log1"), - testRaftLog(2, "log2"), - testRaftLog(3, "log3"), - } - if err := store.StoreLogs(logs); err != nil { - t.Fatalf("bad: %s", err) - } - - // Fetch the last Raft index - idx, err = store.LastIndex() - if err != nil { - t.Fatalf("err: %s", err) - } - if idx != 3 { - t.Fatalf("bad: %d", idx) - } -} - -func TestBoltStore_GetLog(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - log := new(raft.Log) - - // Should return an error on non-existent log - if err := store.GetLog(1, log); err != raft.ErrLogNotFound { - t.Fatalf("expected raft log not found error, got: %v", err) - } - - // Set a mock raft log - logs := []*raft.Log{ - testRaftLog(1, "log1"), - testRaftLog(2, "log2"), - testRaftLog(3, "log3"), - } - if err := store.StoreLogs(logs); err != nil { - t.Fatalf("bad: %s", err) - } - - // Should return the proper log - if err := store.GetLog(2, log); err != nil { - t.Fatalf("err: %s", err) - } - if !reflect.DeepEqual(log, logs[1]) { - t.Fatalf("bad: %#v", log) - } -} - -func TestBoltStore_SetLog(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - // Create the log - log := &raft.Log{ - Data: []byte("log1"), - Index: 1, - } - - // Attempt to store the log - if err := store.StoreLog(log); err != nil { - t.Fatalf("err: %s", err) - } - - // Retrieve the log again - result := new(raft.Log) - if err := store.GetLog(1, result); err != nil { - t.Fatalf("err: %s", err) - } - - // Ensure the log comes back the same - if !reflect.DeepEqual(log, result) { - t.Fatalf("bad: %v", result) - } -} - -func TestBoltStore_SetLogs(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - // Create a set of logs - logs := []*raft.Log{ - testRaftLog(1, "log1"), - testRaftLog(2, "log2"), - } - - // Attempt to store the logs - if err := store.StoreLogs(logs); err != nil { - t.Fatalf("err: %s", err) - } - - // Ensure we stored them all - result1, result2 := new(raft.Log), new(raft.Log) - if err := store.GetLog(1, result1); err != nil { - t.Fatalf("err: %s", err) - } - if !reflect.DeepEqual(logs[0], result1) { - t.Fatalf("bad: %#v", result1) - } - if err := store.GetLog(2, result2); err != nil { - t.Fatalf("err: %s", err) - } - if !reflect.DeepEqual(logs[1], result2) { - t.Fatalf("bad: %#v", result2) - } -} - -func TestBoltStore_DeleteRange(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - // Create a set of logs - log1 := testRaftLog(1, "log1") - log2 := testRaftLog(2, "log2") - log3 := testRaftLog(3, "log3") - logs := []*raft.Log{log1, log2, log3} - - // Attempt to store the logs - if err := store.StoreLogs(logs); err != nil { - t.Fatalf("err: %s", err) - } - - // Attempt to delete a range of logs - if err := store.DeleteRange(1, 2); err != nil { - t.Fatalf("err: %s", err) - } - - // Ensure the logs were deleted - if err := store.GetLog(1, new(raft.Log)); err != raft.ErrLogNotFound { - t.Fatalf("should have deleted log1") - } - if err := store.GetLog(2, new(raft.Log)); err != raft.ErrLogNotFound { - t.Fatalf("should have deleted log2") - } -} - -func TestBoltStore_Set_Get(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - // Returns error on non-existent key - if _, err := store.Get([]byte("bad")); err != ErrKeyNotFound { - t.Fatalf("expected not found error, got: %q", err) - } - - k, v := []byte("hello"), []byte("world") - - // Try to set a k/v pair - if err := store.Set(k, v); err != nil { - t.Fatalf("err: %s", err) - } - - // Try to read it back - val, err := store.Get(k) - if err != nil { - t.Fatalf("err: %s", err) - } - if !bytes.Equal(val, v) { - t.Fatalf("bad: %v", val) - } -} - -func TestBoltStore_SetUint64_GetUint64(t *testing.T) { - store := testBoltStore(t) - defer store.Close() - defer os.Remove(store.path) - - // Returns error on non-existent key - if _, err := store.GetUint64([]byte("bad")); err != ErrKeyNotFound { - t.Fatalf("expected not found error, got: %q", err) - } - - k, v := []byte("abc"), uint64(123) - - // Attempt to set the k/v pair - if err := store.SetUint64(k, v); err != nil { - t.Fatalf("err: %s", err) - } - - // Read back the value - val, err := store.GetUint64(k) - if err != nil { - t.Fatalf("err: %s", err) - } - if val != v { - t.Fatalf("bad: %v", val) - } -} diff --git a/vendor/github.com/hashicorp/raft-boltdb/util.go b/vendor/github.com/hashicorp/raft-boltdb/util.go deleted file mode 100644 index 68dd786b..00000000 --- a/vendor/github.com/hashicorp/raft-boltdb/util.go +++ /dev/null @@ -1,37 +0,0 @@ -package raftboltdb - -import ( - "bytes" - "encoding/binary" - - "github.com/hashicorp/go-msgpack/codec" -) - -// Decode reverses the encode operation on a byte slice input -func decodeMsgPack(buf []byte, out interface{}) error { - r := bytes.NewBuffer(buf) - hd := codec.MsgpackHandle{} - dec := codec.NewDecoder(r, &hd) - return dec.Decode(out) -} - -// Encode writes an encoded object to a new bytes buffer -func encodeMsgPack(in interface{}) (*bytes.Buffer, error) { - buf := bytes.NewBuffer(nil) - hd := codec.MsgpackHandle{} - enc := codec.NewEncoder(buf, &hd) - err := enc.Encode(in) - return buf, err -} - -// Converts bytes to an integer -func bytesToUint64(b []byte) uint64 { - return binary.BigEndian.Uint64(b) -} - -// Converts a uint to a byte slice -func uint64ToBytes(u uint64) []byte { - buf := make([]byte, 8) - binary.BigEndian.PutUint64(buf, u) - return buf -} diff --git a/vendor/github.com/hashicorp/raft/.gitignore b/vendor/github.com/hashicorp/raft/.gitignore deleted file mode 100644 index 83656241..00000000 --- a/vendor/github.com/hashicorp/raft/.gitignore +++ /dev/null @@ -1,23 +0,0 @@ -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe -*.test diff --git a/vendor/github.com/hashicorp/raft/.travis.yml b/vendor/github.com/hashicorp/raft/.travis.yml deleted file mode 100644 index 94eb8668..00000000 --- a/vendor/github.com/hashicorp/raft/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -language: go - -go: - - 1.4 - - 1.5 - - 1.6 - - tip - -install: make deps -script: - - make integ - -notifications: - flowdock: - secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc= - diff --git a/vendor/github.com/hashicorp/raft/LICENSE b/vendor/github.com/hashicorp/raft/LICENSE deleted file mode 100644 index c33dcc7c..00000000 --- a/vendor/github.com/hashicorp/raft/LICENSE +++ /dev/null @@ -1,354 +0,0 @@ -Mozilla Public License, version 2.0 - -1. Definitions - -1.1. “Contributor” - - means each individual or legal entity that creates, contributes to the - creation of, or owns Covered Software. - -1.2. “Contributor Version” - - means the combination of the Contributions of others (if any) used by a - Contributor and that particular Contributor’s Contribution. - -1.3. “Contribution” - - means Covered Software of a particular Contributor. - -1.4. “Covered Software” - - means Source Code Form to which the initial Contributor has attached the - notice in Exhibit A, the Executable Form of such Source Code Form, and - Modifications of such Source Code Form, in each case including portions - thereof. - -1.5. “Incompatible With Secondary Licenses” - means - - a. that the initial Contributor has attached the notice described in - Exhibit B to the Covered Software; or - - b. that the Covered Software was made available under the terms of version - 1.1 or earlier of the License, but not also under the terms of a - Secondary License. - -1.6. “Executable Form” - - means any form of the work other than Source Code Form. - -1.7. “Larger Work” - - means a work that combines Covered Software with other material, in a separate - file or files, that is not Covered Software. - -1.8. “License” - - means this document. - -1.9. “Licensable” - - means having the right to grant, to the maximum extent possible, whether at the - time of the initial grant or subsequently, any and all of the rights conveyed by - this License. - -1.10. “Modifications” - - means any of the following: - - a. any file in Source Code Form that results from an addition to, deletion - from, or modification of the contents of Covered Software; or - - b. any new file in Source Code Form that contains any Covered Software. - -1.11. “Patent Claims” of a Contributor - - means any patent claim(s), including without limitation, method, process, - and apparatus claims, in any patent Licensable by such Contributor that - would be infringed, but for the grant of the License, by the making, - using, selling, offering for sale, having made, import, or transfer of - either its Contributions or its Contributor Version. - -1.12. “Secondary License” - - means either the GNU General Public License, Version 2.0, the GNU Lesser - General Public License, Version 2.1, the GNU Affero General Public - License, Version 3.0, or any later versions of those licenses. - -1.13. “Source Code Form” - - means the form of the work preferred for making modifications. - -1.14. “You” (or “Your”) - - means an individual or a legal entity exercising rights under this - License. For legal entities, “You” includes any entity that controls, is - controlled by, or is under common control with You. For purposes of this - definition, “control” means (a) the power, direct or indirect, to cause - the direction or management of such entity, whether by contract or - otherwise, or (b) ownership of more than fifty percent (50%) of the - outstanding shares or beneficial ownership of such entity. - - -2. License Grants and Conditions - -2.1. Grants - - Each Contributor hereby grants You a world-wide, royalty-free, - non-exclusive license: - - a. under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or as - part of a Larger Work; and - - b. under Patent Claims of such Contributor to make, use, sell, offer for - sale, have made, import, and otherwise transfer either its Contributions - or its Contributor Version. - -2.2. Effective Date - - The licenses granted in Section 2.1 with respect to any Contribution become - effective for each Contribution on the date the Contributor first distributes - such Contribution. - -2.3. Limitations on Grant Scope - - The licenses granted in this Section 2 are the only rights granted under this - License. No additional rights or licenses will be implied from the distribution - or licensing of Covered Software under this License. Notwithstanding Section - 2.1(b) above, no patent license is granted by a Contributor: - - a. for any code that a Contributor has removed from Covered Software; or - - b. for infringements caused by: (i) Your and any other third party’s - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - - c. under Patent Claims infringed by Covered Software in the absence of its - Contributions. - - This License does not grant any rights in the trademarks, service marks, or - logos of any Contributor (except as may be necessary to comply with the - notice requirements in Section 3.4). - -2.4. Subsequent Licenses - - No Contributor makes additional grants as a result of Your choice to - distribute the Covered Software under a subsequent version of this License - (see Section 10.2) or under the terms of a Secondary License (if permitted - under the terms of Section 3.3). - -2.5. Representation - - Each Contributor represents that the Contributor believes its Contributions - are its original creation(s) or it has sufficient rights to grant the - rights to its Contributions conveyed by this License. - -2.6. Fair Use - - This License is not intended to limit any rights You have under applicable - copyright doctrines of fair use, fair dealing, or other equivalents. - -2.7. Conditions - - Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in - Section 2.1. - - -3. Responsibilities - -3.1. Distribution of Source Form - - All distribution of Covered Software in Source Code Form, including any - Modifications that You create or to which You contribute, must be under the - terms of this License. You must inform recipients that the Source Code Form - of the Covered Software is governed by the terms of this License, and how - they can obtain a copy of this License. You may not attempt to alter or - restrict the recipients’ rights in the Source Code Form. - -3.2. Distribution of Executable Form - - If You distribute Covered Software in Executable Form then: - - a. such Covered Software must also be made available in Source Code Form, - as described in Section 3.1, and You must inform recipients of the - Executable Form how they can obtain a copy of such Source Code Form by - reasonable means in a timely manner, at a charge no more than the cost - of distribution to the recipient; and - - b. You may distribute such Executable Form under the terms of this License, - or sublicense it under different terms, provided that the license for - the Executable Form does not attempt to limit or alter the recipients’ - rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - - You may create and distribute a Larger Work under terms of Your choice, - provided that You also comply with the requirements of this License for the - Covered Software. If the Larger Work is a combination of Covered Software - with a work governed by one or more Secondary Licenses, and the Covered - Software is not Incompatible With Secondary Licenses, this License permits - You to additionally distribute such Covered Software under the terms of - such Secondary License(s), so that the recipient of the Larger Work may, at - their option, further distribute the Covered Software under the terms of - either this License or such Secondary License(s). - -3.4. Notices - - You may not remove or alter the substance of any license notices (including - copyright notices, patent notices, disclaimers of warranty, or limitations - of liability) contained within the Source Code Form of the Covered - Software, except that You may alter any license notices to the extent - required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - - You may choose to offer, and to charge a fee for, warranty, support, - indemnity or liability obligations to one or more recipients of Covered - Software. However, You may do so only on Your own behalf, and not on behalf - of any Contributor. You must make it absolutely clear that any such - warranty, support, indemnity, or liability obligation is offered by You - alone, and You hereby agree to indemnify every Contributor for any - liability incurred by such Contributor as a result of warranty, support, - indemnity or liability terms You offer. You may include additional - disclaimers of warranty and limitations of liability specific to any - jurisdiction. - -4. Inability to Comply Due to Statute or Regulation - - If it is impossible for You to comply with any of the terms of this License - with respect to some or all of the Covered Software due to statute, judicial - order, or regulation then You must: (a) comply with the terms of this License - to the maximum extent possible; and (b) describe the limitations and the code - they affect. Such description must be placed in a text file included with all - distributions of the Covered Software under this License. Except to the - extent prohibited by statute or regulation, such description must be - sufficiently detailed for a recipient of ordinary skill to be able to - understand it. - -5. Termination - -5.1. The rights granted under this License will terminate automatically if You - fail to comply with any of its terms. However, if You become compliant, - then the rights granted under this License from a particular Contributor - are reinstated (a) provisionally, unless and until such Contributor - explicitly and finally terminates Your grants, and (b) on an ongoing basis, - if such Contributor fails to notify You of the non-compliance by some - reasonable means prior to 60 days after You have come back into compliance. - Moreover, Your grants from a particular Contributor are reinstated on an - ongoing basis if such Contributor notifies You of the non-compliance by - some reasonable means, this is the first time You have received notice of - non-compliance with this License from such Contributor, and You become - compliant prior to 30 days after Your receipt of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent - infringement claim (excluding declaratory judgment actions, counter-claims, - and cross-claims) alleging that a Contributor Version directly or - indirectly infringes any patent, then the rights granted to You by any and - all Contributors for the Covered Software under Section 2.1 of this License - shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user - license agreements (excluding distributors and resellers) which have been - validly granted by You or Your distributors under this License prior to - termination shall survive termination. - -6. Disclaimer of Warranty - - Covered Software is provided under this License on an “as is” basis, without - warranty of any kind, either expressed, implied, or statutory, including, - without limitation, warranties that the Covered Software is free of defects, - merchantable, fit for a particular purpose or non-infringing. The entire - risk as to the quality and performance of the Covered Software is with You. - Should any Covered Software prove defective in any respect, You (not any - Contributor) assume the cost of any necessary servicing, repair, or - correction. This disclaimer of warranty constitutes an essential part of this - License. No use of any Covered Software is authorized under this License - except under this disclaimer. - -7. Limitation of Liability - - Under no circumstances and under no legal theory, whether tort (including - negligence), contract, or otherwise, shall any Contributor, or anyone who - distributes Covered Software as permitted above, be liable to You for any - direct, indirect, special, incidental, or consequential damages of any - character including, without limitation, damages for lost profits, loss of - goodwill, work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses, even if such party shall have been - informed of the possibility of such damages. This limitation of liability - shall not apply to liability for death or personal injury resulting from such - party’s negligence to the extent applicable law prohibits such limitation. - Some jurisdictions do not allow the exclusion or limitation of incidental or - consequential damages, so this exclusion and limitation may not apply to You. - -8. Litigation - - Any litigation relating to this License may be brought only in the courts of - a jurisdiction where the defendant maintains its principal place of business - and such litigation shall be governed by laws of that jurisdiction, without - reference to its conflict-of-law provisions. Nothing in this Section shall - prevent a party’s ability to bring cross-claims or counter-claims. - -9. Miscellaneous - - This License represents the complete agreement concerning the subject matter - hereof. If any provision of this License is held to be unenforceable, such - provision shall be reformed only to the extent necessary to make it - enforceable. Any law or regulation which provides that the language of a - contract shall be construed against the drafter shall not be used to construe - this License against a Contributor. - - -10. Versions of the License - -10.1. New Versions - - Mozilla Foundation is the license steward. Except as provided in Section - 10.3, no one other than the license steward has the right to modify or - publish new versions of this License. Each version will be given a - distinguishing version number. - -10.2. Effect of New Versions - - You may distribute the Covered Software under the terms of the version of - the License under which You originally received the Covered Software, or - under the terms of any subsequent version published by the license - steward. - -10.3. Modified Versions - - If you create software not governed by this License, and you want to - create a new license for such software, you may create and use a modified - version of this License if you rename the license and remove any - references to the name of the license steward (except to note that such - modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses - If You choose to distribute Source Code Form that is Incompatible With - Secondary Licenses under the terms of this version of the License, the - notice described in Exhibit B of this License must be attached. - -Exhibit A - Source Code Form License Notice - - This Source Code Form is subject to the - terms of the Mozilla Public License, v. - 2.0. If a copy of the MPL was not - distributed with this file, You can - obtain one at - http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular file, then -You may include the notice in a location (such as a LICENSE file in a relevant -directory) where a recipient would be likely to look for such a notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - “Incompatible With Secondary Licenses” Notice - - This Source Code Form is “Incompatible - With Secondary Licenses”, as defined by - the Mozilla Public License, v. 2.0. - diff --git a/vendor/github.com/hashicorp/raft/Makefile b/vendor/github.com/hashicorp/raft/Makefile deleted file mode 100644 index 92a0c0b4..00000000 --- a/vendor/github.com/hashicorp/raft/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...) - -test: - go test -timeout=30s ./... - -integ: test - INTEG_TESTS=yes go test -timeout=23s -run=Integ ./... - -deps: - go get -d -v ./... - echo $(DEPS) | xargs -n1 go get -d - -cov: - INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html - open /tmp/coverage.html - -.PHONY: test cov integ deps diff --git a/vendor/github.com/hashicorp/raft/README.md b/vendor/github.com/hashicorp/raft/README.md deleted file mode 100644 index a70ec8a0..00000000 --- a/vendor/github.com/hashicorp/raft/README.md +++ /dev/null @@ -1,107 +0,0 @@ -raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis-ci.org/hashicorp/raft) -==== - -raft is a [Go](http://www.golang.org) library that manages a replicated -log and can be used with an FSM to manage replicated state machines. It -is a library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)). - -The use cases for such a library are far-reaching as replicated state -machines are a key component of many distributed systems. They enable -building Consistent, Partition Tolerant (CP) systems, with limited -fault tolerance as well. - -## Building - -If you wish to build raft you'll need Go version 1.2+ installed. - -Please check your installation with: - -``` -go version -``` - -## Documentation - -For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft). - -To prevent complications with cgo, the primary backend `MDBStore` is in a separate repository, -called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation -for the `LogStore` and `StableStore`. - -A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called -[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore` -and `StableStore`. - -## Tagged Releases - -As of September 2017, Hashicorp will start using tags for this library to clearly indicate -major version updates. We recommend you vendor your application's dependency on this library. - -* v0.1.0 is the original stable version of the library that was in master and has been maintained -with no breaking API changes. This was in use by Consul prior to version 0.7.0. - -* v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version -manages server identities using a UUID, so introduces some breaking API changes. It also versions -the Raft protocol, and requires some special steps when interoperating with Raft servers running -older versions of the library (see the detailed comment in config.go about version compatibility). -You can reference https://github.com/hashicorp/consul/pull/2222 for an idea of what was required -to port Consul to these new interfaces. - - This version includes some new features as well, including non voting servers, a new address - provider abstraction in the transport layer, and more resilient snapshots. - -## Protocol - -raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf) - -A high level overview of the Raft protocol is described below, but for details please read the full -[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf) -followed by the raft source. Any questions about the raft protocol should be sent to the -[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev). - -### Protocol Description - -Raft nodes are always in one of three states: follower, candidate or leader. All -nodes initially start out as a follower. In this state, nodes can accept log entries -from a leader and cast votes. If no entries are received for some time, nodes -self-promote to the candidate state. In the candidate state nodes request votes from -their peers. If a candidate receives a quorum of votes, then it is promoted to a leader. -The leader must accept new log entries and replicate to all the other followers. -In addition, if stale reads are not acceptable, all queries must also be performed on -the leader. - -Once a cluster has a leader, it is able to accept new log entries. A client can -request that a leader append a new log entry, which is an opaque binary blob to -Raft. The leader then writes the entry to durable storage and attempts to replicate -to a quorum of followers. Once the log entry is considered *committed*, it can be -*applied* to a finite state machine. The finite state machine is application specific, -and is implemented using an interface. - -An obvious question relates to the unbounded nature of a replicated log. Raft provides -a mechanism by which the current state is snapshotted, and the log is compacted. Because -of the FSM abstraction, restoring the state of the FSM must result in the same state -as a replay of old logs. This allows Raft to capture the FSM state at a point in time, -and then remove all the logs that were used to reach that state. This is performed automatically -without user intervention, and prevents unbounded disk usage as well as minimizing -time spent replaying logs. - -Lastly, there is the issue of updating the peer set when new servers are joining -or existing servers are leaving. As long as a quorum of nodes is available, this -is not an issue as Raft provides mechanisms to dynamically update the peer set. -If a quorum of nodes is unavailable, then this becomes a very challenging issue. -For example, suppose there are only 2 peers, A and B. The quorum size is also -2, meaning both nodes must agree to commit a log entry. If either A or B fails, -it is now impossible to reach quorum. This means the cluster is unable to add, -or remove a node, or commit any additional log entries. This results in *unavailability*. -At this point, manual intervention would be required to remove either A or B, -and to restart the remaining node in bootstrap mode. - -A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster -of 5 can tolerate 2 node failures. The recommended configuration is to either -run 3 or 5 raft servers. This maximizes availability without -greatly sacrificing performance. - -In terms of performance, Raft is comparable to Paxos. Assuming stable leadership, -committing a log entry requires a single round trip to half of the cluster. -Thus performance is bound by disk I/O and network latency. - diff --git a/vendor/github.com/hashicorp/raft/bench/bench.go b/vendor/github.com/hashicorp/raft/bench/bench.go deleted file mode 100644 index d7a58f45..00000000 --- a/vendor/github.com/hashicorp/raft/bench/bench.go +++ /dev/null @@ -1,171 +0,0 @@ -package raftbench - -// raftbench provides common benchmarking functions which can be used by -// anything which implements the raft.LogStore and raft.StableStore interfaces. -// All functions accept these interfaces and perform benchmarking. This -// makes comparing backend performance easier by sharing the tests. - -import ( - "github.com/hashicorp/raft" - "testing" -) - -func FirstIndex(b *testing.B, store raft.LogStore) { - // Create some fake data - var logs []*raft.Log - for i := 1; i < 10; i++ { - logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")}) - } - if err := store.StoreLogs(logs); err != nil { - b.Fatalf("err: %s", err) - } - b.ResetTimer() - - // Run FirstIndex a number of times - for n := 0; n < b.N; n++ { - store.FirstIndex() - } -} - -func LastIndex(b *testing.B, store raft.LogStore) { - // Create some fake data - var logs []*raft.Log - for i := 1; i < 10; i++ { - logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")}) - } - if err := store.StoreLogs(logs); err != nil { - b.Fatalf("err: %s", err) - } - b.ResetTimer() - - // Run LastIndex a number of times - for n := 0; n < b.N; n++ { - store.LastIndex() - } -} - -func GetLog(b *testing.B, store raft.LogStore) { - // Create some fake data - var logs []*raft.Log - for i := 1; i < 10; i++ { - logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")}) - } - if err := store.StoreLogs(logs); err != nil { - b.Fatalf("err: %s", err) - } - b.ResetTimer() - - // Run GetLog a number of times - for n := 0; n < b.N; n++ { - if err := store.GetLog(5, new(raft.Log)); err != nil { - b.Fatalf("err: %s", err) - } - } -} - -func StoreLog(b *testing.B, store raft.LogStore) { - // Run StoreLog a number of times - for n := 0; n < b.N; n++ { - log := &raft.Log{Index: uint64(n), Data: []byte("data")} - if err := store.StoreLog(log); err != nil { - b.Fatalf("err: %s", err) - } - } -} - -func StoreLogs(b *testing.B, store raft.LogStore) { - // Run StoreLogs a number of times. We want to set multiple logs each - // run, so we create 3 logs with incrementing indexes for each iteration. - for n := 0; n < b.N; n++ { - b.StopTimer() - offset := 3 * (n + 1) - logs := []*raft.Log{ - &raft.Log{Index: uint64(offset - 2), Data: []byte("data")}, - &raft.Log{Index: uint64(offset - 1), Data: []byte("data")}, - &raft.Log{Index: uint64(offset), Data: []byte("data")}, - } - b.StartTimer() - - if err := store.StoreLogs(logs); err != nil { - b.Fatalf("err: %s", err) - } - } -} - -func DeleteRange(b *testing.B, store raft.LogStore) { - // Create some fake data. In this case, we create 3 new log entries for each - // test case, and separate them by index in multiples of 10. This allows - // some room so that we can test deleting ranges with "extra" logs to - // to ensure we stop going to the database once our max index is hit. - var logs []*raft.Log - for n := 0; n < b.N; n++ { - offset := 10 * n - for i := offset; i < offset+3; i++ { - logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")}) - } - } - if err := store.StoreLogs(logs); err != nil { - b.Fatalf("err: %s", err) - } - b.ResetTimer() - - // Delete a range of the data - for n := 0; n < b.N; n++ { - offset := 10 * n - if err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil { - b.Fatalf("err: %s", err) - } - } -} - -func Set(b *testing.B, store raft.StableStore) { - // Run Set a number of times - for n := 0; n < b.N; n++ { - if err := store.Set([]byte{byte(n)}, []byte("val")); err != nil { - b.Fatalf("err: %s", err) - } - } -} - -func Get(b *testing.B, store raft.StableStore) { - // Create some fake data - for i := 1; i < 10; i++ { - if err := store.Set([]byte{byte(i)}, []byte("val")); err != nil { - b.Fatalf("err: %s", err) - } - } - b.ResetTimer() - - // Run Get a number of times - for n := 0; n < b.N; n++ { - if _, err := store.Get([]byte{0x05}); err != nil { - b.Fatalf("err: %s", err) - } - } -} - -func SetUint64(b *testing.B, store raft.StableStore) { - // Run SetUint64 a number of times - for n := 0; n < b.N; n++ { - if err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil { - b.Fatalf("err: %s", err) - } - } -} - -func GetUint64(b *testing.B, store raft.StableStore) { - // Create some fake data - for i := 0; i < 10; i++ { - if err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil { - b.Fatalf("err: %s", err) - } - } - b.ResetTimer() - - // Run GetUint64 a number of times - for n := 0; n < b.N; n++ { - if _, err := store.Get([]byte{0x05}); err != nil { - b.Fatalf("err: %s", err) - } - } -} diff --git a/vendor/github.com/hashicorp/raft/commands.go b/vendor/github.com/hashicorp/raft/commands.go deleted file mode 100644 index 739775b3..00000000 --- a/vendor/github.com/hashicorp/raft/commands.go +++ /dev/null @@ -1,84 +0,0 @@ -package raft - -// AppendEntriesRequest is the command used to append entries to the -// replicated log. -type AppendEntriesRequest struct { - // Provide the current term and leader - Term uint64 - Leader []byte - - // Provide the previous entries for integrity checking - PrevLogEntry uint64 - PrevLogTerm uint64 - - // New entries to commit - Entries []*Log - - // Commit index on the leader - LeaderCommitIndex uint64 -} - -// AppendEntriesResponse is the response returned from an -// AppendEntriesRequest. -type AppendEntriesResponse struct { - // Newer term if leader is out of date - Term uint64 - - // Last Log is a hint to help accelerate rebuilding slow nodes - LastLog uint64 - - // We may not succeed if we have a conflicting entry - Success bool - - // There are scenarios where this request didn't succeed - // but there's no need to wait/back-off the next attempt. - NoRetryBackoff bool -} - -// RequestVoteRequest is the command used by a candidate to ask a Raft peer -// for a vote in an election. -type RequestVoteRequest struct { - // Provide the term and our id - Term uint64 - Candidate []byte - - // Used to ensure safety - LastLogIndex uint64 - LastLogTerm uint64 -} - -// RequestVoteResponse is the response returned from a RequestVoteRequest. -type RequestVoteResponse struct { - // Newer term if leader is out of date - Term uint64 - - // Return the peers, so that a node can shutdown on removal - Peers []byte - - // Is the vote granted - Granted bool -} - -// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its -// log (and state machine) from a snapshot on another peer. -type InstallSnapshotRequest struct { - Term uint64 - Leader []byte - - // These are the last index/term included in the snapshot - LastLogIndex uint64 - LastLogTerm uint64 - - // Peer Set in the snapshot - Peers []byte - - // Size of the snapshot - Size int64 -} - -// InstallSnapshotResponse is the response returned from an -// InstallSnapshotRequest. -type InstallSnapshotResponse struct { - Term uint64 - Success bool -} diff --git a/vendor/github.com/hashicorp/raft/config.go b/vendor/github.com/hashicorp/raft/config.go deleted file mode 100644 index 2dbd5e60..00000000 --- a/vendor/github.com/hashicorp/raft/config.go +++ /dev/null @@ -1,136 +0,0 @@ -package raft - -import ( - "fmt" - "io" - "log" - "time" -) - -// Config provides any necessary configuration to -// the Raft server -type Config struct { - // HeartbeatTimeout specifies the time in follower state without - // a leader before we attempt an election. - HeartbeatTimeout time.Duration - - // ElectionTimeout specifies the time in candidate state without - // a leader before we attempt an election. - ElectionTimeout time.Duration - - // CommitTimeout controls the time without an Apply() operation - // before we heartbeat to ensure a timely commit. Due to random - // staggering, may be delayed as much as 2x this value. - CommitTimeout time.Duration - - // MaxAppendEntries controls the maximum number of append entries - // to send at once. We want to strike a balance between efficiency - // and avoiding waste if the follower is going to reject because of - // an inconsistent log. - MaxAppendEntries int - - // If we are a member of a cluster, and RemovePeer is invoked for the - // local node, then we forget all peers and transition into the follower state. - // If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise, - // we can become a leader of a cluster containing only this node. - ShutdownOnRemove bool - - // DisableBootstrapAfterElect is used to turn off EnableSingleNode - // after the node is elected. This is used to prevent self-election - // if the node is removed from the Raft cluster via RemovePeer. Setting - // it to false will keep the bootstrap mode, allowing the node to self-elect - // and potentially bootstrap a separate cluster. - DisableBootstrapAfterElect bool - - // TrailingLogs controls how many logs we leave after a snapshot. This is - // used so that we can quickly replay logs on a follower instead of being - // forced to send an entire snapshot. - TrailingLogs uint64 - - // SnapshotInterval controls how often we check if we should perform a snapshot. - // We randomly stagger between this value and 2x this value to avoid the entire - // cluster from performing a snapshot at once. - SnapshotInterval time.Duration - - // SnapshotThreshold controls how many outstanding logs there must be before - // we perform a snapshot. This is to prevent excessive snapshots when we can - // just replay a small set of logs. - SnapshotThreshold uint64 - - // EnableSingleNode allows for a single node mode of operation. This - // is false by default, which prevents a lone node from electing itself. - // leader. - EnableSingleNode bool - - // LeaderLeaseTimeout is used to control how long the "lease" lasts - // for being the leader without being able to contact a quorum - // of nodes. If we reach this interval without contact, we will - // step down as leader. - LeaderLeaseTimeout time.Duration - - // StartAsLeader forces Raft to start in the leader state. This should - // never be used except for testing purposes, as it can cause a split-brain. - StartAsLeader bool - - // NotifyCh is used to provide a channel that will be notified of leadership - // changes. Raft will block writing to this channel, so it should either be - // buffered or aggressively consumed. - NotifyCh chan<- bool - - // LogOutput is used as a sink for logs, unless Logger is specified. - // Defaults to os.Stderr. - LogOutput io.Writer - - // Logger is a user-provided logger. If nil, a logger writing to LogOutput - // is used. - Logger *log.Logger -} - -// DefaultConfig returns a Config with usable defaults. -func DefaultConfig() *Config { - return &Config{ - HeartbeatTimeout: 1000 * time.Millisecond, - ElectionTimeout: 1000 * time.Millisecond, - CommitTimeout: 50 * time.Millisecond, - MaxAppendEntries: 64, - ShutdownOnRemove: true, - DisableBootstrapAfterElect: true, - TrailingLogs: 10240, - SnapshotInterval: 120 * time.Second, - SnapshotThreshold: 8192, - EnableSingleNode: false, - LeaderLeaseTimeout: 500 * time.Millisecond, - } -} - -// ValidateConfig is used to validate a sane configuration -func ValidateConfig(config *Config) error { - if config.HeartbeatTimeout < 5*time.Millisecond { - return fmt.Errorf("Heartbeat timeout is too low") - } - if config.ElectionTimeout < 5*time.Millisecond { - return fmt.Errorf("Election timeout is too low") - } - if config.CommitTimeout < time.Millisecond { - return fmt.Errorf("Commit timeout is too low") - } - if config.MaxAppendEntries <= 0 { - return fmt.Errorf("MaxAppendEntries must be positive") - } - if config.MaxAppendEntries > 1024 { - return fmt.Errorf("MaxAppendEntries is too large") - } - if config.SnapshotInterval < 5*time.Millisecond { - return fmt.Errorf("Snapshot interval is too low") - } - if config.LeaderLeaseTimeout < 5*time.Millisecond { - return fmt.Errorf("Leader lease timeout is too low") - } - if config.LeaderLeaseTimeout > config.HeartbeatTimeout { - return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout") - } - if config.ElectionTimeout < config.HeartbeatTimeout { - return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout") - } - return nil -} diff --git a/vendor/github.com/hashicorp/raft/discard_snapshot.go b/vendor/github.com/hashicorp/raft/discard_snapshot.go deleted file mode 100644 index 1b4611d5..00000000 --- a/vendor/github.com/hashicorp/raft/discard_snapshot.go +++ /dev/null @@ -1,48 +0,0 @@ -package raft - -import ( - "fmt" - "io" -) - -// DiscardSnapshotStore is used to successfully snapshot while -// always discarding the snapshot. This is useful for when the -// log should be truncated but no snapshot should be retained. -// This should never be used for production use, and is only -// suitable for testing. -type DiscardSnapshotStore struct{} - -type DiscardSnapshotSink struct{} - -// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore. -func NewDiscardSnapshotStore() *DiscardSnapshotStore { - return &DiscardSnapshotStore{} -} - -func (d *DiscardSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) { - return &DiscardSnapshotSink{}, nil -} - -func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) { - return nil, nil -} - -func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) { - return nil, nil, fmt.Errorf("open is not supported") -} - -func (d *DiscardSnapshotSink) Write(b []byte) (int, error) { - return len(b), nil -} - -func (d *DiscardSnapshotSink) Close() error { - return nil -} - -func (d *DiscardSnapshotSink) ID() string { - return "discard" -} - -func (d *DiscardSnapshotSink) Cancel() error { - return nil -} diff --git a/vendor/github.com/hashicorp/raft/discard_snapshot_test.go b/vendor/github.com/hashicorp/raft/discard_snapshot_test.go deleted file mode 100644 index 5abedfe2..00000000 --- a/vendor/github.com/hashicorp/raft/discard_snapshot_test.go +++ /dev/null @@ -1,17 +0,0 @@ -package raft - -import "testing" - -func TestDiscardSnapshotStoreImpl(t *testing.T) { - var impl interface{} = &DiscardSnapshotStore{} - if _, ok := impl.(SnapshotStore); !ok { - t.Fatalf("DiscardSnapshotStore not a SnapshotStore") - } -} - -func TestDiscardSnapshotSinkImpl(t *testing.T) { - var impl interface{} = &DiscardSnapshotSink{} - if _, ok := impl.(SnapshotSink); !ok { - t.Fatalf("DiscardSnapshotSink not a SnapshotSink") - } -} diff --git a/vendor/github.com/hashicorp/raft/file_snapshot.go b/vendor/github.com/hashicorp/raft/file_snapshot.go deleted file mode 100644 index 5b6ccc4e..00000000 --- a/vendor/github.com/hashicorp/raft/file_snapshot.go +++ /dev/null @@ -1,513 +0,0 @@ -package raft - -import ( - "bufio" - "bytes" - "encoding/json" - "fmt" - "hash" - "hash/crc64" - "io" - "io/ioutil" - "log" - "os" - "path/filepath" - "runtime" - "sort" - "strings" - "time" -) - -const ( - testPath = "permTest" - snapPath = "snapshots" - metaFilePath = "meta.json" - stateFilePath = "state.bin" - tmpSuffix = ".tmp" -) - -// FileSnapshotStore implements the SnapshotStore interface and allows -// snapshots to be made on the local disk. -type FileSnapshotStore struct { - path string - retain int - logger *log.Logger -} - -type snapMetaSlice []*fileSnapshotMeta - -// FileSnapshotSink implements SnapshotSink with a file. -type FileSnapshotSink struct { - store *FileSnapshotStore - logger *log.Logger - dir string - parentDir string - meta fileSnapshotMeta - - stateFile *os.File - stateHash hash.Hash64 - buffered *bufio.Writer - - closed bool -} - -// fileSnapshotMeta is stored on disk. We also put a CRC -// on disk so that we can verify the snapshot. -type fileSnapshotMeta struct { - SnapshotMeta - CRC []byte -} - -// bufferedFile is returned when we open a snapshot. This way -// reads are buffered and the file still gets closed. -type bufferedFile struct { - bh *bufio.Reader - fh *os.File -} - -func (b *bufferedFile) Read(p []byte) (n int, err error) { - return b.bh.Read(p) -} - -func (b *bufferedFile) Close() error { - return b.fh.Close() -} - -// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based -// on a base directory. The `retain` parameter controls how many -// snapshots are retained. Must be at least 1. -func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) { - if retain < 1 { - return nil, fmt.Errorf("must retain at least one snapshot") - } - if logger == nil { - logger = log.New(os.Stderr, "", log.LstdFlags) - } - - // Ensure our path exists - path := filepath.Join(base, snapPath) - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return nil, fmt.Errorf("snapshot path not accessible: %v", err) - } - - // Setup the store - store := &FileSnapshotStore{ - path: path, - retain: retain, - logger: logger, - } - - // Do a permissions test - if err := store.testPermissions(); err != nil { - return nil, fmt.Errorf("permissions test failed: %v", err) - } - return store, nil -} - -// NewFileSnapshotStore creates a new FileSnapshotStore based -// on a base directory. The `retain` parameter controls how many -// snapshots are retained. Must be at least 1. -func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) { - if logOutput == nil { - logOutput = os.Stderr - } - return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags)) -} - -// testPermissions tries to touch a file in our path to see if it works. -func (f *FileSnapshotStore) testPermissions() error { - path := filepath.Join(f.path, testPath) - fh, err := os.Create(path) - if err != nil { - return err - } - - if err = fh.Close(); err != nil { - return err - } - - if err = os.Remove(path); err != nil { - return err - } - return nil -} - -// snapshotName generates a name for the snapshot. -func snapshotName(term, index uint64) string { - now := time.Now() - msec := now.UnixNano() / int64(time.Millisecond) - return fmt.Sprintf("%d-%d-%d", term, index, msec) -} - -// Create is used to start a new snapshot -func (f *FileSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) { - // Create a new path - name := snapshotName(term, index) - path := filepath.Join(f.path, name+tmpSuffix) - f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path) - - // Make the directory - if err := os.MkdirAll(path, 0755); err != nil { - f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err) - return nil, err - } - - // Create the sink - sink := &FileSnapshotSink{ - store: f, - logger: f.logger, - dir: path, - parentDir: f.path, - meta: fileSnapshotMeta{ - SnapshotMeta: SnapshotMeta{ - ID: name, - Index: index, - Term: term, - Peers: peers, - }, - CRC: nil, - }, - } - - // Write out the meta data - if err := sink.writeMeta(); err != nil { - f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err) - return nil, err - } - - // Open the state file - statePath := filepath.Join(path, stateFilePath) - fh, err := os.Create(statePath) - if err != nil { - f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err) - return nil, err - } - sink.stateFile = fh - - // Create a CRC64 hash - sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA)) - - // Wrap both the hash and file in a MultiWriter with buffering - multi := io.MultiWriter(sink.stateFile, sink.stateHash) - sink.buffered = bufio.NewWriter(multi) - - // Done - return sink, nil -} - -// List returns available snapshots in the store. -func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) { - // Get the eligible snapshots - snapshots, err := f.getSnapshots() - if err != nil { - f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err) - return nil, err - } - - var snapMeta []*SnapshotMeta - for _, meta := range snapshots { - snapMeta = append(snapMeta, &meta.SnapshotMeta) - if len(snapMeta) == f.retain { - break - } - } - return snapMeta, nil -} - -// getSnapshots returns all the known snapshots. -func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) { - // Get the eligible snapshots - snapshots, err := ioutil.ReadDir(f.path) - if err != nil { - f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err) - return nil, err - } - - // Populate the metadata - var snapMeta []*fileSnapshotMeta - for _, snap := range snapshots { - // Ignore any files - if !snap.IsDir() { - continue - } - - // Ignore any temporary snapshots - dirName := snap.Name() - if strings.HasSuffix(dirName, tmpSuffix) { - f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName) - continue - } - - // Try to read the meta data - meta, err := f.readMeta(dirName) - if err != nil { - f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err) - continue - } - - // Append, but only return up to the retain count - snapMeta = append(snapMeta, meta) - } - - // Sort the snapshot, reverse so we get new -> old - sort.Sort(sort.Reverse(snapMetaSlice(snapMeta))) - - return snapMeta, nil -} - -// readMeta is used to read the meta data for a given named backup -func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) { - // Open the meta file - metaPath := filepath.Join(f.path, name, metaFilePath) - fh, err := os.Open(metaPath) - if err != nil { - return nil, err - } - defer fh.Close() - - // Buffer the file IO - buffered := bufio.NewReader(fh) - - // Read in the JSON - meta := &fileSnapshotMeta{} - dec := json.NewDecoder(buffered) - if err := dec.Decode(meta); err != nil { - return nil, err - } - return meta, nil -} - -// Open takes a snapshot ID and returns a ReadCloser for that snapshot. -func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) { - // Get the metadata - meta, err := f.readMeta(id) - if err != nil { - f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err) - return nil, nil, err - } - - // Open the state file - statePath := filepath.Join(f.path, id, stateFilePath) - fh, err := os.Open(statePath) - if err != nil { - f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err) - return nil, nil, err - } - - // Create a CRC64 hash - stateHash := crc64.New(crc64.MakeTable(crc64.ECMA)) - - // Compute the hash - _, err = io.Copy(stateHash, fh) - if err != nil { - f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err) - fh.Close() - return nil, nil, err - } - - // Verify the hash - computed := stateHash.Sum(nil) - if bytes.Compare(meta.CRC, computed) != 0 { - f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)", - meta.CRC, computed) - fh.Close() - return nil, nil, fmt.Errorf("CRC mismatch") - } - - // Seek to the start - if _, err := fh.Seek(0, 0); err != nil { - f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err) - fh.Close() - return nil, nil, err - } - - // Return a buffered file - buffered := &bufferedFile{ - bh: bufio.NewReader(fh), - fh: fh, - } - - return &meta.SnapshotMeta, buffered, nil -} - -// ReapSnapshots reaps any snapshots beyond the retain count. -func (f *FileSnapshotStore) ReapSnapshots() error { - snapshots, err := f.getSnapshots() - if err != nil { - f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err) - return err - } - - for i := f.retain; i < len(snapshots); i++ { - path := filepath.Join(f.path, snapshots[i].ID) - f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path) - if err := os.RemoveAll(path); err != nil { - f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err) - return err - } - } - return nil -} - -// ID returns the ID of the snapshot, can be used with Open() -// after the snapshot is finalized. -func (s *FileSnapshotSink) ID() string { - return s.meta.ID -} - -// Write is used to append to the state file. We write to the -// buffered IO object to reduce the amount of context switches. -func (s *FileSnapshotSink) Write(b []byte) (int, error) { - return s.buffered.Write(b) -} - -// Close is used to indicate a successful end. -func (s *FileSnapshotSink) Close() error { - // Make sure close is idempotent - if s.closed { - return nil - } - s.closed = true - - // Close the open handles - if err := s.finalize(); err != nil { - s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err) - if delErr := os.RemoveAll(s.dir); delErr != nil { - s.logger.Printf("[ERR] snapshot: Failed to delete temporary snapshot at path %v: %v", s.dir, delErr) - return delErr - } - return err - } - - // Write out the meta data - if err := s.writeMeta(); err != nil { - s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err) - return err - } - - // Move the directory into place - newPath := strings.TrimSuffix(s.dir, tmpSuffix) - if err := os.Rename(s.dir, newPath); err != nil { - s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err) - return err - } - - if runtime.GOOS != "windows" { //skipping fsync for directory entry edits on Windows, only needed for *nix style file systems - parentFH, err := os.Open(s.parentDir) - defer parentFH.Close() - if err != nil { - s.logger.Printf("[ERR] snapshot: Failed to open snapshot parent directory %v, error: %v", s.parentDir, err) - return err - } - - if err = parentFH.Sync(); err != nil { - s.logger.Printf("[ERR] snapshot: Failed syncing parent directory %v, error: %v", s.parentDir, err) - return err - } - } - - // Reap any old snapshots - if err := s.store.ReapSnapshots(); err != nil { - return err - } - - return nil -} - -// Cancel is used to indicate an unsuccessful end. -func (s *FileSnapshotSink) Cancel() error { - // Make sure close is idempotent - if s.closed { - return nil - } - s.closed = true - - // Close the open handles - if err := s.finalize(); err != nil { - s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err) - return err - } - - // Attempt to remove all artifacts - return os.RemoveAll(s.dir) -} - -// finalize is used to close all of our resources. -func (s *FileSnapshotSink) finalize() error { - // Flush any remaining data - if err := s.buffered.Flush(); err != nil { - return err - } - - // Sync to force fsync to disk - if err := s.stateFile.Sync(); err != nil { - return err - } - - // Get the file size - stat, statErr := s.stateFile.Stat() - - // Close the file - if err := s.stateFile.Close(); err != nil { - return err - } - - // Set the file size, check after we close - if statErr != nil { - return statErr - } - s.meta.Size = stat.Size() - - // Set the CRC - s.meta.CRC = s.stateHash.Sum(nil) - return nil -} - -// writeMeta is used to write out the metadata we have. -func (s *FileSnapshotSink) writeMeta() error { - // Open the meta file - metaPath := filepath.Join(s.dir, metaFilePath) - fh, err := os.Create(metaPath) - if err != nil { - return err - } - defer fh.Close() - - // Buffer the file IO - buffered := bufio.NewWriter(fh) - - // Write out as JSON - enc := json.NewEncoder(buffered) - if err := enc.Encode(&s.meta); err != nil { - return err - } - - if err = buffered.Flush(); err != nil { - return err - } - - if err = fh.Sync(); err != nil { - return err - } - - return nil -} - -// Implement the sort interface for []*fileSnapshotMeta. -func (s snapMetaSlice) Len() int { - return len(s) -} - -func (s snapMetaSlice) Less(i, j int) bool { - if s[i].Term != s[j].Term { - return s[i].Term < s[j].Term - } - if s[i].Index != s[j].Index { - return s[i].Index < s[j].Index - } - return s[i].ID < s[j].ID -} - -func (s snapMetaSlice) Swap(i, j int) { - s[i], s[j] = s[j], s[i] -} diff --git a/vendor/github.com/hashicorp/raft/file_snapshot_test.go b/vendor/github.com/hashicorp/raft/file_snapshot_test.go deleted file mode 100644 index fcd2ef4b..00000000 --- a/vendor/github.com/hashicorp/raft/file_snapshot_test.go +++ /dev/null @@ -1,343 +0,0 @@ -package raft - -import ( - "bytes" - "io" - "io/ioutil" - "os" - "runtime" - "testing" -) - -func FileSnapTest(t *testing.T) (string, *FileSnapshotStore) { - // Create a test dir - dir, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - - snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - return dir, snap -} - -func TestFileSnapshotStoreImpl(t *testing.T) { - var impl interface{} = &FileSnapshotStore{} - if _, ok := impl.(SnapshotStore); !ok { - t.Fatalf("FileSnapshotStore not a SnapshotStore") - } -} - -func TestFileSnapshotSinkImpl(t *testing.T) { - var impl interface{} = &FileSnapshotSink{} - if _, ok := impl.(SnapshotSink); !ok { - t.Fatalf("FileSnapshotSink not a SnapshotSink") - } -} - -func TestFileSS_CreateSnapshotMissingParentDir(t *testing.T) { - parent, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - defer os.RemoveAll(parent) - - dir, err := ioutil.TempDir(parent, "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - - snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - - os.RemoveAll(parent) - peers := []byte("all my lovely friends") - _, err = snap.Create(10, 3, peers) - if err != nil { - t.Fatalf("should not fail when using non existing parent") - } - -} -func TestFileSS_CreateSnapshot(t *testing.T) { - // Create a test dir - dir, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - defer os.RemoveAll(dir) - - snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Check no snapshots - snaps, err := snap.List() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(snaps) != 0 { - t.Fatalf("did not expect any snapshots: %v", snaps) - } - - // Create a new sink - peers := []byte("all my lovely friends") - sink, err := snap.Create(10, 3, peers) - if err != nil { - t.Fatalf("err: %v", err) - } - - // The sink is not done, should not be in a list! - snaps, err = snap.List() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(snaps) != 0 { - t.Fatalf("did not expect any snapshots: %v", snaps) - } - - // Write to the sink - _, err = sink.Write([]byte("first\n")) - if err != nil { - t.Fatalf("err: %v", err) - } - _, err = sink.Write([]byte("second\n")) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Done! - err = sink.Close() - if err != nil { - t.Fatalf("err: %v", err) - } - - // Should have a snapshot! - snaps, err = snap.List() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(snaps) != 1 { - t.Fatalf("expect a snapshots: %v", snaps) - } - - // Check the latest - latest := snaps[0] - if latest.Index != 10 { - t.Fatalf("bad snapshot: %v", *latest) - } - if latest.Term != 3 { - t.Fatalf("bad snapshot: %v", *latest) - } - if bytes.Compare(latest.Peers, peers) != 0 { - t.Fatalf("bad snapshot: %v", *latest) - } - if latest.Size != 13 { - t.Fatalf("bad snapshot: %v", *latest) - } - - // Read the snapshot - _, r, err := snap.Open(latest.ID) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Read out everything - var buf bytes.Buffer - if _, err := io.Copy(&buf, r); err != nil { - t.Fatalf("err: %v", err) - } - if err := r.Close(); err != nil { - t.Fatalf("err: %v", err) - } - - // Ensure a match - if bytes.Compare(buf.Bytes(), []byte("first\nsecond\n")) != 0 { - t.Fatalf("content mismatch") - } -} - -func TestFileSS_CancelSnapshot(t *testing.T) { - // Create a test dir - dir, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - defer os.RemoveAll(dir) - - snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Create a new sink - peers := []byte("all my lovely friends") - sink, err := snap.Create(10, 3, peers) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Cancel the snapshot! Should delete - err = sink.Cancel() - if err != nil { - t.Fatalf("err: %v", err) - } - - // The sink is canceled, should not be in a list! - snaps, err := snap.List() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(snaps) != 0 { - t.Fatalf("did not expect any snapshots: %v", snaps) - } -} - -func TestFileSS_Retention(t *testing.T) { - // Create a test dir - dir, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - defer os.RemoveAll(dir) - - snap, err := NewFileSnapshotStoreWithLogger(dir, 2, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Create a new sink - peers := []byte("all my lovely friends") - - // Create a few snapshots - for i := 10; i < 15; i++ { - sink, err := snap.Create(uint64(i), 3, peers) - if err != nil { - t.Fatalf("err: %v", err) - } - err = sink.Close() - if err != nil { - t.Fatalf("err: %v", err) - } - } - - // Should only have 2 listed! - snaps, err := snap.List() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(snaps) != 2 { - t.Fatalf("expect 2 snapshots: %v", snaps) - } - - // Check they are the latest - if snaps[0].Index != 14 { - t.Fatalf("bad snap: %#v", *snaps[0]) - } - if snaps[1].Index != 13 { - t.Fatalf("bad snap: %#v", *snaps[1]) - } -} - -func TestFileSS_BadPerm(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("skipping file permission test on windows") - } - - // Create a temp dir - dir1, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %s", err) - } - defer os.RemoveAll(dir1) - - // Create a sub dir and remove all permissions - dir2, err := ioutil.TempDir(dir1, "badperm") - if err != nil { - t.Fatalf("err: %s", err) - } - if err := os.Chmod(dir2, 000); err != nil { - t.Fatalf("err: %s", err) - } - defer os.Chmod(dir2, 777) // Set perms back for delete - - // Should fail - if _, err := NewFileSnapshotStore(dir2, 3, nil); err == nil { - t.Fatalf("should fail to use dir with bad perms") - } -} - -func TestFileSS_MissingParentDir(t *testing.T) { - parent, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - defer os.RemoveAll(parent) - - dir, err := ioutil.TempDir(parent, "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - - os.RemoveAll(parent) - _, err = NewFileSnapshotStore(dir, 3, nil) - if err != nil { - t.Fatalf("should not fail when using non existing parent") - } -} - -func TestFileSS_Ordering(t *testing.T) { - // Create a test dir - dir, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - defer os.RemoveAll(dir) - - snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Create a new sink - peers := []byte("all my lovely friends") - - sink, err := snap.Create(130350, 5, peers) - if err != nil { - t.Fatalf("err: %v", err) - } - err = sink.Close() - if err != nil { - t.Fatalf("err: %v", err) - } - - sink, err = snap.Create(204917, 36, peers) - if err != nil { - t.Fatalf("err: %v", err) - } - err = sink.Close() - if err != nil { - t.Fatalf("err: %v", err) - } - - // Should only have 2 listed! - snaps, err := snap.List() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(snaps) != 2 { - t.Fatalf("expect 2 snapshots: %v", snaps) - } - - // Check they are ordered - if snaps[0].Term != 36 { - t.Fatalf("bad snap: %#v", *snaps[0]) - } - if snaps[1].Term != 5 { - t.Fatalf("bad snap: %#v", *snaps[1]) - } -} diff --git a/vendor/github.com/hashicorp/raft/fsm.go b/vendor/github.com/hashicorp/raft/fsm.go deleted file mode 100644 index ae52e9a7..00000000 --- a/vendor/github.com/hashicorp/raft/fsm.go +++ /dev/null @@ -1,40 +0,0 @@ -package raft - -import ( - "io" -) - -// FSM provides an interface that can be implemented by -// clients to make use of the replicated log. -type FSM interface { - // Apply log is invoked once a log entry is committed. - // It returns a value which will be made available in the - // ApplyFuture returned by Raft.Apply method if that - // method was called on the same Raft node as the FSM. - Apply(*Log) interface{} - - // Snapshot is used to support log compaction. This call should - // return an FSMSnapshot which can be used to save a point-in-time - // snapshot of the FSM. Apply and Snapshot are not called in multiple - // threads, but Apply will be called concurrently with Persist. This means - // the FSM should be implemented in a fashion that allows for concurrent - // updates while a snapshot is happening. - Snapshot() (FSMSnapshot, error) - - // Restore is used to restore an FSM from a snapshot. It is not called - // concurrently with any other command. The FSM must discard all previous - // state. - Restore(io.ReadCloser) error -} - -// FSMSnapshot is returned by an FSM in response to a Snapshot -// It must be safe to invoke FSMSnapshot methods with concurrent -// calls to Apply. -type FSMSnapshot interface { - // Persist should dump all necessary state to the WriteCloser 'sink', - // and call sink.Close() when finished or call sink.Cancel() on error. - Persist(sink SnapshotSink) error - - // Release is invoked when we are finished with the snapshot. - Release() -} diff --git a/vendor/github.com/hashicorp/raft/future.go b/vendor/github.com/hashicorp/raft/future.go deleted file mode 100644 index 177ef834..00000000 --- a/vendor/github.com/hashicorp/raft/future.go +++ /dev/null @@ -1,203 +0,0 @@ -package raft - -import ( - "sync" - "time" -) - -// Future is used to represent an action that may occur in the future. -type Future interface { - // Error blocks until the future arrives and then - // returns the error status of the future. - // This may be called any number of times - all - // calls will return the same value. - // Note that it is not OK to call this method - // twice concurrently on the same Future instance. - Error() error -} - -// ApplyFuture is used for Apply() and may return the FSM response. -type ApplyFuture interface { - Future - - // Response returns the FSM response as returned - // by the FSM.Apply method. This must not be called - // until after the Error method has returned. - Response() interface{} - - // Index holds the index of the newly applied log entry. - // This must not be called - // until after the Error method has returned. - Index() uint64 -} - -// errorFuture is used to return a static error. -type errorFuture struct { - err error -} - -func (e errorFuture) Error() error { - return e.err -} - -func (e errorFuture) Response() interface{} { - return nil -} - -func (e errorFuture) Index() uint64 { - return 0 -} - -// deferError can be embedded to allow a future -// to provide an error in the future. -type deferError struct { - err error - errCh chan error - responded bool -} - -func (d *deferError) init() { - d.errCh = make(chan error, 1) -} - -func (d *deferError) Error() error { - if d.err != nil { - // Note that when we've received a nil error, this - // won't trigger, but the channel is closed after - // send so we'll still return nil below. - return d.err - } - if d.errCh == nil { - panic("waiting for response on nil channel") - } - d.err = <-d.errCh - return d.err -} - -func (d *deferError) respond(err error) { - if d.errCh == nil { - return - } - if d.responded { - return - } - d.errCh <- err - close(d.errCh) - d.responded = true -} - -// logFuture is used to apply a log entry and waits until -// the log is considered committed. -type logFuture struct { - deferError - log Log - policy quorumPolicy - response interface{} - dispatch time.Time -} - -func (l *logFuture) Response() interface{} { - return l.response -} - -func (l *logFuture) Index() uint64 { - return l.log.Index -} - -type peerFuture struct { - deferError - peers []string -} - -type shutdownFuture struct { - raft *Raft -} - -func (s *shutdownFuture) Error() error { - if s.raft == nil { - return nil - } - s.raft.waitShutdown() - if closeable, ok := s.raft.trans.(WithClose); ok { - closeable.Close() - } - return nil -} - -// snapshotFuture is used for waiting on a snapshot to complete. -type snapshotFuture struct { - deferError -} - -// reqSnapshotFuture is used for requesting a snapshot start. -// It is only used internally. -type reqSnapshotFuture struct { - deferError - - // snapshot details provided by the FSM runner before responding - index uint64 - term uint64 - peers []string - snapshot FSMSnapshot -} - -// restoreFuture is used for requesting an FSM to perform a -// snapshot restore. Used internally only. -type restoreFuture struct { - deferError - ID string -} - -// verifyFuture is used to verify the current node is still -// the leader. This is to prevent a stale read. -type verifyFuture struct { - deferError - notifyCh chan *verifyFuture - quorumSize int - votes int - voteLock sync.Mutex -} - -// vote is used to respond to a verifyFuture. -// This may block when responding on the notifyCh. -func (v *verifyFuture) vote(leader bool) { - v.voteLock.Lock() - defer v.voteLock.Unlock() - - // Guard against having notified already - if v.notifyCh == nil { - return - } - - if leader { - v.votes++ - if v.votes >= v.quorumSize { - v.notifyCh <- v - v.notifyCh = nil - } - } else { - v.notifyCh <- v - v.notifyCh = nil - } -} - -// appendFuture is used for waiting on a pipelined append -// entries RPC. -type appendFuture struct { - deferError - start time.Time - args *AppendEntriesRequest - resp *AppendEntriesResponse -} - -func (a *appendFuture) Start() time.Time { - return a.start -} - -func (a *appendFuture) Request() *AppendEntriesRequest { - return a.args -} - -func (a *appendFuture) Response() *AppendEntriesResponse { - return a.resp -} diff --git a/vendor/github.com/hashicorp/raft/future_test.go b/vendor/github.com/hashicorp/raft/future_test.go deleted file mode 100644 index 8bb95832..00000000 --- a/vendor/github.com/hashicorp/raft/future_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package raft - -import ( - "errors" - "testing" -) - -func TestDeferFutureSuccess(t *testing.T) { - var f deferError - f.init() - f.respond(nil) - if err := f.Error(); err != nil { - t.Fatalf("unexpected error result; got %#v want nil", err) - } - if err := f.Error(); err != nil { - t.Fatalf("unexpected error result; got %#v want nil", err) - } -} - -func TestDeferFutureError(t *testing.T) { - want := errors.New("x") - var f deferError - f.init() - f.respond(want) - if got := f.Error(); got != want { - t.Fatalf("unexpected error result; got %#v want %#v", got, want) - } - if got := f.Error(); got != want { - t.Fatalf("unexpected error result; got %#v want %#v", got, want) - } -} - -func TestDeferFutureConcurrent(t *testing.T) { - // Food for the race detector. - want := errors.New("x") - var f deferError - f.init() - go f.respond(want) - if got := f.Error(); got != want { - t.Errorf("unexpected error result; got %#v want %#v", got, want) - } -} diff --git a/vendor/github.com/hashicorp/raft/inflight.go b/vendor/github.com/hashicorp/raft/inflight.go deleted file mode 100644 index 7014ff50..00000000 --- a/vendor/github.com/hashicorp/raft/inflight.go +++ /dev/null @@ -1,213 +0,0 @@ -package raft - -import ( - "container/list" - "sync" -) - -// QuorumPolicy allows individual logFutures to have different -// commitment rules while still using the inflight mechanism. -type quorumPolicy interface { - // Checks if a commit from a given peer is enough to - // satisfy the commitment rules - Commit() bool - - // Checks if a commit is committed - IsCommitted() bool -} - -// MajorityQuorum is used by Apply transactions and requires -// a simple majority of nodes. -type majorityQuorum struct { - count int - votesNeeded int -} - -func newMajorityQuorum(clusterSize int) *majorityQuorum { - votesNeeded := (clusterSize / 2) + 1 - return &majorityQuorum{count: 0, votesNeeded: votesNeeded} -} - -func (m *majorityQuorum) Commit() bool { - m.count++ - return m.count >= m.votesNeeded -} - -func (m *majorityQuorum) IsCommitted() bool { - return m.count >= m.votesNeeded -} - -// Inflight is used to track operations that are still in-flight. -type inflight struct { - sync.Mutex - committed *list.List - commitCh chan struct{} - minCommit uint64 - maxCommit uint64 - operations map[uint64]*logFuture - stopCh chan struct{} -} - -// NewInflight returns an inflight struct that notifies -// the provided channel when logs are finished committing. -func newInflight(commitCh chan struct{}) *inflight { - return &inflight{ - committed: list.New(), - commitCh: commitCh, - minCommit: 0, - maxCommit: 0, - operations: make(map[uint64]*logFuture), - stopCh: make(chan struct{}), - } -} - -// Start is used to mark a logFuture as being inflight. It -// also commits the entry, as it is assumed the leader is -// starting. -func (i *inflight) Start(l *logFuture) { - i.Lock() - defer i.Unlock() - i.start(l) -} - -// StartAll is used to mark a list of logFuture's as being -// inflight. It also commits each entry as the leader is -// assumed to be starting. -func (i *inflight) StartAll(logs []*logFuture) { - i.Lock() - defer i.Unlock() - for _, l := range logs { - i.start(l) - } -} - -// start is used to mark a single entry as inflight, -// must be invoked with the lock held. -func (i *inflight) start(l *logFuture) { - idx := l.log.Index - i.operations[idx] = l - - if idx > i.maxCommit { - i.maxCommit = idx - } - if i.minCommit == 0 { - i.minCommit = idx - } - i.commit(idx) -} - -// Cancel is used to cancel all in-flight operations. -// This is done when the leader steps down, and all futures -// are sent the given error. -func (i *inflight) Cancel(err error) { - // Close the channel first to unblock any pending commits - close(i.stopCh) - - // Lock after close to avoid deadlock - i.Lock() - defer i.Unlock() - - // Respond to all inflight operations - for _, op := range i.operations { - op.respond(err) - } - - // Clear all the committed but not processed - for e := i.committed.Front(); e != nil; e = e.Next() { - e.Value.(*logFuture).respond(err) - } - - // Clear the map - i.operations = make(map[uint64]*logFuture) - - // Clear the list of committed - i.committed = list.New() - - // Close the commmitCh - close(i.commitCh) - - // Reset indexes - i.minCommit = 0 - i.maxCommit = 0 -} - -// Committed returns all the committed operations in order. -func (i *inflight) Committed() (l *list.List) { - i.Lock() - l, i.committed = i.committed, list.New() - i.Unlock() - return l -} - -// Commit is used by leader replication routines to indicate that -// a follower was finished committing a log to disk. -func (i *inflight) Commit(index uint64) { - i.Lock() - defer i.Unlock() - i.commit(index) -} - -// CommitRange is used to commit a range of indexes inclusively. -// It is optimized to avoid commits for indexes that are not tracked. -func (i *inflight) CommitRange(minIndex, maxIndex uint64) { - i.Lock() - defer i.Unlock() - - // Update the minimum index - minIndex = max(i.minCommit, minIndex) - - // Commit each index - for idx := minIndex; idx <= maxIndex; idx++ { - i.commit(idx) - } -} - -// commit is used to commit a single index. Must be called with the lock held. -func (i *inflight) commit(index uint64) { - op, ok := i.operations[index] - if !ok { - // Ignore if not in the map, as it may be committed already - return - } - - // Check if we've satisfied the commit - if !op.policy.Commit() { - return - } - - // Cannot commit if this is not the minimum inflight. This can happen - // if the quorum size changes, meaning a previous commit requires a larger - // quorum that this commit. We MUST block until the previous log is committed, - // otherwise logs will be applied out of order. - if index != i.minCommit { - return - } - -NOTIFY: - // Add the operation to the committed list - i.committed.PushBack(op) - - // Stop tracking since it is committed - delete(i.operations, index) - - // Update the indexes - if index == i.maxCommit { - i.minCommit = 0 - i.maxCommit = 0 - - } else { - i.minCommit++ - } - - // Check if the next in-flight operation is ready - if i.minCommit != 0 { - op = i.operations[i.minCommit] - if op.policy.IsCommitted() { - index = i.minCommit - goto NOTIFY - } - } - - // Async notify of ready operations - asyncNotifyCh(i.commitCh) -} diff --git a/vendor/github.com/hashicorp/raft/inflight_test.go b/vendor/github.com/hashicorp/raft/inflight_test.go deleted file mode 100644 index a9f57d6e..00000000 --- a/vendor/github.com/hashicorp/raft/inflight_test.go +++ /dev/null @@ -1,150 +0,0 @@ -package raft - -import ( - "fmt" - "testing" -) - -func TestInflight_StartCommit(t *testing.T) { - commitCh := make(chan struct{}, 1) - in := newInflight(commitCh) - - // Commit a transaction as being in flight - l := &logFuture{log: Log{Index: 1}} - l.policy = newMajorityQuorum(5) - in.Start(l) - - // Commit 3 times - in.Commit(1) - if in.Committed().Len() != 0 { - t.Fatalf("should not be commited") - } - - in.Commit(1) - if in.Committed().Len() != 1 { - t.Fatalf("should be commited") - } - - // Already committed but should work anyways - in.Commit(1) -} - -func TestInflight_Cancel(t *testing.T) { - commitCh := make(chan struct{}, 1) - in := newInflight(commitCh) - - // Commit a transaction as being in flight - l := &logFuture{ - log: Log{Index: 1}, - } - l.init() - l.policy = newMajorityQuorum(3) - in.Start(l) - - // Cancel with an error - err := fmt.Errorf("error 1") - in.Cancel(err) - - // Should get an error return - if l.Error() != err { - t.Fatalf("expected error") - } -} - -func TestInflight_StartAll(t *testing.T) { - commitCh := make(chan struct{}, 1) - in := newInflight(commitCh) - - // Commit a few transaction as being in flight - l1 := &logFuture{log: Log{Index: 2}} - l1.policy = newMajorityQuorum(5) - l2 := &logFuture{log: Log{Index: 3}} - l2.policy = newMajorityQuorum(5) - l3 := &logFuture{log: Log{Index: 4}} - l3.policy = newMajorityQuorum(5) - - // Start all the entries - in.StartAll([]*logFuture{l1, l2, l3}) - - // Commit ranges - in.CommitRange(1, 5) - in.CommitRange(1, 4) - in.CommitRange(1, 10) - - // Should get 3 back - if in.Committed().Len() != 3 { - t.Fatalf("expected all 3 to commit") - } -} - -func TestInflight_CommitRange(t *testing.T) { - commitCh := make(chan struct{}, 1) - in := newInflight(commitCh) - - // Commit a few transaction as being in flight - l1 := &logFuture{log: Log{Index: 2}} - l1.policy = newMajorityQuorum(5) - in.Start(l1) - - l2 := &logFuture{log: Log{Index: 3}} - l2.policy = newMajorityQuorum(5) - in.Start(l2) - - l3 := &logFuture{log: Log{Index: 4}} - l3.policy = newMajorityQuorum(5) - in.Start(l3) - - // Commit ranges - in.CommitRange(1, 5) - in.CommitRange(1, 4) - in.CommitRange(1, 10) - - // Should get 3 back - if in.Committed().Len() != 3 { - t.Fatalf("expected all 3 to commit") - } -} - -// Should panic if we commit non contiguously! -func TestInflight_NonContiguous(t *testing.T) { - commitCh := make(chan struct{}, 1) - in := newInflight(commitCh) - - // Commit a few transaction as being in flight - l1 := &logFuture{log: Log{Index: 2}} - l1.policy = newMajorityQuorum(5) - in.Start(l1) - - l2 := &logFuture{log: Log{Index: 3}} - l2.policy = newMajorityQuorum(5) - in.Start(l2) - - in.Commit(3) - in.Commit(3) - in.Commit(3) // panic! - - if in.Committed().Len() != 0 { - t.Fatalf("should not commit") - } - - in.Commit(2) - in.Commit(2) - in.Commit(2) // panic! - - committed := in.Committed() - if committed.Len() != 2 { - t.Fatalf("should commit both") - } - - current := committed.Front() - l := current.Value.(*logFuture) - if l.log.Index != 2 { - t.Fatalf("bad: %v", *l) - } - - current = current.Next() - l = current.Value.(*logFuture) - if l.log.Index != 3 { - t.Fatalf("bad: %v", *l) - } -} diff --git a/vendor/github.com/hashicorp/raft/inmem_store.go b/vendor/github.com/hashicorp/raft/inmem_store.go deleted file mode 100644 index 6e4dfd02..00000000 --- a/vendor/github.com/hashicorp/raft/inmem_store.go +++ /dev/null @@ -1,116 +0,0 @@ -package raft - -import ( - "sync" -) - -// InmemStore implements the LogStore and StableStore interface. -// It should NOT EVER be used for production. It is used only for -// unit tests. Use the MDBStore implementation instead. -type InmemStore struct { - l sync.RWMutex - lowIndex uint64 - highIndex uint64 - logs map[uint64]*Log - kv map[string][]byte - kvInt map[string]uint64 -} - -// NewInmemStore returns a new in-memory backend. Do not ever -// use for production. Only for testing. -func NewInmemStore() *InmemStore { - i := &InmemStore{ - logs: make(map[uint64]*Log), - kv: make(map[string][]byte), - kvInt: make(map[string]uint64), - } - return i -} - -// FirstIndex implements the LogStore interface. -func (i *InmemStore) FirstIndex() (uint64, error) { - i.l.RLock() - defer i.l.RUnlock() - return i.lowIndex, nil -} - -// LastIndex implements the LogStore interface. -func (i *InmemStore) LastIndex() (uint64, error) { - i.l.RLock() - defer i.l.RUnlock() - return i.highIndex, nil -} - -// GetLog implements the LogStore interface. -func (i *InmemStore) GetLog(index uint64, log *Log) error { - i.l.RLock() - defer i.l.RUnlock() - l, ok := i.logs[index] - if !ok { - return ErrLogNotFound - } - *log = *l - return nil -} - -// StoreLog implements the LogStore interface. -func (i *InmemStore) StoreLog(log *Log) error { - return i.StoreLogs([]*Log{log}) -} - -// StoreLogs implements the LogStore interface. -func (i *InmemStore) StoreLogs(logs []*Log) error { - i.l.Lock() - defer i.l.Unlock() - for _, l := range logs { - i.logs[l.Index] = l - if i.lowIndex == 0 { - i.lowIndex = l.Index - } - if l.Index > i.highIndex { - i.highIndex = l.Index - } - } - return nil -} - -// DeleteRange implements the LogStore interface. -func (i *InmemStore) DeleteRange(min, max uint64) error { - i.l.Lock() - defer i.l.Unlock() - for j := min; j <= max; j++ { - delete(i.logs, j) - } - i.lowIndex = max + 1 - return nil -} - -// Set implements the StableStore interface. -func (i *InmemStore) Set(key []byte, val []byte) error { - i.l.Lock() - defer i.l.Unlock() - i.kv[string(key)] = val - return nil -} - -// Get implements the StableStore interface. -func (i *InmemStore) Get(key []byte) ([]byte, error) { - i.l.RLock() - defer i.l.RUnlock() - return i.kv[string(key)], nil -} - -// SetUint64 implements the StableStore interface. -func (i *InmemStore) SetUint64(key []byte, val uint64) error { - i.l.Lock() - defer i.l.Unlock() - i.kvInt[string(key)] = val - return nil -} - -// GetUint64 implements the StableStore interface. -func (i *InmemStore) GetUint64(key []byte) (uint64, error) { - i.l.RLock() - defer i.l.RUnlock() - return i.kvInt[string(key)], nil -} diff --git a/vendor/github.com/hashicorp/raft/inmem_transport.go b/vendor/github.com/hashicorp/raft/inmem_transport.go deleted file mode 100644 index 2d5f3190..00000000 --- a/vendor/github.com/hashicorp/raft/inmem_transport.go +++ /dev/null @@ -1,324 +0,0 @@ -package raft - -import ( - "fmt" - "io" - "sync" - "time" -) - -// NewInmemAddr returns a new in-memory addr with -// a randomly generate UUID as the ID. -func NewInmemAddr() string { - return generateUUID() -} - -// inmemPipeline is used to pipeline requests for the in-mem transport. -type inmemPipeline struct { - trans *InmemTransport - peer *InmemTransport - peerAddr string - - doneCh chan AppendFuture - inprogressCh chan *inmemPipelineInflight - - shutdown bool - shutdownCh chan struct{} - shutdownLock sync.Mutex -} - -type inmemPipelineInflight struct { - future *appendFuture - respCh <-chan RPCResponse -} - -// InmemTransport Implements the Transport interface, to allow Raft to be -// tested in-memory without going over a network. -type InmemTransport struct { - sync.RWMutex - consumerCh chan RPC - localAddr string - peers map[string]*InmemTransport - pipelines []*inmemPipeline - timeout time.Duration -} - -// NewInmemTransport is used to initialize a new transport -// and generates a random local address if none is specified -func NewInmemTransport(addr string) (string, *InmemTransport) { - if addr == "" { - addr = NewInmemAddr() - } - trans := &InmemTransport{ - consumerCh: make(chan RPC, 16), - localAddr: addr, - peers: make(map[string]*InmemTransport), - timeout: 50 * time.Millisecond, - } - return addr, trans -} - -// SetHeartbeatHandler is used to set optional fast-path for -// heartbeats, not supported for this transport. -func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) { -} - -// Consumer implements the Transport interface. -func (i *InmemTransport) Consumer() <-chan RPC { - return i.consumerCh -} - -// LocalAddr implements the Transport interface. -func (i *InmemTransport) LocalAddr() string { - return i.localAddr -} - -// AppendEntriesPipeline returns an interface that can be used to pipeline -// AppendEntries requests. -func (i *InmemTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) { - i.RLock() - peer, ok := i.peers[target] - i.RUnlock() - if !ok { - return nil, fmt.Errorf("failed to connect to peer: %v", target) - } - pipeline := newInmemPipeline(i, peer, target) - i.Lock() - i.pipelines = append(i.pipelines, pipeline) - i.Unlock() - return pipeline, nil -} - -// AppendEntries implements the Transport interface. -func (i *InmemTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error { - rpcResp, err := i.makeRPC(target, args, nil, i.timeout) - if err != nil { - return err - } - - // Copy the result back - out := rpcResp.Response.(*AppendEntriesResponse) - *resp = *out - return nil -} - -// RequestVote implements the Transport interface. -func (i *InmemTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error { - rpcResp, err := i.makeRPC(target, args, nil, i.timeout) - if err != nil { - return err - } - - // Copy the result back - out := rpcResp.Response.(*RequestVoteResponse) - *resp = *out - return nil -} - -// InstallSnapshot implements the Transport interface. -func (i *InmemTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error { - rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout) - if err != nil { - return err - } - - // Copy the result back - out := rpcResp.Response.(*InstallSnapshotResponse) - *resp = *out - return nil -} - -func (i *InmemTransport) makeRPC(target string, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) { - i.RLock() - peer, ok := i.peers[target] - i.RUnlock() - - if !ok { - err = fmt.Errorf("failed to connect to peer: %v", target) - return - } - - // Send the RPC over - respCh := make(chan RPCResponse) - peer.consumerCh <- RPC{ - Command: args, - Reader: r, - RespChan: respCh, - } - - // Wait for a response - select { - case rpcResp = <-respCh: - if rpcResp.Error != nil { - err = rpcResp.Error - } - case <-time.After(timeout): - err = fmt.Errorf("command timed out") - } - return -} - -// EncodePeer implements the Transport interface. It uses the UUID as the -// address directly. -func (i *InmemTransport) EncodePeer(p string) []byte { - return []byte(p) -} - -// DecodePeer implements the Transport interface. It wraps the UUID in an -// InmemAddr. -func (i *InmemTransport) DecodePeer(buf []byte) string { - return string(buf) -} - -// Connect is used to connect this transport to another transport for -// a given peer name. This allows for local routing. -func (i *InmemTransport) Connect(peer string, t Transport) { - trans := t.(*InmemTransport) - i.Lock() - defer i.Unlock() - i.peers[peer] = trans -} - -// Disconnect is used to remove the ability to route to a given peer. -func (i *InmemTransport) Disconnect(peer string) { - i.Lock() - defer i.Unlock() - delete(i.peers, peer) - - // Disconnect any pipelines - n := len(i.pipelines) - for idx := 0; idx < n; idx++ { - if i.pipelines[idx].peerAddr == peer { - i.pipelines[idx].Close() - i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil - idx-- - n-- - } - } - i.pipelines = i.pipelines[:n] -} - -// DisconnectAll is used to remove all routes to peers. -func (i *InmemTransport) DisconnectAll() { - i.Lock() - defer i.Unlock() - i.peers = make(map[string]*InmemTransport) - - // Handle pipelines - for _, pipeline := range i.pipelines { - pipeline.Close() - } - i.pipelines = nil -} - -// Close is used to permanently disable the transport -func (i *InmemTransport) Close() error { - i.DisconnectAll() - return nil -} - -func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr string) *inmemPipeline { - i := &inmemPipeline{ - trans: trans, - peer: peer, - peerAddr: addr, - doneCh: make(chan AppendFuture, 16), - inprogressCh: make(chan *inmemPipelineInflight, 16), - shutdownCh: make(chan struct{}), - } - go i.decodeResponses() - return i -} - -func (i *inmemPipeline) decodeResponses() { - timeout := i.trans.timeout - for { - select { - case inp := <-i.inprogressCh: - var timeoutCh <-chan time.Time - if timeout > 0 { - timeoutCh = time.After(timeout) - } - - select { - case rpcResp := <-inp.respCh: - // Copy the result back - *inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse) - inp.future.respond(rpcResp.Error) - - select { - case i.doneCh <- inp.future: - case <-i.shutdownCh: - return - } - - case <-timeoutCh: - inp.future.respond(fmt.Errorf("command timed out")) - select { - case i.doneCh <- inp.future: - case <-i.shutdownCh: - return - } - - case <-i.shutdownCh: - return - } - case <-i.shutdownCh: - return - } - } -} - -func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) { - // Create a new future - future := &appendFuture{ - start: time.Now(), - args: args, - resp: resp, - } - future.init() - - // Handle a timeout - var timeout <-chan time.Time - if i.trans.timeout > 0 { - timeout = time.After(i.trans.timeout) - } - - // Send the RPC over - respCh := make(chan RPCResponse, 1) - rpc := RPC{ - Command: args, - RespChan: respCh, - } - select { - case i.peer.consumerCh <- rpc: - case <-timeout: - return nil, fmt.Errorf("command enqueue timeout") - case <-i.shutdownCh: - return nil, ErrPipelineShutdown - } - - // Send to be decoded - select { - case i.inprogressCh <- &inmemPipelineInflight{future, respCh}: - return future, nil - case <-i.shutdownCh: - return nil, ErrPipelineShutdown - } -} - -func (i *inmemPipeline) Consumer() <-chan AppendFuture { - return i.doneCh -} - -func (i *inmemPipeline) Close() error { - i.shutdownLock.Lock() - defer i.shutdownLock.Unlock() - if i.shutdown { - return nil - } - - i.shutdown = true - close(i.shutdownCh) - return nil -} diff --git a/vendor/github.com/hashicorp/raft/inmem_transport_test.go b/vendor/github.com/hashicorp/raft/inmem_transport_test.go deleted file mode 100644 index 82c95348..00000000 --- a/vendor/github.com/hashicorp/raft/inmem_transport_test.go +++ /dev/null @@ -1,18 +0,0 @@ -package raft - -import ( - "testing" -) - -func TestInmemTransportImpl(t *testing.T) { - var inm interface{} = &InmemTransport{} - if _, ok := inm.(Transport); !ok { - t.Fatalf("InmemTransport is not a Transport") - } - if _, ok := inm.(LoopbackTransport); !ok { - t.Fatalf("InmemTransport is not a Loopback Transport") - } - if _, ok := inm.(WithPeers); !ok { - t.Fatalf("InmemTransport is not a WithPeers Transport") - } -} diff --git a/vendor/github.com/hashicorp/raft/integ_test.go b/vendor/github.com/hashicorp/raft/integ_test.go deleted file mode 100644 index 66654be4..00000000 --- a/vendor/github.com/hashicorp/raft/integ_test.go +++ /dev/null @@ -1,336 +0,0 @@ -package raft - -import ( - "bytes" - "fmt" - "io/ioutil" - "log" - "os" - "testing" - "time" -) - -// CheckInteg will skip a test if integration testing is not enabled. -func CheckInteg(t *testing.T) { - if !IsInteg() { - t.SkipNow() - } -} - -// IsInteg returns a boolean telling you if we're in integ testing mode. -func IsInteg() bool { - return os.Getenv("INTEG_TESTS") != "" -} - -type RaftEnv struct { - dir string - conf *Config - fsm *MockFSM - store *InmemStore - snapshot *FileSnapshotStore - peers *JSONPeers - trans *NetworkTransport - raft *Raft - logger *log.Logger -} - -// Release shuts down and cleans up any stored data, its not restartable after this -func (r *RaftEnv) Release() { - r.Shutdown() - os.RemoveAll(r.dir) -} - -// Shutdown shuts down raft & transport, but keeps track of its data, its restartable -// after a Shutdown() by calling Start() -func (r *RaftEnv) Shutdown() { - r.logger.Printf("[WARN] Shutdown node at %v", r.raft.localAddr) - f := r.raft.Shutdown() - if err := f.Error(); err != nil { - panic(err) - } - r.trans.Close() -} - -// Restart will start a raft node that was previously Shutdown() -func (r *RaftEnv) Restart(t *testing.T) { - trans, err := NewTCPTransport(r.raft.localAddr, nil, 2, time.Second, nil) - if err != nil { - t.Fatalf("err: %v", err) - } - r.trans = trans - r.logger.Printf("[INFO] Starting node at %v", trans.LocalAddr()) - raft, err := NewRaft(r.conf, r.fsm, r.store, r.store, r.snapshot, r.peers, r.trans) - if err != nil { - t.Fatalf("err: %v", err) - } - r.raft = raft -} - -func MakeRaft(t *testing.T, conf *Config) *RaftEnv { - // Set the config - if conf == nil { - conf = inmemConfig(t) - } - - dir, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - - stable := NewInmemStore() - - snap, err := NewFileSnapshotStore(dir, 3, nil) - if err != nil { - t.Fatalf("err: %v", err) - } - - env := &RaftEnv{ - conf: conf, - dir: dir, - store: stable, - snapshot: snap, - fsm: &MockFSM{}, - } - - trans, err := NewTCPTransport("127.0.0.1:0", nil, 2, time.Second, nil) - if err != nil { - t.Fatalf("err: %v", err) - } - env.logger = log.New(os.Stdout, trans.LocalAddr()+" :", log.Lmicroseconds) - env.trans = trans - - env.peers = NewJSONPeers(dir, trans) - - env.logger.Printf("[INFO] Starting node at %v", trans.LocalAddr()) - conf.Logger = env.logger - raft, err := NewRaft(conf, env.fsm, stable, stable, snap, env.peers, trans) - if err != nil { - t.Fatalf("err: %v", err) - } - env.raft = raft - return env -} - -func WaitFor(env *RaftEnv, state RaftState) error { - limit := time.Now().Add(200 * time.Millisecond) - for env.raft.State() != state { - if time.Now().Before(limit) { - time.Sleep(10 * time.Millisecond) - } else { - return fmt.Errorf("failed to transition to state %v", state) - } - } - return nil -} - -func WaitForAny(state RaftState, envs []*RaftEnv) (*RaftEnv, error) { - limit := time.Now().Add(200 * time.Millisecond) -CHECK: - for _, env := range envs { - if env.raft.State() == state { - return env, nil - } - } - if time.Now().Before(limit) { - goto WAIT - } - return nil, fmt.Errorf("failed to find node in %v state", state) -WAIT: - time.Sleep(10 * time.Millisecond) - goto CHECK -} - -func WaitFuture(f Future, t *testing.T) error { - timer := time.AfterFunc(200*time.Millisecond, func() { - panic(fmt.Errorf("timeout waiting for future %v", f)) - }) - defer timer.Stop() - return f.Error() -} - -func NoErr(err error, t *testing.T) { - if err != nil { - t.Fatalf("err: %v", err) - } -} - -func CheckConsistent(envs []*RaftEnv, t *testing.T) { - limit := time.Now().Add(400 * time.Millisecond) - first := envs[0] - first.fsm.Lock() - defer first.fsm.Unlock() - var err error -CHECK: - l1 := len(first.fsm.logs) - for i := 1; i < len(envs); i++ { - env := envs[i] - env.fsm.Lock() - l2 := len(env.fsm.logs) - if l1 != l2 { - err = fmt.Errorf("log length mismatch %d %d", l1, l2) - env.fsm.Unlock() - goto ERR - } - for idx, log := range first.fsm.logs { - other := env.fsm.logs[idx] - if bytes.Compare(log, other) != 0 { - err = fmt.Errorf("log entry %d mismatch between %s/%s : '%s' / '%s'", idx, first.raft.localAddr, env.raft.localAddr, log, other) - env.fsm.Unlock() - goto ERR - } - } - env.fsm.Unlock() - } - return -ERR: - if time.Now().After(limit) { - t.Fatalf("%v", err) - } - first.fsm.Unlock() - time.Sleep(20 * time.Millisecond) - first.fsm.Lock() - goto CHECK -} - -// return a log entry that's at least sz long that has the prefix 'test i ' -func logBytes(i, sz int) []byte { - var logBuffer bytes.Buffer - fmt.Fprintf(&logBuffer, "test %d ", i) - for logBuffer.Len() < sz { - logBuffer.WriteByte('x') - } - return logBuffer.Bytes() - -} - -// Tests Raft by creating a cluster, growing it to 5 nodes while -// causing various stressful conditions -func TestRaft_Integ(t *testing.T) { - CheckInteg(t) - conf := DefaultConfig() - conf.HeartbeatTimeout = 50 * time.Millisecond - conf.ElectionTimeout = 50 * time.Millisecond - conf.LeaderLeaseTimeout = 50 * time.Millisecond - conf.CommitTimeout = 5 * time.Millisecond - conf.SnapshotThreshold = 100 - conf.TrailingLogs = 10 - conf.EnableSingleNode = true - - // Create a single node - env1 := MakeRaft(t, conf) - NoErr(WaitFor(env1, Leader), t) - - totalApplied := 0 - applyAndWait := func(leader *RaftEnv, n int, sz int) { - // Do some commits - var futures []ApplyFuture - for i := 0; i < n; i++ { - futures = append(futures, leader.raft.Apply(logBytes(i, sz), 0)) - } - for _, f := range futures { - NoErr(WaitFuture(f, t), t) - leader.logger.Printf("[DEBUG] Applied at %d, size %d", f.Index(), sz) - } - totalApplied += n - } - // Do some commits - applyAndWait(env1, 100, 10) - - // Do a snapshot - NoErr(WaitFuture(env1.raft.Snapshot(), t), t) - - // Join a few nodes! - var envs []*RaftEnv - for i := 0; i < 4; i++ { - env := MakeRaft(t, conf) - addr := env.trans.LocalAddr() - NoErr(WaitFuture(env1.raft.AddPeer(addr), t), t) - envs = append(envs, env) - } - - // Wait for a leader - leader, err := WaitForAny(Leader, append([]*RaftEnv{env1}, envs...)) - NoErr(err, t) - - // Do some more commits - applyAndWait(leader, 100, 10) - - // snapshot the leader - NoErr(WaitFuture(leader.raft.Snapshot(), t), t) - - CheckConsistent(append([]*RaftEnv{env1}, envs...), t) - - // shutdown a follower - disconnected := envs[len(envs)-1] - disconnected.Shutdown() - - // Do some more commits [make sure the resulting snapshot will be a reasonable size] - applyAndWait(leader, 100, 10000) - - // snapshot the leader [leaders log should be compacted past the disconnected follower log now] - NoErr(WaitFuture(leader.raft.Snapshot(), t), t) - - // Unfortuantly we need to wait for the leader to start backing off RPCs to the down follower - // such that when the follower comes back up it'll run an election before it gets an rpc from - // the leader - time.Sleep(time.Second * 5) - - // start the now out of date follower back up - disconnected.Restart(t) - - // wait for it to get caught up - timeout := time.Now().Add(time.Second * 10) - for disconnected.raft.getLastApplied() < leader.raft.getLastApplied() { - time.Sleep(time.Millisecond) - if time.Now().After(timeout) { - t.Fatalf("Gave up waiting for follower to get caught up to leader") - } - } - - CheckConsistent(append([]*RaftEnv{env1}, envs...), t) - - // Shoot two nodes in the head! - rm1, rm2 := envs[0], envs[1] - rm1.Release() - rm2.Release() - envs = envs[2:] - time.Sleep(10 * time.Millisecond) - - // Wait for a leader - leader, err = WaitForAny(Leader, append([]*RaftEnv{env1}, envs...)) - NoErr(err, t) - - // Do some more commits - applyAndWait(leader, 100, 10) - - // Join a few new nodes! - for i := 0; i < 2; i++ { - env := MakeRaft(t, conf) - addr := env.trans.LocalAddr() - NoErr(WaitFuture(leader.raft.AddPeer(addr), t), t) - envs = append(envs, env) - } - - // Remove the old nodes - NoErr(WaitFuture(leader.raft.RemovePeer(rm1.raft.localAddr), t), t) - NoErr(WaitFuture(leader.raft.RemovePeer(rm2.raft.localAddr), t), t) - - // Shoot the leader - env1.Release() - time.Sleep(3 * conf.HeartbeatTimeout) - - // Wait for a leader - leader, err = WaitForAny(Leader, envs) - NoErr(err, t) - - allEnvs := append([]*RaftEnv{env1}, envs...) - CheckConsistent(allEnvs, t) - - if len(env1.fsm.logs) != totalApplied { - t.Fatalf("should apply %d logs! %d", totalApplied, len(env1.fsm.logs)) - } - - for _, e := range envs { - e.Release() - } -} diff --git a/vendor/github.com/hashicorp/raft/log.go b/vendor/github.com/hashicorp/raft/log.go deleted file mode 100644 index 9399154a..00000000 --- a/vendor/github.com/hashicorp/raft/log.go +++ /dev/null @@ -1,67 +0,0 @@ -package raft - -// LogType describes various types of log entries. -type LogType uint8 - -const ( - // LogCommand is applied to a user FSM. - LogCommand LogType = iota - - // LogNoop is used to assert leadership. - LogNoop - - // LogAddPeer is used to add a new peer. - LogAddPeer - - // LogRemovePeer is used to remove an existing peer. - LogRemovePeer - - // LogBarrier is used to ensure all preceding operations have been - // applied to the FSM. It is similar to LogNoop, but instead of returning - // once committed, it only returns once the FSM manager acks it. Otherwise - // it is possible there are operations committed but not yet applied to - // the FSM. - LogBarrier -) - -// Log entries are replicated to all members of the Raft cluster -// and form the heart of the replicated state machine. -type Log struct { - // Index holds the index of the log entry. - Index uint64 - - // Term holds the election term of the log entry. - Term uint64 - - // Type holds the type of the log entry. - Type LogType - - // Data holds the log entry's type-specific data. - Data []byte - - // peer is not exported since it is not transmitted, only used - // internally to construct the Data field. - peer string -} - -// LogStore is used to provide an interface for storing -// and retrieving logs in a durable fashion. -type LogStore interface { - // FirstIndex returns the first index written. 0 for no entries. - FirstIndex() (uint64, error) - - // LastIndex returns the last index written. 0 for no entries. - LastIndex() (uint64, error) - - // GetLog gets a log entry at a given index. - GetLog(index uint64, log *Log) error - - // StoreLog stores a log entry. - StoreLog(log *Log) error - - // StoreLogs stores multiple log entries. - StoreLogs(logs []*Log) error - - // DeleteRange deletes a range of log entries. The range is inclusive. - DeleteRange(min, max uint64) error -} diff --git a/vendor/github.com/hashicorp/raft/log_cache.go b/vendor/github.com/hashicorp/raft/log_cache.go deleted file mode 100644 index 952e98c2..00000000 --- a/vendor/github.com/hashicorp/raft/log_cache.go +++ /dev/null @@ -1,79 +0,0 @@ -package raft - -import ( - "fmt" - "sync" -) - -// LogCache wraps any LogStore implementation to provide an -// in-memory ring buffer. This is used to cache access to -// the recently written entries. For implementations that do not -// cache themselves, this can provide a substantial boost by -// avoiding disk I/O on recent entries. -type LogCache struct { - store LogStore - - cache []*Log - l sync.RWMutex -} - -// NewLogCache is used to create a new LogCache with the -// given capacity and backend store. -func NewLogCache(capacity int, store LogStore) (*LogCache, error) { - if capacity <= 0 { - return nil, fmt.Errorf("capacity must be positive") - } - c := &LogCache{ - store: store, - cache: make([]*Log, capacity), - } - return c, nil -} - -func (c *LogCache) GetLog(idx uint64, log *Log) error { - // Check the buffer for an entry - c.l.RLock() - cached := c.cache[idx%uint64(len(c.cache))] - c.l.RUnlock() - - // Check if entry is valid - if cached != nil && cached.Index == idx { - *log = *cached - return nil - } - - // Forward request on cache miss - return c.store.GetLog(idx, log) -} - -func (c *LogCache) StoreLog(log *Log) error { - return c.StoreLogs([]*Log{log}) -} - -func (c *LogCache) StoreLogs(logs []*Log) error { - // Insert the logs into the ring buffer - c.l.Lock() - for _, l := range logs { - c.cache[l.Index%uint64(len(c.cache))] = l - } - c.l.Unlock() - - return c.store.StoreLogs(logs) -} - -func (c *LogCache) FirstIndex() (uint64, error) { - return c.store.FirstIndex() -} - -func (c *LogCache) LastIndex() (uint64, error) { - return c.store.LastIndex() -} - -func (c *LogCache) DeleteRange(min, max uint64) error { - // Invalidate the cache on deletes - c.l.Lock() - c.cache = make([]*Log, len(c.cache)) - c.l.Unlock() - - return c.store.DeleteRange(min, max) -} diff --git a/vendor/github.com/hashicorp/raft/log_cache_test.go b/vendor/github.com/hashicorp/raft/log_cache_test.go deleted file mode 100644 index 7569e78e..00000000 --- a/vendor/github.com/hashicorp/raft/log_cache_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package raft - -import ( - "testing" -) - -func TestLogCache(t *testing.T) { - store := NewInmemStore() - c, _ := NewLogCache(16, store) - - // Insert into the in-mem store - for i := 0; i < 32; i++ { - log := &Log{Index: uint64(i) + 1} - store.StoreLog(log) - } - - // Check the indexes - if idx, _ := c.FirstIndex(); idx != 1 { - t.Fatalf("bad: %d", idx) - } - if idx, _ := c.LastIndex(); idx != 32 { - t.Fatalf("bad: %d", idx) - } - - // Try get log with a miss - var out Log - err := c.GetLog(1, &out) - if err != nil { - t.Fatalf("err: %v", err) - } - if out.Index != 1 { - t.Fatalf("bad: %#v", out) - } - - // Store logs - l1 := &Log{Index: 33} - l2 := &Log{Index: 34} - err = c.StoreLogs([]*Log{l1, l2}) - if err != nil { - t.Fatalf("err: %v", err) - } - - if idx, _ := c.LastIndex(); idx != 34 { - t.Fatalf("bad: %d", idx) - } - - // Check that it wrote-through - err = store.GetLog(33, &out) - if err != nil { - t.Fatalf("err: %v", err) - } - err = store.GetLog(34, &out) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Delete in the backend - err = store.DeleteRange(33, 34) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Should be in the ring buffer - err = c.GetLog(33, &out) - if err != nil { - t.Fatalf("err: %v", err) - } - err = c.GetLog(34, &out) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Purge the ring buffer - err = c.DeleteRange(33, 34) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Should not be in the ring buffer - err = c.GetLog(33, &out) - if err != ErrLogNotFound { - t.Fatalf("err: %v", err) - } - err = c.GetLog(34, &out) - if err != ErrLogNotFound { - t.Fatalf("err: %v", err) - } -} diff --git a/vendor/github.com/hashicorp/raft/net_transport.go b/vendor/github.com/hashicorp/raft/net_transport.go deleted file mode 100644 index 3de2a694..00000000 --- a/vendor/github.com/hashicorp/raft/net_transport.go +++ /dev/null @@ -1,622 +0,0 @@ -package raft - -import ( - "bufio" - "errors" - "fmt" - "io" - "log" - "net" - "os" - "sync" - "time" - - "github.com/hashicorp/go-msgpack/codec" -) - -const ( - rpcAppendEntries uint8 = iota - rpcRequestVote - rpcInstallSnapshot - - // DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport. - DefaultTimeoutScale = 256 * 1024 // 256KB - - // rpcMaxPipeline controls the maximum number of outstanding - // AppendEntries RPC calls. - rpcMaxPipeline = 128 -) - -var ( - // ErrTransportShutdown is returned when operations on a transport are - // invoked after it's been terminated. - ErrTransportShutdown = errors.New("transport shutdown") - - // ErrPipelineShutdown is returned when the pipeline is closed. - ErrPipelineShutdown = errors.New("append pipeline closed") -) - -/* - -NetworkTransport provides a network based transport that can be -used to communicate with Raft on remote machines. It requires -an underlying stream layer to provide a stream abstraction, which can -be simple TCP, TLS, etc. - -This transport is very simple and lightweight. Each RPC request is -framed by sending a byte that indicates the message type, followed -by the MsgPack encoded request. - -The response is an error string followed by the response object, -both are encoded using MsgPack. - -InstallSnapshot is special, in that after the RPC request we stream -the entire state. That socket is not re-used as the connection state -is not known if there is an error. - -*/ -type NetworkTransport struct { - connPool map[string][]*netConn - connPoolLock sync.Mutex - - consumeCh chan RPC - - heartbeatFn func(RPC) - heartbeatFnLock sync.Mutex - - logger *log.Logger - - maxPool int - - shutdown bool - shutdownCh chan struct{} - shutdownLock sync.Mutex - - stream StreamLayer - - timeout time.Duration - TimeoutScale int -} - -// StreamLayer is used with the NetworkTransport to provide -// the low level stream abstraction. -type StreamLayer interface { - net.Listener - - // Dial is used to create a new outgoing connection - Dial(address string, timeout time.Duration) (net.Conn, error) -} - -type netConn struct { - target string - conn net.Conn - r *bufio.Reader - w *bufio.Writer - dec *codec.Decoder - enc *codec.Encoder -} - -func (n *netConn) Release() error { - return n.conn.Close() -} - -type netPipeline struct { - conn *netConn - trans *NetworkTransport - - doneCh chan AppendFuture - inprogressCh chan *appendFuture - - shutdown bool - shutdownCh chan struct{} - shutdownLock sync.Mutex -} - -// NewNetworkTransport creates a new network transport with the given dialer -// and listener. The maxPool controls how many connections we will pool. The -// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply -// the timeout by (SnapshotSize / TimeoutScale). -func NewNetworkTransport( - stream StreamLayer, - maxPool int, - timeout time.Duration, - logOutput io.Writer, -) *NetworkTransport { - if logOutput == nil { - logOutput = os.Stderr - } - return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags)) -} - -// NewNetworkTransportWithLogger creates a new network transport with the given dialer -// and listener. The maxPool controls how many connections we will pool. The -// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply -// the timeout by (SnapshotSize / TimeoutScale). -func NewNetworkTransportWithLogger( - stream StreamLayer, - maxPool int, - timeout time.Duration, - logger *log.Logger, -) *NetworkTransport { - if logger == nil { - logger = log.New(os.Stderr, "", log.LstdFlags) - } - trans := &NetworkTransport{ - connPool: make(map[string][]*netConn), - consumeCh: make(chan RPC), - logger: logger, - maxPool: maxPool, - shutdownCh: make(chan struct{}), - stream: stream, - timeout: timeout, - TimeoutScale: DefaultTimeoutScale, - } - go trans.listen() - return trans -} - -// SetHeartbeatHandler is used to setup a heartbeat handler -// as a fast-pass. This is to avoid head-of-line blocking from -// disk IO. -func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) { - n.heartbeatFnLock.Lock() - defer n.heartbeatFnLock.Unlock() - n.heartbeatFn = cb -} - -// Close is used to stop the network transport. -func (n *NetworkTransport) Close() error { - n.shutdownLock.Lock() - defer n.shutdownLock.Unlock() - - if !n.shutdown { - close(n.shutdownCh) - n.stream.Close() - n.shutdown = true - } - return nil -} - -// Consumer implements the Transport interface. -func (n *NetworkTransport) Consumer() <-chan RPC { - return n.consumeCh -} - -// LocalAddr implements the Transport interface. -func (n *NetworkTransport) LocalAddr() string { - return n.stream.Addr().String() -} - -// IsShutdown is used to check if the transport is shutdown. -func (n *NetworkTransport) IsShutdown() bool { - select { - case <-n.shutdownCh: - return true - default: - return false - } -} - -// getExistingConn is used to grab a pooled connection. -func (n *NetworkTransport) getPooledConn(target string) *netConn { - n.connPoolLock.Lock() - defer n.connPoolLock.Unlock() - - conns, ok := n.connPool[target] - if !ok || len(conns) == 0 { - return nil - } - - var conn *netConn - num := len(conns) - conn, conns[num-1] = conns[num-1], nil - n.connPool[target] = conns[:num-1] - return conn -} - -// getConn is used to get a connection from the pool. -func (n *NetworkTransport) getConn(target string) (*netConn, error) { - // Check for a pooled conn - if conn := n.getPooledConn(target); conn != nil { - return conn, nil - } - - // Dial a new connection - conn, err := n.stream.Dial(target, n.timeout) - if err != nil { - return nil, err - } - - // Wrap the conn - netConn := &netConn{ - target: target, - conn: conn, - r: bufio.NewReader(conn), - w: bufio.NewWriter(conn), - } - - // Setup encoder/decoders - netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{}) - netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{}) - - // Done - return netConn, nil -} - -// returnConn returns a connection back to the pool. -func (n *NetworkTransport) returnConn(conn *netConn) { - n.connPoolLock.Lock() - defer n.connPoolLock.Unlock() - - key := conn.target - conns, _ := n.connPool[key] - - if !n.IsShutdown() && len(conns) < n.maxPool { - n.connPool[key] = append(conns, conn) - } else { - conn.Release() - } -} - -// AppendEntriesPipeline returns an interface that can be used to pipeline -// AppendEntries requests. -func (n *NetworkTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) { - // Get a connection - conn, err := n.getConn(target) - if err != nil { - return nil, err - } - - // Create the pipeline - return newNetPipeline(n, conn), nil -} - -// AppendEntries implements the Transport interface. -func (n *NetworkTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error { - return n.genericRPC(target, rpcAppendEntries, args, resp) -} - -// RequestVote implements the Transport interface. -func (n *NetworkTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error { - return n.genericRPC(target, rpcRequestVote, args, resp) -} - -// genericRPC handles a simple request/response RPC. -func (n *NetworkTransport) genericRPC(target string, rpcType uint8, args interface{}, resp interface{}) error { - // Get a conn - conn, err := n.getConn(target) - if err != nil { - return err - } - - // Set a deadline - if n.timeout > 0 { - conn.conn.SetDeadline(time.Now().Add(n.timeout)) - } - - // Send the RPC - if err = sendRPC(conn, rpcType, args); err != nil { - return err - } - - // Decode the response - canReturn, err := decodeResponse(conn, resp) - if canReturn { - n.returnConn(conn) - } - return err -} - -// InstallSnapshot implements the Transport interface. -func (n *NetworkTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error { - // Get a conn, always close for InstallSnapshot - conn, err := n.getConn(target) - if err != nil { - return err - } - defer conn.Release() - - // Set a deadline, scaled by request size - if n.timeout > 0 { - timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale)) - if timeout < n.timeout { - timeout = n.timeout - } - conn.conn.SetDeadline(time.Now().Add(timeout)) - } - - // Send the RPC - if err = sendRPC(conn, rpcInstallSnapshot, args); err != nil { - return err - } - - // Stream the state - if _, err = io.Copy(conn.w, data); err != nil { - return err - } - - // Flush - if err = conn.w.Flush(); err != nil { - return err - } - - // Decode the response, do not return conn - _, err = decodeResponse(conn, resp) - return err -} - -// EncodePeer implements the Transport interface. -func (n *NetworkTransport) EncodePeer(p string) []byte { - return []byte(p) -} - -// DecodePeer implements the Transport interface. -func (n *NetworkTransport) DecodePeer(buf []byte) string { - return string(buf) -} - -// listen is used to handling incoming connections. -func (n *NetworkTransport) listen() { - for { - // Accept incoming connections - conn, err := n.stream.Accept() - if err != nil { - if n.IsShutdown() { - return - } - n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err) - continue - } - n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr()) - - // Handle the connection in dedicated routine - go n.handleConn(conn) - } -} - -// handleConn is used to handle an inbound connection for its lifespan. -func (n *NetworkTransport) handleConn(conn net.Conn) { - defer conn.Close() - r := bufio.NewReader(conn) - w := bufio.NewWriter(conn) - dec := codec.NewDecoder(r, &codec.MsgpackHandle{}) - enc := codec.NewEncoder(w, &codec.MsgpackHandle{}) - - for { - if err := n.handleCommand(r, dec, enc); err != nil { - if err != io.EOF { - n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err) - } - return - } - if err := w.Flush(); err != nil { - n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err) - return - } - } -} - -// handleCommand is used to decode and dispatch a single command. -func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error { - // Get the rpc type - rpcType, err := r.ReadByte() - if err != nil { - return err - } - - // Create the RPC object - respCh := make(chan RPCResponse, 1) - rpc := RPC{ - RespChan: respCh, - } - - // Decode the command - isHeartbeat := false - switch rpcType { - case rpcAppendEntries: - var req AppendEntriesRequest - if err := dec.Decode(&req); err != nil { - return err - } - rpc.Command = &req - - // Check if this is a heartbeat - if req.Term != 0 && req.Leader != nil && - req.PrevLogEntry == 0 && req.PrevLogTerm == 0 && - len(req.Entries) == 0 && req.LeaderCommitIndex == 0 { - isHeartbeat = true - } - - case rpcRequestVote: - var req RequestVoteRequest - if err := dec.Decode(&req); err != nil { - return err - } - rpc.Command = &req - - case rpcInstallSnapshot: - var req InstallSnapshotRequest - if err := dec.Decode(&req); err != nil { - return err - } - rpc.Command = &req - rpc.Reader = io.LimitReader(r, req.Size) - - default: - return fmt.Errorf("unknown rpc type %d", rpcType) - } - - // Check for heartbeat fast-path - if isHeartbeat { - n.heartbeatFnLock.Lock() - fn := n.heartbeatFn - n.heartbeatFnLock.Unlock() - if fn != nil { - fn(rpc) - goto RESP - } - } - - // Dispatch the RPC - select { - case n.consumeCh <- rpc: - case <-n.shutdownCh: - return ErrTransportShutdown - } - - // Wait for response -RESP: - select { - case resp := <-respCh: - // Send the error first - respErr := "" - if resp.Error != nil { - respErr = resp.Error.Error() - } - if err := enc.Encode(respErr); err != nil { - return err - } - - // Send the response - if err := enc.Encode(resp.Response); err != nil { - return err - } - case <-n.shutdownCh: - return ErrTransportShutdown - } - return nil -} - -// decodeResponse is used to decode an RPC response and reports whether -// the connection can be reused. -func decodeResponse(conn *netConn, resp interface{}) (bool, error) { - // Decode the error if any - var rpcError string - if err := conn.dec.Decode(&rpcError); err != nil { - conn.Release() - return false, err - } - - // Decode the response - if err := conn.dec.Decode(resp); err != nil { - conn.Release() - return false, err - } - - // Format an error if any - if rpcError != "" { - return true, fmt.Errorf(rpcError) - } - return true, nil -} - -// sendRPC is used to encode and send the RPC. -func sendRPC(conn *netConn, rpcType uint8, args interface{}) error { - // Write the request type - if err := conn.w.WriteByte(rpcType); err != nil { - conn.Release() - return err - } - - // Send the request - if err := conn.enc.Encode(args); err != nil { - conn.Release() - return err - } - - // Flush - if err := conn.w.Flush(); err != nil { - conn.Release() - return err - } - return nil -} - -// newNetPipeline is used to construct a netPipeline from a given -// transport and connection. -func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline { - n := &netPipeline{ - conn: conn, - trans: trans, - doneCh: make(chan AppendFuture, rpcMaxPipeline), - inprogressCh: make(chan *appendFuture, rpcMaxPipeline), - shutdownCh: make(chan struct{}), - } - go n.decodeResponses() - return n -} - -// decodeResponses is a long running routine that decodes the responses -// sent on the connection. -func (n *netPipeline) decodeResponses() { - timeout := n.trans.timeout - for { - select { - case future := <-n.inprogressCh: - if timeout > 0 { - n.conn.conn.SetReadDeadline(time.Now().Add(timeout)) - } - - _, err := decodeResponse(n.conn, future.resp) - future.respond(err) - select { - case n.doneCh <- future: - case <-n.shutdownCh: - return - } - case <-n.shutdownCh: - return - } - } -} - -// AppendEntries is used to pipeline a new append entries request. -func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) { - // Create a new future - future := &appendFuture{ - start: time.Now(), - args: args, - resp: resp, - } - future.init() - - // Add a send timeout - if timeout := n.trans.timeout; timeout > 0 { - n.conn.conn.SetWriteDeadline(time.Now().Add(timeout)) - } - - // Send the RPC - if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil { - return nil, err - } - - // Hand-off for decoding, this can also cause back-pressure - // to prevent too many inflight requests - select { - case n.inprogressCh <- future: - return future, nil - case <-n.shutdownCh: - return nil, ErrPipelineShutdown - } -} - -// Consumer returns a channel that can be used to consume complete futures. -func (n *netPipeline) Consumer() <-chan AppendFuture { - return n.doneCh -} - -// Closed is used to shutdown the pipeline connection. -func (n *netPipeline) Close() error { - n.shutdownLock.Lock() - defer n.shutdownLock.Unlock() - if n.shutdown { - return nil - } - - // Release the connection - n.conn.Release() - - n.shutdown = true - close(n.shutdownCh) - return nil -} diff --git a/vendor/github.com/hashicorp/raft/net_transport_test.go b/vendor/github.com/hashicorp/raft/net_transport_test.go deleted file mode 100644 index ca92c897..00000000 --- a/vendor/github.com/hashicorp/raft/net_transport_test.go +++ /dev/null @@ -1,449 +0,0 @@ -package raft - -import ( - "bytes" - "reflect" - "sync" - "testing" - "time" -) - -func TestNetworkTransport_StartStop(t *testing.T) { - trans, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - trans.Close() -} - -func TestNetworkTransport_Heartbeat_FastPath(t *testing.T) { - // Transport 1 is consumer - trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans1.Close() - - // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } - - invoked := false - fastpath := func(rpc RPC) { - // Verify the command - req := rpc.Command.(*AppendEntriesRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - - rpc.Respond(&resp, nil) - invoked = true - } - trans1.SetHeartbeatHandler(fastpath) - - // Transport 2 makes outbound request - trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans2.Close() - - var out AppendEntriesResponse - if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } - - // Ensure fast-path is used - if !invoked { - t.Fatalf("fast-path not used") - } -} - -func TestNetworkTransport_AppendEntries(t *testing.T) { - // Transport 1 is consumer - trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - &Log{ - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } - - // Listen for a request - go func() { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*AppendEntriesRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - }() - - // Transport 2 makes outbound request - trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans2.Close() - - var out AppendEntriesResponse - if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } -} - -func TestNetworkTransport_AppendEntriesPipeline(t *testing.T) { - // Transport 1 is consumer - trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - &Log{ - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } - - // Listen for a request - go func() { - for i := 0; i < 10; i++ { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*AppendEntriesRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - } - }() - - // Transport 2 makes outbound request - trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans2.Close() - - pipeline, err := trans2.AppendEntriesPipeline(trans1.LocalAddr()) - if err != nil { - t.Fatalf("err: %v", err) - } - defer pipeline.Close() - for i := 0; i < 10; i++ { - out := new(AppendEntriesResponse) - if _, err := pipeline.AppendEntries(&args, out); err != nil { - t.Fatalf("err: %v", err) - } - } - - respCh := pipeline.Consumer() - for i := 0; i < 10; i++ { - select { - case ready := <-respCh: - // Verify the response - if !reflect.DeepEqual(&resp, ready.Response()) { - t.Fatalf("command mismatch: %#v %#v", &resp, ready.Response()) - } - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - } -} - -func TestNetworkTransport_RequestVote(t *testing.T) { - // Transport 1 is consumer - trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := RequestVoteRequest{ - Term: 20, - Candidate: []byte("butters"), - LastLogIndex: 100, - LastLogTerm: 19, - } - resp := RequestVoteResponse{ - Term: 100, - Peers: []byte("blah"), - Granted: false, - } - - // Listen for a request - go func() { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*RequestVoteRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - }() - - // Transport 2 makes outbound request - trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans2.Close() - - var out RequestVoteResponse - if err := trans2.RequestVote(trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } -} - -func TestNetworkTransport_InstallSnapshot(t *testing.T) { - // Transport 1 is consumer - trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := InstallSnapshotRequest{ - Term: 10, - Leader: []byte("kyle"), - LastLogIndex: 100, - LastLogTerm: 9, - Peers: []byte("blah blah"), - Size: 10, - } - resp := InstallSnapshotResponse{ - Term: 10, - Success: true, - } - - // Listen for a request - go func() { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*InstallSnapshotRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - - // Try to read the bytes - buf := make([]byte, 10) - rpc.Reader.Read(buf) - - // Compare - if bytes.Compare(buf, []byte("0123456789")) != 0 { - t.Fatalf("bad buf %v", buf) - } - - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - }() - - // Transport 2 makes outbound request - trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans2.Close() - - // Create a buffer - buf := bytes.NewBuffer([]byte("0123456789")) - - var out InstallSnapshotResponse - if err := trans2.InstallSnapshot(trans1.LocalAddr(), &args, &out, buf); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } -} - -func TestNetworkTransport_EncodeDecode(t *testing.T) { - // Transport 1 is consumer - trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans1.Close() - - local := trans1.LocalAddr() - enc := trans1.EncodePeer(local) - dec := trans1.DecodePeer(enc) - - if dec != local { - t.Fatalf("enc/dec fail: %v %v", dec, local) - } -} - -func TestNetworkTransport_PooledConn(t *testing.T) { - // Transport 1 is consumer - trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - &Log{ - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } - - // Listen for a request - go func() { - for { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*AppendEntriesRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - return - } - } - }() - - // Transport 2 makes outbound request, 3 conn pool - trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 3, time.Second, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - defer trans2.Close() - - // Create wait group - wg := &sync.WaitGroup{} - wg.Add(5) - - appendFunc := func() { - defer wg.Done() - var out AppendEntriesResponse - if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } - } - - // Try to do parallel appends, should stress the conn pool - for i := 0; i < 5; i++ { - go appendFunc() - } - - // Wait for the routines to finish - wg.Wait() - - // Check the conn pool size - addr := trans1.LocalAddr() - if len(trans2.connPool[addr]) != 3 { - t.Fatalf("Expected 2 pooled conns!") - } -} diff --git a/vendor/github.com/hashicorp/raft/observer.go b/vendor/github.com/hashicorp/raft/observer.go deleted file mode 100644 index dbd0cc64..00000000 --- a/vendor/github.com/hashicorp/raft/observer.go +++ /dev/null @@ -1,122 +0,0 @@ -package raft - -import ( - "sync/atomic" -) - -// Observation is sent along the given channel to observers when an event occurs. -type Observation struct { - // Raft holds the Raft instance generating the observation. - Raft *Raft - // Data holds observation-specific data. Possible types are - // *RequestVoteRequest, RaftState and LeaderObservation. - Data interface{} -} - -// LeaderObservation is used in Observation.Data when leadership changes. -type LeaderObservation struct { - Leader string -} - -// nextObserverId is used to provide a unique ID for each observer to aid in -// deregistration. -var nextObserverID uint64 - -// FilterFn is a function that can be registered in order to filter observations. -// The function reports whether the observation should be included - if -// it returns false, the observation will be filtered out. -type FilterFn func(o *Observation) bool - -// Observer describes what to do with a given observation. -type Observer struct { - // numObserved and numDropped are performance counters for this observer. - // 64 bit types must be 64 bit aligned to use with atomic operations on - // 32 bit platforms, so keep them at the top of the struct. - numObserved uint64 - numDropped uint64 - - // channel receives observations. - channel chan Observation - - // blocking, if true, will cause Raft to block when sending an observation - // to this observer. This should generally be set to false. - blocking bool - - // filter will be called to determine if an observation should be sent to - // the channel. - filter FilterFn - - // id is the ID of this observer in the Raft map. - id uint64 -} - -// NewObserver creates a new observer that can be registered -// to make observations on a Raft instance. Observations -// will be sent on the given channel if they satisfy the -// given filter. -// -// If blocking is true, the observer will block when it can't -// send on the channel, otherwise it may discard events. -func NewObserver(channel chan Observation, blocking bool, filter FilterFn) *Observer { - return &Observer{ - channel: channel, - blocking: blocking, - filter: filter, - id: atomic.AddUint64(&nextObserverID, 1), - } -} - -// GetNumObserved returns the number of observations. -func (or *Observer) GetNumObserved() uint64 { - return atomic.LoadUint64(&or.numObserved) -} - -// GetNumDropped returns the number of dropped observations due to blocking. -func (or *Observer) GetNumDropped() uint64 { - return atomic.LoadUint64(&or.numDropped) -} - -// RegisterObserver registers a new observer. -func (r *Raft) RegisterObserver(or *Observer) { - r.observersLock.Lock() - defer r.observersLock.Unlock() - r.observers[or.id] = or -} - -// DeregisterObserver deregisters an observer. -func (r *Raft) DeregisterObserver(or *Observer) { - r.observersLock.Lock() - defer r.observersLock.Unlock() - delete(r.observers, or.id) -} - -// observe sends an observation to every observer. -func (r *Raft) observe(o interface{}) { - // In general observers should not block. But in any case this isn't - // disastrous as we only hold a read lock, which merely prevents - // registration / deregistration of observers. - r.observersLock.RLock() - defer r.observersLock.RUnlock() - for _, or := range r.observers { - // It's wasteful to do this in the loop, but for the common case - // where there are no observers we won't create any objects. - ob := Observation{Raft: r, Data: o} - if or.filter != nil && !or.filter(&ob) { - continue - } - if or.channel == nil { - continue - } - if or.blocking { - or.channel <- ob - atomic.AddUint64(&or.numObserved, 1) - } else { - select { - case or.channel <- ob: - atomic.AddUint64(&or.numObserved, 1) - default: - atomic.AddUint64(&or.numDropped, 1) - } - } - } -} diff --git a/vendor/github.com/hashicorp/raft/peer.go b/vendor/github.com/hashicorp/raft/peer.go deleted file mode 100644 index 6f3bcf85..00000000 --- a/vendor/github.com/hashicorp/raft/peer.go +++ /dev/null @@ -1,122 +0,0 @@ -package raft - -import ( - "bytes" - "encoding/json" - "io/ioutil" - "os" - "path/filepath" - "sync" -) - -const ( - jsonPeerPath = "peers.json" -) - -// PeerStore provides an interface for persistent storage and -// retrieval of peers. We use a separate interface than StableStore -// since the peers may need to be edited by a human operator. For example, -// in a two node cluster, the failure of either node requires human intervention -// since consensus is impossible. -type PeerStore interface { - // Peers returns the list of known peers. - Peers() ([]string, error) - - // SetPeers sets the list of known peers. This is invoked when a peer is - // added or removed. - SetPeers([]string) error -} - -// StaticPeers is used to provide a static list of peers. -type StaticPeers struct { - StaticPeers []string - l sync.Mutex -} - -// Peers implements the PeerStore interface. -func (s *StaticPeers) Peers() ([]string, error) { - s.l.Lock() - peers := s.StaticPeers - s.l.Unlock() - return peers, nil -} - -// SetPeers implements the PeerStore interface. -func (s *StaticPeers) SetPeers(p []string) error { - s.l.Lock() - s.StaticPeers = p - s.l.Unlock() - return nil -} - -// JSONPeers is used to provide peer persistence on disk in the form -// of a JSON file. This allows human operators to manipulate the file. -type JSONPeers struct { - l sync.Mutex - path string - trans Transport -} - -// NewJSONPeers creates a new JSONPeers store. Requires a transport -// to handle the serialization of network addresses. -func NewJSONPeers(base string, trans Transport) *JSONPeers { - path := filepath.Join(base, jsonPeerPath) - store := &JSONPeers{ - path: path, - trans: trans, - } - return store -} - -// Peers implements the PeerStore interface. -func (j *JSONPeers) Peers() ([]string, error) { - j.l.Lock() - defer j.l.Unlock() - - // Read the file - buf, err := ioutil.ReadFile(j.path) - if err != nil && !os.IsNotExist(err) { - return nil, err - } - - // Check for no peers - if len(buf) == 0 { - return nil, nil - } - - // Decode the peers - var peerSet []string - dec := json.NewDecoder(bytes.NewReader(buf)) - if err := dec.Decode(&peerSet); err != nil { - return nil, err - } - - // Deserialize each peer - var peers []string - for _, p := range peerSet { - peers = append(peers, j.trans.DecodePeer([]byte(p))) - } - return peers, nil -} - -// SetPeers implements the PeerStore interface. -func (j *JSONPeers) SetPeers(peers []string) error { - j.l.Lock() - defer j.l.Unlock() - - // Encode each peer - var peerSet []string - for _, p := range peers { - peerSet = append(peerSet, string(j.trans.EncodePeer(p))) - } - - // Convert to JSON - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - if err := enc.Encode(peerSet); err != nil { - return err - } - - // Write out as JSON - return ioutil.WriteFile(j.path, buf.Bytes(), 0755) -} diff --git a/vendor/github.com/hashicorp/raft/peer_test.go b/vendor/github.com/hashicorp/raft/peer_test.go deleted file mode 100644 index ff835e02..00000000 --- a/vendor/github.com/hashicorp/raft/peer_test.go +++ /dev/null @@ -1,44 +0,0 @@ -package raft - -import ( - "io/ioutil" - "os" - "testing" -) - -func TestJSONPeers(t *testing.T) { - // Create a test dir - dir, err := ioutil.TempDir("", "raft") - if err != nil { - t.Fatalf("err: %v ", err) - } - defer os.RemoveAll(dir) - - // Create the store - _, trans := NewInmemTransport("") - store := NewJSONPeers(dir, trans) - - // Try a read, should get nothing - peers, err := store.Peers() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(peers) != 0 { - t.Fatalf("peers: %v", peers) - } - - // Initialize some peers - newPeers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()} - if err := store.SetPeers(newPeers); err != nil { - t.Fatalf("err: %v", err) - } - - // Try a read, should peers - peers, err = store.Peers() - if err != nil { - t.Fatalf("err: %v", err) - } - if len(peers) != 3 { - t.Fatalf("peers: %v", peers) - } -} diff --git a/vendor/github.com/hashicorp/raft/raft.go b/vendor/github.com/hashicorp/raft/raft.go deleted file mode 100644 index c5dac733..00000000 --- a/vendor/github.com/hashicorp/raft/raft.go +++ /dev/null @@ -1,1925 +0,0 @@ -package raft - -import ( - "bytes" - "errors" - "fmt" - "io" - "io/ioutil" - "log" - "os" - "strconv" - "sync" - "time" - - "github.com/armon/go-metrics" -) - -const ( - minCheckInterval = 10 * time.Millisecond -) - -var ( - keyCurrentTerm = []byte("CurrentTerm") - keyLastVoteTerm = []byte("LastVoteTerm") - keyLastVoteCand = []byte("LastVoteCand") - - // ErrLeader is returned when an operation can't be completed on a - // leader node. - ErrLeader = errors.New("node is the leader") - - // ErrNotLeader is returned when an operation can't be completed on a - // follower or candidate node. - ErrNotLeader = errors.New("node is not the leader") - - // ErrLeadershipLost is returned when a leader fails to commit a log entry - // because it's been deposed in the process. - ErrLeadershipLost = errors.New("leadership lost while committing log") - - // ErrRaftShutdown is returned when operations are requested against an - // inactive Raft. - ErrRaftShutdown = errors.New("raft is already shutdown") - - // ErrEnqueueTimeout is returned when a command fails due to a timeout. - ErrEnqueueTimeout = errors.New("timed out enqueuing operation") - - // ErrKnownPeer is returned when trying to add a peer to the configuration - // that already exists. - ErrKnownPeer = errors.New("peer already known") - - // ErrUnknownPeer is returned when trying to remove a peer from the - // configuration that doesn't exist. - ErrUnknownPeer = errors.New("peer is unknown") - - // ErrNothingNewToSnapshot is returned when trying to create a snapshot - // but there's nothing new commited to the FSM since we started. - ErrNothingNewToSnapshot = errors.New("Nothing new to snapshot") -) - -// commitTuple is used to send an index that was committed, -// with an optional associated future that should be invoked. -type commitTuple struct { - log *Log - future *logFuture -} - -// leaderState is state that is used while we are a leader. -type leaderState struct { - commitCh chan struct{} - inflight *inflight - replState map[string]*followerReplication - notify map[*verifyFuture]struct{} - stepDown chan struct{} -} - -// Raft implements a Raft node. -type Raft struct { - raftState - - // applyCh is used to async send logs to the main thread to - // be committed and applied to the FSM. - applyCh chan *logFuture - - // Configuration provided at Raft initialization - conf *Config - - // FSM is the client state machine to apply commands to - fsm FSM - - // fsmCommitCh is used to trigger async application of logs to the fsm - fsmCommitCh chan commitTuple - - // fsmRestoreCh is used to trigger a restore from snapshot - fsmRestoreCh chan *restoreFuture - - // fsmSnapshotCh is used to trigger a new snapshot being taken - fsmSnapshotCh chan *reqSnapshotFuture - - // lastContact is the last time we had contact from the - // leader node. This can be used to gauge staleness. - lastContact time.Time - lastContactLock sync.RWMutex - - // Leader is the current cluster leader - leader string - leaderLock sync.RWMutex - - // leaderCh is used to notify of leadership changes - leaderCh chan bool - - // leaderState used only while state is leader - leaderState leaderState - - // Stores our local addr - localAddr string - - // Used for our logging - logger *log.Logger - - // LogStore provides durable storage for logs - logs LogStore - - // Track our known peers - peerCh chan *peerFuture - peers []string - peerStore PeerStore - - // RPC chan comes from the transport layer - rpcCh <-chan RPC - - // Shutdown channel to exit, protected to prevent concurrent exits - shutdown bool - shutdownCh chan struct{} - shutdownLock sync.Mutex - - // snapshots is used to store and retrieve snapshots - snapshots SnapshotStore - - // snapshotCh is used for user triggered snapshots - snapshotCh chan *snapshotFuture - - // stable is a StableStore implementation for durable state - // It provides stable storage for many fields in raftState - stable StableStore - - // The transport layer we use - trans Transport - - // verifyCh is used to async send verify futures to the main thread - // to verify we are still the leader - verifyCh chan *verifyFuture - - // List of observers and the mutex that protects them. The observers list - // is indexed by an artificial ID which is used for deregistration. - observersLock sync.RWMutex - observers map[uint64]*Observer -} - -// NewRaft is used to construct a new Raft node. It takes a configuration, as well -// as implementations of various interfaces that are required. If we have any old state, -// such as snapshots, logs, peers, etc, all those will be restored when creating the -// Raft node. -func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps SnapshotStore, - peerStore PeerStore, trans Transport) (*Raft, error) { - // Validate the configuration - if err := ValidateConfig(conf); err != nil { - return nil, err - } - - // Ensure we have a LogOutput - var logger *log.Logger - if conf.Logger != nil { - logger = conf.Logger - } else { - if conf.LogOutput == nil { - conf.LogOutput = os.Stderr - } - logger = log.New(conf.LogOutput, "", log.LstdFlags) - } - - // Try to restore the current term - currentTerm, err := stable.GetUint64(keyCurrentTerm) - if err != nil && err.Error() != "not found" { - return nil, fmt.Errorf("failed to load current term: %v", err) - } - - // Read the last log value - lastIdx, err := logs.LastIndex() - if err != nil { - return nil, fmt.Errorf("failed to find last log: %v", err) - } - - // Get the log - var lastLog Log - if lastIdx > 0 { - if err = logs.GetLog(lastIdx, &lastLog); err != nil { - return nil, fmt.Errorf("failed to get last log: %v", err) - } - } - - // Construct the list of peers that excludes us - localAddr := trans.LocalAddr() - peers, err := peerStore.Peers() - if err != nil { - return nil, fmt.Errorf("failed to get list of peers: %v", err) - } - peers = ExcludePeer(peers, localAddr) - - // Create Raft struct - r := &Raft{ - applyCh: make(chan *logFuture), - conf: conf, - fsm: fsm, - fsmCommitCh: make(chan commitTuple, 128), - fsmRestoreCh: make(chan *restoreFuture), - fsmSnapshotCh: make(chan *reqSnapshotFuture), - leaderCh: make(chan bool), - localAddr: localAddr, - logger: logger, - logs: logs, - peerCh: make(chan *peerFuture), - peers: peers, - peerStore: peerStore, - rpcCh: trans.Consumer(), - snapshots: snaps, - snapshotCh: make(chan *snapshotFuture), - shutdownCh: make(chan struct{}), - stable: stable, - trans: trans, - verifyCh: make(chan *verifyFuture, 64), - observers: make(map[uint64]*Observer), - } - - // Initialize as a follower - r.setState(Follower) - - // Start as leader if specified. This should only be used - // for testing purposes. - if conf.StartAsLeader { - r.setState(Leader) - r.setLeader(r.localAddr) - } - - // Restore the current term and the last log - r.setCurrentTerm(currentTerm) - r.setLastLog(lastLog.Index, lastLog.Term) - - // Attempt to restore a snapshot if there are any - if err := r.restoreSnapshot(); err != nil { - return nil, err - } - - // Setup a heartbeat fast-path to avoid head-of-line - // blocking where possible. It MUST be safe for this - // to be called concurrently with a blocking RPC. - trans.SetHeartbeatHandler(r.processHeartbeat) - - // Start the background work - r.goFunc(r.run) - r.goFunc(r.runFSM) - r.goFunc(r.runSnapshots) - return r, nil -} - -// Leader is used to return the current leader of the cluster. -// It may return empty string if there is no current leader -// or the leader is unknown. -func (r *Raft) Leader() string { - r.leaderLock.RLock() - leader := r.leader - r.leaderLock.RUnlock() - return leader -} - -// setLeader is used to modify the current leader of the cluster -func (r *Raft) setLeader(leader string) { - r.leaderLock.Lock() - oldLeader := r.leader - r.leader = leader - r.leaderLock.Unlock() - if oldLeader != leader { - r.observe(LeaderObservation{Leader: leader}) - } -} - -// Apply is used to apply a command to the FSM in a highly consistent -// manner. This returns a future that can be used to wait on the application. -// An optional timeout can be provided to limit the amount of time we wait -// for the command to be started. This must be run on the leader or it -// will fail. -func (r *Raft) Apply(cmd []byte, timeout time.Duration) ApplyFuture { - metrics.IncrCounter([]string{"raft", "apply"}, 1) - var timer <-chan time.Time - if timeout > 0 { - timer = time.After(timeout) - } - - // Create a log future, no index or term yet - logFuture := &logFuture{ - log: Log{ - Type: LogCommand, - Data: cmd, - }, - } - logFuture.init() - - select { - case <-timer: - return errorFuture{ErrEnqueueTimeout} - case <-r.shutdownCh: - return errorFuture{ErrRaftShutdown} - case r.applyCh <- logFuture: - return logFuture - } -} - -// Barrier is used to issue a command that blocks until all preceeding -// operations have been applied to the FSM. It can be used to ensure the -// FSM reflects all queued writes. An optional timeout can be provided to -// limit the amount of time we wait for the command to be started. This -// must be run on the leader or it will fail. -func (r *Raft) Barrier(timeout time.Duration) Future { - metrics.IncrCounter([]string{"raft", "barrier"}, 1) - var timer <-chan time.Time - if timeout > 0 { - timer = time.After(timeout) - } - - // Create a log future, no index or term yet - logFuture := &logFuture{ - log: Log{ - Type: LogBarrier, - }, - } - logFuture.init() - - select { - case <-timer: - return errorFuture{ErrEnqueueTimeout} - case <-r.shutdownCh: - return errorFuture{ErrRaftShutdown} - case r.applyCh <- logFuture: - return logFuture - } -} - -// VerifyLeader is used to ensure the current node is still -// the leader. This can be done to prevent stale reads when a -// new leader has potentially been elected. -func (r *Raft) VerifyLeader() Future { - metrics.IncrCounter([]string{"raft", "verify_leader"}, 1) - verifyFuture := &verifyFuture{} - verifyFuture.init() - select { - case <-r.shutdownCh: - return errorFuture{ErrRaftShutdown} - case r.verifyCh <- verifyFuture: - return verifyFuture - } -} - -// AddPeer is used to add a new peer into the cluster. This must be -// run on the leader or it will fail. -func (r *Raft) AddPeer(peer string) Future { - logFuture := &logFuture{ - log: Log{ - Type: LogAddPeer, - peer: peer, - }, - } - logFuture.init() - select { - case r.applyCh <- logFuture: - return logFuture - case <-r.shutdownCh: - return errorFuture{ErrRaftShutdown} - } -} - -// RemovePeer is used to remove a peer from the cluster. If the -// current leader is being removed, it will cause a new election -// to occur. This must be run on the leader or it will fail. -func (r *Raft) RemovePeer(peer string) Future { - logFuture := &logFuture{ - log: Log{ - Type: LogRemovePeer, - peer: peer, - }, - } - logFuture.init() - select { - case r.applyCh <- logFuture: - return logFuture - case <-r.shutdownCh: - return errorFuture{ErrRaftShutdown} - } -} - -// SetPeers is used to forcibly replace the set of internal peers and -// the peerstore with the ones specified. This can be considered unsafe. -func (r *Raft) SetPeers(p []string) Future { - peerFuture := &peerFuture{ - peers: p, - } - peerFuture.init() - - select { - case r.peerCh <- peerFuture: - return peerFuture - case <-r.shutdownCh: - return errorFuture{ErrRaftShutdown} - } -} - -// Shutdown is used to stop the Raft background routines. -// This is not a graceful operation. Provides a future that -// can be used to block until all background routines have exited. -func (r *Raft) Shutdown() Future { - r.shutdownLock.Lock() - defer r.shutdownLock.Unlock() - - if !r.shutdown { - close(r.shutdownCh) - r.shutdown = true - r.setState(Shutdown) - return &shutdownFuture{r} - } - - // avoid closing transport twice - return &shutdownFuture{nil} -} - -// Snapshot is used to manually force Raft to take a snapshot. -// Returns a future that can be used to block until complete. -func (r *Raft) Snapshot() Future { - snapFuture := &snapshotFuture{} - snapFuture.init() - select { - case r.snapshotCh <- snapFuture: - return snapFuture - case <-r.shutdownCh: - return errorFuture{ErrRaftShutdown} - } - -} - -// State is used to return the current raft state. -func (r *Raft) State() RaftState { - return r.getState() -} - -// LeaderCh is used to get a channel which delivers signals on -// acquiring or losing leadership. It sends true if we become -// the leader, and false if we lose it. The channel is not buffered, -// and does not block on writes. -func (r *Raft) LeaderCh() <-chan bool { - return r.leaderCh -} - -func (r *Raft) String() string { - return fmt.Sprintf("Node at %s [%v]", r.localAddr, r.getState()) -} - -// LastContact returns the time of last contact by a leader. -// This only makes sense if we are currently a follower. -func (r *Raft) LastContact() time.Time { - r.lastContactLock.RLock() - last := r.lastContact - r.lastContactLock.RUnlock() - return last -} - -// Stats is used to return a map of various internal stats. This -// should only be used for informative purposes or debugging. -// -// Keys are: "state", "term", "last_log_index", "last_log_term", -// "commit_index", "applied_index", "fsm_pending", -// "last_snapshot_index", "last_snapshot_term", "num_peers" and -// "last_contact". -// -// The value of "state" is a numerical value representing a -// RaftState const. -// -// The value of "last_contact" is either "never" if there -// has been no contact with a leader, "0" if the node is in the -// leader state, or the time since last contact with a leader -// formatted as a string. -// -// All other values are uint64s, formatted as strings. -func (r *Raft) Stats() map[string]string { - toString := func(v uint64) string { - return strconv.FormatUint(v, 10) - } - lastLogIndex, lastLogTerm := r.getLastLog() - lastSnapIndex, lastSnapTerm := r.getLastSnapshot() - s := map[string]string{ - "state": r.getState().String(), - "term": toString(r.getCurrentTerm()), - "last_log_index": toString(lastLogIndex), - "last_log_term": toString(lastLogTerm), - "commit_index": toString(r.getCommitIndex()), - "applied_index": toString(r.getLastApplied()), - "fsm_pending": toString(uint64(len(r.fsmCommitCh))), - "last_snapshot_index": toString(lastSnapIndex), - "last_snapshot_term": toString(lastSnapTerm), - "num_peers": toString(uint64(len(r.peers))), - } - last := r.LastContact() - if last.IsZero() { - s["last_contact"] = "never" - } else if r.getState() == Leader { - s["last_contact"] = "0" - } else { - s["last_contact"] = fmt.Sprintf("%v", time.Now().Sub(last)) - } - return s -} - -// LastIndex returns the last index in stable storage, -// either from the last log or from the last snapshot. -func (r *Raft) LastIndex() uint64 { - return r.getLastIndex() -} - -// AppliedIndex returns the last index applied to the FSM. This is generally -// lagging behind the last index, especially for indexes that are persisted but -// have not yet been considered committed by the leader. NOTE - this reflects -// the last index that was sent to the application's FSM over the apply channel -// but DOES NOT mean that the application's FSM has yet consumed it and applied -// it to its internal state. Thus, the application's state may lag behind this -// index. -func (r *Raft) AppliedIndex() uint64 { - return r.getLastApplied() -} - -// runFSM is a long running goroutine responsible for applying logs -// to the FSM. This is done async of other logs since we don't want -// the FSM to block our internal operations. -func (r *Raft) runFSM() { - var lastIndex, lastTerm uint64 - for { - select { - case req := <-r.fsmRestoreCh: - // Open the snapshot - meta, source, err := r.snapshots.Open(req.ID) - if err != nil { - req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err)) - continue - } - - // Attempt to restore - start := time.Now() - if err := r.fsm.Restore(source); err != nil { - req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err)) - source.Close() - continue - } - source.Close() - metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start) - - // Update the last index and term - lastIndex = meta.Index - lastTerm = meta.Term - req.respond(nil) - - case req := <-r.fsmSnapshotCh: - // Is there something to snapshot? - if lastIndex == 0 { - req.respond(ErrNothingNewToSnapshot) - continue - } - - // Get our peers - peers, err := r.peerStore.Peers() - if err != nil { - req.respond(err) - continue - } - - // Start a snapshot - start := time.Now() - snap, err := r.fsm.Snapshot() - metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start) - - // Respond to the request - req.index = lastIndex - req.term = lastTerm - req.peers = peers - req.snapshot = snap - req.respond(err) - - case commitEntry := <-r.fsmCommitCh: - // Apply the log if a command - var resp interface{} - if commitEntry.log.Type == LogCommand { - start := time.Now() - resp = r.fsm.Apply(commitEntry.log) - metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start) - } - - // Update the indexes - lastIndex = commitEntry.log.Index - lastTerm = commitEntry.log.Term - - // Invoke the future if given - if commitEntry.future != nil { - commitEntry.future.response = resp - commitEntry.future.respond(nil) - } - case <-r.shutdownCh: - return - } - } -} - -// run is a long running goroutine that runs the Raft FSM. -func (r *Raft) run() { - for { - // Check if we are doing a shutdown - select { - case <-r.shutdownCh: - // Clear the leader to prevent forwarding - r.setLeader("") - return - default: - } - - // Enter into a sub-FSM - switch r.getState() { - case Follower: - r.runFollower() - case Candidate: - r.runCandidate() - case Leader: - r.runLeader() - } - } -} - -// runFollower runs the FSM for a follower. -func (r *Raft) runFollower() { - didWarn := false - r.logger.Printf("[INFO] raft: %v entering Follower state (Leader: %q)", r, r.Leader()) - metrics.IncrCounter([]string{"raft", "state", "follower"}, 1) - heartbeatTimer := randomTimeout(r.conf.HeartbeatTimeout) - for { - select { - case rpc := <-r.rpcCh: - r.processRPC(rpc) - - case a := <-r.applyCh: - // Reject any operations since we are not the leader - a.respond(ErrNotLeader) - - case v := <-r.verifyCh: - // Reject any operations since we are not the leader - v.respond(ErrNotLeader) - - case p := <-r.peerCh: - // Set the peers - r.peers = ExcludePeer(p.peers, r.localAddr) - p.respond(r.peerStore.SetPeers(p.peers)) - - case <-heartbeatTimer: - // Restart the heartbeat timer - heartbeatTimer = randomTimeout(r.conf.HeartbeatTimeout) - - // Check if we have had a successful contact - lastContact := r.LastContact() - if time.Now().Sub(lastContact) < r.conf.HeartbeatTimeout { - continue - } - - // Heartbeat failed! Transition to the candidate state - lastLeader := r.Leader() - r.setLeader("") - if len(r.peers) == 0 && !r.conf.EnableSingleNode { - if !didWarn { - r.logger.Printf("[WARN] raft: EnableSingleNode disabled, and no known peers. Aborting election.") - didWarn = true - } - } else { - r.logger.Printf(`[WARN] raft: Heartbeat timeout from %q reached, starting election`, lastLeader) - - metrics.IncrCounter([]string{"raft", "transition", "heartbeat_timeout"}, 1) - r.setState(Candidate) - return - } - - case <-r.shutdownCh: - return - } - } -} - -// runCandidate runs the FSM for a candidate. -func (r *Raft) runCandidate() { - r.logger.Printf("[INFO] raft: %v entering Candidate state", r) - metrics.IncrCounter([]string{"raft", "state", "candidate"}, 1) - - // Start vote for us, and set a timeout - voteCh := r.electSelf() - electionTimer := randomTimeout(r.conf.ElectionTimeout) - - // Tally the votes, need a simple majority - grantedVotes := 0 - votesNeeded := r.quorumSize() - r.logger.Printf("[DEBUG] raft: Votes needed: %d", votesNeeded) - - for r.getState() == Candidate { - select { - case rpc := <-r.rpcCh: - r.processRPC(rpc) - - case vote := <-voteCh: - // Check if the term is greater than ours, bail - if vote.Term > r.getCurrentTerm() { - r.logger.Printf("[DEBUG] raft: Newer term discovered, fallback to follower") - r.setState(Follower) - r.setCurrentTerm(vote.Term) - return - } - - // Check if the vote is granted - if vote.Granted { - grantedVotes++ - r.logger.Printf("[DEBUG] raft: Vote granted from %s. Tally: %d", vote.voter, grantedVotes) - } - - // Check if we've become the leader - if grantedVotes >= votesNeeded { - r.logger.Printf("[INFO] raft: Election won. Tally: %d", grantedVotes) - r.setState(Leader) - r.setLeader(r.localAddr) - return - } - - case a := <-r.applyCh: - // Reject any operations since we are not the leader - a.respond(ErrNotLeader) - - case v := <-r.verifyCh: - // Reject any operations since we are not the leader - v.respond(ErrNotLeader) - - case p := <-r.peerCh: - // Set the peers - r.peers = ExcludePeer(p.peers, r.localAddr) - p.respond(r.peerStore.SetPeers(p.peers)) - // Become a follower again - r.setState(Follower) - return - - case <-electionTimer: - // Election failed! Restart the election. We simply return, - // which will kick us back into runCandidate - r.logger.Printf("[WARN] raft: Election timeout reached, restarting election") - return - - case <-r.shutdownCh: - return - } - } -} - -// runLeader runs the FSM for a leader. Do the setup here and drop into -// the leaderLoop for the hot loop. -func (r *Raft) runLeader() { - r.logger.Printf("[INFO] raft: %v entering Leader state", r) - metrics.IncrCounter([]string{"raft", "state", "leader"}, 1) - - // Notify that we are the leader - asyncNotifyBool(r.leaderCh, true) - - // Push to the notify channel if given - if notify := r.conf.NotifyCh; notify != nil { - select { - case notify <- true: - case <-r.shutdownCh: - } - } - - // Setup leader state - r.leaderState.commitCh = make(chan struct{}, 1) - r.leaderState.inflight = newInflight(r.leaderState.commitCh) - r.leaderState.replState = make(map[string]*followerReplication) - r.leaderState.notify = make(map[*verifyFuture]struct{}) - r.leaderState.stepDown = make(chan struct{}, 1) - - // Cleanup state on step down - defer func() { - // Since we were the leader previously, we update our - // last contact time when we step down, so that we are not - // reporting a last contact time from before we were the - // leader. Otherwise, to a client it would seem our data - // is extremely stale. - r.setLastContact() - - // Stop replication - for _, p := range r.leaderState.replState { - close(p.stopCh) - } - - // Cancel inflight requests - r.leaderState.inflight.Cancel(ErrLeadershipLost) - - // Respond to any pending verify requests - for future := range r.leaderState.notify { - future.respond(ErrLeadershipLost) - } - - // Clear all the state - r.leaderState.commitCh = nil - r.leaderState.inflight = nil - r.leaderState.replState = nil - r.leaderState.notify = nil - r.leaderState.stepDown = nil - - // If we are stepping down for some reason, no known leader. - // We may have stepped down due to an RPC call, which would - // provide the leader, so we cannot always blank this out. - r.leaderLock.Lock() - if r.leader == r.localAddr { - r.leader = "" - } - r.leaderLock.Unlock() - - // Notify that we are not the leader - asyncNotifyBool(r.leaderCh, false) - - // Push to the notify channel if given - if notify := r.conf.NotifyCh; notify != nil { - select { - case notify <- false: - case <-r.shutdownCh: - // On shutdown, make a best effort but do not block - select { - case notify <- false: - default: - } - } - } - }() - - // Start a replication routine for each peer - for _, peer := range r.peers { - r.startReplication(peer) - } - - // Dispatch a no-op log first. Instead of LogNoop, - // we use a LogAddPeer with our peerset. This acts like - // a no-op as well, but when doing an initial bootstrap, ensures - // that all nodes share a common peerset. - peerSet := append([]string{r.localAddr}, r.peers...) - noop := &logFuture{ - log: Log{ - Type: LogAddPeer, - Data: encodePeers(peerSet, r.trans), - }, - } - r.dispatchLogs([]*logFuture{noop}) - - // Disable EnableSingleNode after we've been elected leader. - // This is to prevent a split brain in the future, if we are removed - // from the cluster and then elect ourself as leader. - if r.conf.DisableBootstrapAfterElect && r.conf.EnableSingleNode { - r.logger.Printf("[INFO] raft: Disabling EnableSingleNode (bootstrap)") - r.conf.EnableSingleNode = false - } - - // Sit in the leader loop until we step down - r.leaderLoop() -} - -// startReplication is a helper to setup state and start async replication to a peer. -func (r *Raft) startReplication(peer string) { - lastIdx := r.getLastIndex() - s := &followerReplication{ - peer: peer, - inflight: r.leaderState.inflight, - stopCh: make(chan uint64, 1), - triggerCh: make(chan struct{}, 1), - currentTerm: r.getCurrentTerm(), - matchIndex: 0, - nextIndex: lastIdx + 1, - lastContact: time.Now(), - notifyCh: make(chan struct{}, 1), - stepDown: r.leaderState.stepDown, - } - r.leaderState.replState[peer] = s - r.goFunc(func() { r.replicate(s) }) - asyncNotifyCh(s.triggerCh) -} - -// leaderLoop is the hot loop for a leader. It is invoked -// after all the various leader setup is done. -func (r *Raft) leaderLoop() { - // stepDown is used to track if there is an inflight log that - // would cause us to lose leadership (specifically a RemovePeer of - // ourselves). If this is the case, we must not allow any logs to - // be processed in parallel, otherwise we are basing commit on - // only a single peer (ourself) and replicating to an undefined set - // of peers. - stepDown := false - - lease := time.After(r.conf.LeaderLeaseTimeout) - for r.getState() == Leader { - select { - case rpc := <-r.rpcCh: - r.processRPC(rpc) - - case <-r.leaderState.stepDown: - r.setState(Follower) - - case <-r.leaderState.commitCh: - // Get the committed messages - committed := r.leaderState.inflight.Committed() - for e := committed.Front(); e != nil; e = e.Next() { - // Measure the commit time - commitLog := e.Value.(*logFuture) - metrics.MeasureSince([]string{"raft", "commitTime"}, commitLog.dispatch) - - // Increment the commit index - idx := commitLog.log.Index - r.setCommitIndex(idx) - r.processLogs(idx, commitLog) - } - - case v := <-r.verifyCh: - if v.quorumSize == 0 { - // Just dispatched, start the verification - r.verifyLeader(v) - - } else if v.votes < v.quorumSize { - // Early return, means there must be a new leader - r.logger.Printf("[WARN] raft: New leader elected, stepping down") - r.setState(Follower) - delete(r.leaderState.notify, v) - v.respond(ErrNotLeader) - - } else { - // Quorum of members agree, we are still leader - delete(r.leaderState.notify, v) - v.respond(nil) - } - - case p := <-r.peerCh: - p.respond(ErrLeader) - - case newLog := <-r.applyCh: - // Group commit, gather all the ready commits - ready := []*logFuture{newLog} - for i := 0; i < r.conf.MaxAppendEntries; i++ { - select { - case newLog := <-r.applyCh: - ready = append(ready, newLog) - default: - break - } - } - - // Handle any peer set changes - n := len(ready) - for i := 0; i < n; i++ { - // Fail all future transactions once stepDown is on - if stepDown { - ready[i].respond(ErrNotLeader) - ready[i], ready[n-1] = ready[n-1], nil - n-- - i-- - continue - } - - // Special case AddPeer and RemovePeer - log := ready[i] - if log.log.Type != LogAddPeer && log.log.Type != LogRemovePeer { - continue - } - - // Check if this log should be ignored. The logs can be - // reordered here since we have not yet assigned an index - // and are not violating any promises. - if !r.preparePeerChange(log) { - ready[i], ready[n-1] = ready[n-1], nil - n-- - i-- - continue - } - - // Apply peer set changes early and check if we will step - // down after the commit of this log. If so, we must not - // allow any future entries to make progress to avoid undefined - // behavior. - if ok := r.processLog(&log.log, nil, true); ok { - stepDown = true - } - } - - // Nothing to do if all logs are invalid - if n == 0 { - continue - } - - // Dispatch the logs - ready = ready[:n] - r.dispatchLogs(ready) - - case <-lease: - // Check if we've exceeded the lease, potentially stepping down - maxDiff := r.checkLeaderLease() - - // Next check interval should adjust for the last node we've - // contacted, without going negative - checkInterval := r.conf.LeaderLeaseTimeout - maxDiff - if checkInterval < minCheckInterval { - checkInterval = minCheckInterval - } - - // Renew the lease timer - lease = time.After(checkInterval) - - case <-r.shutdownCh: - return - } - } -} - -// verifyLeader must be called from the main thread for safety. -// Causes the followers to attempt an immediate heartbeat. -func (r *Raft) verifyLeader(v *verifyFuture) { - // Current leader always votes for self - v.votes = 1 - - // Set the quorum size, hot-path for single node - v.quorumSize = r.quorumSize() - if v.quorumSize == 1 { - v.respond(nil) - return - } - - // Track this request - v.notifyCh = r.verifyCh - r.leaderState.notify[v] = struct{}{} - - // Trigger immediate heartbeats - for _, repl := range r.leaderState.replState { - repl.notifyLock.Lock() - repl.notify = append(repl.notify, v) - repl.notifyLock.Unlock() - asyncNotifyCh(repl.notifyCh) - } -} - -// checkLeaderLease is used to check if we can contact a quorum of nodes -// within the last leader lease interval. If not, we need to step down, -// as we may have lost connectivity. Returns the maximum duration without -// contact. -func (r *Raft) checkLeaderLease() time.Duration { - // Track contacted nodes, we can always contact ourself - contacted := 1 - - // Check each follower - var maxDiff time.Duration - now := time.Now() - for peer, f := range r.leaderState.replState { - diff := now.Sub(f.LastContact()) - if diff <= r.conf.LeaderLeaseTimeout { - contacted++ - if diff > maxDiff { - maxDiff = diff - } - } else { - // Log at least once at high value, then debug. Otherwise it gets very verbose. - if diff <= 3*r.conf.LeaderLeaseTimeout { - r.logger.Printf("[WARN] raft: Failed to contact %v in %v", peer, diff) - } else { - r.logger.Printf("[DEBUG] raft: Failed to contact %v in %v", peer, diff) - } - } - metrics.AddSample([]string{"raft", "leader", "lastContact"}, float32(diff/time.Millisecond)) - } - - // Verify we can contact a quorum - quorum := r.quorumSize() - if contacted < quorum { - r.logger.Printf("[WARN] raft: Failed to contact quorum of nodes, stepping down") - r.setState(Follower) - metrics.IncrCounter([]string{"raft", "transition", "leader_lease_timeout"}, 1) - } - return maxDiff -} - -// quorumSize is used to return the quorum size -func (r *Raft) quorumSize() int { - return ((len(r.peers) + 1) / 2) + 1 -} - -// preparePeerChange checks if a LogAddPeer or LogRemovePeer should be performed, -// and properly formats the data field on the log before dispatching it. -func (r *Raft) preparePeerChange(l *logFuture) bool { - // Check if this is a known peer - p := l.log.peer - knownPeer := PeerContained(r.peers, p) || r.localAddr == p - - // Ignore known peers on add - if l.log.Type == LogAddPeer && knownPeer { - l.respond(ErrKnownPeer) - return false - } - - // Ignore unknown peers on remove - if l.log.Type == LogRemovePeer && !knownPeer { - l.respond(ErrUnknownPeer) - return false - } - - // Construct the peer set - var peerSet []string - if l.log.Type == LogAddPeer { - peerSet = append([]string{p, r.localAddr}, r.peers...) - } else { - peerSet = ExcludePeer(append([]string{r.localAddr}, r.peers...), p) - } - - // Setup the log - l.log.Data = encodePeers(peerSet, r.trans) - return true -} - -// dispatchLog is called to push a log to disk, mark it -// as inflight and begin replication of it. -func (r *Raft) dispatchLogs(applyLogs []*logFuture) { - now := time.Now() - defer metrics.MeasureSince([]string{"raft", "leader", "dispatchLog"}, now) - - term := r.getCurrentTerm() - lastIndex := r.getLastIndex() - logs := make([]*Log, len(applyLogs)) - - for idx, applyLog := range applyLogs { - applyLog.dispatch = now - applyLog.log.Index = lastIndex + uint64(idx) + 1 - applyLog.log.Term = term - applyLog.policy = newMajorityQuorum(len(r.peers) + 1) - logs[idx] = &applyLog.log - } - - // Write the log entry locally - if err := r.logs.StoreLogs(logs); err != nil { - r.logger.Printf("[ERR] raft: Failed to commit logs: %v", err) - for _, applyLog := range applyLogs { - applyLog.respond(err) - } - r.setState(Follower) - return - } - - // Add this to the inflight logs, commit - r.leaderState.inflight.StartAll(applyLogs) - - // Update the last log since it's on disk now - r.setLastLog(lastIndex+uint64(len(applyLogs)), term) - - // Notify the replicators of the new log - for _, f := range r.leaderState.replState { - asyncNotifyCh(f.triggerCh) - } -} - -// processLogs is used to process all the logs from the lastApplied -// up to the given index. -func (r *Raft) processLogs(index uint64, future *logFuture) { - // Reject logs we've applied already - lastApplied := r.getLastApplied() - if index <= lastApplied { - r.logger.Printf("[WARN] raft: Skipping application of old log: %d", index) - return - } - - // Apply all the preceding logs - for idx := r.getLastApplied() + 1; idx <= index; idx++ { - // Get the log, either from the future or from our log store - if future != nil && future.log.Index == idx { - r.processLog(&future.log, future, false) - - } else { - l := new(Log) - if err := r.logs.GetLog(idx, l); err != nil { - r.logger.Printf("[ERR] raft: Failed to get log at %d: %v", idx, err) - panic(err) - } - r.processLog(l, nil, false) - } - - // Update the lastApplied index and term - r.setLastApplied(idx) - } -} - -// processLog is invoked to process the application of a single committed log. -// Returns if this log entry would cause us to stepDown after it commits. -func (r *Raft) processLog(l *Log, future *logFuture, precommit bool) (stepDown bool) { - switch l.Type { - case LogBarrier: - // Barrier is handled by the FSM - fallthrough - - case LogCommand: - // Forward to the fsm handler - select { - case r.fsmCommitCh <- commitTuple{l, future}: - case <-r.shutdownCh: - if future != nil { - future.respond(ErrRaftShutdown) - } - } - - // Return so that the future is only responded to - // by the FSM handler when the application is done - return - - case LogAddPeer: - fallthrough - case LogRemovePeer: - peers := decodePeers(l.Data, r.trans) - r.logger.Printf("[DEBUG] raft: Node %v updated peer set (%v): %v", r.localAddr, l.Type, peers) - - // If the peer set does not include us, remove all other peers - removeSelf := !PeerContained(peers, r.localAddr) && l.Type == LogRemovePeer - if removeSelf { - // Mark that this operation will cause us to step down as - // leader. This prevents the future logs from being Applied - // from this leader. - stepDown = true - - // We only modify the peers after the commit, otherwise we - // would be using a quorum size of 1 for the RemovePeer operation. - // This is used with the stepDown guard to prevent any other logs. - if !precommit { - r.peers = nil - r.peerStore.SetPeers([]string{r.localAddr}) - } - } else { - r.peers = ExcludePeer(peers, r.localAddr) - r.peerStore.SetPeers(peers) - } - - // Handle replication if we are the leader - if r.getState() == Leader { - for _, p := range r.peers { - if _, ok := r.leaderState.replState[p]; !ok { - r.logger.Printf("[INFO] raft: Added peer %v, starting replication", p) - r.startReplication(p) - } - } - } - - // Stop replication for old nodes - if r.getState() == Leader && !precommit { - var toDelete []string - for _, repl := range r.leaderState.replState { - if !PeerContained(r.peers, repl.peer) { - r.logger.Printf("[INFO] raft: Removed peer %v, stopping replication (Index: %d)", repl.peer, l.Index) - - // Replicate up to this index and stop - repl.stopCh <- l.Index - close(repl.stopCh) - toDelete = append(toDelete, repl.peer) - } - } - for _, name := range toDelete { - delete(r.leaderState.replState, name) - } - } - - // Handle removing ourself - if removeSelf && !precommit { - if r.conf.ShutdownOnRemove { - r.logger.Printf("[INFO] raft: Removed ourself, shutting down") - r.Shutdown() - } else { - r.logger.Printf("[INFO] raft: Removed ourself, transitioning to follower") - r.setState(Follower) - } - } - - case LogNoop: - // Ignore the no-op - default: - r.logger.Printf("[ERR] raft: Got unrecognized log type: %#v", l) - } - - // Invoke the future if given - if future != nil && !precommit { - future.respond(nil) - } - return -} - -// processRPC is called to handle an incoming RPC request. -func (r *Raft) processRPC(rpc RPC) { - switch cmd := rpc.Command.(type) { - case *AppendEntriesRequest: - r.appendEntries(rpc, cmd) - case *RequestVoteRequest: - r.requestVote(rpc, cmd) - case *InstallSnapshotRequest: - r.installSnapshot(rpc, cmd) - default: - r.logger.Printf("[ERR] raft: Got unexpected command: %#v", rpc.Command) - rpc.Respond(nil, fmt.Errorf("unexpected command")) - } -} - -// processHeartbeat is a special handler used just for heartbeat requests -// so that they can be fast-pathed if a transport supports it. -func (r *Raft) processHeartbeat(rpc RPC) { - defer metrics.MeasureSince([]string{"raft", "rpc", "processHeartbeat"}, time.Now()) - - // Check if we are shutdown, just ignore the RPC - select { - case <-r.shutdownCh: - return - default: - } - - // Ensure we are only handling a heartbeat - switch cmd := rpc.Command.(type) { - case *AppendEntriesRequest: - r.appendEntries(rpc, cmd) - default: - r.logger.Printf("[ERR] raft: Expected heartbeat, got command: %#v", rpc.Command) - rpc.Respond(nil, fmt.Errorf("unexpected command")) - } -} - -// appendEntries is invoked when we get an append entries RPC call. -func (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) { - defer metrics.MeasureSince([]string{"raft", "rpc", "appendEntries"}, time.Now()) - // Setup a response - resp := &AppendEntriesResponse{ - Term: r.getCurrentTerm(), - LastLog: r.getLastIndex(), - Success: false, - NoRetryBackoff: false, - } - var rpcErr error - defer func() { - rpc.Respond(resp, rpcErr) - }() - - // Ignore an older term - if a.Term < r.getCurrentTerm() { - return - } - - // Increase the term if we see a newer one, also transition to follower - // if we ever get an appendEntries call - if a.Term > r.getCurrentTerm() || r.getState() != Follower { - // Ensure transition to follower - r.setState(Follower) - r.setCurrentTerm(a.Term) - resp.Term = a.Term - } - - // Save the current leader - r.setLeader(r.trans.DecodePeer(a.Leader)) - - // Verify the last log entry - if a.PrevLogEntry > 0 { - lastIdx, lastTerm := r.getLastEntry() - - var prevLogTerm uint64 - if a.PrevLogEntry == lastIdx { - prevLogTerm = lastTerm - - } else { - var prevLog Log - if err := r.logs.GetLog(a.PrevLogEntry, &prevLog); err != nil { - r.logger.Printf("[WARN] raft: Failed to get previous log: %d %v (last: %d)", - a.PrevLogEntry, err, lastIdx) - resp.NoRetryBackoff = true - return - } - prevLogTerm = prevLog.Term - } - - if a.PrevLogTerm != prevLogTerm { - r.logger.Printf("[WARN] raft: Previous log term mis-match: ours: %d remote: %d", - prevLogTerm, a.PrevLogTerm) - resp.NoRetryBackoff = true - return - } - } - - // Process any new entries - if n := len(a.Entries); n > 0 { - start := time.Now() - first := a.Entries[0] - last := a.Entries[n-1] - - // Delete any conflicting entries - lastLogIdx, _ := r.getLastLog() - if first.Index <= lastLogIdx { - r.logger.Printf("[WARN] raft: Clearing log suffix from %d to %d", first.Index, lastLogIdx) - if err := r.logs.DeleteRange(first.Index, lastLogIdx); err != nil { - r.logger.Printf("[ERR] raft: Failed to clear log suffix: %v", err) - return - } - } - - // Append the entry - if err := r.logs.StoreLogs(a.Entries); err != nil { - r.logger.Printf("[ERR] raft: Failed to append to logs: %v", err) - return - } - - // Update the lastLog - r.setLastLog(last.Index, last.Term) - metrics.MeasureSince([]string{"raft", "rpc", "appendEntries", "storeLogs"}, start) - } - - // Update the commit index - if a.LeaderCommitIndex > 0 && a.LeaderCommitIndex > r.getCommitIndex() { - start := time.Now() - idx := min(a.LeaderCommitIndex, r.getLastIndex()) - r.setCommitIndex(idx) - r.processLogs(idx, nil) - metrics.MeasureSince([]string{"raft", "rpc", "appendEntries", "processLogs"}, start) - } - - // Everything went well, set success - resp.Success = true - r.setLastContact() - return -} - -// requestVote is invoked when we get an request vote RPC call. -func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) { - defer metrics.MeasureSince([]string{"raft", "rpc", "requestVote"}, time.Now()) - r.observe(*req) - - // Setup a response - resp := &RequestVoteResponse{ - Term: r.getCurrentTerm(), - Peers: encodePeers(r.peers, r.trans), - Granted: false, - } - var rpcErr error - defer func() { - rpc.Respond(resp, rpcErr) - }() - - // Check if we have an existing leader [who's not the candidate] - candidate := r.trans.DecodePeer(req.Candidate) - if leader := r.Leader(); leader != "" && leader != candidate { - r.logger.Printf("[WARN] raft: Rejecting vote request from %v since we have a leader: %v", - candidate, leader) - return - } - - // Ignore an older term - if req.Term < r.getCurrentTerm() { - return - } - - // Increase the term if we see a newer one - if req.Term > r.getCurrentTerm() { - // Ensure transition to follower - r.setState(Follower) - r.setCurrentTerm(req.Term) - resp.Term = req.Term - } - - // Check if we have voted yet - lastVoteTerm, err := r.stable.GetUint64(keyLastVoteTerm) - if err != nil && err.Error() != "not found" { - r.logger.Printf("[ERR] raft: Failed to get last vote term: %v", err) - return - } - lastVoteCandBytes, err := r.stable.Get(keyLastVoteCand) - if err != nil && err.Error() != "not found" { - r.logger.Printf("[ERR] raft: Failed to get last vote candidate: %v", err) - return - } - - // Check if we've voted in this election before - if lastVoteTerm == req.Term && lastVoteCandBytes != nil { - r.logger.Printf("[INFO] raft: Duplicate RequestVote for same term: %d", req.Term) - if bytes.Compare(lastVoteCandBytes, req.Candidate) == 0 { - r.logger.Printf("[WARN] raft: Duplicate RequestVote from candidate: %s", req.Candidate) - resp.Granted = true - } - return - } - - // Reject if their term is older - lastIdx, lastTerm := r.getLastEntry() - if lastTerm > req.LastLogTerm { - r.logger.Printf("[WARN] raft: Rejecting vote request from %v since our last term is greater (%d, %d)", - candidate, lastTerm, req.LastLogTerm) - return - } - - if lastTerm == req.LastLogTerm && lastIdx > req.LastLogIndex { - r.logger.Printf("[WARN] raft: Rejecting vote request from %v since our last index is greater (%d, %d)", - candidate, lastIdx, req.LastLogIndex) - return - } - - // Persist a vote for safety - if err := r.persistVote(req.Term, req.Candidate); err != nil { - r.logger.Printf("[ERR] raft: Failed to persist vote: %v", err) - return - } - - resp.Granted = true - r.setLastContact() - return -} - -// installSnapshot is invoked when we get a InstallSnapshot RPC call. -// We must be in the follower state for this, since it means we are -// too far behind a leader for log replay. -func (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) { - defer metrics.MeasureSince([]string{"raft", "rpc", "installSnapshot"}, time.Now()) - // Setup a response - resp := &InstallSnapshotResponse{ - Term: r.getCurrentTerm(), - Success: false, - } - var rpcErr error - defer func() { - io.Copy(ioutil.Discard, rpc.Reader) // ensure we always consume all the snapshot data from the stream [see issue #212] - rpc.Respond(resp, rpcErr) - }() - - // Ignore an older term - if req.Term < r.getCurrentTerm() { - r.logger.Printf("[INFO] raft: Ignoring installSnapshot request with older term of %d vs currentTerm %d", req.Term, r.getCurrentTerm()) - return - } - - // Increase the term if we see a newer one - if req.Term > r.getCurrentTerm() { - // Ensure transition to follower - r.setState(Follower) - r.setCurrentTerm(req.Term) - resp.Term = req.Term - } - - // Save the current leader - r.setLeader(r.trans.DecodePeer(req.Leader)) - - // Create a new snapshot - sink, err := r.snapshots.Create(req.LastLogIndex, req.LastLogTerm, req.Peers) - if err != nil { - r.logger.Printf("[ERR] raft: Failed to create snapshot to install: %v", err) - rpcErr = fmt.Errorf("failed to create snapshot: %v", err) - return - } - - // Spill the remote snapshot to disk - n, err := io.Copy(sink, rpc.Reader) - if err != nil { - sink.Cancel() - r.logger.Printf("[ERR] raft: Failed to copy snapshot: %v", err) - rpcErr = err - return - } - - // Check that we received it all - if n != req.Size { - sink.Cancel() - r.logger.Printf("[ERR] raft: Failed to receive whole snapshot: %d / %d", n, req.Size) - rpcErr = fmt.Errorf("short read") - return - } - - // Finalize the snapshot - if err := sink.Close(); err != nil { - r.logger.Printf("[ERR] raft: Failed to finalize snapshot: %v", err) - rpcErr = err - return - } - r.logger.Printf("[INFO] raft: Copied %d bytes to local snapshot", n) - - // Restore snapshot - future := &restoreFuture{ID: sink.ID()} - future.init() - select { - case r.fsmRestoreCh <- future: - case <-r.shutdownCh: - future.respond(ErrRaftShutdown) - return - } - - // Wait for the restore to happen - if err := future.Error(); err != nil { - r.logger.Printf("[ERR] raft: Failed to restore snapshot: %v", err) - rpcErr = err - return - } - - // Update the lastApplied so we don't replay old logs - r.setLastApplied(req.LastLogIndex) - - // Update the last stable snapshot info - r.setLastSnapshot(req.LastLogIndex, req.LastLogTerm) - - // Restore the peer set - peers := decodePeers(req.Peers, r.trans) - r.peers = ExcludePeer(peers, r.localAddr) - r.peerStore.SetPeers(peers) - - // Compact logs, continue even if this fails - if err := r.compactLogs(req.LastLogIndex); err != nil { - r.logger.Printf("[ERR] raft: Failed to compact logs: %v", err) - } - - r.logger.Printf("[INFO] raft: Installed remote snapshot") - resp.Success = true - r.setLastContact() - return -} - -// setLastContact is used to set the last contact time to now -func (r *Raft) setLastContact() { - r.lastContactLock.Lock() - r.lastContact = time.Now() - r.lastContactLock.Unlock() -} - -type voteResult struct { - RequestVoteResponse - voter string -} - -// electSelf is used to send a RequestVote RPC to all peers, -// and vote for ourself. This has the side affecting of incrementing -// the current term. The response channel returned is used to wait -// for all the responses (including a vote for ourself). -func (r *Raft) electSelf() <-chan *voteResult { - // Create a response channel - respCh := make(chan *voteResult, len(r.peers)+1) - - // Increment the term - r.setCurrentTerm(r.getCurrentTerm() + 1) - - // Construct the request - lastIdx, lastTerm := r.getLastEntry() - req := &RequestVoteRequest{ - Term: r.getCurrentTerm(), - Candidate: r.trans.EncodePeer(r.localAddr), - LastLogIndex: lastIdx, - LastLogTerm: lastTerm, - } - - // Construct a function to ask for a vote - askPeer := func(peer string) { - r.goFunc(func() { - defer metrics.MeasureSince([]string{"raft", "candidate", "electSelf"}, time.Now()) - resp := &voteResult{voter: peer} - err := r.trans.RequestVote(peer, req, &resp.RequestVoteResponse) - if err != nil { - r.logger.Printf("[ERR] raft: Failed to make RequestVote RPC to %v: %v", peer, err) - resp.Term = req.Term - resp.Granted = false - } - - // If we are not a peer, we could have been removed but failed - // to receive the log message. OR it could mean an improperly configured - // cluster. Either way, we should warn - if err == nil { - peerSet := decodePeers(resp.Peers, r.trans) - if !PeerContained(peerSet, r.localAddr) { - r.logger.Printf("[WARN] raft: Remote peer %v does not have local node %v as a peer", - peer, r.localAddr) - } - } - - respCh <- resp - }) - } - - // For each peer, request a vote - for _, peer := range r.peers { - askPeer(peer) - } - - // Persist a vote for ourselves - if err := r.persistVote(req.Term, req.Candidate); err != nil { - r.logger.Printf("[ERR] raft: Failed to persist vote : %v", err) - return nil - } - - // Include our own vote - respCh <- &voteResult{ - RequestVoteResponse: RequestVoteResponse{ - Term: req.Term, - Granted: true, - }, - voter: r.localAddr, - } - return respCh -} - -// persistVote is used to persist our vote for safety. -func (r *Raft) persistVote(term uint64, candidate []byte) error { - if err := r.stable.SetUint64(keyLastVoteTerm, term); err != nil { - return err - } - if err := r.stable.Set(keyLastVoteCand, candidate); err != nil { - return err - } - return nil -} - -// setCurrentTerm is used to set the current term in a durable manner. -func (r *Raft) setCurrentTerm(t uint64) { - // Persist to disk first - if err := r.stable.SetUint64(keyCurrentTerm, t); err != nil { - panic(fmt.Errorf("failed to save current term: %v", err)) - } - r.raftState.setCurrentTerm(t) -} - -// setState is used to update the current state. Any state -// transition causes the known leader to be cleared. This means -// that leader should be set only after updating the state. -func (r *Raft) setState(state RaftState) { - r.setLeader("") - oldState := r.raftState.getState() - r.raftState.setState(state) - if oldState != state { - r.observe(state) - } -} - -// runSnapshots is a long running goroutine used to manage taking -// new snapshots of the FSM. It runs in parallel to the FSM and -// main goroutines, so that snapshots do not block normal operation. -func (r *Raft) runSnapshots() { - for { - select { - case <-randomTimeout(r.conf.SnapshotInterval): - // Check if we should snapshot - if !r.shouldSnapshot() { - continue - } - - // Trigger a snapshot - if err := r.takeSnapshot(); err != nil { - r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err) - } - - case future := <-r.snapshotCh: - // User-triggered, run immediately - err := r.takeSnapshot() - if err != nil { - r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err) - } - future.respond(err) - - case <-r.shutdownCh: - return - } - } -} - -// shouldSnapshot checks if we meet the conditions to take -// a new snapshot. -func (r *Raft) shouldSnapshot() bool { - // Check the last snapshot index - lastSnap, _ := r.getLastSnapshot() - - // Check the last log index - lastIdx, err := r.logs.LastIndex() - if err != nil { - r.logger.Printf("[ERR] raft: Failed to get last log index: %v", err) - return false - } - - // Compare the delta to the threshold - delta := lastIdx - lastSnap - return delta >= r.conf.SnapshotThreshold -} - -// takeSnapshot is used to take a new snapshot. -func (r *Raft) takeSnapshot() error { - defer metrics.MeasureSince([]string{"raft", "snapshot", "takeSnapshot"}, time.Now()) - // Create a snapshot request - req := &reqSnapshotFuture{} - req.init() - - // Wait for dispatch or shutdown - select { - case r.fsmSnapshotCh <- req: - case <-r.shutdownCh: - return ErrRaftShutdown - } - - // Wait until we get a response - if err := req.Error(); err != nil { - if err != ErrNothingNewToSnapshot { - err = fmt.Errorf("failed to start snapshot: %v", err) - } - return err - } - defer req.snapshot.Release() - - // Log that we are starting the snapshot - r.logger.Printf("[INFO] raft: Starting snapshot up to %d", req.index) - - // Encode the peerset - peerSet := encodePeers(req.peers, r.trans) - - // Create a new snapshot - start := time.Now() - sink, err := r.snapshots.Create(req.index, req.term, peerSet) - if err != nil { - return fmt.Errorf("failed to create snapshot: %v", err) - } - metrics.MeasureSince([]string{"raft", "snapshot", "create"}, start) - - // Try to persist the snapshot - start = time.Now() - if err := req.snapshot.Persist(sink); err != nil { - sink.Cancel() - return fmt.Errorf("failed to persist snapshot: %v", err) - } - metrics.MeasureSince([]string{"raft", "snapshot", "persist"}, start) - - // Close and check for error - if err := sink.Close(); err != nil { - return fmt.Errorf("failed to close snapshot: %v", err) - } - - // Update the last stable snapshot info - r.setLastSnapshot(req.index, req.term) - - // Compact the logs - if err := r.compactLogs(req.index); err != nil { - return err - } - - // Log completion - r.logger.Printf("[INFO] raft: Snapshot to %d complete", req.index) - return nil -} - -// compactLogs takes the last inclusive index of a snapshot -// and trims the logs that are no longer needed. -func (r *Raft) compactLogs(snapIdx uint64) error { - defer metrics.MeasureSince([]string{"raft", "compactLogs"}, time.Now()) - // Determine log ranges to compact - minLog, err := r.logs.FirstIndex() - if err != nil { - return fmt.Errorf("failed to get first log index: %v", err) - } - - // Check if we have enough logs to truncate - lastLogIdx, _ := r.getLastLog() - if lastLogIdx <= r.conf.TrailingLogs { - return nil - } - - // Truncate up to the end of the snapshot, or `TrailingLogs` - // back from the head, which ever is further back. This ensures - // at least `TrailingLogs` entries, but does not allow logs - // after the snapshot to be removed. - maxLog := min(snapIdx, lastLogIdx-r.conf.TrailingLogs) - - // Log this - r.logger.Printf("[INFO] raft: Compacting logs from %d to %d", minLog, maxLog) - - // Compact the logs - if err := r.logs.DeleteRange(minLog, maxLog); err != nil { - return fmt.Errorf("log compaction failed: %v", err) - } - return nil -} - -// restoreSnapshot attempts to restore the latest snapshots, and fails -// if none of them can be restored. This is called at initialization time, -// and is completely unsafe to call at any other time. -func (r *Raft) restoreSnapshot() error { - snapshots, err := r.snapshots.List() - if err != nil { - r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err) - return err - } - - // Try to load in order of newest to oldest - for _, snapshot := range snapshots { - _, source, err := r.snapshots.Open(snapshot.ID) - if err != nil { - r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapshot.ID, err) - continue - } - defer source.Close() - - if err := r.fsm.Restore(source); err != nil { - r.logger.Printf("[ERR] raft: Failed to restore snapshot %v: %v", snapshot.ID, err) - continue - } - - // Log success - r.logger.Printf("[INFO] raft: Restored from snapshot %v", snapshot.ID) - - // Update the lastApplied so we don't replay old logs - r.setLastApplied(snapshot.Index) - - // Update the last stable snapshot info - r.setLastSnapshot(snapshot.Index, snapshot.Term) - - // Success! - return nil - } - - // If we had snapshots and failed to load them, its an error - if len(snapshots) > 0 { - return fmt.Errorf("failed to load any existing snapshots") - } - return nil -} diff --git a/vendor/github.com/hashicorp/raft/raft_test.go b/vendor/github.com/hashicorp/raft/raft_test.go deleted file mode 100644 index 5eb660ae..00000000 --- a/vendor/github.com/hashicorp/raft/raft_test.go +++ /dev/null @@ -1,1845 +0,0 @@ -package raft - -import ( - "bytes" - "fmt" - "io" - "io/ioutil" - "log" - "os" - "reflect" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/hashicorp/go-msgpack/codec" -) - -// MockFSM is an implementation of the FSM interface, and just stores -// the logs sequentially. -type MockFSM struct { - sync.Mutex - logs [][]byte -} - -type MockSnapshot struct { - logs [][]byte - maxIndex int -} - -func (m *MockFSM) Apply(log *Log) interface{} { - m.Lock() - defer m.Unlock() - m.logs = append(m.logs, log.Data) - return len(m.logs) -} - -func (m *MockFSM) Snapshot() (FSMSnapshot, error) { - m.Lock() - defer m.Unlock() - return &MockSnapshot{m.logs, len(m.logs)}, nil -} - -func (m *MockFSM) Restore(inp io.ReadCloser) error { - m.Lock() - defer m.Unlock() - defer inp.Close() - hd := codec.MsgpackHandle{} - dec := codec.NewDecoder(inp, &hd) - - m.logs = nil - return dec.Decode(&m.logs) -} - -func (m *MockSnapshot) Persist(sink SnapshotSink) error { - hd := codec.MsgpackHandle{} - enc := codec.NewEncoder(sink, &hd) - if err := enc.Encode(m.logs[:m.maxIndex]); err != nil { - sink.Cancel() - return err - } - sink.Close() - return nil -} - -func (m *MockSnapshot) Release() { -} - -// Return configurations optimized for in-memory -func inmemConfig(t *testing.T) *Config { - conf := DefaultConfig() - conf.HeartbeatTimeout = 50 * time.Millisecond - conf.ElectionTimeout = 50 * time.Millisecond - conf.LeaderLeaseTimeout = 50 * time.Millisecond - conf.CommitTimeout = 5 * time.Millisecond - conf.Logger = newTestLogger(t) - return conf -} - -// This can be used as the destination for a logger and it'll -// map them into calls to testing.T.Log, so that you only see -// the logging for failed tests. -type testLoggerAdapter struct { - t *testing.T - prefix string -} - -func (a *testLoggerAdapter) Write(d []byte) (int, error) { - if d[len(d)-1] == '\n' { - d = d[:len(d)-1] - } - if a.prefix != "" { - l := a.prefix + ": " + string(d) - a.t.Log(l) - return len(l), nil - } - - a.t.Log(string(d)) - return len(d), nil -} - -func newTestLogger(t *testing.T) *log.Logger { - return log.New(&testLoggerAdapter{t: t}, "", log.Lmicroseconds) -} - -func newTestLoggerWithPrefix(t *testing.T, prefix string) *log.Logger { - return log.New(&testLoggerAdapter{t: t, prefix: prefix}, "", log.Lmicroseconds) -} - -type cluster struct { - dirs []string - stores []*InmemStore - fsms []*MockFSM - snaps []*FileSnapshotStore - trans []LoopbackTransport - rafts []*Raft - t *testing.T - observationCh chan Observation - conf *Config - propagateTimeout time.Duration - longstopTimeout time.Duration - logger *log.Logger - startTime time.Time - - failedLock sync.Mutex - failedCh chan struct{} - failed bool -} - -func (c *cluster) Merge(other *cluster) { - c.dirs = append(c.dirs, other.dirs...) - c.stores = append(c.stores, other.stores...) - c.fsms = append(c.fsms, other.fsms...) - c.snaps = append(c.snaps, other.snaps...) - c.trans = append(c.trans, other.trans...) - c.rafts = append(c.rafts, other.rafts...) -} - -// notifyFailed will close the failed channel which can signal the goroutine -// running the test that another goroutine has detected a failure in order to -// terminate the test. -func (c *cluster) notifyFailed() { - c.failedLock.Lock() - defer c.failedLock.Unlock() - if !c.failed { - c.failed = true - close(c.failedCh) - } -} - -// Failf provides a logging function that fails the tests, prints the output -// with microseconds, and does not mysteriously eat the string. This can be -// safely called from goroutines but won't immediately halt the test. The -// failedCh will be closed to allow blocking functions in the main thread to -// detect the failure and react. Note that you should arrange for the main -// thread to block until all goroutines have completed in order to reliably -// fail tests using this function. -func (c *cluster) Failf(format string, args ...interface{}) { - c.logger.Printf(format, args...) - c.t.Fail() - c.notifyFailed() -} - -// FailNowf provides a logging function that fails the tests, prints the output -// with microseconds, and does not mysteriously eat the string. FailNowf must be -// called from the goroutine running the test or benchmark function, not from -// other goroutines created during the test. Calling FailNowf does not stop -// those other goroutines. -func (c *cluster) FailNowf(format string, args ...interface{}) { - c.logger.Printf(format, args...) - c.t.FailNow() -} - -// Close shuts down the cluster and cleans up. -func (c *cluster) Close() { - var futures []Future - for _, r := range c.rafts { - futures = append(futures, r.Shutdown()) - } - - // Wait for shutdown - limit := time.AfterFunc(c.longstopTimeout, func() { - // We can't FailNowf here, and c.Failf won't do anything if we - // hang, so panic. - panic("timed out waiting for shutdown") - }) - defer limit.Stop() - - for _, f := range futures { - if err := f.Error(); err != nil { - c.FailNowf("[ERR] shutdown future err: %v", err) - } - } - - for _, d := range c.dirs { - os.RemoveAll(d) - } -} - -// WaitEventChan returns a channel which will signal if an observation is made -// or a timeout occurs. It is possible to set a filter to look for specific -// observations. Setting timeout to 0 means that it will wait forever until a -// non-filtered observation is made. -func (c *cluster) WaitEventChan(filter FilterFn, timeout time.Duration) <-chan struct{} { - ch := make(chan struct{}) - go func() { - defer close(ch) - var timeoutCh <-chan time.Time - if timeout > 0 { - timeoutCh = time.After(timeout) - } - for { - select { - case <-timeoutCh: - return - - case o, ok := <-c.observationCh: - if !ok || filter == nil || filter(&o) { - return - } - } - } - }() - return ch -} - -// WaitEvent waits until an observation is made, a timeout occurs, or a test -// failure is signaled. It is possible to set a filter to look for specific -// observations. Setting timeout to 0 means that it will wait forever until a -// non-filtered observation is made or a test failure is signaled. -func (c *cluster) WaitEvent(filter FilterFn, timeout time.Duration) { - select { - case <-c.failedCh: - c.t.FailNow() - - case <-c.WaitEventChan(filter, timeout): - } -} - -// WaitForReplication blocks until every FSM in the cluster has the given -// length, or the long sanity check timeout expires. -func (c *cluster) WaitForReplication(fsmLength int) { - limitCh := time.After(c.longstopTimeout) - -CHECK: - for { - ch := c.WaitEventChan(nil, c.conf.CommitTimeout) - select { - case <-c.failedCh: - c.t.FailNow() - - case <-limitCh: - c.FailNowf("[ERR] Timeout waiting for replication") - - case <-ch: - for _, fsm := range c.fsms { - fsm.Lock() - num := len(fsm.logs) - fsm.Unlock() - if num != fsmLength { - continue CHECK - } - } - return - } - } -} - -// pollState takes a snapshot of the state of the cluster. This might not be -// stable, so use GetInState() to apply some additional checks when waiting -// for the cluster to achieve a particular state. -func (c *cluster) pollState(s RaftState) ([]*Raft, uint64) { - var highestTerm uint64 - in := make([]*Raft, 0, 1) - for _, r := range c.rafts { - if r.State() == s { - in = append(in, r) - } - term := r.getCurrentTerm() - if term > highestTerm { - highestTerm = term - } - } - return in, highestTerm -} - -// GetInState polls the state of the cluster and attempts to identify when it has -// settled into the given state. -func (c *cluster) GetInState(s RaftState) []*Raft { - c.logger.Printf("[INFO] Starting stability test for raft state: %+v", s) - limitCh := time.After(c.longstopTimeout) - - // An election should complete after 2 * max(HeartbeatTimeout, ElectionTimeout) - // because of the randomised timer expiring in 1 x interval ... 2 x interval. - // We add a bit for propagation delay. If the election fails (e.g. because - // two elections start at once), we will have got something through our - // observer channel indicating a different state (i.e. one of the nodes - // will have moved to candidate state) which will reset the timer. - // - // Because of an implementation peculiarity, it can actually be 3 x timeout. - timeout := c.conf.HeartbeatTimeout - if timeout < c.conf.ElectionTimeout { - timeout = c.conf.ElectionTimeout - } - timeout = 2*timeout + c.conf.CommitTimeout - timer := time.NewTimer(timeout) - defer timer.Stop() - - // Wait until we have a stable instate slice. Each time we see an - // observation a state has changed, recheck it and if it has changed, - // restart the timer. - var pollStartTime = time.Now() - for { - inState, highestTerm := c.pollState(s) - inStateTime := time.Now() - - // Sometimes this routine is called very early on before the - // rafts have started up. We then timeout even though no one has - // even started an election. So if the highest term in use is - // zero, we know there are no raft processes that have yet issued - // a RequestVote, and we set a long time out. This is fixed when - // we hear the first RequestVote, at which point we reset the - // timer. - if highestTerm == 0 { - timer.Reset(c.longstopTimeout) - } else { - timer.Reset(timeout) - } - - // Filter will wake up whenever we observe a RequestVote. - filter := func(ob *Observation) bool { - switch ob.Data.(type) { - case RaftState: - return true - case RequestVoteRequest: - return true - default: - return false - } - } - - select { - case <-c.failedCh: - c.t.FailNow() - - case <-limitCh: - c.FailNowf("[ERR] Timeout waiting for stable %s state", s) - - case <-c.WaitEventChan(filter, 0): - c.logger.Printf("[DEBUG] Resetting stability timeout") - - case t, ok := <-timer.C: - if !ok { - c.FailNowf("[ERR] Timer channel errored") - } - c.logger.Printf("[INFO] Stable state for %s reached at %s (%d nodes), %s from start of poll, %s from cluster start. Timeout at %s, %s after stability", - s, inStateTime, len(inState), inStateTime.Sub(pollStartTime), inStateTime.Sub(c.startTime), t, t.Sub(inStateTime)) - return inState - } - } -} - -// Leader waits for the cluster to elect a leader and stay in a stable state. -func (c *cluster) Leader() *Raft { - leaders := c.GetInState(Leader) - if len(leaders) != 1 { - c.FailNowf("[ERR] expected one leader: %v", leaders) - } - return leaders[0] -} - -// Followers waits for the cluster to have N-1 followers and stay in a stable -// state. -func (c *cluster) Followers() []*Raft { - expFollowers := len(c.rafts) - 1 - followers := c.GetInState(Follower) - if len(followers) != expFollowers { - c.FailNowf("[ERR] timeout waiting for %d followers (followers are %v)", expFollowers, followers) - } - return followers -} - -// FullyConnect connects all the transports together. -func (c *cluster) FullyConnect() { - c.logger.Printf("[DEBUG] Fully Connecting") - for i, t1 := range c.trans { - for j, t2 := range c.trans { - if i != j { - t1.Connect(t2.LocalAddr(), t2) - t2.Connect(t1.LocalAddr(), t1) - } - } - } -} - -// Disconnect disconnects all transports from the given address. -func (c *cluster) Disconnect(a string) { - c.logger.Printf("[DEBUG] Disconnecting %v", a) - for _, t := range c.trans { - if t.LocalAddr() == a { - t.DisconnectAll() - } else { - t.Disconnect(a) - } - } -} - -// IndexOf returns the index of the given raft instance. -func (c *cluster) IndexOf(r *Raft) int { - for i, n := range c.rafts { - if n == r { - return i - } - } - return -1 -} - -// EnsureLeader checks that ALL the nodes think the leader is the given expected -// leader. -func (c *cluster) EnsureLeader(t *testing.T, expect string) { - // We assume c.Leader() has been called already; now check all the rafts - // think the leader is correct - fail := false - for _, r := range c.rafts { - leader := r.Leader() - if leader != expect { - if leader == "" { - leader = "[none]" - } - if expect == "" { - c.logger.Printf("[ERR] Peer %s sees leader %v expected [none]", r, leader) - } else { - c.logger.Printf("[ERR] Peer %s sees leader %v expected %v", r, leader, expect) - } - fail = true - } - } - if fail { - c.FailNowf("[ERR] At least one peer has the wrong notion of leader") - } -} - -// EnsureSame makes sure all the FSMs have the same contents. -func (c *cluster) EnsureSame(t *testing.T) { - limit := time.Now().Add(c.longstopTimeout) - first := c.fsms[0] - -CHECK: - first.Lock() - for i, fsm := range c.fsms { - if i == 0 { - continue - } - fsm.Lock() - - if len(first.logs) != len(fsm.logs) { - fsm.Unlock() - if time.Now().After(limit) { - c.FailNowf("[ERR] FSM log length mismatch: %d %d", - len(first.logs), len(fsm.logs)) - } else { - goto WAIT - } - } - - for idx := 0; idx < len(first.logs); idx++ { - if bytes.Compare(first.logs[idx], fsm.logs[idx]) != 0 { - fsm.Unlock() - if time.Now().After(limit) { - c.FailNowf("[ERR] FSM log mismatch at index %d", idx) - } else { - goto WAIT - } - } - } - fsm.Unlock() - } - - first.Unlock() - return - -WAIT: - first.Unlock() - c.WaitEvent(nil, c.conf.CommitTimeout) - goto CHECK -} - -// raftToPeerSet returns the set of peers as a map. -func raftToPeerSet(r *Raft) map[string]struct{} { - peers := make(map[string]struct{}) - peers[r.localAddr] = struct{}{} - - raftPeers, _ := r.peerStore.Peers() - for _, p := range raftPeers { - peers[p] = struct{}{} - } - return peers -} - -// EnsureSamePeers makes sure all the rafts have the same set of peers. -func (c *cluster) EnsureSamePeers(t *testing.T) { - limit := time.Now().Add(c.longstopTimeout) - peerSet := raftToPeerSet(c.rafts[0]) - -CHECK: - for i, raft := range c.rafts { - if i == 0 { - continue - } - - otherSet := raftToPeerSet(raft) - if !reflect.DeepEqual(peerSet, otherSet) { - if time.Now().After(limit) { - c.FailNowf("[ERR] peer mismatch: %v %v", peerSet, otherSet) - } else { - goto WAIT - } - } - } - return - -WAIT: - c.WaitEvent(nil, c.conf.CommitTimeout) - goto CHECK -} - -// makeCluster will return a cluster with the given config and number of peers. -// If addPeers is true, they will be added into the peer store before starting, -// otherwise their transports will be wired up but they won't yet have configured -// each other. -func makeCluster(n int, addPeers bool, t *testing.T, conf *Config) *cluster { - if conf == nil { - conf = inmemConfig(t) - } - - c := &cluster{ - observationCh: make(chan Observation, 1024), - conf: conf, - // Propagation takes a maximum of 2 heartbeat timeouts (time to - // get a new heartbeat that would cause a commit) plus a bit. - propagateTimeout: conf.HeartbeatTimeout*2 + conf.CommitTimeout, - longstopTimeout: 5 * time.Second, - logger: newTestLoggerWithPrefix(t, "cluster"), - failedCh: make(chan struct{}), - } - c.t = t - peers := make([]string, 0, n) - - // Setup the stores and transports - for i := 0; i < n; i++ { - dir, err := ioutil.TempDir("", "raft") - if err != nil { - c.FailNowf("[ERR] err: %v ", err) - } - - store := NewInmemStore() - c.dirs = append(c.dirs, dir) - c.stores = append(c.stores, store) - c.fsms = append(c.fsms, &MockFSM{}) - - dir2, snap := FileSnapTest(t) - c.dirs = append(c.dirs, dir2) - c.snaps = append(c.snaps, snap) - - addr, trans := NewInmemTransport("") - c.trans = append(c.trans, trans) - peers = append(peers, addr) - } - - // Wire the transports together - c.FullyConnect() - - // Create all the rafts - c.startTime = time.Now() - for i := 0; i < n; i++ { - if n == 1 { - conf.EnableSingleNode = true - } - - logs := c.stores[i] - store := c.stores[i] - snap := c.snaps[i] - trans := c.trans[i] - - peerStore := &StaticPeers{} - if addPeers { - peerStore.StaticPeers = peers - } - peerConf := conf - peerConf.Logger = newTestLoggerWithPrefix(t, peers[i]) - - raft, err := NewRaft(peerConf, c.fsms[i], logs, store, snap, peerStore, trans) - if err != nil { - c.FailNowf("[ERR] NewRaft failed: %v", err) - } - - raft.RegisterObserver(NewObserver(c.observationCh, false, nil)) - if err != nil { - c.FailNowf("[ERR] RegisterObserver failed: %v", err) - } - c.rafts = append(c.rafts, raft) - } - - return c -} - -// See makeCluster. This adds the peers initially to the peer store. -func MakeCluster(n int, t *testing.T, conf *Config) *cluster { - return makeCluster(n, true, t, conf) -} - -// See makeCluster. This doesn't add the peers initially to the peer store. -func MakeClusterNoPeers(n int, t *testing.T, conf *Config) *cluster { - return makeCluster(n, false, t, conf) -} - -func TestRaft_StartStop(t *testing.T) { - c := MakeCluster(1, t, nil) - c.Close() -} - -func TestRaft_AfterShutdown(t *testing.T) { - c := MakeCluster(1, t, nil) - c.Close() - raft := c.rafts[0] - - // Everything should fail now - if f := raft.Apply(nil, 0); f.Error() != ErrRaftShutdown { - c.FailNowf("[ERR] should be shutdown: %v", f.Error()) - } - if f := raft.AddPeer(NewInmemAddr()); f.Error() != ErrRaftShutdown { - c.FailNowf("[ERR] should be shutdown: %v", f.Error()) - } - if f := raft.RemovePeer(NewInmemAddr()); f.Error() != ErrRaftShutdown { - c.FailNowf("[ERR] should be shutdown: %v", f.Error()) - } - if f := raft.Snapshot(); f.Error() != ErrRaftShutdown { - c.FailNowf("[ERR] should be shutdown: %v", f.Error()) - } - - // Should be idempotent - if f := raft.Shutdown(); f.Error() != nil { - c.FailNowf("[ERR] shutdown should be idempotent") - } - -} - -func TestRaft_SingleNode(t *testing.T) { - conf := inmemConfig(t) - c := MakeCluster(1, t, conf) - defer c.Close() - raft := c.rafts[0] - - // Watch leaderCh for change - select { - case v := <-raft.LeaderCh(): - if !v { - c.FailNowf("[ERR] should become leader") - } - case <-time.After(conf.HeartbeatTimeout * 3): - c.FailNowf("[ERR] timeout becoming leader") - } - - // Should be leader - if s := raft.State(); s != Leader { - c.FailNowf("[ERR] expected leader: %v", s) - } - - // Should be able to apply - future := raft.Apply([]byte("test"), c.conf.HeartbeatTimeout) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Check the response - if future.Response().(int) != 1 { - c.FailNowf("[ERR] bad response: %v", future.Response()) - } - - // Check the index - if idx := future.Index(); idx == 0 { - c.FailNowf("[ERR] bad index: %d", idx) - } - - // Check that it is applied to the FSM - if len(c.fsms[0].logs) != 1 { - c.FailNowf("[ERR] did not apply to FSM!") - } -} - -func TestRaft_TripleNode(t *testing.T) { - // Make the cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Should be one leader - c.Followers() - leader := c.Leader() - c.EnsureLeader(t, leader.localAddr) - - // Should be able to apply - future := leader.Apply([]byte("test"), c.conf.CommitTimeout) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - c.WaitForReplication(1) -} - -func TestRaft_LeaderFail(t *testing.T) { - // Make the cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Should be one leader - c.Followers() - leader := c.Leader() - - // Should be able to apply - future := leader.Apply([]byte("test"), c.conf.CommitTimeout) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - c.WaitForReplication(1) - - // Disconnect the leader now - t.Logf("[INFO] Disconnecting %v", leader) - leaderTerm := leader.getCurrentTerm() - c.Disconnect(leader.localAddr) - - // Wait for new leader - limit := time.Now().Add(c.longstopTimeout) - var newLead *Raft - for time.Now().Before(limit) && newLead == nil { - c.WaitEvent(nil, c.conf.CommitTimeout) - leaders := c.GetInState(Leader) - if len(leaders) == 1 && leaders[0] != leader { - newLead = leaders[0] - } - } - if newLead == nil { - c.FailNowf("[ERR] expected new leader") - } - - // Ensure the term is greater - if newLead.getCurrentTerm() <= leaderTerm { - c.FailNowf("[ERR] expected newer term! %d %d (%v, %v)", newLead.getCurrentTerm(), leaderTerm, newLead, leader) - } - - // Apply should work not work on old leader - future1 := leader.Apply([]byte("fail"), c.conf.CommitTimeout) - - // Apply should work on newer leader - future2 := newLead.Apply([]byte("apply"), c.conf.CommitTimeout) - - // Future2 should work - if err := future2.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Reconnect the networks - t.Logf("[INFO] Reconnecting %v", leader) - c.FullyConnect() - - // Future1 should fail - if err := future1.Error(); err != ErrLeadershipLost && err != ErrNotLeader { - c.FailNowf("[ERR] err: %v", err) - } - - // Wait for log replication - c.EnsureSame(t) - - // Check two entries are applied to the FSM - for _, fsm := range c.fsms { - fsm.Lock() - if len(fsm.logs) != 2 { - c.FailNowf("[ERR] did not apply both to FSM! %v", fsm.logs) - } - if bytes.Compare(fsm.logs[0], []byte("test")) != 0 { - c.FailNowf("[ERR] first entry should be 'test'") - } - if bytes.Compare(fsm.logs[1], []byte("apply")) != 0 { - c.FailNowf("[ERR] second entry should be 'apply'") - } - fsm.Unlock() - } -} - -func TestRaft_BehindFollower(t *testing.T) { - // Make the cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Disconnect one follower - leader := c.Leader() - followers := c.Followers() - behind := followers[0] - c.Disconnect(behind.localAddr) - - // Commit a lot of things - var future Future - for i := 0; i < 100; i++ { - future = leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) - } - - // Wait for the last future to apply - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } else { - t.Logf("[INFO] Finished apply without behind follower") - } - - // Check that we have a non zero last contact - if behind.LastContact().IsZero() { - c.FailNowf("[ERR] expected previous contact") - } - - // Reconnect the behind node - c.FullyConnect() - - // Ensure all the logs are the same - c.EnsureSame(t) - - // Ensure one leader - leader = c.Leader() - c.EnsureLeader(t, leader.localAddr) -} - -func TestRaft_ApplyNonLeader(t *testing.T) { - // Make the cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Wait for a leader - c.Leader() - - // Try to apply to them - followers := c.GetInState(Follower) - if len(followers) != 2 { - c.FailNowf("[ERR] Expected 2 followers") - } - follower := followers[0] - - // Try to apply - future := follower.Apply([]byte("test"), c.conf.CommitTimeout) - if future.Error() != ErrNotLeader { - c.FailNowf("[ERR] should not apply on follower") - } - - // Should be cached - if future.Error() != ErrNotLeader { - c.FailNowf("[ERR] should not apply on follower") - } -} - -func TestRaft_ApplyConcurrent(t *testing.T) { - // Make the cluster - conf := inmemConfig(t) - conf.HeartbeatTimeout = 2 * conf.HeartbeatTimeout - conf.ElectionTimeout = 2 * conf.ElectionTimeout - c := MakeCluster(3, t, conf) - defer c.Close() - - // Wait for a leader - leader := c.Leader() - - // Create a wait group - const sz = 100 - var group sync.WaitGroup - group.Add(sz) - - applyF := func(i int) { - defer group.Done() - future := leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) - if err := future.Error(); err != nil { - c.Failf("[ERR] err: %v", err) - } - } - - // Concurrently apply - for i := 0; i < sz; i++ { - go applyF(i) - } - - // Wait to finish - doneCh := make(chan struct{}) - go func() { - group.Wait() - close(doneCh) - }() - select { - case <-doneCh: - case <-time.After(c.longstopTimeout): - c.FailNowf("[ERR] timeout") - } - - // If anything failed up to this point then bail now, rather than do a - // confusing compare. - if t.Failed() { - c.FailNowf("[ERR] One or more of the apply operations failed") - } - - // Check the FSMs - c.EnsureSame(t) -} - -func TestRaft_ApplyConcurrent_Timeout(t *testing.T) { - // Make the cluster - conf := inmemConfig(t) - conf.CommitTimeout = 1 * time.Millisecond - conf.HeartbeatTimeout = 2 * conf.HeartbeatTimeout - conf.ElectionTimeout = 2 * conf.ElectionTimeout - c := MakeCluster(1, t, conf) - defer c.Close() - - // Wait for a leader - leader := c.Leader() - - // Enough enqueues should cause at least one timeout... - var didTimeout int32 - for i := 0; (i < 5000) && (atomic.LoadInt32(&didTimeout) == 0); i++ { - go func(i int) { - future := leader.Apply([]byte(fmt.Sprintf("test%d", i)), time.Microsecond) - if future.Error() == ErrEnqueueTimeout { - atomic.StoreInt32(&didTimeout, 1) - } - }(i) - - // Give the leader loop some other things to do in order to - // increase the odds of a timeout. - if i%5 == 0 { - leader.VerifyLeader() - } - } - - // Loop until we see a timeout, or give up. - limit := time.Now().Add(c.longstopTimeout) - for time.Now().Before(limit) { - if atomic.LoadInt32(&didTimeout) != 0 { - return - } - c.WaitEvent(nil, c.propagateTimeout) - } - c.FailNowf("[ERR] Timeout waiting to detect apply timeouts") -} - -func TestRaft_JoinNode(t *testing.T) { - // Make a cluster - c := MakeCluster(2, t, nil) - defer c.Close() - - // Apply a log to this cluster to ensure it is 'newer' - var future Future - leader := c.Leader() - future = leader.Apply([]byte("first"), 0) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } else { - t.Logf("[INFO] Applied log") - } - - // Make a new cluster of 1 - c1 := MakeCluster(1, t, nil) - - // Merge clusters - c.Merge(c1) - c.FullyConnect() - - // Wait until we have 2 leaders - limit := time.Now().Add(c.longstopTimeout) - var leaders []*Raft - for time.Now().Before(limit) && len(leaders) != 2 { - c.WaitEvent(nil, c.conf.CommitTimeout) - leaders = c.GetInState(Leader) - } - if len(leaders) != 2 { - c.FailNowf("[ERR] expected two leader: %v", leaders) - } - - // Join the new node in - future = leader.AddPeer(c1.rafts[0].localAddr) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Wait until we have 2 followers - limit = time.Now().Add(c.longstopTimeout) - var followers []*Raft - for time.Now().Before(limit) && len(followers) != 2 { - c.WaitEvent(nil, c.conf.CommitTimeout) - followers = c.GetInState(Follower) - } - if len(followers) != 2 { - c.FailNowf("[ERR] expected two followers: %v", followers) - } - - // Check the FSMs - c.EnsureSame(t) - - // Check the peers - c.EnsureSamePeers(t) - - // Ensure one leader - leader = c.Leader() - c.EnsureLeader(t, leader.localAddr) -} - -func TestRaft_RemoveFollower(t *testing.T) { - // Make a cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Wait until we have 2 followers - limit := time.Now().Add(c.longstopTimeout) - var followers []*Raft - for time.Now().Before(limit) && len(followers) != 2 { - c.WaitEvent(nil, c.conf.CommitTimeout) - followers = c.GetInState(Follower) - } - if len(followers) != 2 { - c.FailNowf("[ERR] expected two followers: %v", followers) - } - - // Remove a follower - follower := followers[0] - future := leader.RemovePeer(follower.localAddr) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Wait a while - time.Sleep(c.propagateTimeout) - - // Other nodes should have fewer peers - if peers, _ := leader.peerStore.Peers(); len(peers) != 2 { - c.FailNowf("[ERR] too many peers") - } - if peers, _ := followers[1].peerStore.Peers(); len(peers) != 2 { - c.FailNowf("[ERR] too many peers") - } -} - -func TestRaft_RemoveLeader(t *testing.T) { - // Make a cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Wait until we have 2 followers - limit := time.Now().Add(c.longstopTimeout) - var followers []*Raft - for time.Now().Before(limit) && len(followers) != 2 { - c.WaitEvent(nil, c.conf.CommitTimeout) - followers = c.GetInState(Follower) - } - if len(followers) != 2 { - c.FailNowf("[ERR] expected two followers: %v", followers) - } - - // Remove the leader - leader.RemovePeer(leader.localAddr) - - // Wait a while - time.Sleep(c.propagateTimeout) - - // Should have a new leader - newLeader := c.Leader() - - // Wait a bit for log application - time.Sleep(c.propagateTimeout) - - // Other nodes should have fewer peers - if peers, _ := newLeader.peerStore.Peers(); len(peers) != 2 { - c.FailNowf("[ERR] too many peers") - } - - // Old leader should be shutdown - if leader.State() != Shutdown { - c.FailNowf("[ERR] leader should be shutdown") - } - - // Old leader should have no peers - if peers, _ := leader.peerStore.Peers(); len(peers) != 1 { - c.FailNowf("[ERR] leader should have no peers") - } -} - -func TestRaft_RemoveLeader_NoShutdown(t *testing.T) { - // Make a cluster - conf := inmemConfig(t) - conf.ShutdownOnRemove = false - c := MakeCluster(3, t, conf) - defer c.Close() - - // Get the leader - c.Followers() - leader := c.Leader() - - // Remove the leader - var removeFuture Future - for i := byte(0); i < 100; i++ { - future := leader.Apply([]byte{i}, 0) - if i == 80 { - removeFuture = leader.RemovePeer(leader.localAddr) - } - if i > 80 { - if err := future.Error(); err == nil || err != ErrNotLeader { - c.FailNowf("[ERR] err: %v, future entries should fail", err) - } - } - } - - if err := removeFuture.Error(); err != nil { - c.FailNowf("[ERR] RemovePeer failed with error %v", err) - } - - // Wait a while - time.Sleep(c.propagateTimeout) - - // Should have a new leader - newLeader := c.Leader() - - // Wait a bit for log application - time.Sleep(c.propagateTimeout) - - // Other nodes should have fewer peers - if peers, _ := newLeader.peerStore.Peers(); len(peers) != 2 { - c.FailNowf("[ERR] too many peers") - } - - // Old leader should be a follower - if leader.State() != Follower { - c.FailNowf("[ERR] leader should be shutdown") - } - - // Old leader should have no peers - if peers, _ := leader.peerStore.Peers(); len(peers) != 1 { - c.FailNowf("[ERR] leader should have no peers") - } - - // Other nodes should have the same state - c.EnsureSame(t) -} - -func TestRaft_RemoveLeader_SplitCluster(t *testing.T) { - // Enable operation after a remove - conf := inmemConfig(t) - conf.EnableSingleNode = true - conf.ShutdownOnRemove = false - conf.DisableBootstrapAfterElect = false - - // Make a cluster - c := MakeCluster(3, t, conf) - defer c.Close() - - // Get the leader - c.Followers() - leader := c.Leader() - - // Remove the leader - leader.RemovePeer(leader.localAddr) - - // Wait until we have 2 leaders - limit := time.Now().Add(c.longstopTimeout) - var leaders []*Raft - for time.Now().Before(limit) && len(leaders) != 2 { - c.WaitEvent(nil, c.conf.CommitTimeout) - leaders = c.GetInState(Leader) - } - if len(leaders) != 2 { - c.FailNowf("[ERR] expected two leader: %v", leaders) - } - - // Old leader should have no peers - if len(leader.peers) != 0 { - c.FailNowf("[ERR] leader should have no peers") - } -} - -func TestRaft_AddKnownPeer(t *testing.T) { - // Make a cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Get the leader - leader := c.Leader() - followers := c.GetInState(Follower) - - // Add a follower - future := leader.AddPeer(followers[0].localAddr) - - // Should be already added - if err := future.Error(); err != ErrKnownPeer { - c.FailNowf("[ERR] err: %v", err) - } -} - -func TestRaft_RemoveUnknownPeer(t *testing.T) { - // Make a cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Remove unknown - future := leader.RemovePeer(NewInmemAddr()) - - // Should be already added - if err := future.Error(); err != ErrUnknownPeer { - c.FailNowf("[ERR] err: %v", err) - } -} - -func TestRaft_SnapshotRestore(t *testing.T) { - // Make the cluster - conf := inmemConfig(t) - conf.TrailingLogs = 10 - c := MakeCluster(1, t, conf) - defer c.Close() - - // Commit a lot of things - leader := c.Leader() - var future Future - for i := 0; i < 100; i++ { - future = leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) - } - - // Wait for the last future to apply - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Take a snapshot - snapFuture := leader.Snapshot() - if err := snapFuture.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Check for snapshot - if snaps, _ := leader.snapshots.List(); len(snaps) != 1 { - c.FailNowf("[ERR] should have a snapshot") - } - - // Logs should be trimmed - if idx, _ := leader.logs.FirstIndex(); idx != 92 { - c.FailNowf("[ERR] should trim logs to 92: %d", idx) - } - - // Shutdown - shutdown := leader.Shutdown() - if err := shutdown.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Restart the Raft - r := leader - // Can't just reuse the old transport as it will be closed - _, trans2 := NewInmemTransport(r.trans.LocalAddr()) - r, err := NewRaft(r.conf, r.fsm, r.logs, r.stable, - r.snapshots, r.peerStore, trans2) - if err != nil { - c.FailNowf("[ERR] err: %v", err) - } - c.rafts[0] = r - - // We should have restored from the snapshot! - if last := r.getLastApplied(); last != 101 { - c.FailNowf("[ERR] bad last: %v", last) - } -} - -func TestRaft_SnapshotRestore_PeerChange(t *testing.T) { - // Make the cluster - conf := inmemConfig(t) - conf.TrailingLogs = 10 - c := MakeCluster(3, t, conf) - defer c.Close() - - // Commit a lot of things - leader := c.Leader() - var future Future - for i := 0; i < 100; i++ { - future = leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) - } - - // Wait for the last future to apply - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Take a snapshot - snapFuture := leader.Snapshot() - if err := snapFuture.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Shutdown - shutdown := leader.Shutdown() - if err := shutdown.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Make a separate cluster - c2 := MakeClusterNoPeers(2, t, conf) - defer c2.Close() - - // Kill the old cluster - for _, sec := range c.rafts { - if sec != leader { - sec.Shutdown() - } - } - - // Change the peer addresses - peers := []string{leader.trans.LocalAddr()} - for _, sec := range c2.rafts { - peers = append(peers, sec.trans.LocalAddr()) - } - - // Restart the Raft with new peers - r := leader - peerStore := &StaticPeers{StaticPeers: peers} - // Can't just reuse the old transport as it will be closed - _, trans2 := NewInmemTransport(r.trans.LocalAddr()) - r, err := NewRaft(r.conf, r.fsm, r.logs, r.stable, - r.snapshots, peerStore, trans2) - if err != nil { - c.FailNowf("[ERR] err: %v", err) - } - c.rafts[0] = r - c2.rafts = append(c2.rafts, r) - c2.trans = append(c2.trans, r.trans.(*InmemTransport)) - c2.fsms = append(c2.fsms, r.fsm.(*MockFSM)) - c2.FullyConnect() - - // Wait a while - time.Sleep(c.propagateTimeout) - - // Ensure we elect a leader, and that we replicate - // to our new followers - c2.EnsureSame(t) - - // We should have restored from the snapshot! - if last := r.getLastApplied(); last != 102 { - c.FailNowf("[ERR] bad last: %v", last) - } -} - -func TestRaft_AutoSnapshot(t *testing.T) { - // Make the cluster - conf := inmemConfig(t) - conf.SnapshotInterval = conf.CommitTimeout * 2 - conf.SnapshotThreshold = 50 - conf.TrailingLogs = 10 - c := MakeCluster(1, t, conf) - defer c.Close() - - // Commit a lot of things - leader := c.Leader() - var future Future - for i := 0; i < 100; i++ { - future = leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) - } - - // Wait for the last future to apply - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Wait for a snapshot to happen - time.Sleep(c.propagateTimeout) - - // Check for snapshot - if snaps, _ := leader.snapshots.List(); len(snaps) == 0 { - c.FailNowf("[ERR] should have a snapshot") - } -} - -func TestRaft_ManualSnapshot(t *testing.T) { - // Make the cluster - conf := inmemConfig(t) - conf.SnapshotThreshold = 50 - conf.TrailingLogs = 10 - c := MakeCluster(1, t, conf) - defer c.Close() - - leader := c.Leader() - // with nothing commited, asking for a snapshot should return an error - ssErr := leader.Snapshot().Error() - if ssErr != ErrNothingNewToSnapshot { - t.Errorf("Attempt to manualy create snapshot should of errored because there's nothing to do: %v", ssErr) - } - // commit some things - var future Future - for i := 0; i < 10; i++ { - future = leader.Apply([]byte(fmt.Sprintf("test %d", i)), 0) - } - if err := future.Error(); err != nil { - c.FailNowf("[ERR] Error Apply new log entries: %v", err) - } - // now we should be able to ask for a snapshot without getting an error - ssErr = leader.Snapshot().Error() - if ssErr != nil { - t.Errorf("Request for Snapshot failed: %v", ssErr) - } -} - -func TestRaft_SendSnapshotFollower(t *testing.T) { - // Make the cluster - conf := inmemConfig(t) - conf.TrailingLogs = 10 - c := MakeCluster(3, t, conf) - defer c.Close() - - // Disconnect one follower - followers := c.Followers() - leader := c.Leader() - behind := followers[0] - c.Disconnect(behind.localAddr) - - // Commit a lot of things - var future Future - for i := 0; i < 100; i++ { - future = leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) - } - - // Wait for the last future to apply - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } else { - t.Logf("[INFO] Finished apply without behind follower") - } - - // Snapshot, this will truncate logs! - for _, r := range c.rafts { - future = r.Snapshot() - // the disconnected node will have nothing to snapshot, so that's expected - if err := future.Error(); err != nil && err != ErrNothingNewToSnapshot { - c.FailNowf("[ERR] err: %v", err) - } - } - - // Reconnect the behind node - c.FullyConnect() - - // Ensure all the logs are the same - c.EnsureSame(t) -} - -func TestRaft_ReJoinFollower(t *testing.T) { - // Enable operation after a remove - conf := inmemConfig(t) - conf.ShutdownOnRemove = false - - // Make a cluster - c := MakeCluster(3, t, conf) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Wait until we have 2 followers - limit := time.Now().Add(c.longstopTimeout) - var followers []*Raft - for time.Now().Before(limit) && len(followers) != 2 { - c.WaitEvent(nil, c.conf.CommitTimeout) - followers = c.GetInState(Follower) - } - if len(followers) != 2 { - c.FailNowf("[ERR] expected two followers: %v", followers) - } - - // Remove a follower - follower := followers[0] - future := leader.RemovePeer(follower.localAddr) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Wait a while - time.Sleep(c.propagateTimeout) - - // Other nodes should have fewer peers - if peers, _ := leader.peerStore.Peers(); len(peers) != 2 { - c.FailNowf("[ERR] too many peers: %v", peers) - } - if peers, _ := followers[1].peerStore.Peers(); len(peers) != 2 { - c.FailNowf("[ERR] too many peers: %v", peers) - } - - // Get the leader - time.Sleep(c.propagateTimeout) - leader = c.Leader() - - // Rejoin. The follower will have a higher term than the leader, - // this will cause the leader to step down, and a new round of elections - // to take place. We should eventually re-stabilize. - future = leader.AddPeer(follower.localAddr) - if err := future.Error(); err != nil && err != ErrLeadershipLost { - c.FailNowf("[ERR] err: %v", err) - } - - // Wait a while - time.Sleep(c.propagateTimeout) - - // Other nodes should have fewer peers - if peers, _ := leader.peerStore.Peers(); len(peers) != 3 { - c.FailNowf("[ERR] missing peers: %v", peers) - } - if peers, _ := followers[1].peerStore.Peers(); len(peers) != 3 { - c.FailNowf("[ERR] missing peers: %v", peers) - } - - // Should be a follower now - if follower.State() != Follower { - c.FailNowf("[ERR] bad state: %v", follower.State()) - } -} - -func TestRaft_LeaderLeaseExpire(t *testing.T) { - // Make a cluster - conf := inmemConfig(t) - c := MakeCluster(2, t, conf) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Wait until we have a followers - limit := time.Now().Add(c.longstopTimeout) - var followers []*Raft - for time.Now().Before(limit) && len(followers) != 1 { - c.WaitEvent(nil, c.conf.CommitTimeout) - followers = c.GetInState(Follower) - } - if len(followers) != 1 { - c.FailNowf("[ERR] expected a followers: %v", followers) - } - - // Disconnect the follower now - follower := followers[0] - t.Logf("[INFO] Disconnecting %v", follower) - c.Disconnect(follower.localAddr) - - // Watch the leaderCh - select { - case v := <-leader.LeaderCh(): - if v { - c.FailNowf("[ERR] should step down as leader") - } - case <-time.After(conf.LeaderLeaseTimeout * 2): - c.FailNowf("[ERR] timeout stepping down as leader") - } - - // Ensure the last contact of the leader is non-zero - if leader.LastContact().IsZero() { - c.FailNowf("[ERR] expected non-zero contact time") - } - - // Should be no leaders - if len(c.GetInState(Leader)) != 0 { - c.FailNowf("[ERR] expected step down") - } - - // Verify no further contact - last := follower.LastContact() - time.Sleep(c.propagateTimeout) - - // Check that last contact has not changed - if last != follower.LastContact() { - c.FailNowf("[ERR] unexpected further contact") - } - - // Ensure both have cleared their leader - if l := leader.Leader(); l != "" { - c.FailNowf("[ERR] bad: %v", l) - } - if l := follower.Leader(); l != "" { - c.FailNowf("[ERR] bad: %v", l) - } -} - -func TestRaft_Barrier(t *testing.T) { - // Make the cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Commit a lot of things - for i := 0; i < 100; i++ { - leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) - } - - // Wait for a barrier complete - barrier := leader.Barrier(0) - - // Wait for the barrier future to apply - if err := barrier.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Ensure all the logs are the same - c.EnsureSame(t) - if len(c.fsms[0].logs) != 100 { - c.FailNowf("[ERR] Bad log length") - } -} - -func TestRaft_VerifyLeader(t *testing.T) { - // Make the cluster - c := MakeCluster(3, t, nil) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Verify we are leader - verify := leader.VerifyLeader() - - // Wait for the verify to apply - if err := verify.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } -} - -func TestRaft_VerifyLeader_Single(t *testing.T) { - // Make the cluster - c := MakeCluster(1, t, nil) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Verify we are leader - verify := leader.VerifyLeader() - - // Wait for the verify to apply - if err := verify.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } -} - -func TestRaft_VerifyLeader_Fail(t *testing.T) { - // Make a cluster - conf := inmemConfig(t) - c := MakeCluster(2, t, conf) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Wait until we have a followers - followers := c.Followers() - - // Force follower to different term - follower := followers[0] - follower.setCurrentTerm(follower.getCurrentTerm() + 1) - - // Verify we are leader - verify := leader.VerifyLeader() - - // Wait for the leader to step down - if err := verify.Error(); err != ErrNotLeader && err != ErrLeadershipLost { - c.FailNowf("[ERR] err: %v", err) - } - - // Ensure the known leader is cleared - if l := leader.Leader(); l != "" { - c.FailNowf("[ERR] bad: %v", l) - } -} - -func TestRaft_VerifyLeader_ParitalConnect(t *testing.T) { - // Make a cluster - conf := inmemConfig(t) - c := MakeCluster(3, t, conf) - defer c.Close() - - // Get the leader - leader := c.Leader() - - // Wait until we have a followers - limit := time.Now().Add(c.longstopTimeout) - var followers []*Raft - for time.Now().Before(limit) && len(followers) != 2 { - c.WaitEvent(nil, c.conf.CommitTimeout) - followers = c.GetInState(Follower) - } - if len(followers) != 2 { - c.FailNowf("[ERR] expected two followers but got: %v", followers) - } - - // Force partial disconnect - follower := followers[0] - t.Logf("[INFO] Disconnecting %v", follower) - c.Disconnect(follower.localAddr) - - // Verify we are leader - verify := leader.VerifyLeader() - - // Wait for the leader to step down - if err := verify.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } -} - -func TestRaft_SettingPeers(t *testing.T) { - // Make the cluster - c := MakeClusterNoPeers(3, t, nil) - defer c.Close() - - peers := make([]string, 0, len(c.rafts)) - for _, v := range c.rafts { - peers = append(peers, v.localAddr) - } - - for _, v := range c.rafts { - future := v.SetPeers(peers) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] error setting peers: %v", err) - } - } - - // Wait a while - time.Sleep(c.propagateTimeout) - - // Should have a new leader - if leader := c.Leader(); leader == nil { - c.FailNowf("[ERR] no leader?") - } -} - -func TestRaft_StartAsLeader(t *testing.T) { - conf := inmemConfig(t) - conf.StartAsLeader = true - c := MakeCluster(1, t, conf) - defer c.Close() - raft := c.rafts[0] - - // Watch leaderCh for change - select { - case v := <-raft.LeaderCh(): - if !v { - c.FailNowf("[ERR] should become leader") - } - case <-time.After(c.conf.HeartbeatTimeout * 4): - // Longer than you think as possibility of multiple elections - c.FailNowf("[ERR] timeout becoming leader") - } - - // Should be leader - if s := raft.State(); s != Leader { - c.FailNowf("[ERR] expected leader: %v", s) - } - - // Should be able to apply - future := raft.Apply([]byte("test"), c.conf.CommitTimeout) - if err := future.Error(); err != nil { - c.FailNowf("[ERR] err: %v", err) - } - - // Check the response - if future.Response().(int) != 1 { - c.FailNowf("[ERR] bad response: %v", future.Response()) - } - - // Check the index - if idx := future.Index(); idx == 0 { - c.FailNowf("[ERR] bad index: %d", idx) - } - - // Check that it is applied to the FSM - if len(c.fsms[0].logs) != 1 { - c.FailNowf("[ERR] did not apply to FSM!") - } -} - -func TestRaft_NotifyCh(t *testing.T) { - ch := make(chan bool, 1) - conf := inmemConfig(t) - conf.NotifyCh = ch - c := MakeCluster(1, t, conf) - defer c.Close() - - // Watch leaderCh for change - select { - case v := <-ch: - if !v { - c.FailNowf("[ERR] should become leader") - } - case <-time.After(conf.HeartbeatTimeout * 8): - c.FailNowf("[ERR] timeout becoming leader") - } - - // Close the cluster - c.Close() - - // Watch leaderCh for change - select { - case v := <-ch: - if v { - c.FailNowf("[ERR] should step down as leader") - } - case <-time.After(conf.HeartbeatTimeout * 6): - c.FailNowf("[ERR] timeout on step down as leader") - } -} - -func TestRaft_Voting(t *testing.T) { - c := MakeCluster(3, t, nil) - defer c.Close() - followers := c.Followers() - ldr := c.Leader() - ldrT := c.trans[c.IndexOf(ldr)] - - reqVote := RequestVoteRequest{ - Term: 42, - Candidate: ldrT.EncodePeer(ldr.localAddr), - LastLogIndex: ldr.LastIndex(), - LastLogTerm: 1, - } - // a follower that thinks there's a leader should vote for that leader. - var resp RequestVoteResponse - if err := ldrT.RequestVote(followers[0].localAddr, &reqVote, &resp); err != nil { - c.FailNowf("[ERR] RequestVote RPC failed %v", err) - } - if !resp.Granted { - c.FailNowf("[ERR] expected vote to be granted, but wasn't %+v", resp) - } - // a follow that thinks there's a leader shouldn't vote for a different candidate - reqVote.Candidate = ldrT.EncodePeer(followers[0].localAddr) - if err := ldrT.RequestVote(followers[1].localAddr, &reqVote, &resp); err != nil { - c.FailNowf("[ERR] RequestVote RPC failed %v", err) - } - if resp.Granted { - c.FailNowf("[ERR] expected vote not to be granted, but was %+v", resp) - } -} diff --git a/vendor/github.com/hashicorp/raft/replication.go b/vendor/github.com/hashicorp/raft/replication.go deleted file mode 100644 index 1f8b923c..00000000 --- a/vendor/github.com/hashicorp/raft/replication.go +++ /dev/null @@ -1,522 +0,0 @@ -package raft - -import ( - "errors" - "fmt" - "sync" - "time" - - "github.com/armon/go-metrics" -) - -const ( - maxFailureScale = 12 - failureWait = 10 * time.Millisecond -) - -var ( - // ErrLogNotFound indicates a given log entry is not available. - ErrLogNotFound = errors.New("log not found") - - // ErrPipelineReplicationNotSupported can be returned by the transport to - // signal that pipeline replication is not supported in general, and that - // no error message should be produced. - ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported") -) - -type followerReplication struct { - peer string - inflight *inflight - - stopCh chan uint64 - triggerCh chan struct{} - - currentTerm uint64 - matchIndex uint64 - nextIndex uint64 - - lastContact time.Time - lastContactLock sync.RWMutex - - failures uint64 - - notifyCh chan struct{} - notify []*verifyFuture - notifyLock sync.Mutex - - // stepDown is used to indicate to the leader that we - // should step down based on information from a follower. - stepDown chan struct{} - - // allowPipeline is used to control it seems like - // pipeline replication should be enabled. - allowPipeline bool -} - -// notifyAll is used to notify all the waiting verify futures -// if the follower believes we are still the leader. -func (s *followerReplication) notifyAll(leader bool) { - // Clear the waiting notifies minimizing lock time - s.notifyLock.Lock() - n := s.notify - s.notify = nil - s.notifyLock.Unlock() - - // Submit our votes - for _, v := range n { - v.vote(leader) - } -} - -// LastContact returns the time of last contact. -func (s *followerReplication) LastContact() time.Time { - s.lastContactLock.RLock() - last := s.lastContact - s.lastContactLock.RUnlock() - return last -} - -// setLastContact sets the last contact to the current time. -func (s *followerReplication) setLastContact() { - s.lastContactLock.Lock() - s.lastContact = time.Now() - s.lastContactLock.Unlock() -} - -// replicate is a long running routine that is used to manage -// the process of replicating logs to our followers. -func (r *Raft) replicate(s *followerReplication) { - // Start an async heartbeating routing - stopHeartbeat := make(chan struct{}) - defer close(stopHeartbeat) - r.goFunc(func() { r.heartbeat(s, stopHeartbeat) }) - -RPC: - shouldStop := false - for !shouldStop { - select { - case maxIndex := <-s.stopCh: - // Make a best effort to replicate up to this index - if maxIndex > 0 { - r.replicateTo(s, maxIndex) - } - return - case <-s.triggerCh: - lastLogIdx, _ := r.getLastLog() - shouldStop = r.replicateTo(s, lastLogIdx) - case <-randomTimeout(r.conf.CommitTimeout): - lastLogIdx, _ := r.getLastLog() - shouldStop = r.replicateTo(s, lastLogIdx) - } - - // If things looks healthy, switch to pipeline mode - if !shouldStop && s.allowPipeline { - goto PIPELINE - } - } - return - -PIPELINE: - // Disable until re-enabled - s.allowPipeline = false - - // Replicates using a pipeline for high performance. This method - // is not able to gracefully recover from errors, and so we fall back - // to standard mode on failure. - if err := r.pipelineReplicate(s); err != nil { - if err != ErrPipelineReplicationNotSupported { - r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err) - } - } - goto RPC -} - -// replicateTo is used to replicate the logs up to a given last index. -// If the follower log is behind, we take care to bring them up to date. -func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) { - // Create the base request - var req AppendEntriesRequest - var resp AppendEntriesResponse - var start time.Time -START: - // Prevent an excessive retry rate on errors - if s.failures > 0 { - select { - case <-time.After(backoff(failureWait, s.failures, maxFailureScale)): - case <-r.shutdownCh: - } - } - - // Setup the request - if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound { - goto SEND_SNAP - } else if err != nil { - return - } - - // Make the RPC call - start = time.Now() - if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil { - r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err) - s.failures++ - return - } - appendStats(s.peer, start, float32(len(req.Entries))) - - // Check for a newer term, stop running - if resp.Term > req.Term { - r.handleStaleTerm(s) - return true - } - - // Update the last contact - s.setLastContact() - - // Update s based on success - if resp.Success { - // Update our replication state - updateLastAppended(s, &req) - - // Clear any failures, allow pipelining - s.failures = 0 - s.allowPipeline = true - } else { - s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1) - s.matchIndex = s.nextIndex - 1 - if resp.NoRetryBackoff { - s.failures = 0 - } else { - s.failures++ - } - r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex) - } - -CHECK_MORE: - // Check if there are more logs to replicate - if s.nextIndex <= lastIndex { - goto START - } - return - - // SEND_SNAP is used when we fail to get a log, usually because the follower - // is too far behind, and we must ship a snapshot down instead -SEND_SNAP: - if stop, err := r.sendLatestSnapshot(s); stop { - return true - } else if err != nil { - r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err) - return - } - - // Check if there is more to replicate - goto CHECK_MORE -} - -// sendLatestSnapshot is used to send the latest snapshot we have -// down to our follower. -func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) { - // Get the snapshots - snapshots, err := r.snapshots.List() - if err != nil { - r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err) - return false, err - } - - // Check we have at least a single snapshot - if len(snapshots) == 0 { - return false, fmt.Errorf("no snapshots found") - } - - // Open the most recent snapshot - snapID := snapshots[0].ID - meta, snapshot, err := r.snapshots.Open(snapID) - if err != nil { - r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err) - return false, err - } - defer snapshot.Close() - - // Setup the request - req := InstallSnapshotRequest{ - Term: s.currentTerm, - Leader: r.trans.EncodePeer(r.localAddr), - LastLogIndex: meta.Index, - LastLogTerm: meta.Term, - Peers: meta.Peers, - Size: meta.Size, - } - - // Make the call - start := time.Now() - var resp InstallSnapshotResponse - if err := r.trans.InstallSnapshot(s.peer, &req, &resp, snapshot); err != nil { - r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err) - s.failures++ - return false, err - } - metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", s.peer}, start) - - // Check for a newer term, stop running - if resp.Term > req.Term { - r.handleStaleTerm(s) - return true, nil - } - - // Update the last contact - s.setLastContact() - - // Check for success - if resp.Success { - // Mark any inflight logs as committed - s.inflight.CommitRange(s.matchIndex+1, meta.Index) - - // Update the indexes - s.matchIndex = meta.Index - s.nextIndex = s.matchIndex + 1 - - // Clear any failures - s.failures = 0 - - // Notify we are still leader - s.notifyAll(true) - } else { - s.failures++ - r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer) - } - return false, nil -} - -// heartbeat is used to periodically invoke AppendEntries on a peer -// to ensure they don't time out. This is done async of replicate(), -// since that routine could potentially be blocked on disk IO. -func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) { - var failures uint64 - req := AppendEntriesRequest{ - Term: s.currentTerm, - Leader: r.trans.EncodePeer(r.localAddr), - } - var resp AppendEntriesResponse - for { - // Wait for the next heartbeat interval or forced notify - select { - case <-s.notifyCh: - case <-randomTimeout(r.conf.HeartbeatTimeout / 10): - case <-stopCh: - return - } - - start := time.Now() - if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil { - r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer, err) - failures++ - select { - case <-time.After(backoff(failureWait, failures, maxFailureScale)): - case <-stopCh: - } - } else { - s.setLastContact() - failures = 0 - metrics.MeasureSince([]string{"raft", "replication", "heartbeat", s.peer}, start) - s.notifyAll(resp.Success) - } - } -} - -// pipelineReplicate is used when we have synchronized our state with the follower, -// and want to switch to a higher performance pipeline mode of replication. -// We only pipeline AppendEntries commands, and if we ever hit an error, we fall -// back to the standard replication which can handle more complex situations. -func (r *Raft) pipelineReplicate(s *followerReplication) error { - // Create a new pipeline - pipeline, err := r.trans.AppendEntriesPipeline(s.peer) - if err != nil { - return err - } - defer pipeline.Close() - - // Log start and stop of pipeline - r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer) - defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer) - - // Create a shutdown and finish channel - stopCh := make(chan struct{}) - finishCh := make(chan struct{}) - - // Start a dedicated decoder - r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) }) - - // Start pipeline sends at the last good nextIndex - nextIndex := s.nextIndex - - shouldStop := false -SEND: - for !shouldStop { - select { - case <-finishCh: - break SEND - case maxIndex := <-s.stopCh: - if maxIndex > 0 { - r.pipelineSend(s, pipeline, &nextIndex, maxIndex) - } - break SEND - case <-s.triggerCh: - lastLogIdx, _ := r.getLastLog() - shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx) - case <-randomTimeout(r.conf.CommitTimeout): - lastLogIdx, _ := r.getLastLog() - shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx) - } - } - - // Stop our decoder, and wait for it to finish - close(stopCh) - select { - case <-finishCh: - case <-r.shutdownCh: - } - return nil -} - -// pipelineSend is used to send data over a pipeline. -func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) { - // Create a new append request - req := new(AppendEntriesRequest) - if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil { - return true - } - - // Pipeline the append entries - if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil { - r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err) - return true - } - - // Increase the next send log to avoid re-sending old logs - if n := len(req.Entries); n > 0 { - last := req.Entries[n-1] - *nextIdx = last.Index + 1 - } - return false -} - -// pipelineDecode is used to decode the responses of pipelined requests. -func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) { - defer close(finishCh) - respCh := p.Consumer() - for { - select { - case ready := <-respCh: - req, resp := ready.Request(), ready.Response() - appendStats(s.peer, ready.Start(), float32(len(req.Entries))) - - // Check for a newer term, stop running - if resp.Term > req.Term { - r.handleStaleTerm(s) - return - } - - // Update the last contact - s.setLastContact() - - // Abort pipeline if not successful - if !resp.Success { - return - } - - // Update our replication state - updateLastAppended(s, req) - case <-stopCh: - return - } - } -} - -// setupAppendEntries is used to setup an append entries request. -func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error { - req.Term = s.currentTerm - req.Leader = r.trans.EncodePeer(r.localAddr) - req.LeaderCommitIndex = r.getCommitIndex() - if err := r.setPreviousLog(req, nextIndex); err != nil { - return err - } - if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil { - return err - } - return nil -} - -// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an -// AppendEntriesRequest given the next index to replicate. -func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error { - // Guard for the first index, since there is no 0 log entry - // Guard against the previous index being a snapshot as well - lastSnapIdx, lastSnapTerm := r.getLastSnapshot() - if nextIndex == 1 { - req.PrevLogEntry = 0 - req.PrevLogTerm = 0 - - } else if (nextIndex - 1) == lastSnapIdx { - req.PrevLogEntry = lastSnapIdx - req.PrevLogTerm = lastSnapTerm - - } else { - var l Log - if err := r.logs.GetLog(nextIndex-1, &l); err != nil { - r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", - nextIndex-1, err) - return err - } - - // Set the previous index and term (0 if nextIndex is 1) - req.PrevLogEntry = l.Index - req.PrevLogTerm = l.Term - } - return nil -} - -// setNewLogs is used to setup the logs which should be appended for a request. -func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error { - // Append up to MaxAppendEntries or up to the lastIndex - req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries) - maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex) - for i := nextIndex; i <= maxIndex; i++ { - oldLog := new(Log) - if err := r.logs.GetLog(i, oldLog); err != nil { - r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err) - return err - } - req.Entries = append(req.Entries, oldLog) - } - return nil -} - -// appendStats is used to emit stats about an AppendEntries invocation. -func appendStats(peer string, start time.Time, logs float32) { - metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start) - metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs) -} - -// handleStaleTerm is used when a follower indicates that we have a stale term. -func (r *Raft) handleStaleTerm(s *followerReplication) { - r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer) - s.notifyAll(false) // No longer leader - asyncNotifyCh(s.stepDown) -} - -// updateLastAppended is used to update follower replication state after a successful -// AppendEntries RPC. -func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) { - // Mark any inflight logs as committed - if logs := req.Entries; len(logs) > 0 { - first := logs[0] - last := logs[len(logs)-1] - s.inflight.CommitRange(first.Index, last.Index) - - // Update the indexes - s.matchIndex = last.Index - s.nextIndex = last.Index + 1 - } - - // Notify still leader - s.notifyAll(true) -} diff --git a/vendor/github.com/hashicorp/raft/snapshot.go b/vendor/github.com/hashicorp/raft/snapshot.go deleted file mode 100644 index a4a17f1c..00000000 --- a/vendor/github.com/hashicorp/raft/snapshot.go +++ /dev/null @@ -1,40 +0,0 @@ -package raft - -import ( - "io" -) - -// SnapshotMeta is for metadata of a snapshot. -type SnapshotMeta struct { - ID string // ID is opaque to the store, and is used for opening - Index uint64 - Term uint64 - Peers []byte - Size int64 -} - -// SnapshotStore interface is used to allow for flexible implementations -// of snapshot storage and retrieval. For example, a client could implement -// a shared state store such as S3, allowing new nodes to restore snapshots -// without streaming from the leader. -type SnapshotStore interface { - // Create is used to begin a snapshot at a given index and term, - // with the current peer set already encoded. - Create(index, term uint64, peers []byte) (SnapshotSink, error) - - // List is used to list the available snapshots in the store. - // It should return then in descending order, with the highest index first. - List() ([]*SnapshotMeta, error) - - // Open takes a snapshot ID and provides a ReadCloser. Once close is - // called it is assumed the snapshot is no longer needed. - Open(id string) (*SnapshotMeta, io.ReadCloser, error) -} - -// SnapshotSink is returned by StartSnapshot. The FSM will Write state -// to the sink and call Close on completion. On error, Cancel will be invoked. -type SnapshotSink interface { - io.WriteCloser - ID() string - Cancel() error -} diff --git a/vendor/github.com/hashicorp/raft/stable.go b/vendor/github.com/hashicorp/raft/stable.go deleted file mode 100644 index ff59a8c5..00000000 --- a/vendor/github.com/hashicorp/raft/stable.go +++ /dev/null @@ -1,15 +0,0 @@ -package raft - -// StableStore is used to provide stable storage -// of key configurations to ensure safety. -type StableStore interface { - Set(key []byte, val []byte) error - - // Get returns the value for key, or an empty byte slice if key was not found. - Get(key []byte) ([]byte, error) - - SetUint64(key []byte, val uint64) error - - // GetUint64 returns the uint64 value for key, or 0 if key was not found. - GetUint64(key []byte) (uint64, error) -} diff --git a/vendor/github.com/hashicorp/raft/state.go b/vendor/github.com/hashicorp/raft/state.go deleted file mode 100644 index a58cd0d1..00000000 --- a/vendor/github.com/hashicorp/raft/state.go +++ /dev/null @@ -1,171 +0,0 @@ -package raft - -import ( - "sync" - "sync/atomic" -) - -// RaftState captures the state of a Raft node: Follower, Candidate, Leader, -// or Shutdown. -type RaftState uint32 - -const ( - // Follower is the initial state of a Raft node. - Follower RaftState = iota - - // Candidate is one of the valid states of a Raft node. - Candidate - - // Leader is one of the valid states of a Raft node. - Leader - - // Shutdown is the terminal state of a Raft node. - Shutdown -) - -func (s RaftState) String() string { - switch s { - case Follower: - return "Follower" - case Candidate: - return "Candidate" - case Leader: - return "Leader" - case Shutdown: - return "Shutdown" - default: - return "Unknown" - } -} - -// raftState is used to maintain various state variables -// and provides an interface to set/get the variables in a -// thread safe manner. -type raftState struct { - // currentTerm commitIndex, lastApplied, must be kept at the top of - // the struct so they're 64 bit aligned which is a requirement for - // atomic ops on 32 bit platforms. - - // The current term, cache of StableStore - currentTerm uint64 - - // Highest committed log entry - commitIndex uint64 - - // Last applied log to the FSM - lastApplied uint64 - - // protects 4 next fields - lastLock sync.Mutex - - // Cache the latest snapshot index/term - lastSnapshotIndex uint64 - lastSnapshotTerm uint64 - - // Cache the latest log from LogStore - lastLogIndex uint64 - lastLogTerm uint64 - - // Tracks running goroutines - routinesGroup sync.WaitGroup - - // The current state - state RaftState -} - -func (r *raftState) getState() RaftState { - stateAddr := (*uint32)(&r.state) - return RaftState(atomic.LoadUint32(stateAddr)) -} - -func (r *raftState) setState(s RaftState) { - stateAddr := (*uint32)(&r.state) - atomic.StoreUint32(stateAddr, uint32(s)) -} - -func (r *raftState) getCurrentTerm() uint64 { - return atomic.LoadUint64(&r.currentTerm) -} - -func (r *raftState) setCurrentTerm(term uint64) { - atomic.StoreUint64(&r.currentTerm, term) -} - -func (r *raftState) getLastLog() (index, term uint64) { - r.lastLock.Lock() - index = r.lastLogIndex - term = r.lastLogTerm - r.lastLock.Unlock() - return -} - -func (r *raftState) setLastLog(index, term uint64) { - r.lastLock.Lock() - r.lastLogIndex = index - r.lastLogTerm = term - r.lastLock.Unlock() -} - -func (r *raftState) getLastSnapshot() (index, term uint64) { - r.lastLock.Lock() - index = r.lastSnapshotIndex - term = r.lastSnapshotTerm - r.lastLock.Unlock() - return -} - -func (r *raftState) setLastSnapshot(index, term uint64) { - r.lastLock.Lock() - r.lastSnapshotIndex = index - r.lastSnapshotTerm = term - r.lastLock.Unlock() -} - -func (r *raftState) getCommitIndex() uint64 { - return atomic.LoadUint64(&r.commitIndex) -} - -func (r *raftState) setCommitIndex(index uint64) { - atomic.StoreUint64(&r.commitIndex, index) -} - -func (r *raftState) getLastApplied() uint64 { - return atomic.LoadUint64(&r.lastApplied) -} - -func (r *raftState) setLastApplied(index uint64) { - atomic.StoreUint64(&r.lastApplied, index) -} - -// Start a goroutine and properly handle the race between a routine -// starting and incrementing, and exiting and decrementing. -func (r *raftState) goFunc(f func()) { - r.routinesGroup.Add(1) - go func() { - defer r.routinesGroup.Done() - f() - }() -} - -func (r *raftState) waitShutdown() { - r.routinesGroup.Wait() -} - -// getLastIndex returns the last index in stable storage. -// Either from the last log or from the last snapshot. -func (r *raftState) getLastIndex() uint64 { - r.lastLock.Lock() - defer r.lastLock.Unlock() - return max(r.lastLogIndex, r.lastSnapshotIndex) -} - -// getLastEntry returns the last index and term in stable storage. -// Either from the last log or from the last snapshot. -func (r *raftState) getLastEntry() (uint64, uint64) { - r.lastLock.Lock() - defer r.lastLock.Unlock() - if r.lastLogIndex >= r.lastSnapshotIndex { - return r.lastLogIndex, r.lastLogTerm - } - return r.lastSnapshotIndex, r.lastSnapshotTerm -} diff --git a/vendor/github.com/hashicorp/raft/tag.sh b/vendor/github.com/hashicorp/raft/tag.sh deleted file mode 100755 index cd16623a..00000000 --- a/vendor/github.com/hashicorp/raft/tag.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash -set -e - -# The version must be supplied from the environment. Do not include the -# leading "v". -if [ -z $VERSION ]; then - echo "Please specify a version." - exit 1 -fi - -# Generate the tag. -echo "==> Tagging version $VERSION..." -git commit --allow-empty -a --gpg-sign=348FFC4C -m "Release v$VERSION" -git tag -a -m "Version $VERSION" -s -u 348FFC4C "v${VERSION}" master - -exit 0 diff --git a/vendor/github.com/hashicorp/raft/tcp_transport.go b/vendor/github.com/hashicorp/raft/tcp_transport.go deleted file mode 100644 index 50c6d15d..00000000 --- a/vendor/github.com/hashicorp/raft/tcp_transport.go +++ /dev/null @@ -1,105 +0,0 @@ -package raft - -import ( - "errors" - "io" - "log" - "net" - "time" -) - -var ( - errNotAdvertisable = errors.New("local bind address is not advertisable") - errNotTCP = errors.New("local address is not a TCP address") -) - -// TCPStreamLayer implements StreamLayer interface for plain TCP. -type TCPStreamLayer struct { - advertise net.Addr - listener *net.TCPListener -} - -// NewTCPTransport returns a NetworkTransport that is built on top of -// a TCP streaming transport layer. -func NewTCPTransport( - bindAddr string, - advertise net.Addr, - maxPool int, - timeout time.Duration, - logOutput io.Writer, -) (*NetworkTransport, error) { - return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport { - return NewNetworkTransport(stream, maxPool, timeout, logOutput) - }) -} - -// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of -// a TCP streaming transport layer, with log output going to the supplied Logger -func NewTCPTransportWithLogger( - bindAddr string, - advertise net.Addr, - maxPool int, - timeout time.Duration, - logger *log.Logger, -) (*NetworkTransport, error) { - return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport { - return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger) - }) -} - -func newTCPTransport(bindAddr string, - advertise net.Addr, - maxPool int, - timeout time.Duration, - transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) { - // Try to bind - list, err := net.Listen("tcp", bindAddr) - if err != nil { - return nil, err - } - - // Create stream - stream := &TCPStreamLayer{ - advertise: advertise, - listener: list.(*net.TCPListener), - } - - // Verify that we have a usable advertise address - addr, ok := stream.Addr().(*net.TCPAddr) - if !ok { - list.Close() - return nil, errNotTCP - } - if addr.IP.IsUnspecified() { - list.Close() - return nil, errNotAdvertisable - } - - // Create the network transport - trans := transportCreator(stream) - return trans, nil -} - -// Dial implements the StreamLayer interface. -func (t *TCPStreamLayer) Dial(address string, timeout time.Duration) (net.Conn, error) { - return net.DialTimeout("tcp", address, timeout) -} - -// Accept implements the net.Listener interface. -func (t *TCPStreamLayer) Accept() (c net.Conn, err error) { - return t.listener.Accept() -} - -// Close implements the net.Listener interface. -func (t *TCPStreamLayer) Close() (err error) { - return t.listener.Close() -} - -// Addr implements the net.Listener interface. -func (t *TCPStreamLayer) Addr() net.Addr { - // Use an advertise addr if provided - if t.advertise != nil { - return t.advertise - } - return t.listener.Addr() -} diff --git a/vendor/github.com/hashicorp/raft/tcp_transport_test.go b/vendor/github.com/hashicorp/raft/tcp_transport_test.go deleted file mode 100644 index 6020a546..00000000 --- a/vendor/github.com/hashicorp/raft/tcp_transport_test.go +++ /dev/null @@ -1,24 +0,0 @@ -package raft - -import ( - "net" - "testing" -) - -func TestTCPTransport_BadAddr(t *testing.T) { - _, err := NewTCPTransportWithLogger("0.0.0.0:0", nil, 1, 0, newTestLogger(t)) - if err != errNotAdvertisable { - t.Fatalf("err: %v", err) - } -} - -func TestTCPTransport_WithAdvertise(t *testing.T) { - addr := &net.TCPAddr{IP: []byte{127, 0, 0, 1}, Port: 12345} - trans, err := NewTCPTransportWithLogger("0.0.0.0:0", addr, 1, 0, newTestLogger(t)) - if err != nil { - t.Fatalf("err: %v", err) - } - if trans.LocalAddr() != "127.0.0.1:12345" { - t.Fatalf("bad: %v", trans.LocalAddr()) - } -} diff --git a/vendor/github.com/hashicorp/raft/transport.go b/vendor/github.com/hashicorp/raft/transport.go deleted file mode 100644 index 2b8b422f..00000000 --- a/vendor/github.com/hashicorp/raft/transport.go +++ /dev/null @@ -1,124 +0,0 @@ -package raft - -import ( - "io" - "time" -) - -// RPCResponse captures both a response and a potential error. -type RPCResponse struct { - Response interface{} - Error error -} - -// RPC has a command, and provides a response mechanism. -type RPC struct { - Command interface{} - Reader io.Reader // Set only for InstallSnapshot - RespChan chan<- RPCResponse -} - -// Respond is used to respond with a response, error or both -func (r *RPC) Respond(resp interface{}, err error) { - r.RespChan <- RPCResponse{resp, err} -} - -// Transport provides an interface for network transports -// to allow Raft to communicate with other nodes. -type Transport interface { - // Consumer returns a channel that can be used to - // consume and respond to RPC requests. - Consumer() <-chan RPC - - // LocalAddr is used to return our local address to distinguish from our peers. - LocalAddr() string - - // AppendEntriesPipeline returns an interface that can be used to pipeline - // AppendEntries requests. - AppendEntriesPipeline(target string) (AppendPipeline, error) - - // AppendEntries sends the appropriate RPC to the target node. - AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error - - // RequestVote sends the appropriate RPC to the target node. - RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error - - // InstallSnapshot is used to push a snapshot down to a follower. The data is read from - // the ReadCloser and streamed to the client. - InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error - - // EncodePeer is used to serialize a peer name. - EncodePeer(string) []byte - - // DecodePeer is used to deserialize a peer name. - DecodePeer([]byte) string - - // SetHeartbeatHandler is used to setup a heartbeat handler - // as a fast-pass. This is to avoid head-of-line blocking from - // disk IO. If a Transport does not support this, it can simply - // ignore the call, and push the heartbeat onto the Consumer channel. - SetHeartbeatHandler(cb func(rpc RPC)) -} - -// WithClose is an interface that a transport may provide which -// allows a transport to be shut down cleanly when a Raft instance -// shuts down. -// -// It is defined separately from Transport as unfortunately it wasn't in the -// original interface specification. -type WithClose interface { - // Close permanently closes a transport, stopping - // any associated goroutines and freeing other resources. - Close() error -} - -// LoopbackTransport is an interface that provides a loopback transport suitable for testing -// e.g. InmemTransport. It's there so we don't have to rewrite tests. -type LoopbackTransport interface { - Transport // Embedded transport reference - WithPeers // Embedded peer management - WithClose // with a close routine -} - -// WithPeers is an interface that a transport may provide which allows for connection and -// disconnection. Unless the transport is a loopback transport, the transport specified to -// "Connect" is likely to be nil. -type WithPeers interface { - Connect(peer string, t Transport) // Connect a peer - Disconnect(peer string) // Disconnect a given peer - DisconnectAll() // Disconnect all peers, possibly to reconnect them later -} - -// AppendPipeline is used for pipelining AppendEntries requests. It is used -// to increase the replication throughput by masking latency and better -// utilizing bandwidth. -type AppendPipeline interface { - // AppendEntries is used to add another request to the pipeline. - // The send may block which is an effective form of back-pressure. - AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) - - // Consumer returns a channel that can be used to consume - // response futures when they are ready. - Consumer() <-chan AppendFuture - - // Close closes the pipeline and cancels all inflight RPCs - Close() error -} - -// AppendFuture is used to return information about a pipelined AppendEntries request. -type AppendFuture interface { - Future - - // Start returns the time that the append request was started. - // It is always OK to call this method. - Start() time.Time - - // Request holds the parameters of the AppendEntries call. - // It is always OK to call this method. - Request() *AppendEntriesRequest - - // Response holds the results of the AppendEntries call. - // This method must only be called after the Error - // method returns, and will only be valid on success. - Response() *AppendEntriesResponse -} diff --git a/vendor/github.com/hashicorp/raft/transport_test.go b/vendor/github.com/hashicorp/raft/transport_test.go deleted file mode 100644 index e3cbd525..00000000 --- a/vendor/github.com/hashicorp/raft/transport_test.go +++ /dev/null @@ -1,313 +0,0 @@ -package raft - -import ( - "bytes" - "reflect" - "testing" - "time" -) - -const ( - TT_Inmem = iota - - // NOTE: must be last - numTestTransports -) - -func NewTestTransport(ttype int, addr string) (string, LoopbackTransport) { - switch ttype { - case TT_Inmem: - addr, lt := NewInmemTransport(addr) - return addr, lt - default: - panic("Unknown transport type") - } -} - -func TestTransport_StartStop(t *testing.T) { - for ttype := 0; ttype < numTestTransports; ttype++ { - _, trans := NewTestTransport(ttype, "") - if err := trans.Close(); err != nil { - t.Fatalf("err: %v", err) - } - } -} - -func TestTransport_AppendEntries(t *testing.T) { - for ttype := 0; ttype < numTestTransports; ttype++ { - addr1, trans1 := NewTestTransport(ttype, "") - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - &Log{ - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } - - // Listen for a request - go func() { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*AppendEntriesRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - }() - - // Transport 2 makes outbound request - addr2, trans2 := NewTestTransport(ttype, "") - defer trans2.Close() - - trans1.Connect(addr2, trans2) - trans2.Connect(addr1, trans1) - - var out AppendEntriesResponse - if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } - } -} - -func TestTransport_AppendEntriesPipeline(t *testing.T) { - for ttype := 0; ttype < numTestTransports; ttype++ { - addr1, trans1 := NewTestTransport(ttype, "") - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - &Log{ - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } - - // Listen for a request - go func() { - for i := 0; i < 10; i++ { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*AppendEntriesRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - } - }() - - // Transport 2 makes outbound request - addr2, trans2 := NewTestTransport(ttype, "") - defer trans2.Close() - - trans1.Connect(addr2, trans2) - trans2.Connect(addr1, trans1) - - pipeline, err := trans2.AppendEntriesPipeline(trans1.LocalAddr()) - if err != nil { - t.Fatalf("err: %v", err) - } - defer pipeline.Close() - for i := 0; i < 10; i++ { - out := new(AppendEntriesResponse) - if _, err := pipeline.AppendEntries(&args, out); err != nil { - t.Fatalf("err: %v", err) - } - } - - respCh := pipeline.Consumer() - for i := 0; i < 10; i++ { - select { - case ready := <-respCh: - // Verify the response - if !reflect.DeepEqual(&resp, ready.Response()) { - t.Fatalf("command mismatch: %#v %#v", &resp, ready.Response()) - } - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - } - } -} - -func TestTransport_RequestVote(t *testing.T) { - for ttype := 0; ttype < numTestTransports; ttype++ { - addr1, trans1 := NewTestTransport(ttype, "") - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := RequestVoteRequest{ - Term: 20, - Candidate: []byte("butters"), - LastLogIndex: 100, - LastLogTerm: 19, - } - resp := RequestVoteResponse{ - Term: 100, - Peers: []byte("blah"), - Granted: false, - } - - // Listen for a request - go func() { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*RequestVoteRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - }() - - // Transport 2 makes outbound request - addr2, trans2 := NewTestTransport(ttype, "") - defer trans2.Close() - - trans1.Connect(addr2, trans2) - trans2.Connect(addr1, trans1) - - var out RequestVoteResponse - if err := trans2.RequestVote(trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } - } -} - -func TestTransport_InstallSnapshot(t *testing.T) { - for ttype := 0; ttype < numTestTransports; ttype++ { - addr1, trans1 := NewTestTransport(ttype, "") - defer trans1.Close() - rpcCh := trans1.Consumer() - - // Make the RPC request - args := InstallSnapshotRequest{ - Term: 10, - Leader: []byte("kyle"), - LastLogIndex: 100, - LastLogTerm: 9, - Peers: []byte("blah blah"), - Size: 10, - } - resp := InstallSnapshotResponse{ - Term: 10, - Success: true, - } - - // Listen for a request - go func() { - select { - case rpc := <-rpcCh: - // Verify the command - req := rpc.Command.(*InstallSnapshotRequest) - if !reflect.DeepEqual(req, &args) { - t.Fatalf("command mismatch: %#v %#v", *req, args) - } - - // Try to read the bytes - buf := make([]byte, 10) - rpc.Reader.Read(buf) - - // Compare - if bytes.Compare(buf, []byte("0123456789")) != 0 { - t.Fatalf("bad buf %v", buf) - } - - rpc.Respond(&resp, nil) - - case <-time.After(200 * time.Millisecond): - t.Fatalf("timeout") - } - }() - - // Transport 2 makes outbound request - addr2, trans2 := NewTestTransport(ttype, "") - defer trans2.Close() - - trans1.Connect(addr2, trans2) - trans2.Connect(addr1, trans1) - - // Create a buffer - buf := bytes.NewBuffer([]byte("0123456789")) - - var out InstallSnapshotResponse - if err := trans2.InstallSnapshot(trans1.LocalAddr(), &args, &out, buf); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } - } -} - -func TestTransport_EncodeDecode(t *testing.T) { - for ttype := 0; ttype < numTestTransports; ttype++ { - _, trans1 := NewTestTransport(ttype, "") - defer trans1.Close() - - local := trans1.LocalAddr() - enc := trans1.EncodePeer(local) - dec := trans1.DecodePeer(enc) - - if dec != local { - t.Fatalf("enc/dec fail: %v %v", dec, local) - } - } -} diff --git a/vendor/github.com/hashicorp/raft/util.go b/vendor/github.com/hashicorp/raft/util.go deleted file mode 100644 index 944968a2..00000000 --- a/vendor/github.com/hashicorp/raft/util.go +++ /dev/null @@ -1,179 +0,0 @@ -package raft - -import ( - "bytes" - crand "crypto/rand" - "fmt" - "math" - "math/big" - "math/rand" - "time" - - "github.com/hashicorp/go-msgpack/codec" -) - -func init() { - // Ensure we use a high-entropy seed for the psuedo-random generator - rand.Seed(newSeed()) -} - -// returns an int64 from a crypto random source -// can be used to seed a source for a math/rand. -func newSeed() int64 { - r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64)) - if err != nil { - panic(fmt.Errorf("failed to read random bytes: %v", err)) - } - return r.Int64() -} - -// randomTimeout returns a value that is between the minVal and 2x minVal. -func randomTimeout(minVal time.Duration) <-chan time.Time { - if minVal == 0 { - return nil - } - extra := (time.Duration(rand.Int63()) % minVal) - return time.After(minVal + extra) -} - -// min returns the minimum. -func min(a, b uint64) uint64 { - if a <= b { - return a - } - return b -} - -// max returns the maximum. -func max(a, b uint64) uint64 { - if a >= b { - return a - } - return b -} - -// generateUUID is used to generate a random UUID. -func generateUUID() string { - buf := make([]byte, 16) - if _, err := crand.Read(buf); err != nil { - panic(fmt.Errorf("failed to read random bytes: %v", err)) - } - - return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x", - buf[0:4], - buf[4:6], - buf[6:8], - buf[8:10], - buf[10:16]) -} - -// asyncNotifyCh is used to do an async channel send -// to a single channel without blocking. -func asyncNotifyCh(ch chan struct{}) { - select { - case ch <- struct{}{}: - default: - } -} - -// asyncNotifyBool is used to do an async notification -// on a bool channel. -func asyncNotifyBool(ch chan bool, v bool) { - select { - case ch <- v: - default: - } -} - -// ExcludePeer is used to exclude a single peer from a list of peers. -func ExcludePeer(peers []string, peer string) []string { - otherPeers := make([]string, 0, len(peers)) - for _, p := range peers { - if p != peer { - otherPeers = append(otherPeers, p) - } - } - return otherPeers -} - -// PeerContained checks if a given peer is contained in a list. -func PeerContained(peers []string, peer string) bool { - for _, p := range peers { - if p == peer { - return true - } - } - return false -} - -// AddUniquePeer is used to add a peer to a list of existing -// peers only if it is not already contained. -func AddUniquePeer(peers []string, peer string) []string { - if PeerContained(peers, peer) { - return peers - } - return append(peers, peer) -} - -// encodePeers is used to serialize a list of peers. -func encodePeers(peers []string, trans Transport) []byte { - // Encode each peer - var encPeers [][]byte - for _, p := range peers { - encPeers = append(encPeers, trans.EncodePeer(p)) - } - - // Encode the entire array - buf, err := encodeMsgPack(encPeers) - if err != nil { - panic(fmt.Errorf("failed to encode peers: %v", err)) - } - - return buf.Bytes() -} - -// decodePeers is used to deserialize a list of peers. -func decodePeers(buf []byte, trans Transport) []string { - // Decode the buffer first - var encPeers [][]byte - if err := decodeMsgPack(buf, &encPeers); err != nil { - panic(fmt.Errorf("failed to decode peers: %v", err)) - } - - // Deserialize each peer - var peers []string - for _, enc := range encPeers { - peers = append(peers, trans.DecodePeer(enc)) - } - - return peers -} - -// Decode reverses the encode operation on a byte slice input. -func decodeMsgPack(buf []byte, out interface{}) error { - r := bytes.NewBuffer(buf) - hd := codec.MsgpackHandle{} - dec := codec.NewDecoder(r, &hd) - return dec.Decode(out) -} - -// Encode writes an encoded object to a new bytes buffer. -func encodeMsgPack(in interface{}) (*bytes.Buffer, error) { - buf := bytes.NewBuffer(nil) - hd := codec.MsgpackHandle{} - enc := codec.NewEncoder(buf, &hd) - err := enc.Encode(in) - return buf, err -} - -// backoff is used to compute an exponential backoff -// duration. Base time is scaled by the current round, -// up to some maximum scale factor. -func backoff(base time.Duration, round, limit uint64) time.Duration { - power := min(round, limit) - for power > 2 { - base *= 2 - power-- - } - return base -} diff --git a/vendor/github.com/hashicorp/raft/util_test.go b/vendor/github.com/hashicorp/raft/util_test.go deleted file mode 100644 index 88b93211..00000000 --- a/vendor/github.com/hashicorp/raft/util_test.go +++ /dev/null @@ -1,152 +0,0 @@ -package raft - -import ( - "reflect" - "regexp" - "testing" - "time" -) - -func TestRandomTimeout(t *testing.T) { - start := time.Now() - timeout := randomTimeout(time.Millisecond) - - select { - case <-timeout: - diff := time.Now().Sub(start) - if diff < time.Millisecond { - t.Fatalf("fired early") - } - case <-time.After(3 * time.Millisecond): - t.Fatalf("timeout") - } -} - -func TestNewSeed(t *testing.T) { - vals := make(map[int64]bool) - for i := 0; i < 1000; i++ { - seed := newSeed() - if _, exists := vals[seed]; exists { - t.Fatal("newSeed() return a value it'd previously returned") - } - vals[seed] = true - } -} - -func TestRandomTimeout_NoTime(t *testing.T) { - timeout := randomTimeout(0) - if timeout != nil { - t.Fatalf("expected nil channel") - } -} - -func TestMin(t *testing.T) { - if min(1, 1) != 1 { - t.Fatalf("bad min") - } - if min(2, 1) != 1 { - t.Fatalf("bad min") - } - if min(1, 2) != 1 { - t.Fatalf("bad min") - } -} - -func TestMax(t *testing.T) { - if max(1, 1) != 1 { - t.Fatalf("bad max") - } - if max(2, 1) != 2 { - t.Fatalf("bad max") - } - if max(1, 2) != 2 { - t.Fatalf("bad max") - } -} - -func TestGenerateUUID(t *testing.T) { - prev := generateUUID() - for i := 0; i < 100; i++ { - id := generateUUID() - if prev == id { - t.Fatalf("Should get a new ID!") - } - - matched, err := regexp.MatchString( - `[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}`, id) - if !matched || err != nil { - t.Fatalf("expected match %s %v %s", id, matched, err) - } - } -} - -func TestExcludePeer(t *testing.T) { - peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()} - peer := peers[2] - - after := ExcludePeer(peers, peer) - if len(after) != 2 { - t.Fatalf("Bad length") - } - if after[0] == peer || after[1] == peer { - t.Fatalf("should not contain peer") - } -} - -func TestPeerContained(t *testing.T) { - peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()} - - if !PeerContained(peers, peers[2]) { - t.Fatalf("Expect contained") - } - if PeerContained(peers, NewInmemAddr()) { - t.Fatalf("unexpected contained") - } -} - -func TestAddUniquePeer(t *testing.T) { - peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()} - after := AddUniquePeer(peers, peers[2]) - if !reflect.DeepEqual(after, peers) { - t.Fatalf("unexpected append") - } - after = AddUniquePeer(peers, NewInmemAddr()) - if len(after) != 4 { - t.Fatalf("expected append") - } -} - -func TestEncodeDecodePeers(t *testing.T) { - peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()} - _, trans := NewInmemTransport("") - - // Try to encode/decode - buf := encodePeers(peers, trans) - decoded := decodePeers(buf, trans) - - if !reflect.DeepEqual(peers, decoded) { - t.Fatalf("mismatch %v %v", peers, decoded) - } -} - -func TestBackoff(t *testing.T) { - b := backoff(10*time.Millisecond, 1, 8) - if b != 10*time.Millisecond { - t.Fatalf("bad: %v", b) - } - - b = backoff(20*time.Millisecond, 2, 8) - if b != 20*time.Millisecond { - t.Fatalf("bad: %v", b) - } - - b = backoff(10*time.Millisecond, 8, 8) - if b != 640*time.Millisecond { - t.Fatalf("bad: %v", b) - } - - b = backoff(10*time.Millisecond, 9, 8) - if b != 640*time.Millisecond { - t.Fatalf("bad: %v", b) - } -}