Initial commit
This commit is contained in:
51
lib/rep/functions.go
Normal file
51
lib/rep/functions.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"git.crumpington.com/public/jldb/lib/errs"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
func lockFilePath(rootDir string) string {
|
||||
return filepath.Join(rootDir, "lock")
|
||||
}
|
||||
|
||||
func walRootDir(rootDir string) string {
|
||||
return filepath.Join(rootDir, "wal")
|
||||
}
|
||||
|
||||
func stateFilePath(rootDir string) string {
|
||||
return filepath.Join(rootDir, "state")
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
func sendJSON(
|
||||
item any,
|
||||
conn net.Conn,
|
||||
timeout time.Duration,
|
||||
) error {
|
||||
|
||||
buf := bufPoolGet()
|
||||
defer bufPoolPut(buf)
|
||||
|
||||
if err := json.NewEncoder(buf).Encode(item); err != nil {
|
||||
return errs.Unexpected.WithErr(err)
|
||||
}
|
||||
|
||||
sizeBuf := make([]byte, 2)
|
||||
binary.LittleEndian.PutUint16(sizeBuf, uint16(buf.Len()))
|
||||
|
||||
conn.SetWriteDeadline(time.Now().Add(timeout))
|
||||
buffers := net.Buffers{sizeBuf, buf.Bytes()}
|
||||
if _, err := buffers.WriteTo(conn); err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
178
lib/rep/http-client.go
Normal file
178
lib/rep/http-client.go
Normal file
@@ -0,0 +1,178 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"git.crumpington.com/public/jldb/lib/errs"
|
||||
"git.crumpington.com/public/jldb/lib/httpconn"
|
||||
"git.crumpington.com/public/jldb/lib/wal"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type client struct {
|
||||
// Mutex-protected variables.
|
||||
lock sync.Mutex
|
||||
closed bool
|
||||
conn net.Conn
|
||||
|
||||
// The following are constant.
|
||||
endpoint string
|
||||
psk []byte
|
||||
timeout time.Duration
|
||||
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func newClient(endpoint, psk string, timeout time.Duration) *client {
|
||||
b := make([]byte, 256)
|
||||
copy(b, []byte(psk))
|
||||
|
||||
return &client{
|
||||
endpoint: endpoint,
|
||||
psk: b,
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *client) GetInfo() (info Info, err error) {
|
||||
err = c.withConn(cmdGetInfo, func(conn net.Conn) error {
|
||||
return c.recvJSON(&info, conn, c.timeout)
|
||||
})
|
||||
return info, err
|
||||
}
|
||||
|
||||
func (c *client) RecvState(recv func(net.Conn) error) error {
|
||||
return c.withConn(cmdSendState, recv)
|
||||
}
|
||||
|
||||
func (c *client) StreamWAL(w *wal.WAL) error {
|
||||
return c.withConn(cmdStreamWAL, func(conn net.Conn) error {
|
||||
return w.Recv(conn, c.timeout)
|
||||
})
|
||||
}
|
||||
|
||||
func (c *client) Close() {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
c.closed = true
|
||||
|
||||
if c.conn != nil {
|
||||
c.conn.Close()
|
||||
c.conn = nil
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
func (c *client) writeCmd(cmd byte) error {
|
||||
c.conn.SetWriteDeadline(time.Now().Add(c.timeout))
|
||||
if _, err := c.conn.Write([]byte{cmd}); err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *client) dial() error {
|
||||
c.conn = nil
|
||||
|
||||
conn, err := httpconn.Dial(c.endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
conn.SetWriteDeadline(time.Now().Add(c.timeout))
|
||||
if _, err := conn.Write(c.psk); err != nil {
|
||||
conn.Close()
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
|
||||
c.conn = conn
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *client) withConn(cmd byte, fn func(net.Conn) error) error {
|
||||
conn, err := c.getConn(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := fn(conn); err != nil {
|
||||
conn.Close()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *client) getConn(cmd byte) (net.Conn, error) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
if c.closed {
|
||||
return nil, errs.IO.WithErr(io.EOF)
|
||||
}
|
||||
|
||||
dialed := false
|
||||
|
||||
if c.conn == nil {
|
||||
if err := c.dial(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dialed = true
|
||||
}
|
||||
|
||||
if err := c.writeCmd(cmd); err != nil {
|
||||
if dialed {
|
||||
c.conn = nil
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := c.dial(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := c.writeCmd(cmd); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return c.conn, nil
|
||||
}
|
||||
|
||||
func (c *client) recvJSON(
|
||||
item any,
|
||||
conn net.Conn,
|
||||
timeout time.Duration,
|
||||
) error {
|
||||
|
||||
if cap(c.buf) < 2 {
|
||||
c.buf = make([]byte, 0, 1024)
|
||||
}
|
||||
buf := c.buf[:2]
|
||||
|
||||
conn.SetReadDeadline(time.Now().Add(timeout))
|
||||
|
||||
if _, err := io.ReadFull(conn, buf); err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
|
||||
size := binary.LittleEndian.Uint16(buf)
|
||||
|
||||
if cap(buf) < int(size) {
|
||||
buf = make([]byte, size)
|
||||
c.buf = buf
|
||||
}
|
||||
buf = buf[:size]
|
||||
|
||||
if _, err := io.ReadFull(conn, buf); err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(buf, item); err != nil {
|
||||
return errs.Unexpected.WithErr(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
79
lib/rep/http-handler.go
Normal file
79
lib/rep/http-handler.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"crypto/subtle"
|
||||
"git.crumpington.com/public/jldb/lib/httpconn"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
cmdGetInfo = 10
|
||||
cmdSendState = 20
|
||||
cmdStreamWAL = 30
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func (rep *Replicator) Handle(w http.ResponseWriter, r *http.Request) {
|
||||
logf := func(pattern string, args ...any) {
|
||||
log.Printf("[HTTP-HANDLER] "+pattern, args...)
|
||||
}
|
||||
|
||||
conn, err := httpconn.Accept(w, r)
|
||||
if err != nil {
|
||||
logf("Failed to accept connection: %s", err)
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
psk := make([]byte, 256)
|
||||
|
||||
conn.SetReadDeadline(time.Now().Add(rep.conf.NetTimeout))
|
||||
if _, err := conn.Read(psk); err != nil {
|
||||
logf("Failed to read PSK: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
expected := rep.pskBytes
|
||||
if subtle.ConstantTimeCompare(expected, psk) != 1 {
|
||||
logf("PSK mismatch.")
|
||||
return
|
||||
}
|
||||
|
||||
cmd := make([]byte, 1)
|
||||
|
||||
for {
|
||||
conn.SetReadDeadline(time.Now().Add(rep.conf.NetTimeout))
|
||||
if _, err := conn.Read(cmd); err != nil {
|
||||
logf("Read failed: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
switch cmd[0] {
|
||||
|
||||
case cmdGetInfo:
|
||||
if err := sendJSON(rep.Info(), conn, rep.conf.NetTimeout); err != nil {
|
||||
logf("Failed to send info: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
case cmdSendState:
|
||||
|
||||
if err := rep.sendState(conn); err != nil {
|
||||
if !rep.stopped() {
|
||||
logf("Failed to send state: %s", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
case cmdStreamWAL:
|
||||
err := rep.wal.Send(conn, rep.conf.NetTimeout)
|
||||
if !rep.stopped() {
|
||||
logf("Failed when sending WAL: %s", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
9
lib/rep/info.go
Normal file
9
lib/rep/info.go
Normal file
@@ -0,0 +1,9 @@
|
||||
package rep
|
||||
|
||||
type Info struct {
|
||||
AppSeqNum int64 // Page file sequence number.
|
||||
AppTimestampMS int64 // Page file timestamp.
|
||||
WALFirstSeqNum int64 // WAL min sequence number.
|
||||
WALLastSeqNum int64 // WAL max sequence number.
|
||||
WALLastTimestampMS int64 // WAL timestamp.
|
||||
}
|
||||
20
lib/rep/localstate.go
Normal file
20
lib/rep/localstate.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
type localState struct {
|
||||
SeqNum int64
|
||||
TimestampMS int64
|
||||
}
|
||||
|
||||
func (h localState) writeTo(b []byte) {
|
||||
binary.LittleEndian.PutUint64(b[0:8], uint64(h.SeqNum))
|
||||
binary.LittleEndian.PutUint64(b[8:16], uint64(h.TimestampMS))
|
||||
}
|
||||
|
||||
func (h *localState) readFrom(b []byte) {
|
||||
h.SeqNum = int64(binary.LittleEndian.Uint64(b[0:8]))
|
||||
h.TimestampMS = int64(binary.LittleEndian.Uint64(b[8:16]))
|
||||
}
|
||||
21
lib/rep/pools.go
Normal file
21
lib/rep/pools.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var bufPool = sync.Pool{
|
||||
New: func() any {
|
||||
return &bytes.Buffer{}
|
||||
},
|
||||
}
|
||||
|
||||
func bufPoolGet() *bytes.Buffer {
|
||||
return bufPool.Get().(*bytes.Buffer)
|
||||
}
|
||||
|
||||
func bufPoolPut(b *bytes.Buffer) {
|
||||
b.Reset()
|
||||
bufPool.Put(b)
|
||||
}
|
||||
41
lib/rep/rep-sendrecv.go
Normal file
41
lib/rep/rep-sendrecv.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"io"
|
||||
"git.crumpington.com/public/jldb/lib/errs"
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (rep *Replicator) sendState(conn net.Conn) error {
|
||||
state := rep.getState()
|
||||
|
||||
buf := make([]byte, 512)
|
||||
state.writeTo(buf)
|
||||
|
||||
conn.SetWriteDeadline(time.Now().Add(rep.conf.NetTimeout))
|
||||
if _, err := conn.Write(buf); err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
conn.SetWriteDeadline(time.Time{})
|
||||
|
||||
return rep.app.SendState(conn)
|
||||
}
|
||||
|
||||
func (rep *Replicator) recvState(conn net.Conn) error {
|
||||
buf := make([]byte, 512)
|
||||
conn.SetReadDeadline(time.Now().Add(rep.conf.NetTimeout))
|
||||
if _, err := io.ReadFull(conn, buf); err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
conn.SetReadDeadline(time.Time{})
|
||||
|
||||
if err := rep.app.RecvState(conn); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state := localState{}
|
||||
state.readFrom(buf)
|
||||
|
||||
return rep.setState(state)
|
||||
}
|
||||
184
lib/rep/replicator-open.go
Normal file
184
lib/rep/replicator-open.go
Normal file
@@ -0,0 +1,184 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"git.crumpington.com/public/jldb/lib/atomicheader"
|
||||
"git.crumpington.com/public/jldb/lib/errs"
|
||||
"git.crumpington.com/public/jldb/lib/flock"
|
||||
"git.crumpington.com/public/jldb/lib/wal"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (rep *Replicator) loadConfigDefaults() {
|
||||
conf := rep.conf
|
||||
|
||||
if conf.NetTimeout <= 0 {
|
||||
conf.NetTimeout = time.Minute
|
||||
}
|
||||
if conf.WALSegMinCount <= 0 {
|
||||
conf.WALSegMinCount = 1024
|
||||
}
|
||||
if conf.WALSegMaxAgeSec <= 0 {
|
||||
conf.WALSegMaxAgeSec = 3600
|
||||
}
|
||||
if conf.WALSegGCAgeSec <= 0 {
|
||||
conf.WALSegGCAgeSec = 7 * 86400
|
||||
}
|
||||
|
||||
rep.conf = conf
|
||||
|
||||
rep.pskBytes = make([]byte, 256)
|
||||
copy(rep.pskBytes, []byte(conf.ReplicationPSK))
|
||||
}
|
||||
|
||||
func (rep *Replicator) initDirectories() error {
|
||||
if err := os.MkdirAll(walRootDir(rep.conf.RootDir), 0700); err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rep *Replicator) acquireLock() error {
|
||||
lockFile, err := flock.TryLock(lockFilePath(rep.conf.RootDir))
|
||||
if err != nil {
|
||||
return errs.IO.WithMsg("locked: %s", lockFilePath(rep.conf.RootDir))
|
||||
}
|
||||
if lockFile == nil {
|
||||
return errs.Locked
|
||||
}
|
||||
rep.lockFile = lockFile
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rep *Replicator) loadLocalState() error {
|
||||
f, err := os.OpenFile(stateFilePath(rep.conf.RootDir), os.O_RDWR|os.O_CREATE, 0600)
|
||||
if err != nil {
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
|
||||
info, err := f.Stat()
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
|
||||
if info.Size() < atomicheader.ReservedBytes {
|
||||
if err := atomicheader.Init(f); err != nil {
|
||||
f.Close()
|
||||
return errs.IO.WithErr(err)
|
||||
}
|
||||
}
|
||||
|
||||
rep.stateHandler, err = atomicheader.Open(f)
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
rep.stateFile = f
|
||||
var state localState
|
||||
|
||||
err = rep.stateHandler.Read(func(page []byte) error {
|
||||
state.readFrom(page)
|
||||
return nil
|
||||
})
|
||||
if err == nil {
|
||||
rep.state.Store(&state)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write a clean state.
|
||||
state = localState{}
|
||||
rep.state.Store(&state)
|
||||
return rep.stateHandler.Write(func(page []byte) error {
|
||||
state.writeTo(page)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func (rep *Replicator) walConfig() wal.Config {
|
||||
return wal.Config{
|
||||
SegMinCount: rep.conf.WALSegMinCount,
|
||||
SegMaxAgeSec: rep.conf.WALSegMaxAgeSec,
|
||||
}
|
||||
}
|
||||
|
||||
func (rep *Replicator) openWAL() (err error) {
|
||||
rep.wal, err = wal.Open(walRootDir(rep.conf.RootDir), rep.walConfig())
|
||||
if err != nil {
|
||||
rep.wal, err = wal.Create(walRootDir(rep.conf.RootDir), 1, rep.walConfig())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rep *Replicator) recvStateIfNecessary() error {
|
||||
if rep.conf.Primary {
|
||||
return nil
|
||||
}
|
||||
|
||||
sInfo := rep.Info()
|
||||
pInfo, err := rep.client.GetInfo()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if pInfo.WALFirstSeqNum <= sInfo.WALLastSeqNum {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Make a new WAL.
|
||||
rep.wal.Close()
|
||||
|
||||
if err = rep.client.RecvState(rep.recvState); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state := rep.getState()
|
||||
|
||||
rep.wal, err = wal.Create(walRootDir(rep.conf.RootDir), state.SeqNum+1, rep.walConfig())
|
||||
return err
|
||||
}
|
||||
|
||||
// Replays un-acked entries in the WAL. Acks after all records are replayed.
|
||||
func (rep *Replicator) replay() error {
|
||||
state := rep.getState()
|
||||
it, err := rep.wal.Iterator(state.SeqNum + 1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer it.Close()
|
||||
|
||||
for it.Next(0) {
|
||||
rec := it.Record()
|
||||
if err := rep.app.Replay(rec); err != nil {
|
||||
return err
|
||||
}
|
||||
state.SeqNum = rec.SeqNum
|
||||
state.TimestampMS = rec.TimestampMS
|
||||
}
|
||||
|
||||
if it.Error() != nil {
|
||||
return it.Error()
|
||||
}
|
||||
|
||||
return rep.ack(state.SeqNum, state.TimestampMS)
|
||||
}
|
||||
|
||||
func (rep *Replicator) startWALGC() {
|
||||
rep.done.Add(1)
|
||||
go rep.runWALGC()
|
||||
}
|
||||
|
||||
func (rep *Replicator) startWALFollower() {
|
||||
rep.done.Add(1)
|
||||
go rep.runWALFollower()
|
||||
}
|
||||
|
||||
func (rep *Replicator) startWALRecvr() {
|
||||
rep.done.Add(1)
|
||||
go rep.runWALRecvr()
|
||||
}
|
||||
66
lib/rep/replicator-walfollower.go
Normal file
66
lib/rep/replicator-walfollower.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (rep *Replicator) runWALFollower() {
|
||||
defer rep.done.Done()
|
||||
|
||||
for {
|
||||
rep.followOnce()
|
||||
|
||||
select {
|
||||
case <-rep.stop:
|
||||
return
|
||||
default:
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rep *Replicator) followOnce() {
|
||||
logf := func(pattern string, args ...any) {
|
||||
log.Printf("[WAL-FOLLOWER] "+pattern, args...)
|
||||
}
|
||||
|
||||
state := rep.getState()
|
||||
it, err := rep.wal.Iterator(state.SeqNum + 1)
|
||||
if err != nil {
|
||||
logf("Failed to create WAL iterator: %v", err)
|
||||
return
|
||||
}
|
||||
defer it.Close()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-rep.stop:
|
||||
logf("Stopped")
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
if it.Next(time.Second) {
|
||||
rec := it.Record()
|
||||
|
||||
if err := rep.app.Apply(rec); err != nil {
|
||||
logf("App failed to apply change: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := rep.ack(rec.SeqNum, rec.TimestampMS); err != nil {
|
||||
logf("App failed to update local state: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case rep.appendNotify <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
} else if it.Error() != nil {
|
||||
logf("Iteration error: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
28
lib/rep/replicator-walgc.go
Normal file
28
lib/rep/replicator-walgc.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (rep *Replicator) runWALGC() {
|
||||
defer rep.done.Done()
|
||||
|
||||
ticker := time.NewTicker(time.Minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
state := rep.getState()
|
||||
before := time.Now().Unix() - rep.conf.WALSegMaxAgeSec
|
||||
if err := rep.wal.DeleteBefore(before, state.SeqNum); err != nil {
|
||||
log.Printf("[WAL-GC] failed to delete wal segments: %v", err)
|
||||
}
|
||||
// OK
|
||||
case <-rep.stop:
|
||||
log.Print("[WAL-GC] Stopped")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
38
lib/rep/replicator-walrecvr.go
Normal file
38
lib/rep/replicator-walrecvr.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (rep *Replicator) runWALRecvr() {
|
||||
go func() {
|
||||
<-rep.stop
|
||||
rep.client.Close()
|
||||
}()
|
||||
|
||||
defer rep.done.Done()
|
||||
|
||||
for {
|
||||
rep.runWALRecvrOnce()
|
||||
select {
|
||||
case <-rep.stop:
|
||||
log.Print("[WAL-RECVR] Stopped")
|
||||
return
|
||||
default:
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rep *Replicator) runWALRecvrOnce() {
|
||||
logf := func(pattern string, args ...any) {
|
||||
log.Printf("[WAL-RECVR] "+pattern, args...)
|
||||
}
|
||||
|
||||
if err := rep.client.StreamWAL(rep.wal); err != nil {
|
||||
if !rep.stopped() {
|
||||
logf("Recv failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
235
lib/rep/replicator.go
Normal file
235
lib/rep/replicator.go
Normal file
@@ -0,0 +1,235 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"io"
|
||||
"git.crumpington.com/public/jldb/lib/atomicheader"
|
||||
"git.crumpington.com/public/jldb/lib/errs"
|
||||
"git.crumpington.com/public/jldb/lib/wal"
|
||||
"net"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
RootDir string
|
||||
Primary bool
|
||||
ReplicationPSK string
|
||||
NetTimeout time.Duration // Default is 1 minute.
|
||||
|
||||
// WAL settings.
|
||||
WALSegMinCount int64 // Minimum Change sets in a segment. Default is 1024.
|
||||
WALSegMaxAgeSec int64 // Maximum age of a segment. Default is 1 hour.
|
||||
WALSegGCAgeSec int64 // Segment age for garbage collection. Default is 7 days.
|
||||
|
||||
// If true, Append won't return until a successful App.Apply.
|
||||
SynchronousAppend bool
|
||||
|
||||
// Necessary for secondary.
|
||||
PrimaryEndpoint string
|
||||
}
|
||||
|
||||
type App struct {
|
||||
// SendState: The primary may need to send storage state to a secondary node.
|
||||
SendState func(conn net.Conn) error
|
||||
|
||||
// (1) RecvState: Secondary nodes may need to load state from the primary if the
|
||||
// WAL is too far behind.
|
||||
RecvState func(conn net.Conn) error
|
||||
|
||||
// (2) InitStorage: Prepare application storage for possible calls to
|
||||
// Replay.
|
||||
InitStorage func() error
|
||||
|
||||
// (3) Replay: write the change to storage. Replay must be idempotent.
|
||||
Replay func(rec wal.Record) error
|
||||
|
||||
// (4) LoadFromStorage: load the application's state from it's persistent
|
||||
// storage.
|
||||
LoadFromStorage func() error
|
||||
|
||||
// (5) Apply: write the change to persistent storage. Apply must be
|
||||
// idempotent. In normal operation each change is applied exactly once.
|
||||
Apply func(rec wal.Record) error
|
||||
}
|
||||
|
||||
type Replicator struct {
|
||||
app App
|
||||
conf Config
|
||||
|
||||
lockFile *os.File
|
||||
pskBytes []byte
|
||||
wal *wal.WAL
|
||||
|
||||
appendNotify chan struct{}
|
||||
|
||||
// lock protects state. The lock is held when replaying (R), following (R),
|
||||
// and sending state (W).
|
||||
stateFile *os.File
|
||||
state *atomic.Pointer[localState]
|
||||
stateHandler *atomicheader.Handler
|
||||
|
||||
stop chan struct{}
|
||||
done *sync.WaitGroup
|
||||
|
||||
client *client // For secondary connection to primary.
|
||||
}
|
||||
|
||||
func Open(app App, conf Config) (*Replicator, error) {
|
||||
rep := &Replicator{
|
||||
app: app,
|
||||
conf: conf,
|
||||
state: &atomic.Pointer[localState]{},
|
||||
stop: make(chan struct{}),
|
||||
done: &sync.WaitGroup{},
|
||||
appendNotify: make(chan struct{}, 1),
|
||||
}
|
||||
|
||||
rep.loadConfigDefaults()
|
||||
|
||||
rep.state.Store(&localState{})
|
||||
rep.client = newClient(rep.conf.PrimaryEndpoint, rep.conf.ReplicationPSK, rep.conf.NetTimeout)
|
||||
|
||||
if err := rep.initDirectories(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := rep.acquireLock(); err != nil {
|
||||
rep.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := rep.loadLocalState(); err != nil {
|
||||
rep.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := rep.openWAL(); err != nil {
|
||||
rep.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := rep.recvStateIfNecessary(); err != nil {
|
||||
rep.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := rep.app.InitStorage(); err != nil {
|
||||
rep.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := rep.replay(); err != nil {
|
||||
rep.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := rep.app.LoadFromStorage(); err != nil {
|
||||
rep.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rep.startWALGC()
|
||||
rep.startWALFollower()
|
||||
|
||||
if !rep.conf.Primary {
|
||||
rep.startWALRecvr()
|
||||
}
|
||||
|
||||
return rep, nil
|
||||
}
|
||||
|
||||
func (rep *Replicator) Append(size int64, r io.Reader) (int64, int64, error) {
|
||||
if !rep.conf.Primary {
|
||||
return 0, 0, errs.NotAllowed.WithMsg("cannot write to secondary")
|
||||
}
|
||||
|
||||
seqNum, timestampMS, err := rep.wal.Append(size, r)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if !rep.conf.SynchronousAppend {
|
||||
return seqNum, timestampMS, nil
|
||||
}
|
||||
|
||||
<-rep.appendNotify
|
||||
return seqNum, timestampMS, nil
|
||||
}
|
||||
|
||||
func (rep *Replicator) Primary() bool {
|
||||
return rep.conf.Primary
|
||||
}
|
||||
|
||||
// TODO: Probably remove this.
|
||||
// The caller may call Ack after Apply to acknowledge that the change has also
|
||||
// been applied to the caller's application. Alternatively, the caller may use
|
||||
// follow to apply changes to their application state.
|
||||
func (rep *Replicator) ack(seqNum, timestampMS int64) error {
|
||||
state := rep.getState()
|
||||
state.SeqNum = seqNum
|
||||
state.TimestampMS = timestampMS
|
||||
return rep.setState(state)
|
||||
}
|
||||
|
||||
func (rep *Replicator) getState() localState {
|
||||
return *rep.state.Load()
|
||||
}
|
||||
|
||||
func (rep *Replicator) setState(state localState) error {
|
||||
err := rep.stateHandler.Write(func(page []byte) error {
|
||||
state.writeTo(page)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rep.state.Store(&state)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rep *Replicator) Info() Info {
|
||||
state := rep.getState()
|
||||
walInfo := rep.wal.Info()
|
||||
|
||||
return Info{
|
||||
AppSeqNum: state.SeqNum,
|
||||
AppTimestampMS: state.TimestampMS,
|
||||
WALFirstSeqNum: walInfo.FirstSeqNum,
|
||||
WALLastSeqNum: walInfo.LastSeqNum,
|
||||
WALLastTimestampMS: walInfo.LastTimestampMS,
|
||||
}
|
||||
}
|
||||
|
||||
func (rep *Replicator) Close() error {
|
||||
if rep.stopped() {
|
||||
return nil
|
||||
}
|
||||
|
||||
close(rep.stop)
|
||||
rep.done.Wait()
|
||||
|
||||
if rep.lockFile != nil {
|
||||
rep.lockFile.Close()
|
||||
}
|
||||
|
||||
if rep.wal != nil {
|
||||
rep.wal.Close()
|
||||
}
|
||||
|
||||
if rep.client != nil {
|
||||
rep.client.Close()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rep *Replicator) stopped() bool {
|
||||
select {
|
||||
case <-rep.stop:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
128
lib/rep/testapp-harness_test.go
Normal file
128
lib/rep/testapp-harness_test.go
Normal file
@@ -0,0 +1,128 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestAppHarnessRun(t *testing.T) {
|
||||
TestAppHarness{}.Run(t)
|
||||
}
|
||||
|
||||
type TestAppHarness struct {
|
||||
}
|
||||
|
||||
func (h TestAppHarness) Run(t *testing.T) {
|
||||
val := reflect.ValueOf(h)
|
||||
typ := val.Type()
|
||||
for i := 0; i < typ.NumMethod(); i++ {
|
||||
method := typ.Method(i)
|
||||
|
||||
if !strings.HasPrefix(method.Name, "Test") {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(method.Name, func(t *testing.T) {
|
||||
//t.Parallel()
|
||||
rootDir := t.TempDir()
|
||||
|
||||
app1 := newApp(t, rand.Int63(), Config{
|
||||
Primary: true,
|
||||
RootDir: filepath.Join(rootDir, "app1"),
|
||||
ReplicationPSK: "123",
|
||||
WALSegMinCount: 1,
|
||||
WALSegMaxAgeSec: 1,
|
||||
WALSegGCAgeSec: 1,
|
||||
})
|
||||
defer app1.Close()
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/rep/", app1.rep.Handle)
|
||||
testServer := httptest.NewServer(mux)
|
||||
defer testServer.Close()
|
||||
|
||||
app2 := newApp(t, rand.Int63(), Config{
|
||||
Primary: false,
|
||||
RootDir: filepath.Join(rootDir, "app2"),
|
||||
ReplicationPSK: "123",
|
||||
PrimaryEndpoint: testServer.URL + "/rep/",
|
||||
WALSegMinCount: 1,
|
||||
WALSegMaxAgeSec: 1,
|
||||
WALSegGCAgeSec: 1,
|
||||
})
|
||||
|
||||
val.MethodByName(method.Name).Call([]reflect.Value{
|
||||
reflect.ValueOf(t),
|
||||
reflect.ValueOf(app1),
|
||||
reflect.ValueOf(app2),
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (TestAppHarness) TestRandomUpdates(t *testing.T, app1, app2 *TestApp) {
|
||||
go app1.UpdateRandomFor(4 * time.Second)
|
||||
app2.WaitForEOF()
|
||||
app1.AssertEqual(t, app2)
|
||||
}
|
||||
|
||||
/*
|
||||
func (TestAppHarness) TestRandomUpdatesReplay(t *testing.T, app1, app2 *TestApp) {
|
||||
app1.UpdateRandomFor(4 * time.Second)
|
||||
app2.WaitForEOF()
|
||||
|
||||
app1.Close()
|
||||
app1 = newApp(t, app1.ID, app1.rep.conf)
|
||||
|
||||
app1.AssertEqual(t, app2)
|
||||
info := app1.rep.Info()
|
||||
if info.AppSeqNum != 0 {
|
||||
t.Fatal(info)
|
||||
}
|
||||
}
|
||||
|
||||
func (TestAppHarness) TestRandomUpdatesAck(t *testing.T, app1, app2 *TestApp) {
|
||||
go app1.UpdateRandomFor(4 * time.Second)
|
||||
app2.WaitForEOF()
|
||||
app1.AssertEqual(t, app2)
|
||||
info := app1.rep.Info()
|
||||
if info.AppSeqNum == 0 || info.AppSeqNum != info.WALLastSeqNum {
|
||||
t.Fatal(info)
|
||||
}
|
||||
}
|
||||
|
||||
func (TestAppHarness) TestWriteThenOpenFollower(t *testing.T, app1, app2 *TestApp) {
|
||||
app2.Close()
|
||||
app1.UpdateRandomFor(4 * time.Second)
|
||||
|
||||
app2 = newApp(t, app2.ID, app2.rep.conf)
|
||||
app2.WaitForEOF()
|
||||
app1.AssertEqual(t, app2)
|
||||
}
|
||||
|
||||
func (TestAppHarness) TestUpdateOpenFollowerConcurrently(t *testing.T, app1, app2 *TestApp) {
|
||||
app2.Close()
|
||||
go app1.UpdateRandomFor(4 * time.Second)
|
||||
time.Sleep(2 * time.Second)
|
||||
app2 = newApp(t, app2.ID, app2.rep.conf)
|
||||
app2.WaitForEOF()
|
||||
app1.AssertEqual(t, app2)
|
||||
}
|
||||
|
||||
func (TestAppHarness) TestUpdateCloseOpenFollowerConcurrently(t *testing.T, app1, app2 *TestApp) {
|
||||
go app1.UpdateRandomFor(4 * time.Second)
|
||||
|
||||
time.Sleep(time.Second)
|
||||
app2.Close()
|
||||
time.Sleep(time.Second)
|
||||
app2 = newApp(t, app2.ID, app2.rep.conf)
|
||||
app2.WaitForEOF()
|
||||
app1.AssertEqual(t, app2)
|
||||
}
|
||||
*/
|
||||
239
lib/rep/testapp_test.go
Normal file
239
lib/rep/testapp_test.go
Normal file
@@ -0,0 +1,239 @@
|
||||
package rep
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"git.crumpington.com/public/jldb/lib/wal"
|
||||
"math/rand"
|
||||
"net"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
type TestCmd struct {
|
||||
Set int64 // 1 for set, 0 for delete
|
||||
Key int64
|
||||
Val int64
|
||||
}
|
||||
|
||||
func (c TestCmd) marshal() []byte {
|
||||
b := make([]byte, 24)
|
||||
binary.LittleEndian.PutUint64(b, uint64(c.Set))
|
||||
binary.LittleEndian.PutUint64(b[8:], uint64(c.Key))
|
||||
binary.LittleEndian.PutUint64(b[16:], uint64(c.Val))
|
||||
return b
|
||||
}
|
||||
|
||||
func (c *TestCmd) unmarshal(b []byte) {
|
||||
c.Set = int64(binary.LittleEndian.Uint64(b))
|
||||
c.Key = int64(binary.LittleEndian.Uint64(b[8:]))
|
||||
c.Val = int64(binary.LittleEndian.Uint64(b[16:]))
|
||||
}
|
||||
|
||||
func CmdFromRec(rec wal.Record) TestCmd {
|
||||
cmd := TestCmd{}
|
||||
|
||||
buf, err := io.ReadAll(rec.Reader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if len(buf) != 24 {
|
||||
panic(len(buf))
|
||||
}
|
||||
cmd.unmarshal(buf)
|
||||
return cmd
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
var storage = map[int64]map[int64]int64{}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
type TestApp struct {
|
||||
ID int64
|
||||
storage map[int64]int64
|
||||
|
||||
rep *Replicator
|
||||
|
||||
lock sync.Mutex
|
||||
m map[int64]int64
|
||||
}
|
||||
|
||||
func newApp(t *testing.T, id int64, conf Config) *TestApp {
|
||||
t.Helper()
|
||||
a := &TestApp{
|
||||
ID: id,
|
||||
m: map[int64]int64{},
|
||||
}
|
||||
|
||||
var err error
|
||||
a.rep, err = Open(App{
|
||||
SendState: a.sendState,
|
||||
RecvState: a.recvState,
|
||||
InitStorage: a.initStorage,
|
||||
Replay: a.replay,
|
||||
LoadFromStorage: a.loadFromStorage,
|
||||
Apply: a.apply,
|
||||
}, conf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return a
|
||||
}
|
||||
|
||||
func (a *TestApp) _set(k, v int64) {
|
||||
a.lock.Lock()
|
||||
defer a.lock.Unlock()
|
||||
a.m[k] = v
|
||||
}
|
||||
|
||||
func (a *TestApp) _del(k int64) {
|
||||
a.lock.Lock()
|
||||
defer a.lock.Unlock()
|
||||
delete(a.m, k)
|
||||
}
|
||||
|
||||
func (a *TestApp) Get(k int64) int64 {
|
||||
a.lock.Lock()
|
||||
defer a.lock.Unlock()
|
||||
return a.m[k]
|
||||
}
|
||||
|
||||
func (app *TestApp) Close() {
|
||||
app.rep.Close()
|
||||
}
|
||||
|
||||
func (app *TestApp) Set(k, v int64) error {
|
||||
cmd := TestCmd{Set: 1, Key: k, Val: v}
|
||||
if _, _, err := app.rep.Append(24, bytes.NewBuffer(cmd.marshal())); err != nil {
|
||||
return err
|
||||
}
|
||||
app._set(k, v)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (app *TestApp) Del(k int64) error {
|
||||
cmd := TestCmd{Set: 0, Key: k, Val: 0}
|
||||
if _, _, err := app.rep.Append(24, bytes.NewBuffer(cmd.marshal())); err != nil {
|
||||
return err
|
||||
}
|
||||
app._del(k)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (app *TestApp) UpdateRandomFor(dt time.Duration) {
|
||||
tStart := time.Now()
|
||||
for time.Since(tStart) < dt {
|
||||
if rand.Float32() < 0.5 {
|
||||
if err := app.Set(1+rand.Int63n(10), 1+rand.Int63n(10)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
} else {
|
||||
if err := app.Del(1 + rand.Int63n(10)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
|
||||
app.Set(999, 999)
|
||||
}
|
||||
|
||||
func (app *TestApp) WaitForEOF() {
|
||||
for app.Get(999) != 999 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
func (app *TestApp) AssertEqual(t *testing.T, rhs *TestApp) {
|
||||
app.lock.Lock()
|
||||
defer app.lock.Unlock()
|
||||
rhs.lock.Lock()
|
||||
defer rhs.lock.Unlock()
|
||||
|
||||
if len(app.m) != len(rhs.m) {
|
||||
t.Fatal(len(app.m), len(rhs.m))
|
||||
}
|
||||
|
||||
for k := range app.m {
|
||||
if app.m[k] != rhs.m[k] {
|
||||
t.Fatal(k, app.m[k], rhs.m[k])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
func (app *TestApp) sendState(conn net.Conn) error {
|
||||
app.lock.Lock()
|
||||
b, _ := json.Marshal(app.m)
|
||||
app.lock.Unlock()
|
||||
|
||||
_, err := conn.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
func (app *TestApp) recvState(conn net.Conn) error {
|
||||
m := map[int64]int64{}
|
||||
if err := json.NewDecoder(conn).Decode(&m); err != nil {
|
||||
return err
|
||||
}
|
||||
storage[app.ID] = m
|
||||
return nil
|
||||
}
|
||||
|
||||
func (app *TestApp) initStorage() error {
|
||||
if _, ok := storage[app.ID]; !ok {
|
||||
storage[app.ID] = map[int64]int64{}
|
||||
}
|
||||
app.storage = storage[app.ID]
|
||||
return nil
|
||||
}
|
||||
|
||||
func (app *TestApp) replay(rec wal.Record) error {
|
||||
cmd := CmdFromRec(rec)
|
||||
if cmd.Set != 0 {
|
||||
app.storage[cmd.Key] = cmd.Val
|
||||
} else {
|
||||
delete(app.storage, cmd.Key)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (app *TestApp) loadFromStorage() error {
|
||||
app.m = map[int64]int64{}
|
||||
for k, v := range app.storage {
|
||||
app.m[k] = v
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (app *TestApp) apply(rec wal.Record) error {
|
||||
cmd := CmdFromRec(rec)
|
||||
if cmd.Set != 0 {
|
||||
app.storage[cmd.Key] = cmd.Val
|
||||
} else {
|
||||
delete(app.storage, cmd.Key)
|
||||
}
|
||||
|
||||
// For primary, only update storage.
|
||||
if app.rep.Primary() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// For secondary, update the map.
|
||||
if cmd.Set != 0 {
|
||||
app._set(cmd.Key, cmd.Val)
|
||||
} else {
|
||||
app._del(cmd.Key)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user