-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
598 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package cluster_id | ||
|
||
import "github.com/google/uuid" | ||
|
||
var ( | ||
instanceId = uuid.New().String() | ||
) | ||
|
||
func GetInstanceID() string { | ||
return instanceId | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package cluster | ||
|
||
type ip struct { | ||
Address string `json:"address"` | ||
Votes []vote `json:"vote"` | ||
} | ||
|
||
type vote struct { | ||
NodeID string `json:"node_id"` | ||
VotedAt int64 `json:"voted_at"` | ||
Failed bool `json:"failed"` | ||
} | ||
|
||
type node struct { | ||
Ips []ip `json:"ips"` | ||
LastPingAt int64 `json:"last_ping_at"` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package cluster | ||
|
||
import ( | ||
"errors" | ||
"time" | ||
|
||
"github.com/langgenius/dify-plugin-daemon/internal/utils/cache" | ||
) | ||
|
||
// gc the nodes has already deactivated | ||
func (c *Cluster) gcNodes() error { | ||
var total_errors error | ||
add_error := func(err error) { | ||
if err != nil { | ||
if total_errors == nil { | ||
total_errors = err | ||
} else { | ||
total_errors = errors.Join(total_errors, err) | ||
} | ||
} | ||
} | ||
|
||
// get all nodes status | ||
nodes, err := cache.GetMap[node](CLUSTER_STATUS_HASH_MAP_KEY) | ||
if err == cache.ErrNotFound { | ||
return nil | ||
} | ||
|
||
for node_id, node_status := range nodes { | ||
// delete the node if it is disconnected | ||
if time.Since(time.Unix(node_status.LastPingAt, 0)) > NODE_DISCONNECTED_TIMEOUT { | ||
// gc the node | ||
if err := c.gcNode(node_id); err != nil { | ||
add_error(err) | ||
continue | ||
} | ||
|
||
// delete the node status | ||
if err := cache.DelMapField(CLUSTER_STATUS_HASH_MAP_KEY, node_id); err != nil { | ||
add_error(err) | ||
} | ||
} | ||
} | ||
|
||
return total_errors | ||
} | ||
|
||
// remove the resource associated with the node | ||
func (c *Cluster) gcNode(node_id string) error { | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package cluster | ||
|
||
import "github.com/langgenius/dify-plugin-daemon/internal/types/app" | ||
|
||
type Cluster struct { | ||
port uint16 | ||
} | ||
|
||
var ( | ||
cluster *Cluster | ||
) | ||
|
||
func Launch(config *app.Config) { | ||
cluster = &Cluster{ | ||
port: uint16(config.ServerPort), | ||
} | ||
|
||
go func() { | ||
cluster.clusterLifetime() | ||
}() | ||
} | ||
|
||
func GetCluster() *Cluster { | ||
return cluster | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package cluster | ||
|
||
import ( | ||
"strings" | ||
"time" | ||
|
||
"github.com/langgenius/dify-plugin-daemon/internal/utils/cache" | ||
) | ||
|
||
const ( | ||
CLUSTER_STATE_TENANT_LOCK_PREFIX = "cluster_state_tenant_lock" | ||
CLUSTER_STATE_PLUGIN_LOCK_PREFIX = "cluster_state_plugin_lock" | ||
CLUSTER_UPDATE_NODE_STATUS_LOCK_PREFIX = "cluster_update_node_status_lock" | ||
) | ||
|
||
func (c *Cluster) LockTenant(tenant_id string) error { | ||
key := strings.Join([]string{CLUSTER_STATE_TENANT_LOCK_PREFIX, tenant_id}, ":") | ||
return cache.Lock(key, time.Second*5, time.Second) | ||
} | ||
|
||
func (c *Cluster) UnlockTenant(tenant_id string) error { | ||
key := strings.Join([]string{CLUSTER_STATE_TENANT_LOCK_PREFIX, tenant_id}, ":") | ||
return cache.Unlock(key) | ||
} | ||
|
||
func (c *Cluster) LockPlugin(plugin_id string) error { | ||
key := strings.Join([]string{CLUSTER_STATE_PLUGIN_LOCK_PREFIX, plugin_id}, ":") | ||
return cache.Lock(key, time.Second*5, time.Second) | ||
} | ||
|
||
func (c *Cluster) UnlockPlugin(plugin_id string) error { | ||
key := strings.Join([]string{CLUSTER_STATE_PLUGIN_LOCK_PREFIX, plugin_id}, ":") | ||
return cache.Unlock(key) | ||
} | ||
|
||
func (c *Cluster) LockNodeStatus(node_id string) error { | ||
key := strings.Join([]string{CLUSTER_UPDATE_NODE_STATUS_LOCK_PREFIX, node_id}, ":") | ||
return cache.Lock(key, time.Second*5, time.Second) | ||
} | ||
|
||
func (c *Cluster) UnlockNodeStatus(node_id string) error { | ||
key := strings.Join([]string{CLUSTER_UPDATE_NODE_STATUS_LOCK_PREFIX, node_id}, ":") | ||
return cache.Unlock(key) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
package cluster | ||
|
||
import ( | ||
"errors" | ||
"net" | ||
"time" | ||
|
||
"github.com/langgenius/dify-plugin-daemon/internal/cluster/cluster_id" | ||
"github.com/langgenius/dify-plugin-daemon/internal/utils/cache" | ||
"github.com/langgenius/dify-plugin-daemon/internal/utils/log" | ||
"github.com/langgenius/dify-plugin-daemon/internal/utils/network" | ||
"github.com/langgenius/dify-plugin-daemon/internal/utils/parser" | ||
) | ||
|
||
// Plugin daemon will preemptively try to lock the slot to be the master of the cluster | ||
// and keep update current status of the whole cluster | ||
// once the master is no longer active, one of the slave will try to lock the slot again | ||
// and become the new master | ||
// | ||
// Once a node becomes master, It will take responsibility to gc the nodes has already deactivated | ||
// and all nodes should to maintenance their own status | ||
// | ||
// State: | ||
// - hashmap[cluster-status] | ||
// - node-id: | ||
// - list[ip]: | ||
// - address: string | ||
// - vote: int | ||
// - last_ping_at: int64 | ||
// - preemption-lock: node-id | ||
// - node-status-upgrade-status | ||
// | ||
// A node will be removed from the cluster if it is no longer active | ||
|
||
var ( | ||
i_am_master = false | ||
) | ||
|
||
const ( | ||
CLUSTER_STATUS_HASH_MAP_KEY = "cluster-status-hash-map" | ||
PREEMPTION_LOCK_KEY = "cluster-master-preemption-lock" | ||
) | ||
|
||
const ( | ||
MASTER_LOCKING_INTERVAL = time.Millisecond * 500 // interval to try to lock the slot to be the master | ||
MASTER_LOCK_EXPIRED_TIME = time.Second * 5 // expired time of master key | ||
MASTER_GC_INTERVAL = time.Second * 10 // interval to do garbage collection of nodes has already deactivated | ||
NODE_VOTE_INTERVAL = time.Second * 30 // interval to vote the ips of the nodes | ||
UPDATE_NODE_STATUS_INTERVAL = time.Second * 5 // interval to update the status of the node | ||
NODE_DISCONNECTED_TIMEOUT = time.Second * 10 // once a node is no longer active, it will be removed from the cluster | ||
) | ||
|
||
// lifetime of the cluster | ||
func (c *Cluster) clusterLifetime() { | ||
ticker_lock_master := time.NewTicker(MASTER_LOCKING_INTERVAL) | ||
defer ticker_lock_master.Stop() | ||
|
||
ticker_update_node_status := time.NewTicker(UPDATE_NODE_STATUS_INTERVAL) | ||
defer ticker_update_node_status.Stop() | ||
|
||
master_gc_ticker := time.NewTicker(MASTER_GC_INTERVAL) | ||
defer master_gc_ticker.Stop() | ||
|
||
node_vote_ticker := time.NewTicker(NODE_VOTE_INTERVAL) | ||
defer node_vote_ticker.Stop() | ||
|
||
if err := c.voteIps(); err != nil { | ||
log.Error("failed to vote the ips of the nodes: %s", err.Error()) | ||
} | ||
|
||
for { | ||
select { | ||
case <-ticker_lock_master.C: | ||
if !i_am_master { | ||
// try lock the slot | ||
if success, err := c.lockMaster(); err != nil { | ||
log.Error("failed to lock the slot to be the master of the cluster: %s", err.Error()) | ||
} else if success { | ||
i_am_master = true | ||
log.Info("current node has become the master of the cluster") | ||
} else { | ||
i_am_master = false | ||
log.Info("current node lost the master slot") | ||
} | ||
} else { | ||
// update the master | ||
if err := c.updateMaster(); err != nil { | ||
log.Error("failed to update the master: %s", err.Error()) | ||
} | ||
} | ||
case <-ticker_update_node_status.C: | ||
if err := c.updateNodeStatus(); err != nil { | ||
log.Error("failed to update the status of the node: %s", err.Error()) | ||
} | ||
case <-master_gc_ticker.C: | ||
if i_am_master { | ||
if err := c.gcNodes(); err != nil { | ||
log.Error("failed to gc the nodes has already deactivated: %s", err.Error()) | ||
} | ||
} | ||
case <-node_vote_ticker.C: | ||
if err := c.voteIps(); err != nil { | ||
log.Error("failed to vote the ips of the nodes: %s", err.Error()) | ||
} | ||
} | ||
} | ||
} | ||
|
||
// try lock the slot to be the master of the cluster | ||
// returns: | ||
// - bool: true if the slot is locked by the node | ||
// - error: error if any | ||
func (c *Cluster) lockMaster() (bool, error) { | ||
var final_error error | ||
|
||
for i := 0; i < 3; i++ { | ||
if success, err := cache.SetNX(PREEMPTION_LOCK_KEY, cluster_id.GetInstanceID(), MASTER_LOCK_EXPIRED_TIME); err != nil { | ||
// try again | ||
if final_error == nil { | ||
final_error = err | ||
} else { | ||
final_error = errors.Join(final_error, err) | ||
} | ||
} else if !success { | ||
return false, nil | ||
} else { | ||
return true, nil | ||
} | ||
} | ||
|
||
return false, final_error | ||
} | ||
|
||
// update master | ||
func (c *Cluster) updateMaster() error { | ||
// update expired time of master key | ||
if _, err := cache.Expire(PREEMPTION_LOCK_KEY, MASTER_LOCK_EXPIRED_TIME); err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// update the status of the node | ||
func (c *Cluster) updateNodeStatus() error { | ||
if err := c.LockNodeStatus(cluster_id.GetInstanceID()); err != nil { | ||
return err | ||
} | ||
defer c.UnlockNodeStatus(cluster_id.GetInstanceID()) | ||
|
||
// update the status of the node | ||
node_status, err := cache.GetMapField[node](CLUSTER_STATUS_HASH_MAP_KEY, cluster_id.GetInstanceID()) | ||
if err != nil { | ||
if err == cache.ErrNotFound { | ||
// try to get ips configs | ||
ips, err := network.FetchCurrentIps() | ||
if err != nil { | ||
return err | ||
} | ||
node_status = &node{ | ||
Ips: parser.Map(func(from net.IP) ip { | ||
return ip{ | ||
Address: from.String(), | ||
Votes: []vote{}, | ||
} | ||
}, ips), | ||
} | ||
} else { | ||
return err | ||
} | ||
} else { | ||
ips, err := network.FetchCurrentIps() | ||
if err != nil { | ||
return err | ||
} | ||
// add new ip if not exist | ||
for _, _ip := range ips { | ||
found := false | ||
for _, node_ip := range node_status.Ips { | ||
if node_ip.Address == _ip.String() { | ||
found = true | ||
break | ||
} | ||
} | ||
if !found { | ||
node_status.Ips = append(node_status.Ips, ip{ | ||
Address: _ip.String(), | ||
Votes: []vote{}, | ||
}) | ||
} | ||
} | ||
} | ||
|
||
// refresh the last ping time | ||
node_status.LastPingAt = time.Now().Unix() | ||
|
||
// update the status of the node | ||
if err := cache.SetMapOneField(CLUSTER_STATUS_HASH_MAP_KEY, cluster_id.GetInstanceID(), node_status); err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (c *Cluster) IsMaster() bool { | ||
return i_am_master | ||
} | ||
|
||
func (c *Cluster) IsNodeAlive(node_id string) bool { | ||
node_status, err := cache.GetMapField[node](CLUSTER_STATUS_HASH_MAP_KEY, node_id) | ||
if err != nil { | ||
return false | ||
} | ||
|
||
return time.Since(time.Unix(node_status.LastPingAt, 0)) < NODE_DISCONNECTED_TIMEOUT | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
package cluster |
Oops, something went wrong.