mirror of
https://github.com/TeaOSLab/EdgeAPI.git
synced 2025-11-03 23:20:26 +08:00
优化节点离线通知
This commit is contained in:
@@ -8,6 +8,8 @@ import (
|
||||
"github.com/TeaOSLab/EdgeCommon/pkg/systemconfigs"
|
||||
"github.com/iwind/TeaGo/dbs"
|
||||
"github.com/iwind/TeaGo/logs"
|
||||
"github.com/iwind/TeaGo/types"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -30,13 +32,15 @@ func init() {
|
||||
type NodeMonitorTask struct {
|
||||
intervalSeconds int
|
||||
|
||||
inactiveMap map[int64]int // nodeId => count
|
||||
inactiveMap map[string]int // cluster@nodeId => count
|
||||
notifiedMap map[int64]int64 // nodeId => timestamp
|
||||
}
|
||||
|
||||
func NewNodeMonitorTask(intervalSeconds int) *NodeMonitorTask {
|
||||
return &NodeMonitorTask{
|
||||
intervalSeconds: intervalSeconds,
|
||||
inactiveMap: map[int64]int{},
|
||||
inactiveMap: map[string]int{},
|
||||
notifiedMap: map[int64]int64{},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,18 +93,25 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
|
||||
for _, node := range inactiveNodes {
|
||||
var nodeId = int64(node.Id)
|
||||
nodeMap[nodeId] = node
|
||||
this.inactiveMap[nodeId]++
|
||||
this.inactiveMap[types.String(clusterId)+"@"+types.String(nodeId)]++
|
||||
}
|
||||
|
||||
const maxInactiveTries = 5
|
||||
|
||||
// 处理现有的离线状态
|
||||
for nodeId, count := range this.inactiveMap {
|
||||
for key, count := range this.inactiveMap {
|
||||
var pieces = strings.Split(key, "@")
|
||||
if pieces[0] != types.String(clusterId) {
|
||||
continue
|
||||
}
|
||||
var nodeId = types.Int64(pieces[1])
|
||||
node, ok := nodeMap[nodeId]
|
||||
if ok {
|
||||
// 连续两次
|
||||
if count >= maxInactiveTries {
|
||||
this.inactiveMap[nodeId] = 0
|
||||
// 连续 N 次离线发送通知
|
||||
// 同时也要确保两次发送通知的时间不会过近
|
||||
if count >= maxInactiveTries && time.Now().Unix()-this.notifiedMap[nodeId] > 3600 {
|
||||
this.inactiveMap[key] = 0
|
||||
this.notifiedMap[nodeId] = time.Now().Unix()
|
||||
|
||||
subject := "节点\"" + node.Name + "\"已处于离线状态"
|
||||
msg := "集群'" + cluster.Name + "'节点\"" + node.Name + "\"已处于离线状态,请检查节点是否异常"
|
||||
@@ -110,7 +121,7 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
this.inactiveMap[nodeId] = 0
|
||||
delete(this.inactiveMap, key)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user