优化节点离线通知

This commit is contained in:
GoEdgeLab
2022-01-18 10:35:30 +08:00
parent 863be083a1
commit 933edea4e0

View File

@@ -8,6 +8,8 @@ import (
"github.com/TeaOSLab/EdgeCommon/pkg/systemconfigs"
"github.com/iwind/TeaGo/dbs"
"github.com/iwind/TeaGo/logs"
"github.com/iwind/TeaGo/types"
"strings"
"time"
)
@@ -30,13 +32,15 @@ func init() {
type NodeMonitorTask struct {
intervalSeconds int
inactiveMap map[int64]int // nodeId => count
inactiveMap map[string]int // cluster@nodeId => count
notifiedMap map[int64]int64 // nodeId => timestamp
}
func NewNodeMonitorTask(intervalSeconds int) *NodeMonitorTask {
return &NodeMonitorTask{
intervalSeconds: intervalSeconds,
inactiveMap: map[int64]int{},
inactiveMap: map[string]int{},
notifiedMap: map[int64]int64{},
}
}
@@ -89,18 +93,25 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
for _, node := range inactiveNodes {
var nodeId = int64(node.Id)
nodeMap[nodeId] = node
this.inactiveMap[nodeId]++
this.inactiveMap[types.String(clusterId)+"@"+types.String(nodeId)]++
}
const maxInactiveTries = 5
// 处理现有的离线状态
for nodeId, count := range this.inactiveMap {
for key, count := range this.inactiveMap {
var pieces = strings.Split(key, "@")
if pieces[0] != types.String(clusterId) {
continue
}
var nodeId = types.Int64(pieces[1])
node, ok := nodeMap[nodeId]
if ok {
// 连续两次
if count >= maxInactiveTries {
this.inactiveMap[nodeId] = 0
// 连续 N 次离线发送通知
// 同时也要确保两次发送通知的时间不会过近
if count >= maxInactiveTries && time.Now().Unix()-this.notifiedMap[nodeId] > 3600 {
this.inactiveMap[key] = 0
this.notifiedMap[nodeId] = time.Now().Unix()
subject := "节点\"" + node.Name + "\"已处于离线状态"
msg := "集群'" + cluster.Name + "'节点\"" + node.Name + "\"已处于离线状态,请检查节点是否异常"
@@ -110,7 +121,7 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
}
}
} else {
this.inactiveMap[nodeId] = 0
delete(this.inactiveMap, key)
}
}