优化节点离线通知

This commit is contained in:
GoEdgeLab
2022-01-18 10:35:30 +08:00
parent 863be083a1
commit 933edea4e0

View File

@@ -8,6 +8,8 @@ import (
"github.com/TeaOSLab/EdgeCommon/pkg/systemconfigs" "github.com/TeaOSLab/EdgeCommon/pkg/systemconfigs"
"github.com/iwind/TeaGo/dbs" "github.com/iwind/TeaGo/dbs"
"github.com/iwind/TeaGo/logs" "github.com/iwind/TeaGo/logs"
"github.com/iwind/TeaGo/types"
"strings"
"time" "time"
) )
@@ -30,13 +32,15 @@ func init() {
type NodeMonitorTask struct { type NodeMonitorTask struct {
intervalSeconds int intervalSeconds int
inactiveMap map[int64]int // nodeId => count inactiveMap map[string]int // cluster@nodeId => count
notifiedMap map[int64]int64 // nodeId => timestamp
} }
func NewNodeMonitorTask(intervalSeconds int) *NodeMonitorTask { func NewNodeMonitorTask(intervalSeconds int) *NodeMonitorTask {
return &NodeMonitorTask{ return &NodeMonitorTask{
intervalSeconds: intervalSeconds, intervalSeconds: intervalSeconds,
inactiveMap: map[int64]int{}, inactiveMap: map[string]int{},
notifiedMap: map[int64]int64{},
} }
} }
@@ -89,18 +93,25 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
for _, node := range inactiveNodes { for _, node := range inactiveNodes {
var nodeId = int64(node.Id) var nodeId = int64(node.Id)
nodeMap[nodeId] = node nodeMap[nodeId] = node
this.inactiveMap[nodeId]++ this.inactiveMap[types.String(clusterId)+"@"+types.String(nodeId)]++
} }
const maxInactiveTries = 5 const maxInactiveTries = 5
// 处理现有的离线状态 // 处理现有的离线状态
for nodeId, count := range this.inactiveMap { for key, count := range this.inactiveMap {
var pieces = strings.Split(key, "@")
if pieces[0] != types.String(clusterId) {
continue
}
var nodeId = types.Int64(pieces[1])
node, ok := nodeMap[nodeId] node, ok := nodeMap[nodeId]
if ok { if ok {
// 连续两次 // 连续 N 次离线发送通知
if count >= maxInactiveTries { // 同时也要确保两次发送通知的时间不会过近
this.inactiveMap[nodeId] = 0 if count >= maxInactiveTries && time.Now().Unix()-this.notifiedMap[nodeId] > 3600 {
this.inactiveMap[key] = 0
this.notifiedMap[nodeId] = time.Now().Unix()
subject := "节点\"" + node.Name + "\"已处于离线状态" subject := "节点\"" + node.Name + "\"已处于离线状态"
msg := "集群'" + cluster.Name + "'节点\"" + node.Name + "\"已处于离线状态,请检查节点是否异常" msg := "集群'" + cluster.Name + "'节点\"" + node.Name + "\"已处于离线状态,请检查节点是否异常"
@@ -110,7 +121,7 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
} }
} }
} else { } else {
this.inactiveMap[nodeId] = 0 delete(this.inactiveMap, key)
} }
} }