mirror of
https://github.com/TeaOSLab/EdgeAPI.git
synced 2025-11-06 18:10:25 +08:00
优化节点离线通知
This commit is contained in:
@@ -8,6 +8,8 @@ import (
|
|||||||
"github.com/TeaOSLab/EdgeCommon/pkg/systemconfigs"
|
"github.com/TeaOSLab/EdgeCommon/pkg/systemconfigs"
|
||||||
"github.com/iwind/TeaGo/dbs"
|
"github.com/iwind/TeaGo/dbs"
|
||||||
"github.com/iwind/TeaGo/logs"
|
"github.com/iwind/TeaGo/logs"
|
||||||
|
"github.com/iwind/TeaGo/types"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -30,13 +32,15 @@ func init() {
|
|||||||
type NodeMonitorTask struct {
|
type NodeMonitorTask struct {
|
||||||
intervalSeconds int
|
intervalSeconds int
|
||||||
|
|
||||||
inactiveMap map[int64]int // nodeId => count
|
inactiveMap map[string]int // cluster@nodeId => count
|
||||||
|
notifiedMap map[int64]int64 // nodeId => timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewNodeMonitorTask(intervalSeconds int) *NodeMonitorTask {
|
func NewNodeMonitorTask(intervalSeconds int) *NodeMonitorTask {
|
||||||
return &NodeMonitorTask{
|
return &NodeMonitorTask{
|
||||||
intervalSeconds: intervalSeconds,
|
intervalSeconds: intervalSeconds,
|
||||||
inactiveMap: map[int64]int{},
|
inactiveMap: map[string]int{},
|
||||||
|
notifiedMap: map[int64]int64{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,18 +93,25 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
|
|||||||
for _, node := range inactiveNodes {
|
for _, node := range inactiveNodes {
|
||||||
var nodeId = int64(node.Id)
|
var nodeId = int64(node.Id)
|
||||||
nodeMap[nodeId] = node
|
nodeMap[nodeId] = node
|
||||||
this.inactiveMap[nodeId]++
|
this.inactiveMap[types.String(clusterId)+"@"+types.String(nodeId)]++
|
||||||
}
|
}
|
||||||
|
|
||||||
const maxInactiveTries = 5
|
const maxInactiveTries = 5
|
||||||
|
|
||||||
// 处理现有的离线状态
|
// 处理现有的离线状态
|
||||||
for nodeId, count := range this.inactiveMap {
|
for key, count := range this.inactiveMap {
|
||||||
|
var pieces = strings.Split(key, "@")
|
||||||
|
if pieces[0] != types.String(clusterId) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var nodeId = types.Int64(pieces[1])
|
||||||
node, ok := nodeMap[nodeId]
|
node, ok := nodeMap[nodeId]
|
||||||
if ok {
|
if ok {
|
||||||
// 连续两次
|
// 连续 N 次离线发送通知
|
||||||
if count >= maxInactiveTries {
|
// 同时也要确保两次发送通知的时间不会过近
|
||||||
this.inactiveMap[nodeId] = 0
|
if count >= maxInactiveTries && time.Now().Unix()-this.notifiedMap[nodeId] > 3600 {
|
||||||
|
this.inactiveMap[key] = 0
|
||||||
|
this.notifiedMap[nodeId] = time.Now().Unix()
|
||||||
|
|
||||||
subject := "节点\"" + node.Name + "\"已处于离线状态"
|
subject := "节点\"" + node.Name + "\"已处于离线状态"
|
||||||
msg := "集群'" + cluster.Name + "'节点\"" + node.Name + "\"已处于离线状态,请检查节点是否异常"
|
msg := "集群'" + cluster.Name + "'节点\"" + node.Name + "\"已处于离线状态,请检查节点是否异常"
|
||||||
@@ -110,7 +121,7 @@ func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
this.inactiveMap[nodeId] = 0
|
delete(this.inactiveMap, key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user