节点离线时尝试自动通过API节点远程启动

This commit is contained in:
GoEdgeLab
2022-09-17 10:18:00 +08:00
parent 6e984590b8
commit fb8c8ef643
8 changed files with 89 additions and 620 deletions

View File

@@ -3,6 +3,7 @@ package tasks
import (
"github.com/TeaOSLab/EdgeAPI/internal/db/models"
"github.com/TeaOSLab/EdgeAPI/internal/goman"
"github.com/TeaOSLab/EdgeAPI/internal/installers"
"github.com/TeaOSLab/EdgeCommon/pkg/nodeconfigs"
"github.com/iwind/TeaGo/dbs"
"github.com/iwind/TeaGo/types"
@@ -18,6 +19,12 @@ func init() {
})
}
// 节点启动尝试
type nodeStartingTry struct {
count int
timestamp int64
}
// NodeMonitorTask 边缘节点监控任务
type NodeMonitorTask struct {
BaseTask
@@ -26,6 +33,8 @@ type NodeMonitorTask struct {
inactiveMap map[string]int // cluster@nodeId => count
notifiedMap map[int64]int64 // nodeId => timestamp
recoverMap map[int64]*nodeStartingTry // nodeId => *nodeStartingTry
}
func NewNodeMonitorTask(duration time.Duration) *NodeMonitorTask {
@@ -33,6 +42,7 @@ func NewNodeMonitorTask(duration time.Duration) *NodeMonitorTask {
ticker: time.NewTicker(duration),
inactiveMap: map[string]int{},
notifiedMap: map[int64]int64{},
recoverMap: map[int64]*nodeStartingTry{},
}
}
@@ -74,7 +84,40 @@ func (this *NodeMonitorTask) MonitorCluster(cluster *models.NodeCluster) error {
return err
}
var nodeMap = map[int64]*models.Node{}
// 尝试自动远程启动
var nodeQueue = installers.NewNodeQueue()
for _, node := range inactiveNodes {
var nodeId = int64(node.Id)
tryInfo, ok := this.recoverMap[nodeId]
if !ok {
tryInfo = &nodeStartingTry{
count: 1,
timestamp: time.Now().Unix(),
}
this.recoverMap[nodeId] = tryInfo
} else {
if tryInfo.count >= 3 /** 3次 **/ { // N 秒内超过 M 次就暂时不再重新尝试,防止阻塞当前任务
if tryInfo.timestamp+10*60 /** 10 分钟 **/ > time.Now().Unix() {
continue
}
tryInfo.timestamp = time.Now().Unix()
tryInfo.count = 0
}
tryInfo.count++
}
// TODO 如果用户手工安装的位置不在标准位置,需要节点自身记住最近启动的位置
err = nodeQueue.StartNode(nodeId)
if err != nil {
if !installers.IsGrantError(err) {
_ = models.SharedNodeLogDAO.CreateLog(nil, nodeconfigs.NodeRoleNode, nodeId, 0, 0, models.LevelError, "NODE", "start node from remote API failed: "+err.Error(), time.Now().Unix(), "", nil)
}
} else {
_ = models.SharedNodeLogDAO.CreateLog(nil, nodeconfigs.NodeRoleNode, nodeId, 0, 0, models.LevelSuccess, "NODE", "start node from remote API successfully", time.Now().Unix(), "", nil)
}
}
var nodeMap = map[int64]*models.Node{} // nodeId => Node
for _, node := range inactiveNodes {
var nodeId = int64(node.Id)
nodeMap[nodeId] = node