mirror of
https://github.com/TeaOSLab/EdgeAPI.git
synced 2025-11-16 01:50:25 +08:00
节点离线时尝试自动通过API节点远程启动
This commit is contained in:
@@ -3,6 +3,7 @@ package tasks
|
||||
import (
|
||||
"github.com/TeaOSLab/EdgeAPI/internal/db/models"
|
||||
"github.com/TeaOSLab/EdgeAPI/internal/goman"
|
||||
"github.com/TeaOSLab/EdgeAPI/internal/installers"
|
||||
"github.com/TeaOSLab/EdgeCommon/pkg/nodeconfigs"
|
||||
"github.com/iwind/TeaGo/dbs"
|
||||
"github.com/iwind/TeaGo/types"
|
||||
@@ -18,6 +19,12 @@ func init() {
|
||||
})
|
||||
}
|
||||
|
||||
// 节点启动尝试
|
||||
type nodeStartingTry struct {
|
||||
count int
|
||||
timestamp int64
|
||||
}
|
||||
|
||||
// NodeMonitorTask 边缘节点监控任务
|
||||
type NodeMonitorTask struct {
|
||||
BaseTask
|
||||
@@ -26,6 +33,8 @@ type NodeMonitorTask struct {
|
||||
|
||||
inactiveMap map[string]int // cluster@nodeId => count
|
||||
notifiedMap map[int64]int64 // nodeId => timestamp
|
||||
|
||||
recoverMap map[int64]*nodeStartingTry // nodeId => *nodeStartingTry
|
||||
}
|
||||
|
||||
func NewNodeMonitorTask(duration time.Duration) *NodeMonitorTask {
|
||||
@@ -33,6 +42,7 @@ func NewNodeMonitorTask(duration time.Duration) *NodeMonitorTask {
|
||||
ticker: time.NewTicker(duration),
|
||||
inactiveMap: map[string]int{},
|
||||
notifiedMap: map[int64]int64{},
|
||||
recoverMap: map[int64]*nodeStartingTry{},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,7 +84,40 @@ func (this *NodeMonitorTask) MonitorCluster(cluster *models.NodeCluster) error {
|
||||
return err
|
||||
}
|
||||
|
||||
var nodeMap = map[int64]*models.Node{}
|
||||
// 尝试自动远程启动
|
||||
var nodeQueue = installers.NewNodeQueue()
|
||||
for _, node := range inactiveNodes {
|
||||
var nodeId = int64(node.Id)
|
||||
tryInfo, ok := this.recoverMap[nodeId]
|
||||
if !ok {
|
||||
tryInfo = &nodeStartingTry{
|
||||
count: 1,
|
||||
timestamp: time.Now().Unix(),
|
||||
}
|
||||
this.recoverMap[nodeId] = tryInfo
|
||||
} else {
|
||||
if tryInfo.count >= 3 /** 3次 **/ { // N 秒内超过 M 次就暂时不再重新尝试,防止阻塞当前任务
|
||||
if tryInfo.timestamp+10*60 /** 10 分钟 **/ > time.Now().Unix() {
|
||||
continue
|
||||
}
|
||||
tryInfo.timestamp = time.Now().Unix()
|
||||
tryInfo.count = 0
|
||||
}
|
||||
tryInfo.count++
|
||||
}
|
||||
|
||||
// TODO 如果用户手工安装的位置不在标准位置,需要节点自身记住最近启动的位置
|
||||
err = nodeQueue.StartNode(nodeId)
|
||||
if err != nil {
|
||||
if !installers.IsGrantError(err) {
|
||||
_ = models.SharedNodeLogDAO.CreateLog(nil, nodeconfigs.NodeRoleNode, nodeId, 0, 0, models.LevelError, "NODE", "start node from remote API failed: "+err.Error(), time.Now().Unix(), "", nil)
|
||||
}
|
||||
} else {
|
||||
_ = models.SharedNodeLogDAO.CreateLog(nil, nodeconfigs.NodeRoleNode, nodeId, 0, 0, models.LevelSuccess, "NODE", "start node from remote API successfully", time.Now().Unix(), "", nil)
|
||||
}
|
||||
}
|
||||
|
||||
var nodeMap = map[int64]*models.Node{} // nodeId => Node
|
||||
for _, node := range inactiveNodes {
|
||||
var nodeId = int64(node.Id)
|
||||
nodeMap[nodeId] = node
|
||||
|
||||
Reference in New Issue
Block a user