mirror of
				https://github.com/TeaOSLab/EdgeAPI.git
				synced 2025-11-04 07:50:25 +08:00 
			
		
		
		
	实现监控节点在线状态
This commit is contained in:
		@@ -25,6 +25,7 @@ type MessageType = string
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	MessageTypeHealthCheckFail MessageType = "HealthCheckFail"
 | 
			
		||||
	MessageTypeNodeInactive    MessageType = "NodeInactive"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type MessageDAO dbs.DAO
 | 
			
		||||
@@ -84,6 +85,12 @@ func (this *MessageDAO) CreateClusterMessage(clusterId int64, messageType Messag
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 创建节点消息
 | 
			
		||||
func (this *MessageDAO) CreateNodeMessage(clusterId int64, nodeId int64, messageType MessageType, level string, body string, paramsJSON []byte) error {
 | 
			
		||||
	_, err := this.createMessage(clusterId, nodeId, messageType, level, body, paramsJSON)
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 删除某天之前的消息
 | 
			
		||||
func (this *MessageDAO) DeleteMessagesBeforeDay(dayTime time.Time) error {
 | 
			
		||||
	day := timeutil.Format("Ymd", dayTime)
 | 
			
		||||
 
 | 
			
		||||
@@ -294,6 +294,18 @@ func (this *NodeDAO) FindAllEnabledNodesWithClusterId(clusterId int64) (result [
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 取得一个集群离线的节点
 | 
			
		||||
func (this *NodeDAO) FindAllInactiveNodesWithClusterId(clusterId int64) (result []*Node, err error) {
 | 
			
		||||
	_, err = this.Query().
 | 
			
		||||
		State(NodeStateEnabled).
 | 
			
		||||
		Attr("clusterId", clusterId).
 | 
			
		||||
		Attr("isOn", true). // 只监控启用的节点
 | 
			
		||||
		Where("(status IS NULL OR (JSON_EXTRACT(status, '$.isActive')=false AND UNIX_TIMESTAMP()-JSON_EXTRACT(status, '$.updatedAt')>10) OR  UNIX_TIMESTAMP()-JSON_EXTRACT(status, '$.updatedAt')>120)").
 | 
			
		||||
		Slice(&result).
 | 
			
		||||
		FindAll()
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 计算节点数量
 | 
			
		||||
func (this *NodeDAO) CountAllEnabledNodesMatch(clusterId int64, installState configutils.BoolState, activeState configutils.BoolState) (int64, error) {
 | 
			
		||||
	query := this.Query()
 | 
			
		||||
@@ -336,6 +348,20 @@ func (this *NodeDAO) UpdateNodeStatus(nodeId int64, statusJSON []byte) error {
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 更改节点在线状态
 | 
			
		||||
func (this *NodeDAO) UpdateNodeIsActive(nodeId int64, isActive bool) error {
 | 
			
		||||
	b := "true"
 | 
			
		||||
	if !isActive {
 | 
			
		||||
		b = "false"
 | 
			
		||||
	}
 | 
			
		||||
	_, err := this.Query().
 | 
			
		||||
		Pk(nodeId).
 | 
			
		||||
		Where("status IS NOT NULL").
 | 
			
		||||
		Set("status", dbs.SQL("JSON_SET(status, '$.isActive', "+b+")")).
 | 
			
		||||
		Update()
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 设置节点安装状态
 | 
			
		||||
func (this *NodeDAO) UpdateNodeIsInstalled(nodeId int64, isInstalled bool) error {
 | 
			
		||||
	_, err := this.Query().
 | 
			
		||||
 
 | 
			
		||||
@@ -2,6 +2,7 @@ package models
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"github.com/TeaOSLab/EdgeCommon/pkg/nodeconfigs"
 | 
			
		||||
	"time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -26,3 +27,15 @@ func (this *Node) DecodeInstallStatus() (*NodeInstallStatus, error) {
 | 
			
		||||
	return status, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 节点状态
 | 
			
		||||
func (this *Node) DecodeStatus() (*nodeconfigs.NodeStatus, error) {
 | 
			
		||||
	if len(this.Status) == 0 || this.Status == "null" {
 | 
			
		||||
		return nil, nil
 | 
			
		||||
	}
 | 
			
		||||
	status := &nodeconfigs.NodeStatus{}
 | 
			
		||||
	err := json.Unmarshal([]byte(this.Status), status)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	return status, nil
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@ import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"github.com/TeaOSLab/EdgeAPI/internal/configs"
 | 
			
		||||
	"github.com/TeaOSLab/EdgeAPI/internal/db/models"
 | 
			
		||||
	"github.com/TeaOSLab/EdgeAPI/internal/errors"
 | 
			
		||||
	rpcutils "github.com/TeaOSLab/EdgeAPI/internal/rpc/utils"
 | 
			
		||||
	"github.com/TeaOSLab/EdgeCommon/pkg/messageconfigs"
 | 
			
		||||
@@ -135,6 +136,12 @@ func (this *NodeService) NodeStream(server pb.NodeService_NodeStreamServer) erro
 | 
			
		||||
	for {
 | 
			
		||||
		req, err := server.Recv()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			// 修改节点状态
 | 
			
		||||
			err1 := models.SharedNodeDAO.UpdateNodeIsActive(nodeId, false)
 | 
			
		||||
			if err1 != nil {
 | 
			
		||||
				logs.Println(err1.Error())
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@ func init() {
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 节点健康检查任务
 | 
			
		||||
type HealthCheckTask struct {
 | 
			
		||||
	tasksMap map[int64]*HealthCheckClusterTask // taskId => task
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										92
									
								
								internal/tasks/node_monitor_task.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								internal/tasks/node_monitor_task.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,92 @@
 | 
			
		||||
package tasks
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"github.com/TeaOSLab/EdgeAPI/internal/db/models"
 | 
			
		||||
	"github.com/TeaOSLab/EdgeAPI/internal/utils/numberutils"
 | 
			
		||||
	"github.com/iwind/TeaGo/dbs"
 | 
			
		||||
	"github.com/iwind/TeaGo/logs"
 | 
			
		||||
	"time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func init() {
 | 
			
		||||
	dbs.OnReady(func() {
 | 
			
		||||
		task := NewNodeMonitorTask(60)
 | 
			
		||||
		ticker := time.NewTicker(60 * time.Second)
 | 
			
		||||
		go func() {
 | 
			
		||||
			for range ticker.C {
 | 
			
		||||
				err := task.loop()
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					logs.Println("[TASK][NODE_MONITOR]" + err.Error())
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}()
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 健康节点任务
 | 
			
		||||
type NodeMonitorTask struct {
 | 
			
		||||
	intervalSeconds int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewNodeMonitorTask(intervalSeconds int) *NodeMonitorTask {
 | 
			
		||||
	return &NodeMonitorTask{
 | 
			
		||||
		intervalSeconds: intervalSeconds,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *NodeMonitorTask) Run() {
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *NodeMonitorTask) loop() error {
 | 
			
		||||
	// 检查上次运行时间,防止重复运行
 | 
			
		||||
	settingKey := "node_monitor"
 | 
			
		||||
	timestamp := time.Now().Unix()
 | 
			
		||||
	c, err := models.SharedSysSettingDAO.CompareInt64Setting(settingKey, timestamp-int64(this.intervalSeconds))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if c > 0 {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// 记录时间
 | 
			
		||||
	err = models.SharedSysSettingDAO.UpdateSetting(settingKey, []byte(numberutils.FormatInt64(timestamp)))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	clusters, err := models.SharedNodeClusterDAO.FindAllEnableClusters()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	for _, cluster := range clusters {
 | 
			
		||||
		err := this.monitorCluster(cluster)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *NodeMonitorTask) monitorCluster(cluster *models.NodeCluster) error {
 | 
			
		||||
	clusterId := int64(cluster.Id)
 | 
			
		||||
 | 
			
		||||
	// 检查离线节点
 | 
			
		||||
	inactiveNodes, err := models.SharedNodeDAO.FindAllInactiveNodesWithClusterId(clusterId)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	for _, node := range inactiveNodes {
 | 
			
		||||
		err = models.SharedMessageDAO.CreateNodeMessage(clusterId, int64(node.Id), models.MessageTypeNodeInactive, models.LevelError, "节点已处于离线状态", nil)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// 检查CPU、内存、磁盘不足节点,而且离线的节点不再重复提示
 | 
			
		||||
	// TODO 需要实现
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										17
									
								
								internal/tasks/node_monitor_task_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								internal/tasks/node_monitor_task_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,17 @@
 | 
			
		||||
package tasks
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"github.com/iwind/TeaGo/dbs"
 | 
			
		||||
	"testing"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestNodeMonitorTask_loop(t *testing.T) {
 | 
			
		||||
	dbs.NotifyReady()
 | 
			
		||||
 | 
			
		||||
	task := NewNodeMonitorTask(60)
 | 
			
		||||
	err := task.loop()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Fatal(err)
 | 
			
		||||
	}
 | 
			
		||||
	t.Log("ok")
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user