feature/info-monitor #1

Merged
wk233 merged 5 commits from feature/info-monitor into main 2026-01-26 12:24:55 +08:00
4 changed files with 1008 additions and 14 deletions

View File

@ -1,59 +1,108 @@
package main
import (
"io"
"log"
"os"
"time"
"github.com/wuko233/sysmonitord/internal/config"
"github.com/wuko233/sysmonitord/internal/monitor"
"gopkg.in/natefinch/lumberjack.v2"
)
func main() {
// 设置日志输出到文件和控制台
log.SetOutput(os.Stdout)
fileLogger := &lumberjack.Logger{
Filename: "/var/log/sysmonitord/sysmonitord.log",
MaxSize: 10, // MB
MaxBackups: 5,
MaxAge: 28, // days
Compress: true,
}
log.SetOutput(io.MultiWriter(os.Stdout, fileLogger))
log.Println("启动sysmonitord...")
cfg := &config.SSHMonitor{
// SSH监控配置
sshCfg := &config.SSHMonitor{
Enabled: true,
DisplayOnShell: true,
AlertOnRootLogin: true,
}
log.Printf("加载SSH监控配置: %+v\n", cfg)
// 信息监控配置
infoCfg := &monitor.InfoMonitorConfig{
Enabled: true,
Interval: 30 * time.Second, // 每30秒采集一次
LogFilePath: "/var/log/sysmonitord/info_monitor.log",
MaxLogSize: 100 * 1024 * 1024, // 100MB
LogRetention: 30, // 保留30天
ProcessLimit: 10, // 显示10个进程
CollectNetwork: true,
CollectProcess: true,
}
alertChan := make(chan monitor.Alert, 100)
metricsChan := make(chan monitor.ServerMetrics, 100)
log.Println("初始化SSH监控器...")
sshMonitor := monitor.NewSSHMonitor(cfg, alertChan)
log.Println("初始化监控器...")
log.Println("启用告警处理...")
// 创建SSH监控器
sshMonitor := monitor.NewSSHMonitor(sshCfg, alertChan)
// 创建信息监控器
infoMonitor := monitor.NewInfoMonitor(infoCfg, metricsChan)
// 启动告警处理
log.Println("启动告警处理...")
go handleAlerts(alertChan)
// 启动指标处理
log.Println("启动指标处理...")
go handleMetrics(metricsChan)
// 启动SSH监控器
go func() {
log.Println("启动SSH监控器...")
if err := sshMonitor.Start(); err != nil {
log.Fatalf("启动SSH监控器失败: %v", err)
}
}()
// 启动信息监控器
go func() {
log.Println("启动信息监控器...")
if err := infoMonitor.Start(); err != nil {
log.Fatalf("启动信息监控器失败: %v", err)
}
}()
time.Sleep(3 * time.Second)
log.Println("启动sysmonitord完成.")
log.Println("sysmonitord正在运行...")
log.Println("sysmonitord监控系统已启动.")
log.Println("按Ctrl+C退出...")
stopChan := make(chan os.Signal, 1)
<-stopChan
log.Println("停止SSH监控器...")
log.Println("正在停止监控器...")
// 停止信息监控器
if err := infoMonitor.Stop(); err != nil {
log.Printf("停止信息监控器失败: %v", err)
}
// 停止SSH监控器
if err := sshMonitor.Stop(); err != nil {
log.Fatalf("停止SSH监控器失败: %v", err)
log.Printf("停止SSH监控器失败: %v", err)
}
time.Sleep(1 * time.Second)
log.Println("sysmonitord已退出.")
}
func handleAlerts(alertChan <-chan monitor.Alert) {
@ -63,8 +112,45 @@ func handleAlerts(alertChan <-chan monitor.Alert) {
switch alert.Type {
case "SSH_ROOT_LOGIN":
log.Println("ROOT用户登入")
log.Println("ROOT用户登入警告!")
// Todo: 接入发信接口
}
}
}
func handleMetrics(metricsChan <-chan monitor.ServerMetrics) {
for metrics := range metricsChan {
// 这里可以处理指标数据,比如:
// 1. 存储到数据库
// 2. 发送到监控系统
// 3. 生成告警
// 示例:检查指标并生成告警
checkMetrics(&metrics)
}
}
func checkMetrics(metrics *monitor.ServerMetrics) {
// 检查CPU使用率
if metrics.CPU.UsagePercent > 90 {
log.Printf("[警告] CPU使用率过高: %.2f%%\n", metrics.CPU.UsagePercent)
}
// 检查内存使用率
if metrics.Memory.UsedPercent > 90 {
log.Printf("[警告] 内存使用率过高: %.2f%%\n", metrics.Memory.UsedPercent)
}
// 检查磁盘使用率
for _, disk := range metrics.Disk {
if disk.UsedPercent > 90 {
log.Printf("[警告] 磁盘%s使用率过高: %.2f%%\n",
disk.Mountpoint, disk.UsedPercent)
}
}
// 检查负载
if metrics.Load.RelativeLoad1 > 3.0 {
log.Printf("[警告] 系统负载过高: %.2f\n", metrics.Load.Load1)
}
}

13
go.mod
View File

@ -2,6 +2,17 @@ module github.com/wuko233/sysmonitord
go 1.24.3
require github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
require (
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
github.com/ebitengine/purego v0.9.1 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/shirou/gopsutil/v4 v4.25.12
github.com/tklauser/go-sysconf v0.3.16 // indirect
github.com/tklauser/numcpus v0.11.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
golang.org/x/sys v0.38.0 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1
)

24
go.sum
View File

@ -2,3 +2,27 @@ github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pq
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/shirou/gopsutil/v4 v4.25.12 h1:e7PvW/0RmJ8p8vPGJH4jvNkOyLmbkXgXW4m6ZPic6CY=
github.com/shirou/gopsutil/v4 v4.25.12/go.mod h1:EivAfP5x2EhLp2ovdpKSozecVXn1TmuG7SMzs/Wh4PU=
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI=
github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw=
github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=

View File

@ -0,0 +1,873 @@
package monitor
import (
"fmt"
"log"
"os"
"path/filepath"
"runtime"
"time"
"github.com/shirou/gopsutil/v4/cpu"
"github.com/shirou/gopsutil/v4/disk"
"github.com/shirou/gopsutil/v4/host"
"github.com/shirou/gopsutil/v4/load"
"github.com/shirou/gopsutil/v4/mem"
"github.com/shirou/gopsutil/v4/net"
"github.com/shirou/gopsutil/v4/process"
)
// InfoMonitor 服务器信息监控器
type InfoMonitor struct {
config *InfoMonitorConfig
logFile *os.File
stopChan chan struct{}
metricsChan chan ServerMetrics
}
// InfoMonitorConfig 信息监控配置
type InfoMonitorConfig struct {
Enabled bool `yaml:"enabled"`
Interval time.Duration `yaml:"interval"` // 采集间隔
LogFilePath string `yaml:"log_file_path"` // 日志文件路径
MaxLogSize int64 `yaml:"max_log_size"` // 最大日志大小(字节)
LogRetention int `yaml:"log_retention"` // 日志保留天数
ProcessLimit int `yaml:"process_limit"` // 显示进程数限制
CollectNetwork bool `yaml:"collect_network"` // 是否收集网络信息
CollectProcess bool `yaml:"collect_process"` // 是否收集进程信息
}
// ServerMetrics 服务器指标
type ServerMetrics struct {
Timestamp time.Time `json:"timestamp"`
CPU CPUInfo `json:"cpu"`
Memory MemoryInfo `json:"memory"`
Disk []DiskInfo `json:"disk"`
Network NetworkInfo `json:"network"`
Load LoadInfo `json:"load"`
Processes []ProcessInfo `json:"processes"`
Host HostInfo `json:"host"`
Runtime RuntimeInfo `json:"runtime"`
QuickMetrics QuickMetrics `json:"quick_metrics"`
}
// CPUInfo CPU信息
type CPUInfo struct {
Model string `json:"model"`
Cores int `json:"cores"`
LogicalCores int `json:"logical_cores"`
UsagePercent float64 `json:"usage_percent"`
PerCorePercent []float64 `json:"per_core_percent"`
Mhz float64 `json:"mhz"`
CacheSize int `json:"cache_size"`
}
// MemoryInfo 内存信息
type MemoryInfo struct {
TotalGB float64 `json:"total_gb"`
UsedGB float64 `json:"used_gb"`
AvailableGB float64 `json:"available_gb"`
UsedPercent float64 `json:"used_percent"`
SwapTotalGB float64 `json:"swap_total_gb"`
SwapUsedGB float64 `json:"swap_used_gb"`
}
// DiskInfo 磁盘信息
type DiskInfo struct {
Mountpoint string `json:"mountpoint"`
Device string `json:"device"`
Fstype string `json:"fstype"`
TotalGB float64 `json:"total_gb"`
UsedGB float64 `json:"used_gb"`
FreeGB float64 `json:"free_gb"`
UsedPercent float64 `json:"used_percent"`
InodesPercent float64 `json:"inodes_percent"`
}
// NetworkInfo 网络信息
type NetworkInfo struct {
Interfaces []NetworkInterface `json:"interfaces"`
TotalRecvMB float64 `json:"total_recv_mb"`
TotalSentMB float64 `json:"total_sent_mb"`
TCPConnections int `json:"tcp_connections"`
EstablishedConn int `json:"established_conn"`
}
// NetworkInterface 网络接口
type NetworkInterface struct {
Name string `json:"name"`
HardwareAddr string `json:"hardware_addr"`
IPAddresses []string `json:"ip_addresses"`
}
// LoadInfo 负载信息
type LoadInfo struct {
Load1 float64 `json:"load_1"`
Load5 float64 `json:"load_5"`
Load15 float64 `json:"load_15"`
RelativeLoad1 float64 `json:"relative_load_1"`
RelativeLoad5 float64 `json:"relative_load_5"`
RelativeLoad15 float64 `json:"relative_load_15"`
ProcsRunning int `json:"procs_running"` // 改为 int 类型
ProcsTotal int `json:"procs_total"` // 改为 int 类型
}
// ProcessInfo 进程信息
type ProcessInfo struct {
PID int32 `json:"pid"`
Name string `json:"name"`
Cmdline string `json:"cmdline"`
MemoryMB float64 `json:"memory_mb"`
CPUPercent float64 `json:"cpu_percent"`
}
// HostInfo 主机信息
type HostInfo struct {
Hostname string `json:"hostname"`
OS string `json:"os"`
Platform string `json:"platform"`
PlatformVersion string `json:"platform_version"`
KernelVersion string `json:"kernel_version"`
BootTime time.Time `json:"boot_time"`
Uptime string `json:"uptime"`
CPUCount uint64 `json:"cpu_count"`
Architecture string `json:"architecture"`
HostID string `json:"host_id"`
}
// RuntimeInfo 运行时信息
type RuntimeInfo struct {
GoVersion string `json:"go_version"`
GOOS string `json:"goos"`
GOARCH string `json:"goarch"`
GOROOT string `json:"goroot"`
GOMAXPROCS int `json:"gomaxprocs"`
NumCPU int `json:"num_cpu"`
NumGoroutine int `json:"num_goroutine"`
}
// QuickMetrics 快速指标
type QuickMetrics struct {
CPUPercent float64 `json:"cpu_percent"`
MemoryPercent float64 `json:"memory_percent"`
RootDiskPercent float64 `json:"root_disk_percent"`
AvailableMemoryGB float64 `json:"available_memory_gb"`
}
// NewInfoMonitor 创建信息监控器
func NewInfoMonitor(cfg *InfoMonitorConfig, metricsChan chan ServerMetrics) *InfoMonitor {
if cfg == nil {
cfg = &InfoMonitorConfig{
Enabled: true,
Interval: 30 * time.Second,
ProcessLimit: 10,
CollectNetwork: true,
CollectProcess: true,
}
}
if cfg.Interval == 0 {
cfg.Interval = 30 * time.Second
}
if cfg.ProcessLimit == 0 {
cfg.ProcessLimit = 10
}
if cfg.MaxLogSize == 0 {
cfg.MaxLogSize = 100 * 1024 * 1024 // 100MB
}
if cfg.LogFilePath == "" {
cfg.LogFilePath = "/var/log/sysmonitord/info_monitor.log"
}
return &InfoMonitor{
config: cfg,
stopChan: make(chan struct{}),
metricsChan: metricsChan,
}
}
// Start 启动信息监控
func (m *InfoMonitor) Start() error {
log.Println("启动服务器信息监控...")
// 初始化日志文件
if err := m.initLogFile(); err != nil {
return fmt.Errorf("初始化日志文件失败: %v", err)
}
// 启动监控循环
go m.monitorLoop()
return nil
}
// Stop 停止信息监控
func (m *InfoMonitor) Stop() error {
log.Println("停止服务器信息监控...")
close(m.stopChan)
if m.logFile != nil {
m.logFile.Close()
}
return nil
}
// initLogFile 初始化日志文件
func (m *InfoMonitor) initLogFile() error {
if m.config.LogFilePath == "" {
m.config.LogFilePath = "/var/log/sysmonitord/info_monitor.log"
}
// 创建日志目录
logDir := filepath.Dir(m.config.LogFilePath)
if err := os.MkdirAll(logDir, 0755); err != nil {
return err
}
// 打开日志文件
file, err := os.OpenFile(m.config.LogFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return err
}
m.logFile = file
// 启动日志轮转检查
go m.logRotateCheck()
return nil
}
// logRotateCheck 日志轮转检查
func (m *InfoMonitor) logRotateCheck() {
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
for {
select {
case <-m.stopChan:
return
case <-ticker.C:
if m.logFile != nil {
// 检查文件大小
if info, err := m.logFile.Stat(); err == nil {
if info.Size() > m.config.MaxLogSize {
m.rotateLogFile()
}
}
}
}
}
}
// rotateLogFile 轮转日志文件
func (m *InfoMonitor) rotateLogFile() {
if m.logFile != nil {
m.logFile.Close()
// 重命名旧文件
timestamp := time.Now().Format("20060102_150405")
backupFile := fmt.Sprintf("%s.%s", m.config.LogFilePath, timestamp)
oldPath := m.config.LogFilePath
// 重新打开日志文件
file, err := os.OpenFile(m.config.LogFilePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
log.Printf("重新打开日志文件失败: %v", err)
return
}
m.logFile = file
// 异步重命名旧文件
go func() {
if err := os.Rename(oldPath, backupFile); err != nil {
log.Printf("重命名日志文件失败: %v", err)
}
}()
}
}
// monitorLoop 监控循环
func (m *InfoMonitor) monitorLoop() {
// 首次立即执行
m.collectAndLogMetrics()
ticker := time.NewTicker(m.config.Interval)
defer ticker.Stop()
for {
select {
case <-m.stopChan:
return
case <-ticker.C:
m.collectAndLogMetrics()
}
}
}
// collectAndLogMetrics 收集并记录指标
func (m *InfoMonitor) collectAndLogMetrics() {
startTime := time.Now()
metrics := m.collectAllMetrics()
collectionTime := time.Since(startTime)
log.Printf("收集指标完成,耗时: %v", collectionTime)
// 记录到日志文件
m.logMetrics(metrics)
// 发送到metrics通道如果有
if m.metricsChan != nil {
select {
case m.metricsChan <- metrics:
default:
// 通道满,丢弃数据
log.Printf("警告: metrics通道已满丢弃数据")
}
}
// 输出到控制台
m.displayMetrics(metrics)
}
// collectAllMetrics 收集所有指标
func (m *InfoMonitor) collectAllMetrics() ServerMetrics {
return ServerMetrics{
Timestamp: time.Now(),
CPU: m.getCPUInfo(),
Memory: m.getMemoryInfo(),
Disk: m.getDiskInfo(),
Network: m.getNetworkInfo(),
Load: m.getLoadInfo(),
Processes: m.getProcessInfo(),
Host: m.getHostInfo(),
Runtime: m.getRuntimeInfo(),
QuickMetrics: m.getQuickMetrics(),
}
}
// getHostInfo 获取主机信息
func (m *InfoMonitor) getHostInfo() HostInfo {
hostInfo, err := host.Info()
if err != nil {
log.Printf("获取主机信息失败: %v", err)
return HostInfo{}
}
bootTime := time.Unix(int64(hostInfo.BootTime), 0)
uptime := time.Since(bootTime)
hours := int(uptime.Hours())
minutes := int(uptime.Minutes()) % 60
seconds := int(uptime.Seconds()) % 60
return HostInfo{
Hostname: hostInfo.Hostname,
OS: hostInfo.OS,
Platform: hostInfo.Platform,
PlatformVersion: hostInfo.PlatformVersion,
KernelVersion: hostInfo.KernelVersion,
BootTime: bootTime,
Uptime: fmt.Sprintf("%d小时%d分钟%d秒", hours, minutes, seconds),
CPUCount: hostInfo.Procs,
Architecture: hostInfo.KernelArch,
HostID: hostInfo.HostID,
}
}
// getCPUInfo 获取CPU信息
func (m *InfoMonitor) getCPUInfo() CPUInfo {
physicalCount, _ := cpu.Counts(false)
logicalCount, _ := cpu.Counts(true)
percent, _ := cpu.Percent(200*time.Millisecond, false)
perCorePercent, _ := cpu.Percent(200*time.Millisecond, true)
cpuInfoList, _ := cpu.Info()
var model string
var mhz float64
var cacheSize int
if len(cpuInfoList) > 0 {
model = cpuInfoList[0].ModelName
mhz = cpuInfoList[0].Mhz
// 将 int32 转换为 int
cacheSize = int(cpuInfoList[0].CacheSize)
}
usagePercent := 0.0
if len(percent) > 0 {
usagePercent = percent[0]
}
return CPUInfo{
Model: model,
Cores: physicalCount,
LogicalCores: logicalCount,
UsagePercent: usagePercent,
PerCorePercent: perCorePercent,
Mhz: mhz,
CacheSize: cacheSize,
}
}
// getMemoryInfo 获取内存信息
func (m *InfoMonitor) getMemoryInfo() MemoryInfo {
vMem, err := mem.VirtualMemory()
if err != nil {
log.Printf("获取内存信息失败: %v", err)
return MemoryInfo{}
}
swap, _ := mem.SwapMemory()
return MemoryInfo{
TotalGB: float64(vMem.Total) / (1024 * 1024 * 1024),
UsedGB: float64(vMem.Used) / (1024 * 1024 * 1024),
AvailableGB: float64(vMem.Available) / (1024 * 1024 * 1024),
UsedPercent: vMem.UsedPercent,
SwapTotalGB: float64(swap.Total) / (1024 * 1024 * 1024),
SwapUsedGB: float64(swap.Used) / (1024 * 1024 * 1024),
}
}
// getDiskInfo 获取磁盘信息
func (m *InfoMonitor) getDiskInfo() []DiskInfo {
partitions, err := disk.Partitions(false)
if err != nil {
log.Printf("获取磁盘分区失败: %v", err)
return nil
}
var disks []DiskInfo
for _, partition := range partitions {
// 过滤掉特殊文件系统
if partition.Fstype == "" ||
partition.Fstype == "tmpfs" ||
partition.Fstype == "devtmpfs" ||
partition.Fstype == "squashfs" ||
partition.Fstype == "efivarfs" ||
partition.Fstype == "debugfs" ||
partition.Fstype == "securityfs" ||
partition.Fstype == "cgroup" ||
partition.Fstype == "cgroup2" ||
partition.Fstype == "pstore" ||
partition.Fstype == "autofs" {
continue
}
usage, err := disk.Usage(partition.Mountpoint)
if err != nil {
continue
}
inodesPercent := 0.0
if usage.InodesUsedPercent > 0 {
inodesPercent = usage.InodesUsedPercent
}
disks = append(disks, DiskInfo{
Mountpoint: partition.Mountpoint,
Device: partition.Device,
Fstype: partition.Fstype,
TotalGB: float64(usage.Total) / (1024 * 1024 * 1024),
UsedGB: float64(usage.Used) / (1024 * 1024 * 1024),
FreeGB: float64(usage.Free) / (1024 * 1024 * 1024),
UsedPercent: usage.UsedPercent,
InodesPercent: inodesPercent,
})
}
return disks
}
// getNetworkInfo 获取网络信息
func (m *InfoMonitor) getNetworkInfo() NetworkInfo {
if !m.config.CollectNetwork {
return NetworkInfo{}
}
interfaces, err := net.Interfaces()
if err != nil {
log.Printf("获取网络接口失败: %v", err)
return NetworkInfo{}
}
var netInterfaces []NetworkInterface
for _, iface := range interfaces {
if len(iface.Addrs) > 0 && iface.Name != "lo" {
var ips []string
for _, addr := range iface.Addrs {
ips = append(ips, addr.Addr)
}
netInterfaces = append(netInterfaces, NetworkInterface{
Name: iface.Name,
HardwareAddr: iface.HardwareAddr,
IPAddresses: ips,
})
}
}
// 获取网络IO统计
ioCounters, _ := net.IOCounters(true)
var totalRecv, totalSent uint64
for _, io := range ioCounters {
totalRecv += io.BytesRecv
totalSent += io.BytesSent
}
// 获取TCP连接数
tcpConns, _ := net.Connections("tcp")
established := 0
for _, conn := range tcpConns {
if conn.Status == "ESTABLISHED" {
established++
}
}
return NetworkInfo{
Interfaces: netInterfaces,
TotalRecvMB: float64(totalRecv) / (1024 * 1024),
TotalSentMB: float64(totalSent) / (1024 * 1024),
TCPConnections: len(tcpConns),
EstablishedConn: established,
}
}
// getLoadInfo 获取负载信息
func (m *InfoMonitor) getLoadInfo() LoadInfo {
avg, err := load.Avg()
if err != nil {
log.Printf("获取系统负载失败: %v", err)
return LoadInfo{}
}
misc, _ := load.Misc()
logicalCount, _ := cpu.Counts(true)
relativeLoad1 := 0.0
relativeLoad5 := 0.0
relativeLoad15 := 0.0
if logicalCount > 0 {
relativeLoad1 = avg.Load1 / float64(logicalCount)
relativeLoad5 = avg.Load5 / float64(logicalCount)
relativeLoad15 = avg.Load15 / float64(logicalCount)
}
return LoadInfo{
Load1: avg.Load1,
Load5: avg.Load5,
Load15: avg.Load15,
RelativeLoad1: relativeLoad1,
RelativeLoad5: relativeLoad5,
RelativeLoad15: relativeLoad15,
// load.Misc() 返回的是 int 类型
ProcsRunning: misc.ProcsRunning,
ProcsTotal: misc.ProcsTotal,
}
}
// getProcessInfo 获取进程信息
func (m *InfoMonitor) getProcessInfo() []ProcessInfo {
if !m.config.CollectProcess {
return nil
}
processes, err := process.Processes()
if err != nil {
log.Printf("获取进程列表失败: %v", err)
return nil
}
var procList []ProcessInfo
limit := m.config.ProcessLimit
if limit <= 0 {
limit = 10 // 默认显示10个进程
}
count := 0
for _, p := range processes {
if count >= limit {
break
}
name, err := p.Name()
if err != nil || name == "" || name == " " {
continue
}
cmdline, _ := p.Cmdline()
memInfo, _ := p.MemoryInfo()
cpuPercent, _ := p.CPUPercent()
var memMB float64
if memInfo != nil {
memMB = float64(memInfo.RSS) / (1024 * 1024)
}
procList = append(procList, ProcessInfo{
PID: p.Pid,
Name: name,
Cmdline: cmdline,
MemoryMB: memMB,
CPUPercent: cpuPercent,
})
count++
}
return procList
}
// getRuntimeInfo 获取运行时信息
func (m *InfoMonitor) getRuntimeInfo() RuntimeInfo {
return RuntimeInfo{
GoVersion: runtime.Version(),
GOOS: runtime.GOOS,
GOARCH: runtime.GOARCH,
GOROOT: runtime.GOROOT(),
GOMAXPROCS: runtime.GOMAXPROCS(0),
NumCPU: runtime.NumCPU(),
NumGoroutine: runtime.NumGoroutine(),
}
}
// getQuickMetrics 获取快速指标
func (m *InfoMonitor) getQuickMetrics() QuickMetrics {
cpuPercent, _ := cpu.Percent(100*time.Millisecond, false)
memInfo, _ := mem.VirtualMemory()
rootUsage, _ := disk.Usage("/")
quickCPU := 0.0
if len(cpuPercent) > 0 {
quickCPU = cpuPercent[0]
}
memPercent := 0.0
availableGB := 0.0
if memInfo != nil {
memPercent = memInfo.UsedPercent
availableGB = float64(memInfo.Available) / (1024 * 1024 * 1024)
}
rootDiskPercent := 0.0
if rootUsage != nil {
rootDiskPercent = rootUsage.UsedPercent
}
return QuickMetrics{
CPUPercent: quickCPU,
MemoryPercent: memPercent,
RootDiskPercent: rootDiskPercent,
AvailableMemoryGB: availableGB,
}
}
// logMetrics 记录指标到日志文件
func (m *InfoMonitor) logMetrics(metrics ServerMetrics) {
if m.logFile == nil {
return
}
// 基本指标日志
basicLog := fmt.Sprintf("[INFO-METRIC] %s | CPU:%.2f%% | MEM:%.2f%% | Load1:%.2f | DiskRoot:%.2f%%",
metrics.Timestamp.Format("2006-01-02 15:04:05"),
metrics.QuickMetrics.CPUPercent,
metrics.QuickMetrics.MemoryPercent,
metrics.Load.Load1,
metrics.QuickMetrics.RootDiskPercent,
)
// 详细指标日志
detailedLog := fmt.Sprintf("\n[INFO-DETAIL] Host: %s, Uptime: %s, Cores: %d/%d, Mem: %.2f/%.2f GB",
metrics.Host.Hostname,
metrics.Host.Uptime,
metrics.CPU.Cores,
metrics.CPU.LogicalCores,
metrics.Memory.UsedGB,
metrics.Memory.TotalGB,
)
logLine := basicLog + detailedLog + "\n"
if _, err := m.logFile.WriteString(logLine); err != nil {
log.Printf("写入日志文件失败: %v", err)
}
// 确保数据写入磁盘
m.logFile.Sync()
}
// displayMetrics 显示指标到控制台
func (m *InfoMonitor) displayMetrics(metrics ServerMetrics) {
// 使用不同颜色显示不同类型的指标
fmt.Printf("\n\x1b[36m════════════════ 服务器监控指标 [%s] ════════════════\x1b[0m\n",
metrics.Timestamp.Format("15:04:05"))
// 主机信息
fmt.Printf("\n\x1b[33m主机信息:\x1b[0m\n")
fmt.Printf(" \x1b[32m主机名:\x1b[0m %s\n", metrics.Host.Hostname)
fmt.Printf(" \x1b[32m运行时间:\x1b[0m %s\n", metrics.Host.Uptime)
fmt.Printf(" \x1b[32m系统:\x1b[0m %s %s\n", metrics.Host.Platform, metrics.Host.PlatformVersion)
// CPU信息
fmt.Printf("\n\x1b[33mCPU信息:\x1b[0m\n")
fmt.Printf(" \x1b[32m型号:\x1b[0m %s\n", metrics.CPU.Model)
fmt.Printf(" \x1b[32m核心:\x1b[0m %d物理/%d逻辑\n", metrics.CPU.Cores, metrics.CPU.LogicalCores)
// 根据CPU使用率显示不同颜色
cpuColor := "\x1b[32m" // 绿色
if metrics.CPU.UsagePercent > 70 {
cpuColor = "\x1b[33m" // 黄色
}
if metrics.CPU.UsagePercent > 90 {
cpuColor = "\x1b[31m" // 红色
}
fmt.Printf(" \x1b[32m使用率:\x1b[0m %s%.2f%%\x1b[0m", cpuColor, metrics.CPU.UsagePercent)
if len(metrics.CPU.PerCorePercent) > 0 {
fmt.Printf(" (")
for i, p := range metrics.CPU.PerCorePercent {
if i > 0 {
fmt.Printf(" ")
}
coreColor := "\x1b[32m"
if p > 70 {
coreColor = "\x1b[33m"
}
if p > 90 {
coreColor = "\x1b[31m"
}
fmt.Printf("%s%d:%.0f%%\x1b[0m", coreColor, i, p)
}
fmt.Printf(")")
}
fmt.Println()
// 内存信息
fmt.Printf("\n\x1b[33m内存信息:\x1b[0m\n")
// 根据内存使用率显示不同颜色
memColor := "\x1b[32m"
if metrics.Memory.UsedPercent > 70 {
memColor = "\x1b[33m"
}
if metrics.Memory.UsedPercent > 90 {
memColor = "\x1b[31m"
}
memBar := getProgressBar(metrics.Memory.UsedPercent, 20)
fmt.Printf(" \x1b[32m使用率:\x1b[0m %s%.1f%%\x1b[0m %s\n",
memColor, metrics.Memory.UsedPercent, memBar)
fmt.Printf(" \x1b[32m总量/已用/可用:\x1b[0m %.2f/%.2f/%.2f GB\n",
metrics.Memory.TotalGB, metrics.Memory.UsedGB, metrics.Memory.AvailableGB)
if metrics.Memory.SwapTotalGB > 0 {
fmt.Printf(" \x1b[32m交换空间:\x1b[0m %.2f GB\n", metrics.Memory.SwapTotalGB)
}
// 磁盘信息
if len(metrics.Disk) > 0 {
fmt.Printf("\n\x1b[33m磁盘使用情况:\x1b[0m\n")
for _, disk := range metrics.Disk {
diskColor := "\x1b[32m"
if disk.UsedPercent > 70 {
diskColor = "\x1b[33m"
}
if disk.UsedPercent > 90 {
diskColor = "\x1b[31m"
}
diskBar := getProgressBar(disk.UsedPercent, 15)
fmt.Printf(" \x1b[32m%s:\x1b[0m %s%.1f%%\x1b[0m %s %.2f/%.2f GB\n",
disk.Mountpoint, diskColor, disk.UsedPercent, diskBar, disk.UsedGB, disk.TotalGB)
}
}
// 负载信息
fmt.Printf("\n\x1b[33m系统负载:\x1b[0m\n")
load1Color := "\x1b[32m"
if metrics.Load.RelativeLoad1 > 1.0 {
load1Color = "\x1b[33m"
}
if metrics.Load.RelativeLoad1 > 2.0 {
load1Color = "\x1b[31m"
}
fmt.Printf(" \x1b[32m1/5/15分钟:\x1b[0m %s%.2f\x1b[0m/%.2f/%.2f\n",
load1Color, metrics.Load.Load1, metrics.Load.Load5, metrics.Load.Load15)
fmt.Printf(" \x1b[32m相对负载:\x1b[0m %.2f/%.2f/%.2f\n",
metrics.Load.RelativeLoad1, metrics.Load.RelativeLoad5, metrics.Load.RelativeLoad15)
fmt.Printf(" \x1b[32m进程:\x1b[0m %d运行中 / %d总计\n",
metrics.Load.ProcsRunning, metrics.Load.ProcsTotal)
// 网络信息(如果启用了)
if m.config.CollectNetwork && len(metrics.Network.Interfaces) > 0 {
fmt.Printf("\n\x1b[33m网络信息:\x1b[0m\n")
fmt.Printf(" \x1b[32mTCP连接:\x1b[0m %d (已建立: %d)\n",
metrics.Network.TCPConnections, metrics.Network.EstablishedConn)
fmt.Printf(" \x1b[32m流量:\x1b[0m 接收:%.2f MB 发送:%.2f MB\n",
metrics.Network.TotalRecvMB, metrics.Network.TotalSentMB)
}
// 进程信息
if len(metrics.Processes) > 0 {
fmt.Printf("\n\x1b[33mTOP进程 (按内存排序):\x1b[0m\n")
for i, proc := range metrics.Processes {
if i >= 5 { // 只显示前5个
break
}
procColor := "\x1b[36m"
if proc.CPUPercent > 10 {
procColor = "\x1b[33m"
}
if proc.CPUPercent > 30 {
procColor = "\x1b[31m"
}
// 截断过长的命令行
cmdDisplay := proc.Cmdline
if len(cmdDisplay) > 50 {
cmdDisplay = cmdDisplay[:47] + "..."
}
fmt.Printf(" %s%5d\x1b[0m %-20s %s%.1f%%\x1b[0m %.1fMB %s\n",
procColor, proc.PID, proc.Name, procColor, proc.CPUPercent,
proc.MemoryMB, cmdDisplay)
}
}
fmt.Printf("\n\x1b[36m══════════════════════════════════════════════════════\x1b[0m\n")
}
// getProgressBar 获取进度条
func getProgressBar(percent float64, width int) string {
filled := int((percent / 100.0) * float64(width))
if filled > width {
filled = width
}
bar := "["
for i := 0; i < width; i++ {
if i < filled {
// 根据填充量使用不同颜色
if i < width/3 {
bar += "\x1b[32m=\x1b[0m" // 绿色
} else if i < width*2/3 {
bar += "\x1b[33m=\x1b[0m" // 黄色
} else {
bar += "\x1b[31m=\x1b[0m" // 红色
}
} else {
bar += " "
}
}
bar += "]"
return bar
}