From a9ee997d72f03f67f63a4a5e11472fd3d12d628f Mon Sep 17 00:00:00 2001 From: wuko233 Date: Sun, 22 Mar 2026 12:16:48 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=A0=B9=E6=8D=AE=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E8=A7=84=E8=8C=83=E9=87=8D=E5=86=99=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + cmd/sysmonitord/main.go | 116 ++++++++----- docs/ARCHITECTURE.md | 267 ++++++++++++++++++++++++++++++ docs/MessageProtocol.md | 277 -------------------------------- internal/config/config.go | 59 ++++--- internal/config/loader.go | 55 +++++++ internal/monitor/ssh_monitor.go | 4 +- internal/monitor/types.go | 79 +++++++-- internal/network/client.go | 90 +++++++++-- internal/network/types.go | 69 +++++++- internal/scanner/scanner.go | 17 +- internal/scanner/watcher.go | 25 ++- internal/whitelist/manager.go | 80 ++++++--- 13 files changed, 730 insertions(+), 409 deletions(-) create mode 100644 docs/ARCHITECTURE.md delete mode 100644 docs/MessageProtocol.md create mode 100644 internal/config/loader.go diff --git a/.gitignore b/.gitignore index 65989ac..d9fe637 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ test/ # 编译文件 sysmonitord fixroad.md +.claude/plugins.json diff --git a/cmd/sysmonitord/main.go b/cmd/sysmonitord/main.go index eff1550..51a59c3 100644 --- a/cmd/sysmonitord/main.go +++ b/cmd/sysmonitord/main.go @@ -19,7 +19,6 @@ import ( const ( OfficialConfigURL = "http://localhost:8090/api/v1/configs/official.json" UserConfigURL = "http://localhost:8090/api/v1/configs/user.json" - CenterServerURL = "ws://localhost:8090/api/v1/ws" ) func main() { @@ -49,12 +48,12 @@ func main() { if err != nil { log.Fatalf("[启动错误]下载配置失败: %v", err) - log.Println("[启动降级] 继续使用默认空配置...") - // os.Exit(1) } + CenterServerURL := determineServerURL(officialCfg, userCfg) + log.Println("[启动流程] 2/6: 初始化白名单判定引擎...") - wlManager := whitelist.NewManager() + wlManager := whitelist.NewManager(officialCfg, userCfg) wlManager.UpdateConfig(officialCfg, userCfg) log.Println("[启动流程] 3/6: 启动中心服务器连接...") @@ -78,53 +77,66 @@ func main() { log.Println("[启动流程] 4/6: 启动文件完整性防护...") + var sysScanner *scanner.Scanner + var sysWatcher *scanner.Watcher + var sshMon *monitor.SSHMonitor + var infoMon *monitor.InfoMonitor + // 扫盘器 - sysScanner := scanner.NewScanner(wlManager, centerClient) - sysScanner.Start() + if userCfg.Modules.FileScanner { + sysScanner = scanner.NewScanner(wlManager, centerClient) + sysScanner.Start() + log.Printf("[监控信息] 文件周期扫描器已启动") + } // 监控器 - sysWatcher, err := scanner.NewWatcher(wlManager, centerClient) - if err != nil { - log.Fatalf("[启动错误] 初始化监控器失败: %v", err) - } else { - sysWatcher.Start() + if userCfg.Modules.FileWatcher { + var err error + sysWatcher, err = scanner.NewWatcher(wlManager, centerClient) + if err != nil { + log.Fatalf("[启动错误] 初始化监控器失败: %v", err) + } else { + sysWatcher.Start() + log.Printf("[监控信息] 文件监控已启动") + } } log.Println("[启动流程] 5/6: 启动系统行为监控...") // SSH监控 - sshAlertChan := make(chan monitor.Alert, 100) - sshMon := monitor.NewSSHMonitor(&config.SSHMonitor{ - Enabled: true, - AlertOnRootLogin: true, - DisplayOnShell: true, - }, sshAlertChan) + if userCfg.Modules.SSHMonitor { + sshAlertChan := make(chan monitor.Alert, 100) + sshMon = monitor.NewSSHMonitor(&userCfg.MonitorConfig.SSHMonitorConfig, sshAlertChan) + go func() { + for alert := range sshAlertChan { + packet := network.NewPacket("SSH_ALERT", alert) + auditClient.SendQueue(packet) + } + }() - go func() { - for alert := range sshAlertChan { - packet := network.NewPacket("SSH_ALERT", alert) - auditClient.SendQueue(packet) - } - }() - - go func() { - if err := sshMon.Start(); err != nil { - log.Printf("[监控错误] SSH监控遇到错误: %v", err) - } - }() + go func() { + if err := sshMon.Start(); err != nil { + log.Printf("[监控错误] SSH监控遇到错误: %v", err) + } + log.Printf("[监控信息] SSH监控已启动") + }() + } // 状态监控 - metricsChan := make(chan monitor.ServerMetrics, 100) - infoMon := monitor.NewInfoMonitor(nil, metricsChan) + if userCfg.Modules.SystemMonitor { + metricsChan := make(chan monitor.ServerMetrics, 100) + infoMon = monitor.NewInfoMonitor(nil, metricsChan) - go func() { - for metrics := range metricsChan { - packet := network.NewPacket("STATUS_UPDATE", metrics) - centerClient.SendQueue(packet) - } - }() + go func() { + for metrics := range metricsChan { + packet := network.NewPacket("STATUS_UPDATE", metrics) + centerClient.SendQueue(packet) + } + }() - go infoMon.Start() + go infoMon.Start() + log.Printf("[监控信息] 系统状态监控已启动") + } log.Println("[启动流程] 6/6: 系统监控守护进程启动完成!") @@ -136,14 +148,36 @@ func main() { if sysWatcher != nil { sysWatcher.Stop() } - sysScanner.Stop() - sshMon.Stop() - infoMon.Stop() + + if sysScanner != nil { + sysScanner.Stop() + } + if sshMon != nil { + sshMon.Stop() + } + if infoMon != nil { + infoMon.Stop() + } + if centerClient != nil { + centerClient.Stop() + } + centerClient.Stop() auditClient.Stop() + log.Println("[守护进程] 已成功停止,安全退出程序。") } +func determineServerURL(officialCfg config.OfficialConfig, userCfg config.UserConfig) string { + // 服务器地址仅从用户配置获取(符合 ARCHITECTURE.md 规范) + if userCfg.Connection.CenterServerURL != "" { + return userCfg.Connection.CenterServerURL + } + + // 默认地址 + return "ws://localhost:8090/api/v1/ws" +} + func initLogger() { log.SetOutput(os.Stdout) fileLogger := &lumberjack.Logger{ diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..8fb8c54 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,267 @@ +# SysMonitord 开发规范与架构说明书 + +> **版本**: v1.0.1 +> **日期**: 2026-03-22 + +--- + +## 1. 设计理念与架构总览 + +### 1.1 核心架构 +SysMonitord 采用 **模块化** 设计,旨在降低耦合度,提升扩展性。系统主要包含以下核心域: + +* **Config Domain (配置域)**: 负责配置的拉取、解析、合并与热更新。 +* **Monitor Domain (监控域)**: 负责SSH审计、系统基础指标采集。 +* **Security Domain (安全域)**: 负责文件完整性扫描与实时防护。 +* **Network Domain (通讯域)**: 负责与中心服务器的 WebSocket 长连接通讯。 + +### 1.2 配置分层设计 (核心规范) +为了解决“配置混乱、前后端字段对不上”以及“官方策略与用户环境耦合”的问题,我们将配置严格划分为两个独立的 JSON 文件: + +1. **官方策略**: `official.json` + * **权限**: 只读,由官方/安全团队维护,随版本更新发布。 + * **内容**: **仅包含安全基线**(核心白名单Hash、强制忽略路径、扫描范围)。 + * **注意**: **不再包含服务器地址**,因为官方策略包应适用于所有客户环境,不应硬编码服务端地址。 + * **优先级**: 基线级(定义“什么是合法的”)。 + +2. **用户配置**: `user.json` + * **权限**: 读写,由用户/运维人员维护,可通过控制台动态下发。 + * **内容**: **运行时环境配置**(服务器地址、审计开关、业务自定义白名单、性能阈值)。 + * **优先级**: 补充级(在官方基线之上进行扩充或覆盖特定参数)。 + +--- + +## 2. 配置文件数据结构定义 + +> **重要**: 前后端交互必须严格遵循以下数据结构定义。字段命名统一使用 **snake_case** (下划线命名法)。 + +### 2.1 官方策略结构 +**用途**: 定义不可篡改的安全基线,与具体部署环境无关。 + +```json +{ + "version": "1.0.20260322", + "whitelist_files": { + "/bin/ls": ["sha256:abc123def456..."], + "/bin/cat": ["sha256:789xyz..."], + "/usr/bin/top": ["sha256:example123..."] + }, + "whitelist_processes": [ + "systemd", + "sshd", + "dockerd", + "nginx", + "python3" + ], + "ignored_paths": [ + "/tmp/", + "/var/log/", + "/proc/", + "/sys/", + "/dev/" + ], + "scan_paths": [ + "/bin", + "/sbin", + "/usr/bin", + "/usr/sbin", + "/etc/init.d" + ] +} +``` + +**字段说明**: +* `whitelist_files`: `Map`。Key 为绝对路径,Value 为允许的 Hash 列表(支持多版本二进制)。 +* `ignored_paths`: `[]string`。忽略扫描的目录前缀,用于减少无效报警和性能消耗。 + +### 2.2 用户配置结构 +**用途**: 定义业务个性化需求及连接参数。 + +```json +{ + "version": "user_v1", + "connection": { + "center_server_url": "ws://localhost:8090/api/v1/ws", + "audit_server_url": "" + }, + "modules": { + "file_scanner": false, + "file_watcher": true, + "ssh_monitor": true, + "system_monitor": true + }, + "supplement_files": { + "/home/admin/app/myapp": ["sha256:user_hash_123..."] + }, + "supplement_processes": [ + "java_app", + "python_worker", + "my_custom_service" + ], + "ignored_paths": [ + "/home/admin/logs/", + "/var/cache/myapp/" + ], + "monitor_config": { + "ssh_monitor": { + "enabled": true, + "alert_on_root_login": true + }, + "system_monitor": { + "collect_interval": "10s", + "collect_network": true, + "collect_process": true, + "process_limit": 10, + "scan_cpu_threshold": 80 + } + } +} +``` + +**字段变更说明**: +* `connection`: 服务器地址**仅**在此处配置,实现环境与策略分离。 +* `supplement_processes`: 统一为数组格式 `[]string`,简化白名单管理逻辑。 +* `scan_cpu_threshold`: 新增字段,允许用户自定义扫描时的CPU上限,默认推荐 80。 + +--- + +## 3. 通讯协议规范 + +Agent 与 Server 之间通过 WebSocket 进行全双工通讯。数据格式采用统一的 JSON 信封结构。 + +### 3.1 消息信封格式 + +所有下行和上行消息均遵循以下结构: + +```go +// 位于 network/types.go +type Packet struct { + Type string `json:"type"` // 消息类型,大写下划线命名 + Timestamp int64 `json:"timestamp"` // Unix 时间戳 (秒级) + Code int `json:"code"` // 状态码:200=成功, 400=参数错误, 500=服务器错误 + Payload interface{} `json:"payload"` // 实际业务数据负载 +} +``` + +### 3.2 上行消息类型定义 + +| Type 常量 | Payload 结构 | 说明 | 触发频率 | +| :--- | :--- | :--- | :--- | +| `STATUS_UPDATE` | `SystemMetrics` | 系统状态心跳 | 30s/次 | +| `SSH_ALERT` | `SSHLoginEvent` | SSH 登录审计日志 | 事件触发 | +| `REALTIME_FILE_ALERT` | `FileEventPayload` | 实时文件篡改/新增告警 | 事件触发 | +| `SCAN_RESULT` | `FileEventPayload` | 周期性全盘扫描结果 | 周期触发 | + +**Payload 结构定义**: + +```go +// 1. 系统状态心跳 +type SystemMetrics struct { + CpuPercent float64 `json:"cpu_percent"` // 保留2位小数,例: 45.23 + MemPercent float64 `json:"mem_percent"` + DiskUsage float64 `json:"disk_usage"` // 根分区使用率 + LoadAvg1 float64 `json:"load_avg_1"` // 1分钟负载 + AgentVersion string `json:"agent_version"` // Agent 当前版本 +} + +// 2. SSH 审计日志 +type SSHLoginEvent struct { + User string `json:"user"` // 登录用户名 + IP string `json:"ip"` // 来源IP + Port int `json:"port"` // 来源端口 + Status string `json:"status"` // "SUCCESS" 或 "FAILED" + Time int64 `json:"event_time"` // 事件发生时间戳 + Method string `json:"method"` // "password" 或 "key" +} + +// 3. 文件告警负载 +type FileEventPayload struct { + FilePath string `json:"filepath"` + Operation string `json:"operation"` // CREATE, MODIFY, DELETE + Status string `json:"status"` // DETECTED, HASH_MISMATCH + Timestamp int64 `json:"event_time"` +} +``` + +### 3.3 下行消息类型定义 + +| Type 常量 | Payload 结构 | 说明 | +| :--- | :--- | :--- | +| `CONFIG_UPDATE` | `{ "url": "..." }` | 通知 Agent 配置已更新,需重新拉取 | +| `TASK_SCAN` | `{ "path": "/" }` | 下发即时扫描任务 | +| `TASK_STOP` | `null` | 停止指定模块 | +| `COMMAND_RESPONSE` | `{ "result": "ok" }` | 服务端对上行消息的确认或错误返回 | + +--- + +## 4. 核心模块实现规范 + +### 4.1 配置加载器 + +**逻辑规范**: +1. 启动时加载 `user.json` 获取服务器地址。 +2. 连接服务器,通过 HTTP GET 请求拉取最新的 `official.json`。 +3. 将两者合并为内存配置对象 `RuntimeConfig`。 + +**错误处理**: +* 若 `user.json` 缺失,尝试连接默认地址或提示启动失败(视部署策略而定)。 +* 若拉取 `official.json` 失败,使用本地缓存(如果存在)继续运行,并输出 WARN 日志,不应 Crash 进程。 + +### 4.2 白名单管理器 + +**逻辑规范**: +1. **判定优先级**: 首先检查 `IgnoredPaths` -> 其次检查 `WhitelistFiles`。 +2. **合并策略 (重要)**: + * 对于文件白名单:采用 **并集策略**。 + * 若 `official.json` 允许 Hash A,`user.json` 允许 Hash B,则该文件 Hash 为 A 或 B 均判定为合法。 + * 实现示例: + ```go + // 伪代码 + allowedHashes := append(officialConfig.WhitelistFiles[path], userConfig.SupplementFiles[path]...) + if contains(allowedHashes, currentHash) { status = SAFE } + ``` + +3. **判定结果状态**: + * `IGNORED`: 在忽略列表中 + * `SAFE`: 在白名单中且 Hash 匹配 + * `NON_WHITELISTED`: 未在白名单中 + * `HASH_MISMATCH`: 在白名单中但 Hash 不匹配 + +4. **并发安全**: 必须使用 `sync.RWMutex` 保护配置更新和查询操作。 + +### 4.3 监控模块 + +**数据规范**: +* **时间单位**: 配置文件中使用字符串 (`"30s"`),代码中解析为 `time.Duration`。传输协议中使用 **秒级 Unix 时间戳** (int64)。 +* **数据精度**: 百分比和容量数据,传输时统一保留小数点后 **2位**。 +* **性能限制**: `process_limit` 必须生效,防止采集 Top N 进程时导致 Payload 超过 WebSocket 帧大小限制。 + +--- + +## 5. 错误处理与日志规范 + +### 5.1 日志分级 +* `DEBUG`: 详细的扫描路径、心跳发送详情(生产环境默认关闭)。 +* `INFO`: 模块启动/停止、配置更新成功、检测到的安全事件。 +* `WARN`: CPU 负载避让生效(暂停扫描)、网络断线重连中、使用本地缓存配置。 +* `ERROR`: 配置下载失败、文件权限错误、JSON 解析失败。 + +### 5.2 异常处理策略 +* **网络中断**: 必须实现 **指数退避** 重连机制。 + * 初始延迟: 1s + * 最大延迟: 60s + * 因子: 2.0 +* **JSON 解析失败**: 丢弃收到的畸形消息,记录 ERROR 日志,不断开连接。 + +--- + +## 6. 前后端对接检查清单 + +在开发前后端接口时,请对照以下清单进行自测: + +- [ ] **字段命名**: 后端 JSON Tag 是否使用了 `snake_case` (如 `cpu_percent`),而非 `camelCase`? +- [ ] **空值处理**: 当列表为空时,后端返回的是 `[]` 空数组,还是 `null`? (建议统一返回 `[]`)。 +- [ ] **时间格式**: 时间戳是返回 Unix 毫秒还是秒? (本文档强制要求 **秒**)。 +- [ ] **枚举值**: 告警类型 (`REALTIME_FILE_ALERT` 等) 及状态码 (`Code: 200`) 是否有明确定义? +- [ ] **版本兼容**: 当新增字段时,旧版 Agent 是否会因为未知字段 Crash? (Go 解析 JSON 默认忽略未知字段,确认其他语言实现也遵循此原则)。 +- [ ] **服务器地址**: 确认 `official.json` 中不包含任何硬编码的服务器 IP 或域名。 \ No newline at end of file diff --git a/docs/MessageProtocol.md b/docs/MessageProtocol.md deleted file mode 100644 index 96cd1c9..0000000 --- a/docs/MessageProtocol.md +++ /dev/null @@ -1,277 +0,0 @@ -# 消息交互 - -## 数据包格式 - -### 通用数据包结构 -```json -{ - "type": "消息类型", - "timestamp": 1612345678901, - "payload": { - // 根据消息类型的具体数据结构 - } -} -``` - -### 数据类型定义 -```go -type Packet struct { - Type string `json:"type"` // 消息类型 - Timestamp int64 `json:"timestamp"` // Unix时间戳 - Payload interface{} `json:"payload"` // 消息载荷 -} -``` - -## 消息类型及数据结构 - -### 1. 系统状态更新 (`STATUS_UPDATE`) - -**描述**: 定期发送的系统性能指标 - -**推送频率**: 每30秒一次 - -**Payload 结构**: `ServerMetrics` -```json -{ - "timestamp": "2024-01-15T10:30:00Z", - "cpu": { - "model": "Intel(R) Xeon(R) CPU E5-2680 v4", - "cores": 14, - "logical_cores": 28, - "usage_percent": 45.67, - "per_core_percent": [23.4, 45.6, 12.3, ...], - "mhz": 2400.5, - "cache_size": 35840 - }, - "memory": { - "total_gb": 128.0, - "used_gb": 64.5, - "available_gb": 63.5, - "used_percent": 50.4, - "swap_total_gb": 16.0, - "swap_used_gb": 2.3 - }, - "disk": [ - { - "mountpoint": "/", - "device": "/dev/sda1", - "fstype": "ext4", - "total_gb": 500.0, - "used_gb": 250.0, - "free_gb": 250.0, - "used_percent": 50.0, - "inodes_percent": 12.3 - } - ], - "network": { - "interfaces": [ - { - "name": "eth0", - "hardware_addr": "00:11:22:33:44:55", - "ip_addresses": ["192.168.1.100", "fe80::211:22ff:fe33:4455"] - } - ], - "total_recv_mb": 1234.56, - "total_sent_mb": 987.65, - "tcp_connections": 245, - "established_conn": 128 - }, - "load": { - "load_1": 2.34, - "load_5": 2.12, - "load_15": 1.89, - "relative_load_1": 0.83, - "relative_load_5": 0.76, - "relative_load_15": 0.68, - "procs_running": 132, - "procs_total": 456 - }, - "processes": [ - { - "pid": 1234, - "name": "nginx", - "cmdline": "nginx: master process", - "memory_mb": 125.6, - "cpu_percent": 12.3 - } - ], - "host": { - "hostname": "server01", - "os": "linux", - "platform": "ubuntu", - "platform_version": "20.04", - "kernel_version": "5.4.0-42-generic", - "boot_time": "2024-01-15T08:00:00Z", - "uptime": "2小时30分钟45秒", - "cpu_count": 28, - "architecture": "x86_64", - "host_id": "abcdef12-3456-7890-abcd-ef1234567890" - }, - "runtime": { - "go_version": "go1.21.0", - "goos": "linux", - "goarch": "amd64", - "goroot": "/usr/local/go", - "gomaxprocs": 28, - "num_cpu": 28, - "num_goroutine": 42 - }, - "quick_metrics": { - "cpu_percent": 45.67, - "memory_percent": 50.4, - "root_disk_percent": 50.0, - "available_memory_gb": 63.5 - } -} -``` - -### 2. SSH登录告警 (`SSH_ALERT`) - -**描述**: SSH登录安全告警(特别是root登录) - -**触发条件**: SSH登录事件,当检测到root登录时触发HIGH级别告警 - -**Payload 结构**: `Alert` -```json -{ - "type": "SSH_ROOT_LOGIN", - "level": "HIGH", - "message": "检测到来自192.168.1.50的root登录", - "timestamp": "2024-01-15T10:31:15Z", - "data": { - "timestamp": "2024-01-15T10:31:15Z", - "hostname": "server01", - "username": "root", - "method": "publickey", - "source_ip": "192.168.1.50", - "port": "22", - "service": "sshd", - "pid": "12345", - "message": "Accepted publickey for root from 192.168.1.50 port 22" - } -} -``` - -### 3. 文件完整性告警 - -#### 3.1 非白名单文件告警 (`NON_WHITELISTED_FILE`) - -**描述**: 扫描发现不在白名单中的文件 - -**触发条件**: 定期扫描中发现未在白名单中注册的文件 - -**Payload 结构**: -```json -{ - "type": "NON_WHITELISTED_FILE", - "timestamp": 1612345678901, - "payload": { - "filepath": "/tmp/suspicious_file.bin", - "status": "detected" - } -} -``` - -#### 3.2 文件Hash不匹配告警 (`FILE_HASH_MISMATCH`) - -**描述**: 白名单文件被篡改(Hash值不匹配) - -**触发条件**: 文件hash与白名单记录不符 - -**Payload 结构**: -```json -{ - "type": "FILE_HASH_MISMATCH", - "timestamp": 1612345678901, - "payload": { - "filepath": "/usr/bin/ls", - "status": "detected" - } -} -``` - -### 4. 实时文件监控告警 - -#### 4.1 实时文件变动告警 (`REALTIME_FILE_ALERT`) - -**描述**: 监控目录中检测到非白名单文件的创建或修改 - -**触发条件**: 使用fsnotify监控到文件系统事件 - -**Payload 结构**: -```json -{ - "type": "REALTIME_FILE_ALERT", - "timestamp": 1612345678901, - "payload": { - "filepath": "/tmp/new_suspicious_file", - "operation": "CREATE", - "time": "2024-01-15T10:32:00Z" - } -} -``` - -#### 4.2 实时Hash不匹配告警 (`REALTIME_HASH_MISMATCH`) - -**描述**: 监控到白名单文件被实时篡改 - -**Payload 结构**: -```json -{ - "type": "REALTIME_HASH_MISMATCH", - "timestamp": 1612345678901, - "payload": { - "filepath": "/etc/passwd", - "operation": "WRITE", - "time": "2024-01-15T10:33:00Z" - } -} -``` - -## 配置接口 - -### 1. 配置下载接口 - -Agent 启动时会通过 HTTP 下载两份配置: - -#### 官方配置 (GET) -- **URL**: `http://localhost:8090/api/v1/configs/official.json` -- **响应格式**: 符合 `OfficialConfig` 结构 - -#### 用户配置 (GET) -- **URL**: `http://localhost:8090/api/v1/configs/user.json` -- **响应格式**: 符合 `UserConfig` 结构 - -### 2. 配置数据结构 - -#### OfficialConfig -```json -{ - "whitelist_files": { - "/usr/bin/ls": ["hash1", "hash2"], - "/bin/bash": ["hash3"] - }, - "whitelist_processes": ["sshd", "nginx", "docker"], - "ignored_paths": ["/proc", "/sys", "/dev"] -} -``` - -#### UserConfig -```json -{ - "audit_server_url": "ws://audit.example.com:8090/api/v1/ws", - "supplement_files": { - "/opt/myapp/bin/app": ["user_hash1"] - }, - "supplement_processes": { - "myapp": "/opt/myapp/bin/app start", - "custom_service": "" - }, - "ignored_paths": ["/mnt/temp"], - "check_perm_paths": ["/etc/sudoers", "/etc/shadow"], - "email_config": { - "imap_server": "imap.example.com", - "emergency_mail": ["admin@example.com", "security@example.com"] - } -} -``` diff --git a/internal/config/config.go b/internal/config/config.go index b8e5015..2098b06 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1,6 +1,24 @@ package config -type ModelSwitches struct { +type OfficialConfig struct { + Version string `json:"version"` + WhitelistFiles map[string][]string `json:"whitelist_files"` + WhitelistProcesses []string `json:"whitelist_processes"` + IgnoredPaths []string `json:"ignored_paths"` + ScanPaths []string `json:"scan_paths"` +} + +type UserConfig struct { + Version string `json:"version"` + Connection ConnectionConfig `json:"connection"` + Modules ModuleSwitches `json:"modules"` + SupplementFiles map[string][]string `json:"supplement_files"` + SupplementProcesses []string `json:"supplement_processes"` + IgnoredPaths []string `json:"ignored_paths"` + MonitorConfig MonitorConfig `json:"monitor_config"` +} + +type ModuleSwitches struct { FileScanner bool `json:"file_scanner"` FileWatcher bool `json:"file_watcher"` SSHMonitor bool `json:"ssh_monitor"` @@ -13,10 +31,11 @@ type SSHMonitorConfig struct { } type SystemMonitorConfig struct { - CollectInterval string `json:"collect_interval"` - CollectNetwork bool `json:"collect_network"` - CollectProcess bool `json:"collect_process"` - ProcessLimit int `json:"process_limit"` + CollectInterval string `json:"collect_interval"` + CollectNetwork bool `json:"collect_network"` + CollectProcess bool `json:"collect_process"` + ProcessLimit int `json:"process_limit"` + ScanCPUThreshold int `json:"scan_cpu_threshold"` } type MonitorConfig struct { @@ -29,23 +48,6 @@ type ConnectionConfig struct { AuditServerURL string `json:"audit_server_url"` } -type OfficialConfig struct { - Version string `json:"version"` - WhiteListFiles map[string]string `json:"white_list_files"` - WhiteListProcesses []string `json:"white_list_processes"` - IgnoredPaths []string `json:"ignored_paths"` - ScanPaths []string `json:"scan_paths"` -} - -type UserConfig struct { - Version string `json:"version"` - Connection ConnectionConfig `json:"connection"` - Models ModelSwitches `json:"models"` - SupplementFiles map[string]string `json:"supplement_files"` - SupplementProcesses []string `json:"supplement_processes"` - MonitorConfig MonitorConfig `json:"monitor_config"` -} - type Configuration struct { Official OfficialConfig // 官方配置 User UserConfig // 用户自定义配置 @@ -56,17 +58,22 @@ func NewDefaultUserConfig() UserConfig { Version: "BuildInDefault", Connection: ConnectionConfig{ CenterServerURL: "ws://localhost:8090/api/v1/ws", - AuditServerURL: "ws://localhost:8090/api/v1/ws", }, - Models: ModelSwitches{ + Modules: ModuleSwitches{ FileScanner: false, FileWatcher: true, SSHMonitor: true, SystemMonitor: true, }, MonitorConfig: MonitorConfig{ - SSHMonitorConfig: SSHMonitorConfig{Enabled: true}, - SystemMonitorConfig: SystemMonitorConfig{CollectInterval: "30s", CollectNetwork: true, CollectProcess: true, ProcessLimit: 10}, + SSHMonitorConfig: SSHMonitorConfig{Enabled: true}, + SystemMonitorConfig: SystemMonitorConfig{ + CollectInterval: "30s", + CollectNetwork: true, + CollectProcess: true, + ProcessLimit: 10, + ScanCPUThreshold: 80, // 默认 80% CPU 避让阈值 + }, }, } } diff --git a/internal/config/loader.go b/internal/config/loader.go new file mode 100644 index 0000000..a223495 --- /dev/null +++ b/internal/config/loader.go @@ -0,0 +1,55 @@ +package config + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +type RemoteConfigLoader struct { + client *http.Client +} + +func NewRemoteConfigLoader() *RemoteConfigLoader { + return &RemoteConfigLoader{ + client: &http.Client{Timeout: 10 * time.Second}, + } +} + +func (l *RemoteConfigLoader) LoadConfigs(OfficialURL, UserURL string) (*OfficialConfig, *UserConfig, error) { + var officialCfg OfficialConfig + var userCfg UserConfig + + // 加载官方配置 + if err := l.fetchJSON(OfficialURL, &officialCfg); err != nil { + return nil, nil, fmt.Errorf("[致命错误]加载官方配置失败: %v", err) + } + + // 加载用户配置 + if err := l.fetchJSON(UserURL, &userCfg); err != nil { + return nil, nil, fmt.Errorf("[致命错误]加载用户配置失败: %v", err) + } + + return &officialCfg, &userCfg, nil +} + +func (l *RemoteConfigLoader) fetchJSON(url string, target interface{}) error { + resp, err := l.client.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("请求失败: %s", resp.Status) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + return json.Unmarshal(body, target) +} diff --git a/internal/monitor/ssh_monitor.go b/internal/monitor/ssh_monitor.go index 7dcfd35..4430968 100644 --- a/internal/monitor/ssh_monitor.go +++ b/internal/monitor/ssh_monitor.go @@ -11,7 +11,7 @@ import ( "github.com/wuko233/sysmonitord/internal/config" ) -func NewSSHMonitor(cfg *config.SSHMonitor, alertChan chan<- Alert) *SSHMonitor { +func NewSSHMonitor(cfg *config.SSHMonitorConfig, alertChan chan<- Alert) *SSHMonitor { return &SSHMonitor{ config: cfg, alertChan: alertChan, @@ -137,7 +137,7 @@ func (m *SSHMonitor) parseSSHEvent(field map[string]string, message string) *SSH } func (m *SSHMonitor) handleSSHEvent(event *SSHLoginEvent) { - if m.config.DisplayOnShell { + if m.config.AlertOnRootLogin { // 在终端显示事件 m.displayEventOnShell(event) } diff --git a/internal/monitor/types.go b/internal/monitor/types.go index 0dc37f6..e8b7e44 100644 --- a/internal/monitor/types.go +++ b/internal/monitor/types.go @@ -1,6 +1,9 @@ package monitor import ( + "math" + "strconv" + "strings" "time" "github.com/coreos/go-systemd/v22/sdjournal" @@ -16,21 +19,75 @@ type Alert struct { } type SSHMonitor struct { - config *config.SSHMonitor + config *config.SSHMonitorConfig alertChan chan<- Alert stopChan chan struct{} journal *sdjournal.Journal } +// SSHLoginEvent 内部使用的结构体 type SSHLoginEvent struct { - Timestamp time.Time `json:"timestamp"` - - Hostname string `json:"hostname"` // 主机名 - Username string `json:"username"` // 用户名 - Method string `json:"method"` // 登录方式:password/publickey - SourceIP string `json:"source_ip"` // 来源IP - Port string `json:"port"` // 端口 - Service string `json:"service"` // 服务名 - PID string `json:"pid"` // 进程ID - Message string `json:"message"` // 原始日志消息 + Timestamp time.Time `json:"-"` + // 保留内部字段用于处理 + Hostname string + Username string + Method string + SourceIP string + Port string + Service string + PID string + Message string +} + +// SSHLoginEventProtocol 用于网络传输的结构体(符合 ARCHITECTURE.md) +type SSHLoginEventProtocol struct { + User string `json:"user"` // 登录用户名 + IP string `json:"ip"` // 来源IP + Port int `json:"port"` // 来源端口 + Status string `json:"status"` // "SUCCESS" 或 "FAILED" + Time int64 `json:"event_time"` // 事件发生时间戳 + Method string `json:"method"` // "password" 或 "key" +} + +// ToProtocol 转换为协议格式 +func (e *SSHLoginEvent) ToProtocol(status string) SSHLoginEventProtocol { + port, _ := strconv.Atoi(e.Port) + + method := "password" + if strings.Contains(e.Method, "publickey") { + method = "key" + } + + return SSHLoginEventProtocol{ + User: e.Username, + IP: e.SourceIP, + Port: port, + Status: status, + Time: e.Timestamp.Unix(), + Method: method, + } +} + +// SystemMetrics 用于心跳的精简系统指标(符合 ARCHITECTURE.md) +type SystemMetrics struct { + CpuPercent float64 `json:"cpu_percent"` // 保留2位小数 + MemPercent float64 `json:"mem_percent"` + DiskUsage float64 `json:"disk_usage"` // 根分区使用率 + LoadAvg1 float64 `json:"load_avg_1"` // 1分钟负载 + AgentVersion string `json:"agent_version"` // Agent 当前版本 +} + +// ToSystemMetrics 从 ServerMetrics 转换为心跳格式 +func ToSystemMetrics(metrics ServerMetrics, version string) SystemMetrics { + return SystemMetrics{ + CpuPercent: roundToTwoDecimal(metrics.QuickMetrics.CPUPercent), + MemPercent: roundToTwoDecimal(metrics.QuickMetrics.MemoryPercent), + DiskUsage: roundToTwoDecimal(metrics.QuickMetrics.RootDiskPercent), + LoadAvg1: roundToTwoDecimal(metrics.Load.Load1), + AgentVersion: version, + } +} + +func roundToTwoDecimal(v float64) float64 { + return math.Round(v*100) / 100 } diff --git a/internal/network/client.go b/internal/network/client.go index d7661f4..320f1b7 100644 --- a/internal/network/client.go +++ b/internal/network/client.go @@ -16,12 +16,14 @@ type ClientConfig struct { } type WSClient struct { - config ClientConfig - conn *websocket.Conn - sendChan chan Packet - mu sync.Mutex - isConnected bool - stopChan chan struct{} + config ClientConfig + conn *websocket.Conn + sendChan chan Packet + mu sync.Mutex + isConnected bool + stopChan chan struct{} + handler MessageHandler + reconnectDelay time.Duration // 指数退避延迟 } func NewWSClient(cfg ClientConfig) *WSClient { @@ -29,12 +31,18 @@ func NewWSClient(cfg ClientConfig) *WSClient { cfg.BufferSize = 100 } return &WSClient{ - config: cfg, - sendChan: make(chan Packet, cfg.BufferSize), - stopChan: make(chan struct{}), + config: cfg, + sendChan: make(chan Packet, cfg.BufferSize), + stopChan: make(chan struct{}), + reconnectDelay: 1 * time.Second, // 初始延迟 1秒 } } +// SetHandler 设置消息处理器 +func (c *WSClient) SetHandler(handler MessageHandler) { + c.handler = handler +} + func (c *WSClient) Start() { go c.connectionLoop() go c.sendLoop() @@ -84,6 +92,12 @@ func (c *WSClient) sendRaw(packet Packet) { } func (c *WSClient) connectionLoop() { + const ( + initialDelay = 1 * time.Second + maxDelay = 60 * time.Second + backoffFactor = 2.0 + ) + for { select { case <-c.stopChan: @@ -91,23 +105,73 @@ func (c *WSClient) connectionLoop() { default: if !c.isConnected { if err := c.connect(); err != nil { - log.Printf("[网络] 连接 %s 失败: %v. 5秒后重试...", c.config.ServerURL, err) - time.Sleep(5 * time.Second) + log.Printf("[网络] 连接 %s 失败: %v. %v后重试...", + c.config.ServerURL, err, c.reconnectDelay) + + // 等待退避时间 + time.Sleep(c.reconnectDelay) + + // 计算下一次退避时间(指数增长,最大60秒) + c.reconnectDelay = time.Duration(float64(c.reconnectDelay) * backoffFactor) + if c.reconnectDelay > maxDelay { + c.reconnectDelay = maxDelay + } continue } + + // 连接成功,重置退避时间 + c.reconnectDelay = initialDelay } - _, _, err := c.conn.ReadMessage() + _, message, err := c.conn.ReadMessage() if err != nil { log.Printf("[网络] 连接断开: %v", err) c.closeConn() + // 重置退避时间,准备重新连接 + c.reconnectDelay = initialDelay + continue } - // TODO: 处理服务器消息 + // 处理服务器消息 + c.handleServerMessage(message) } } } +// handleServerMessage 处理服务器消息 +func (c *WSClient) handleServerMessage(message []byte) { + var packet Packet + if err := json.Unmarshal(message, &packet); err != nil { + log.Printf("[网络] JSON解析失败: %v", err) + return // 丢弃畸形消息,不断开连接 + } + + if c.handler == nil { + log.Printf("[网络] 收到服务器消息但未设置处理器: %s", packet.Type) + return + } + + payload, ok := packet.Payload.(map[string]interface{}) + if !ok { + log.Printf("[网络] 无法解析消息载荷: %v", packet.Payload) + return + } + + switch packet.Type { + case TypeConfigUpdate: + c.handler.HandleConfigUpdate(payload) + case TypeTaskScan: + c.handler.HandleTaskScan(payload) + case TypeTaskStop: + c.handler.HandleTaskStop(payload) + case TypeCommandResponse: + // 命令响应,可用于确认 + log.Printf("[网络] 收到命令响应: code=%d", packet.Code) + default: + log.Printf("[网络] 未知消息类型: %s", packet.Type) + } +} + func (c *WSClient) closeConn() { c.mu.Lock() defer c.mu.Unlock() diff --git a/internal/network/types.go b/internal/network/types.go index dcc9bd7..14a88d7 100644 --- a/internal/network/types.go +++ b/internal/network/types.go @@ -2,21 +2,84 @@ package network import "time" +// 状态码常量定义 +const ( + CodeSuccess = 200 // 成功 + CodeBadRequest = 400 // 参数错误 + CodeServerError = 500 // 服务器错误 +) + +// 上行消息类型 +const ( + TypeStatusUpdate = "STATUS_UPDATE" + TypeSSHAlert = "SSH_ALERT" + TypeRealtimeFileAlert = "REALTIME_FILE_ALERT" + TypeScanResult = "SCAN_RESULT" +) + +// 下行消息类型 +const ( + TypeConfigUpdate = "CONFIG_UPDATE" + TypeTaskScan = "TASK_SCAN" + TypeTaskStop = "TASK_STOP" + TypeCommandResponse = "COMMAND_RESPONSE" +) + +// 文件操作类型常量 +const ( + OpCreate = "CREATE" + OpModify = "MODIFY" + OpDelete = "DELETE" +) + +// 文件状态常量 +const ( + StatusDetected = "DETECTED" + StatusHashMismatch = "HASH_MISMATCH" +) + type Packet struct { - Type string `json:"type"` - Timestamp int64 `json:"timestamp"` - Payload interface{} `json:"payload"` + Type string `json:"type"` // 消息类型,大写下划线命名 + Timestamp int64 `json:"timestamp"` // Unix 时间戳 (秒级) + Code int `json:"code"` // 状态码:200=成功, 400=参数错误, 500=服务器错误 + Payload interface{} `json:"payload"` // 实际业务数据负载 } func NewPacket(msgType string, payload interface{}) Packet { return Packet{ Type: msgType, Timestamp: time.Now().Unix(), + Code: CodeSuccess, Payload: payload, } } +// NewPacketWithCode 创建带指定状态码的 Packet +func NewPacketWithCode(msgType string, code int, payload interface{}) Packet { + return Packet{ + Type: msgType, + Timestamp: time.Now().Unix(), + Code: code, + Payload: payload, + } +} + +// FileEventPayload 文件告警负载 +type FileEventPayload struct { + FilePath string `json:"filepath"` + Operation string `json:"operation"` // CREATE, MODIFY, DELETE + Status string `json:"status"` // DETECTED, HASH_MISMATCH + Timestamp int64 `json:"event_time"` +} + type ConfigUrls struct { OfficialConfigUrl string UserConfigUrl string } + +// MessageHandler 消息处理器接口(用于处理下行消息) +type MessageHandler interface { + HandleConfigUpdate(payload map[string]interface{}) + HandleTaskScan(payload map[string]interface{}) + HandleTaskStop(payload map[string]interface{}) +} diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 04163d4..92f617f 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -87,10 +87,10 @@ func (s *Scanner) performScan() { if !isWhitelisted { log.Printf("[扫描器] 发现未在白名单文件: %s", path) - s.reportFile(path, "NON_WHITELISTED_FILE") + s.reportFile(path, network.TypeScanResult, network.StatusDetected) } else if !isHashMatch { log.Printf("[扫描器] 警告!文件Hash不匹配(可能被篡改): %s", path) - s.reportFile(path, "FILE_HASH_MISMATCH") + s.reportFile(path, network.TypeScanResult, network.StatusHashMismatch) } return nil @@ -116,14 +116,15 @@ func (s *Scanner) checkCPUAndSleep() { time.Sleep(10 * time.Millisecond) } -func (s *Scanner) reportFile(path string, alertType string) { - payload := map[string]interface{}{ - "filepath": path, - "status": "detected", +func (s *Scanner) reportFile(path string, alertType string, status string) { + payload := network.FileEventPayload{ + FilePath: path, + Operation: "", // 周期扫描没有特定操作 + Status: status, + Timestamp: time.Now().Unix(), } - packet := network.NewPacket(alertType, payload) - + packet := network.NewPacket(network.TypeScanResult, payload) s.client.SendQueue(packet) } diff --git a/internal/scanner/watcher.go b/internal/scanner/watcher.go index 1af3edd..38239b3 100644 --- a/internal/scanner/watcher.go +++ b/internal/scanner/watcher.go @@ -3,6 +3,7 @@ package scanner import ( "log" "os" + "strings" "time" "github.com/fsnotify/fsnotify" @@ -80,7 +81,6 @@ func (w *Watcher) eventLoop() { } func (w *Watcher) handleFileChange(path string, op string) { - time.Sleep(200 * time.Millisecond) // 等待文件写入完成 if _, err := os.Stat(path); os.IsNotExist(err) { @@ -94,18 +94,27 @@ func (w *Watcher) handleFileChange(path string, op string) { if !isWhitelisted { log.Printf("[监听器] 实时拦截:检测到非白名单文件变动 (%s): %s", op, path) - w.reportEvent(path, "REALTIME_FILE_ALERT", op) + w.reportEvent(path, network.TypeRealtimeFileAlert, op, network.StatusDetected) } else if !isHashMatch { log.Printf("[监听器] 实时拦截:检测到白名单文件被篡改 (%s): %s", op, path) - w.reportEvent(path, "REALTIME_HASH_MISMATCH", op) + w.reportEvent(path, network.TypeRealtimeFileAlert, op, network.StatusHashMismatch) } } -func (w *Watcher) reportEvent(path, alertType, op string) { - payload := map[string]interface{}{ - "filepath": path, - "operation": op, - "time": time.Now(), +func (w *Watcher) reportEvent(path, alertType, op string, status string) { + // 标准化操作类型 + operation := network.OpModify + if strings.Contains(strings.ToUpper(op), "CREATE") { + operation = network.OpCreate + } else if strings.Contains(strings.ToUpper(op), "DELETE") { + operation = network.OpDelete + } + + payload := network.FileEventPayload{ + FilePath: path, + Operation: operation, + Status: status, + Timestamp: time.Now().Unix(), } packet := network.NewPacket(alertType, payload) diff --git a/internal/whitelist/manager.go b/internal/whitelist/manager.go index a4af5b1..aeb37cd 100644 --- a/internal/whitelist/manager.go +++ b/internal/whitelist/manager.go @@ -13,6 +13,15 @@ import ( "github.com/wuko233/sysmonitord/internal/config" ) +type FileStatus string + +const ( + StatusIgnored FileStatus = "IGNORED" + StatusNonWhitelisted FileStatus = "NON_WHITELISTED" + StatusHashMismatch FileStatus = "HASH_MISMATCH" + StatusSafe FileStatus = "SAFE" +) + type Manager struct { mu sync.RWMutex official config.OfficialConfig @@ -20,16 +29,16 @@ type Manager struct { mergedIgnore []string } -func NewManager() *Manager { - return &Manager{ - official: config.OfficialConfig{ - WhitelistFiles: make(map[string][]string), - }, - user: config.UserConfig{ - SupplementFiles: make(map[string][]string), - SupplementProcesses: make(map[string]string), - }, +func NewManager(o config.OfficialConfig, u config.UserConfig) *Manager { + m := &Manager{ + official: o, + user: u, } + + m.mergedIgnore = append([]string{}, m.official.IgnoredPaths...) + m.mergedIgnore = append(m.mergedIgnore, m.user.IgnoredPaths...) + + return m } func (m *Manager) UpdateConfig(official config.OfficialConfig, user config.UserConfig) { @@ -63,33 +72,51 @@ func (m *Manager) IsPathIgnoredUnsafe(path string) bool { // CheckFileStatus 检查文件状态 // 返回: isWhitelisted(是否在白名单), isValid(Hash是否匹配), err func (m *Manager) CheckFileStatus(path string) (bool, bool, error) { - m.mu.RLock() defer m.mu.RUnlock() - if m.IsPathIgnoredUnsafe((path)) { + // 1. 首先检查是否在忽略列表中 + if m.IsPathIgnoredUnsafe(path) { return true, true, nil } - hashes, exists := m.official.WhitelistFiles[path] - if !exists { - hashes, exists = m.user.SupplementFiles[path] + // 2. 合并官方和用户的白名单(并集策略) + var allowedHashes []string + + // 添加官方白名单 Hash + if officialHashes, exists := m.official.WhitelistFiles[path]; exists { + allowedHashes = append(allowedHashes, officialHashes...) } - if !exists { + + // 添加用户补充白名单 Hash + if userHashes, exists := m.user.SupplementFiles[path]; exists { + allowedHashes = append(allowedHashes, userHashes...) + } + + // 如果两个白名单都没有这个文件 + if len(allowedHashes) == 0 { return false, false, nil } + // 3. 计算当前文件 Hash fileHash, err := CalculateFileHash(path) if err != nil { return true, false, fmt.Errorf("计算文件哈希失败: %v", err) } - for _, h := range hashes { - if strings.EqualFold(h, fmt.Sprintf("%v", fileHash)) { + // 4. 检查 Hash 是否在允许列表中 + for _, h := range allowedHashes { + // 支持 sha256:xxx 格式或纯 hash 格式 + normalizedHash := h + if strings.HasPrefix(h, "sha256:") { + normalizedHash = h[7:] + } + if strings.EqualFold(normalizedHash, fileHash) { return true, true, nil } } + // 在白名单中但 Hash 不匹配 return true, false, nil } @@ -114,14 +141,18 @@ func (m *Manager) IsProcessAllowed(procName string, cmdLine string) bool { m.mu.RLock() defer m.mu.RUnlock() + // 检查官方白名单 for _, p := range m.official.WhitelistProcesses { if p == procName { return true } } - if _, ok := m.user.SupplementProcesses[procName]; ok { - return true + // 检查用户补充白名单(数组形式) + for _, p := range m.user.SupplementProcesses { + if p == procName { + return true + } } return false @@ -131,5 +162,14 @@ func (m *Manager) GetAuditServerUrl() string { m.mu.RLock() defer m.mu.RUnlock() - return m.user.AuditServerUrl + return m.user.Connection.AuditServerURL +} + +func (m *Manager) rebuildIgnoreList() { + totalLen := len(m.official.IgnoredPaths) + len(m.user.IgnoredPaths) + + // 预分配容量以提高性能 + m.mergedIgnore = make([]string, 0, totalLen) + m.mergedIgnore = append(m.mergedIgnore, m.official.IgnoredPaths...) + m.mergedIgnore = append(m.mergedIgnore, m.user.IgnoredPaths...) }