[scanner] 实现哈希计算策略

[config] 实现配置转换
This commit is contained in:
wuko233 2026-03-30 17:19:42 +08:00
parent 8825080fab
commit 6cecd69758
7 changed files with 217 additions and 14 deletions

View File

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"os" "os"
"sysmonitord/internal/config" "sysmonitord/internal/config"
"sysmonitord/internal/scanner/hash"
"sysmonitord/internal/scanner/process" "sysmonitord/internal/scanner/process"
"sysmonitord/pkg/logger" "sysmonitord/pkg/logger"
@ -28,7 +29,13 @@ var StartCmd = &cobra.Command{
zap.String("审计服务器地址", fmt.Sprintf("%s:%d", cfg.Audit.Server, cfg.Audit.Port)), zap.String("审计服务器地址", fmt.Sprintf("%s:%d", cfg.Audit.Server, cfg.Audit.Port)),
) )
procs, err := process.ScanAllProcesses() hashCfg := &hash.Config{
UseFastHash: cfg.Scanner.File.FastHash,
Threshold: cfg.Scanner.File.FastHashSize,
ChunkSize: cfg.Scanner.File.FastHashChunk,
}
procs, err := process.ScanAllProcesses(hashCfg)
if err != nil { if err != nil {
logger.Log.Error("扫描进程失败", zap.Error(err)) logger.Log.Error("扫描进程失败", zap.Error(err))
os.Exit(1) os.Exit(1)
@ -46,6 +53,7 @@ var StartCmd = &cobra.Command{
zap.String("name", p.Name), zap.String("name", p.Name),
zap.String("path", p.Path), zap.String("path", p.Path),
zap.String("cmdline", p.Cmdline), zap.String("cmdline", p.Cmdline),
zap.Stringer("data", p),
) )
} }
}, },

View File

@ -10,6 +10,9 @@ audit:
scanner: scanner:
file: file:
exclude_paths: exclude_paths:
- /proc - /proc
- /sys - /sys
fast_hash: true
fast_hash_size: 100MB
fast_hash_chunk: 2MB

View File

@ -355,7 +355,9 @@ scanner:
- /sys/ - /sys/
- /dev/ - /dev/
- /tmp/ - /tmp/
max_file_size: 100MB fast_hash: true
fast_hash_size: 100MB
fast_hash_chunk: 2MB
hash_algorithm: sha256 hash_algorithm: sha256
process: process:
scan_interval: 30 scan_interval: 30
@ -552,6 +554,20 @@ LimitNOFILE=65536
WantedBy=multi-user.target WantedBy=multi-user.target
``` ```
### 5.5 分层抽样哈希策略
针对大文件(默认 >100MB的哈希计算为避免I/O阻塞采用分层抽样算法
**算法逻辑**
1. 读取文件头部 N 字节(默认 1MB
2. 读取文件尾部 N 字节(默认 1MB
3. 获取文件总大小 Size。
4. 拼接:`Head + Tail + Size`,对拼接后的数据进行 SHA256 运算。
**优势**
- **性能**:将 GB 级文件的哈希耗时从秒级降至毫秒级。
- **安全性**:任何对文件内容的修改,极大概率会触碰到头部(文件头结构)或尾部(数据填充),且锁定文件大小,有效检测篡改行为。
--- ---
## 六、数据格式规范 ## 六、数据格式规范

View File

@ -3,7 +3,9 @@ package config
import ( import (
"fmt" "fmt"
"os" "os"
"sysmonitord/pkg/logger"
"go.uber.org/zap"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@ -24,7 +26,12 @@ type ScannerConfig struct {
} }
type FileScannerConfig struct { type FileScannerConfig struct {
ExcludePaths []string `yaml:"exclude_paths"` ExcludePaths []string `yaml:"exclude_paths"`
FastHash bool `yaml:"fast_hash"`
FastHashSizeRaw string `yaml:"fast_hash_size"`
FastHashChunkRaw string `yaml:"fast_hash_chunk"`
FastHashSize int64
FastHashChunk int64
} }
func LoadConfig(path string) (*Config, error) { func LoadConfig(path string) (*Config, error) {
@ -37,5 +44,23 @@ func LoadConfig(path string) (*Config, error) {
if err := yaml.Unmarshal(data, &cfg); err != nil { if err := yaml.Unmarshal(data, &cfg); err != nil {
return nil, fmt.Errorf("无法解析配置文件: %w", err) return nil, fmt.Errorf("无法解析配置文件: %w", err)
} }
// 解析 FastHashSize
cfg.Scanner.File.FastHashSize, err = ParseSize(cfg.Scanner.File.FastHashSizeRaw)
if err != nil {
return nil, fmt.Errorf("解析 fast_hash_size 失败: %w", err)
}
// 解析 FastHashChunk
cfg.Scanner.File.FastHashChunk, err = ParseSize(cfg.Scanner.File.FastHashChunkRaw)
if err != nil {
return nil, fmt.Errorf("解析 fast_hash_chunk 失败: %w", err)
}
logger.Log.Debug("配置加载完成",
zap.Int64("fast_hash_size", cfg.Scanner.File.FastHashSize),
zap.Int64("fast_hash_chunk", cfg.Scanner.File.FastHashChunk),
)
return &cfg, nil return &cfg, nil
} }

46
internal/config/utils.go Normal file
View File

@ -0,0 +1,46 @@
package config
import (
"fmt"
"regexp"
"strconv"
"strings"
)
func ParseSize(sizeStr string) (int64, error) {
sizeStr = strings.TrimSpace(sizeStr)
if sizeStr == "" {
return 0, nil
}
// 正则匹配:数字 + 单位
re := regexp.MustCompile(`(?i)^(\d+)\s*([KMGT]?B?)$`)
matches := re.FindStringSubmatch(sizeStr)
if len(matches) != 3 {
return 0, fmt.Errorf("无效的大小格式: %s", sizeStr)
}
value, err := strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return 0, err
}
unit := strings.ToUpper(matches[2])
var multiplier int64 = 1
switch unit {
case "B", "":
multiplier = 1
case "KB", "K":
multiplier = 1024
case "MB", "M":
multiplier = 1024 * 1024
case "GB", "G":
multiplier = 1024 * 1024 * 1024
case "TB", "T":
multiplier = 1024 * 1024 * 1024 * 1024
default:
return 0, fmt.Errorf("未知的单位: %s", unit)
}
return value * multiplier, nil
}

View File

@ -0,0 +1,92 @@
package hash
import (
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"io"
"os"
"sysmonitord/pkg/logger"
"go.uber.org/zap"
)
type Config struct {
UseFastHash bool
Threshold int64
ChunkSize int64
}
func SHA256(filePath string, cfg *Config) (string, error) {
info, err := os.Stat(filePath)
if err != nil {
logger.Log.Warn("[hash]获取文件信息失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
fileSize := info.Size()
if cfg != nil && cfg.UseFastHash && fileSize > cfg.Threshold {
logger.Log.Debug("[hash] 分层哈希...",
zap.String("path", filePath),
zap.Int64("fileSize", fileSize),
)
return calculateFastHash(filePath, fileSize, cfg.ChunkSize)
}
return calculateFullHash(filePath)
}
func calculateFullHash(filePath string) (string, error) {
file, err := os.Open(filePath)
if err != nil {
logger.Log.Warn("[scanner]打开文件失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
defer file.Close()
hasher := sha256.New()
if _, err := io.Copy(hasher, file); err != nil {
logger.Log.Error("[scanner]读取文件失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
hashBytes := hasher.Sum(nil)
hashString := hex.EncodeToString(hashBytes)
return hashString, nil
}
func calculateFastHash(filePath string, fileSize int64, chunkSize int64) (string, error) {
file, err := os.Open(filePath)
if err != nil {
logger.Log.Warn("[scanner]打开文件失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
defer file.Close()
hasher := sha256.New()
if _, err := io.CopyN(hasher, file, chunkSize); err != nil {
if err != io.EOF {
return "", err
}
}
tailOffset := fileSize - chunkSize
if tailOffset < 0 {
tailOffset = 0
}
if _, err := file.Seek(tailOffset, io.SeekStart); err != nil {
return "", err
}
if _, err := io.CopyN(hasher, file, chunkSize); err != nil {
return "", err
}
if err := binary.Write(hasher, binary.BigEndian, fileSize); err != nil {
return "", err
}
return hex.EncodeToString(hasher.Sum(nil)), nil
}

View File

@ -2,6 +2,8 @@ package process
import ( import (
"fmt" "fmt"
"os"
"sysmonitord/internal/scanner/hash"
"sysmonitord/pkg/logger" "sysmonitord/pkg/logger"
"github.com/shirou/gopsutil/v3/process" "github.com/shirou/gopsutil/v3/process"
@ -9,13 +11,14 @@ import (
) )
type ProcessInfo struct { type ProcessInfo struct {
PID int32 `json:"pid"` PID int32 `json:"pid"`
Name string `json:"name"` Name string `json:"name"`
Path string `json:"path"` Path string `json:"path"`
Cmdline string `json:"cmdline"` Cmdline string `json:"cmdline"`
FileHash string `json:"file_hash"`
} }
func ScanAllProcesses() ([]ProcessInfo, error) { func ScanAllProcesses(hashCfg *hash.Config) ([]ProcessInfo, error) {
logger.Log.Info("[scan]正在扫描系统中的所有进程...") logger.Log.Info("[scan]正在扫描系统中的所有进程...")
pids, err := process.Pids() pids, err := process.Pids()
@ -52,6 +55,18 @@ func ScanAllProcesses() ([]ProcessInfo, error) {
Path: exePath, Path: exePath,
Cmdline: cmdline, Cmdline: cmdline,
} }
if exePath != "" {
if _, err := os.Stat(exePath); err == nil {
fileHash, err := hash.SHA256(exePath, hashCfg)
if err == nil {
info.FileHash = fileHash
} else {
logger.Log.Warn("[scan]计算文件哈希失败", zap.String("path", exePath), zap.Error(err))
}
}
}
processList = append(processList, info) processList = append(processList, info)
} }
@ -60,7 +75,5 @@ func ScanAllProcesses() ([]ProcessInfo, error) {
} }
func (p ProcessInfo) String() string { func (p ProcessInfo) String() string {
return fmt.Sprintf("%s:%s:%d", p.Name, p.Path, p.PID) return fmt.Sprintf("%s:%s:%s", p.Name, p.Path, p.FileHash)
// Todo: 哈希计算
} }