diff --git a/cmd/start/start.go b/cmd/start/start.go index a3fe3fc..73c820a 100644 --- a/cmd/start/start.go +++ b/cmd/start/start.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "sysmonitord/internal/config" + "sysmonitord/internal/scanner/hash" "sysmonitord/internal/scanner/process" "sysmonitord/pkg/logger" @@ -28,7 +29,13 @@ var StartCmd = &cobra.Command{ zap.String("审计服务器地址", fmt.Sprintf("%s:%d", cfg.Audit.Server, cfg.Audit.Port)), ) - procs, err := process.ScanAllProcesses() + hashCfg := &hash.Config{ + UseFastHash: cfg.Scanner.File.FastHash, + Threshold: cfg.Scanner.File.FastHashSize, + ChunkSize: cfg.Scanner.File.FastHashChunk, + } + + procs, err := process.ScanAllProcesses(hashCfg) if err != nil { logger.Log.Error("扫描进程失败", zap.Error(err)) os.Exit(1) @@ -46,6 +53,7 @@ var StartCmd = &cobra.Command{ zap.String("name", p.Name), zap.String("path", p.Path), zap.String("cmdline", p.Cmdline), + zap.Stringer("data", p), ) } }, diff --git a/config.yaml b/config.yaml index d4aaf9c..92ea87c 100644 --- a/config.yaml +++ b/config.yaml @@ -10,6 +10,9 @@ audit: scanner: file: - exclude_paths: - - /proc - - /sys \ No newline at end of file + exclude_paths: + - /proc + - /sys + fast_hash: true + fast_hash_size: 100MB + fast_hash_chunk: 2MB \ No newline at end of file diff --git a/development.md b/development.md index 0195290..e6f0a02 100644 --- a/development.md +++ b/development.md @@ -355,7 +355,9 @@ scanner: - /sys/ - /dev/ - /tmp/ - max_file_size: 100MB + fast_hash: true + fast_hash_size: 100MB + fast_hash_chunk: 2MB hash_algorithm: sha256 process: scan_interval: 30 @@ -552,6 +554,20 @@ LimitNOFILE=65536 WantedBy=multi-user.target ``` +### 5.5 分层抽样哈希策略 + +针对大文件(默认 >100MB)的哈希计算,为避免I/O阻塞,采用分层抽样算法: + +**算法逻辑**: +1. 读取文件头部 N 字节(默认 1MB)。 +2. 读取文件尾部 N 字节(默认 1MB)。 +3. 获取文件总大小 Size。 +4. 拼接:`Head + Tail + Size`,对拼接后的数据进行 SHA256 运算。 + +**优势**: +- **性能**:将 GB 级文件的哈希耗时从秒级降至毫秒级。 +- **安全性**:任何对文件内容的修改,极大概率会触碰到头部(文件头结构)或尾部(数据填充),且锁定文件大小,有效检测篡改行为。 + --- ## 六、数据格式规范 diff --git a/internal/config/config.go b/internal/config/config.go index 9e98127..f223f45 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -3,7 +3,9 @@ package config import ( "fmt" "os" + "sysmonitord/pkg/logger" + "go.uber.org/zap" "gopkg.in/yaml.v3" ) @@ -24,7 +26,12 @@ type ScannerConfig struct { } type FileScannerConfig struct { - ExcludePaths []string `yaml:"exclude_paths"` + ExcludePaths []string `yaml:"exclude_paths"` + FastHash bool `yaml:"fast_hash"` + FastHashSizeRaw string `yaml:"fast_hash_size"` + FastHashChunkRaw string `yaml:"fast_hash_chunk"` + FastHashSize int64 + FastHashChunk int64 } func LoadConfig(path string) (*Config, error) { @@ -37,5 +44,23 @@ func LoadConfig(path string) (*Config, error) { if err := yaml.Unmarshal(data, &cfg); err != nil { return nil, fmt.Errorf("无法解析配置文件: %w", err) } + + // 解析 FastHashSize + cfg.Scanner.File.FastHashSize, err = ParseSize(cfg.Scanner.File.FastHashSizeRaw) + if err != nil { + return nil, fmt.Errorf("解析 fast_hash_size 失败: %w", err) + } + + // 解析 FastHashChunk + cfg.Scanner.File.FastHashChunk, err = ParseSize(cfg.Scanner.File.FastHashChunkRaw) + if err != nil { + return nil, fmt.Errorf("解析 fast_hash_chunk 失败: %w", err) + } + + logger.Log.Debug("配置加载完成", + zap.Int64("fast_hash_size", cfg.Scanner.File.FastHashSize), + zap.Int64("fast_hash_chunk", cfg.Scanner.File.FastHashChunk), + ) + return &cfg, nil } diff --git a/internal/config/utils.go b/internal/config/utils.go new file mode 100644 index 0000000..bbd7eef --- /dev/null +++ b/internal/config/utils.go @@ -0,0 +1,46 @@ +package config + +import ( + "fmt" + "regexp" + "strconv" + "strings" +) + +func ParseSize(sizeStr string) (int64, error) { + sizeStr = strings.TrimSpace(sizeStr) + if sizeStr == "" { + return 0, nil + } + + // 正则匹配:数字 + 单位 + re := regexp.MustCompile(`(?i)^(\d+)\s*([KMGT]?B?)$`) + matches := re.FindStringSubmatch(sizeStr) + if len(matches) != 3 { + return 0, fmt.Errorf("无效的大小格式: %s", sizeStr) + } + + value, err := strconv.ParseInt(matches[1], 10, 64) + if err != nil { + return 0, err + } + + unit := strings.ToUpper(matches[2]) + var multiplier int64 = 1 + switch unit { + case "B", "": + multiplier = 1 + case "KB", "K": + multiplier = 1024 + case "MB", "M": + multiplier = 1024 * 1024 + case "GB", "G": + multiplier = 1024 * 1024 * 1024 + case "TB", "T": + multiplier = 1024 * 1024 * 1024 * 1024 + default: + return 0, fmt.Errorf("未知的单位: %s", unit) + } + + return value * multiplier, nil +} diff --git a/internal/scanner/hash/hash.go b/internal/scanner/hash/hash.go new file mode 100644 index 0000000..efd7610 --- /dev/null +++ b/internal/scanner/hash/hash.go @@ -0,0 +1,92 @@ +package hash + +import ( + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "io" + "os" + "sysmonitord/pkg/logger" + + "go.uber.org/zap" +) + +type Config struct { + UseFastHash bool + Threshold int64 + ChunkSize int64 +} + +func SHA256(filePath string, cfg *Config) (string, error) { + info, err := os.Stat(filePath) + if err != nil { + logger.Log.Warn("[hash]获取文件信息失败", zap.String("path", filePath), zap.Error(err)) + return "", err + } + + fileSize := info.Size() + + if cfg != nil && cfg.UseFastHash && fileSize > cfg.Threshold { + logger.Log.Debug("[hash] 分层哈希...", + zap.String("path", filePath), + zap.Int64("fileSize", fileSize), + ) + return calculateFastHash(filePath, fileSize, cfg.ChunkSize) + } + + return calculateFullHash(filePath) +} + +func calculateFullHash(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + logger.Log.Warn("[scanner]打开文件失败", zap.String("path", filePath), zap.Error(err)) + return "", err + } + defer file.Close() + + hasher := sha256.New() + if _, err := io.Copy(hasher, file); err != nil { + logger.Log.Error("[scanner]读取文件失败", zap.String("path", filePath), zap.Error(err)) + return "", err + } + + hashBytes := hasher.Sum(nil) + hashString := hex.EncodeToString(hashBytes) + return hashString, nil +} + +func calculateFastHash(filePath string, fileSize int64, chunkSize int64) (string, error) { + file, err := os.Open(filePath) + if err != nil { + logger.Log.Warn("[scanner]打开文件失败", zap.String("path", filePath), zap.Error(err)) + return "", err + } + defer file.Close() + + hasher := sha256.New() + + if _, err := io.CopyN(hasher, file, chunkSize); err != nil { + if err != io.EOF { + return "", err + } + } + + tailOffset := fileSize - chunkSize + if tailOffset < 0 { + tailOffset = 0 + } + + if _, err := file.Seek(tailOffset, io.SeekStart); err != nil { + return "", err + } + + if _, err := io.CopyN(hasher, file, chunkSize); err != nil { + return "", err + } + + if err := binary.Write(hasher, binary.BigEndian, fileSize); err != nil { + return "", err + } + return hex.EncodeToString(hasher.Sum(nil)), nil +} diff --git a/internal/scanner/process/process.go b/internal/scanner/process/process.go index 707040c..eab1047 100644 --- a/internal/scanner/process/process.go +++ b/internal/scanner/process/process.go @@ -2,6 +2,8 @@ package process import ( "fmt" + "os" + "sysmonitord/internal/scanner/hash" "sysmonitord/pkg/logger" "github.com/shirou/gopsutil/v3/process" @@ -9,13 +11,14 @@ import ( ) type ProcessInfo struct { - PID int32 `json:"pid"` - Name string `json:"name"` - Path string `json:"path"` - Cmdline string `json:"cmdline"` + PID int32 `json:"pid"` + Name string `json:"name"` + Path string `json:"path"` + Cmdline string `json:"cmdline"` + FileHash string `json:"file_hash"` } -func ScanAllProcesses() ([]ProcessInfo, error) { +func ScanAllProcesses(hashCfg *hash.Config) ([]ProcessInfo, error) { logger.Log.Info("[scan]正在扫描系统中的所有进程...") pids, err := process.Pids() @@ -52,6 +55,18 @@ func ScanAllProcesses() ([]ProcessInfo, error) { Path: exePath, Cmdline: cmdline, } + + if exePath != "" { + if _, err := os.Stat(exePath); err == nil { + fileHash, err := hash.SHA256(exePath, hashCfg) + if err == nil { + info.FileHash = fileHash + } else { + logger.Log.Warn("[scan]计算文件哈希失败", zap.String("path", exePath), zap.Error(err)) + } + } + } + processList = append(processList, info) } @@ -60,7 +75,5 @@ func ScanAllProcesses() ([]ProcessInfo, error) { } func (p ProcessInfo) String() string { - return fmt.Sprintf("%s:%s:%d", p.Name, p.Path, p.PID) - - // Todo: 哈希计算 + return fmt.Sprintf("%s:%s:%s", p.Name, p.Path, p.FileHash) }