sysmonitord/internal/scanner/hash/hash.go
2026-04-01 21:13:03 +08:00

195 lines
4.0 KiB
Go

package hash
import (
"crypto/md5"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"hash"
"io"
"os"
"sysmonitord/pkg/logger"
"github.com/cespare/xxhash/v2"
"go.uber.org/zap"
)
type HashAlgorithm interface {
Hash() hash.Hash
Name() string
}
// ==== SHA256 ====
type SHA256Algorithm struct{}
func (a *SHA256Algorithm) Hash() hash.Hash {
return sha256.New()
}
func (a *SHA256Algorithm) Name() string {
return "sha256"
}
// ==== MD5 ====
type MD5Algorithm struct{}
func (a *MD5Algorithm) Hash() hash.Hash {
return md5.New()
}
func (a *MD5Algorithm) Name() string {
return "md5"
}
// ==== xxHash64 ====
type XXHash64Algorithm struct{}
func (a *XXHash64Algorithm) Hash() hash.Hash {
return &xxHash64Wrapper{
xxhash: xxhash.New(),
}
}
func (a *XXHash64Algorithm) Name() string {
return "xxhash64"
}
type xxHash64Wrapper struct {
xxhash *xxhash.Digest
}
func (w *xxHash64Wrapper) Write(p []byte) (n int, err error) {
return w.xxhash.Write(p)
}
// Sum 返回当前哈希值,追加到 b 后面
// xxHash64 返回 8 字节的哈希值(小端序)
func (w *xxHash64Wrapper) Sum(b []byte) []byte {
// 获取当前的 64 位哈希值
h := w.xxhash.Sum64()
// 将 uint64 转换为 8 字节的小端序字节数组
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, h)
// 追加到输入的 b 后面
return append(b, buf...)
}
// Reset 重置哈希状态
func (w *xxHash64Wrapper) Reset() {
w.xxhash.Reset()
}
// Size 返回哈希值的字节数
func (w *xxHash64Wrapper) Size() int {
return 8 // xxHash64 输出 64 位 = 8 字节
}
// BlockSize 返回底层哈希的块大小
func (w *xxHash64Wrapper) BlockSize() int {
return w.xxhash.BlockSize()
}
// Sum64 提供直接获取 uint64 的便捷方法
func (w *xxHash64Wrapper) Sum64() uint64 {
return w.xxhash.Sum64()
}
// ==== 配置结构体 ====
type Config struct {
UseFastHash bool
Threshold int64
ChunkSize int64
Algorithm HashAlgorithm
}
// ==== 计算文件哈希 ====
func Calculate(filePath string, fileSize int64, cfg *Config) (string, error) {
if cfg == nil {
cfg = &Config{
Algorithm: &SHA256Algorithm{},
}
}
if fileSize == 0 {
info, err := os.Stat(filePath)
if err != nil {
logger.Log.Warn("[scanner]获取文件信息失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
fileSize = info.Size()
}
if cfg.Algorithm == nil {
cfg.Algorithm = &SHA256Algorithm{}
}
logger.Log.Debug("[scanner]计算文件哈希", zap.String("path", filePath), zap.Int64("size", fileSize), zap.String("algorithm", cfg.Algorithm.Name()))
if cfg.UseFastHash && fileSize > cfg.Threshold {
return calculateFast(filePath, fileSize, cfg)
} else {
return calculateFull(filePath, cfg)
}
}
func calculateFull(filePath string, cfg *Config) (string, error) {
file, err := os.Open(filePath)
if err != nil {
logger.Log.Warn("[scanner]打开文件失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
defer file.Close()
hasher := cfg.Algorithm.Hash()
if _, err := io.Copy(hasher, file); err != nil {
logger.Log.Error("[scanner]读取文件失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
hashBytes := hasher.Sum(nil)
hashString := hex.EncodeToString(hashBytes)
return hashString, nil
}
func calculateFast(filePath string, fileSize int64, cfg *Config) (string, error) {
file, err := os.Open(filePath)
if err != nil {
logger.Log.Warn("[scanner]打开文件失败", zap.String("path", filePath), zap.Error(err))
return "", err
}
defer file.Close()
hasher := cfg.Algorithm.Hash()
chunkSize := cfg.ChunkSize
if _, err := io.CopyN(hasher, file, chunkSize); err != nil {
if err != io.EOF {
return "", err
}
}
tailOffset := fileSize - chunkSize
if tailOffset < 0 {
tailOffset = 0
}
if _, err := file.Seek(tailOffset, io.SeekStart); err != nil {
return "", err
}
if _, err := io.CopyN(hasher, file, chunkSize); err != nil {
return "", err
}
if err := binary.Write(hasher, binary.BigEndian, fileSize); err != nil {
return "", err
}
return hex.EncodeToString(hasher.Sum(nil)), nil
}