优化存储逻辑:为 WebDAV、Local 和 S3 增加 Exists 方法;调整图片处理逻辑以避免重复存储变体;新增调试日志以便于排查问题

This commit is contained in:
2026-01-30 16:34:20 +08:00
parent 852a72c597
commit e48959d5ba
7 changed files with 205 additions and 70 deletions

View File

@@ -8,8 +8,8 @@ import (
type Image struct { type Image struct {
ID uint `gorm:"primaryKey" json:"id"` ID uint `gorm:"primaryKey" json:"id"`
Date string `gorm:"uniqueIndex:idx_date_mkt;type:varchar(10)" json:"date"` // YYYY-MM-DD Date string `gorm:"uniqueIndex:idx_date_mkt;index:idx_mkt_date,priority:2;type:varchar(10)" json:"date"` // YYYY-MM-DD
Mkt string `gorm:"uniqueIndex:idx_date_mkt;type:varchar(10)" json:"mkt"` // zh-CN, en-US etc. Mkt string `gorm:"uniqueIndex:idx_date_mkt;index:idx_mkt_date,priority:1;type:varchar(10)" json:"mkt"` // zh-CN, en-US etc.
Title string `json:"title"` Title string `json:"title"`
Copyright string `json:"copyright"` Copyright string `json:"copyright"`
CopyrightLink string `json:"copyrightlink"` CopyrightLink string `json:"copyrightlink"`

View File

@@ -11,6 +11,7 @@ import (
"net/http" "net/http"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"time" "time"
"BingPaper/internal/config" "BingPaper/internal/config"
@@ -124,27 +125,12 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
// 幂等检查 // 幂等检查
var existing model.Image var existing model.Image
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err == nil { if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err == nil {
util.Logger.Info("Image already exists, skipping", zap.String("date", dateStr), zap.String("mkt", mkt)) util.Logger.Debug("Image already exists in DB, skipping", zap.String("date", dateStr), zap.String("mkt", mkt))
return nil return nil
} }
util.Logger.Info("Processing new image", zap.String("date", dateStr), zap.String("mkt", mkt), zap.String("title", bingImg.Title)) imageName := f.extractImageName(bingImg.URLBase, bingImg.HSH)
util.Logger.Info("Processing image", zap.String("date", dateStr), zap.String("mkt", mkt), zap.String("imageName", imageName))
// UHD 探测
imgURL, variantName := f.probeUHD(bingImg.URLBase)
imgData, err := f.downloadImage(imgURL)
if err != nil {
util.Logger.Error("Failed to download image", zap.String("url", imgURL), zap.Error(err))
return err
}
// 解码图片用于缩放
srcImg, _, err := image.Decode(bytes.NewReader(imgData))
if err != nil {
util.Logger.Error("Failed to decode image data", zap.Error(err))
return err
}
// 创建 DB 记录 // 创建 DB 记录
dbImg := model.Image{ dbImg := model.Image{
@@ -168,7 +154,6 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
return err return err
} }
// 再次检查 dbImg.ID 是否被填充,如果没有被填充(说明由于冲突未插入),则需要查询出已有的 ID
if dbImg.ID == 0 { if dbImg.ID == 0 {
var existing model.Image var existing model.Image
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err != nil { if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err != nil {
@@ -178,6 +163,9 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
dbImg = existing dbImg = existing
} }
// UHD 探测
imgURL, variantName := f.probeUHD(bingImg.URLBase)
// 保存各种分辨率 // 保存各种分辨率
targetVariants := []struct { targetVariants := []struct {
name string name string
@@ -197,40 +185,105 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
{"320x240", 320, 240}, {"320x240", 320, 240},
} }
// 首先保存原图 (UHD 或 1080p) // 检查是否所有变体都已存在于存储中
if err := f.saveVariant(ctx, &dbImg, variantName, "jpg", imgData); err != nil { allExist := true
util.Logger.Error("Failed to save original variant", zap.String("variant", variantName), zap.Error(err)) // 检查 UHD/原图
} uhdKey := f.generateKey(imageName, variantName, "jpg")
exists, _ := storage.GlobalStorage.Exists(ctx, uhdKey)
for _, v := range targetVariants { if !exists {
// 如果目标分辨率就是原图分辨率,则跳过(已经保存过了) allExist = false
if v.name == variantName { } else {
continue for _, v := range targetVariants {
} if v.name == variantName {
continue
resized := imaging.Fill(srcImg, v.width, v.height, imaging.Center, imaging.Lanczos) }
buf := new(bytes.Buffer) vKey := f.generateKey(imageName, v.name, "jpg")
if err := jpeg.Encode(buf, resized, &jpeg.Options{Quality: 100}); err != nil { exists, _ := storage.GlobalStorage.Exists(ctx, vKey)
util.Logger.Warn("Failed to encode jpeg", zap.String("variant", v.name), zap.Error(err)) if !exists {
continue allExist = false
} break
currentImgData := buf.Bytes() }
// 保存 JPG
if err := f.saveVariant(ctx, &dbImg, v.name, "jpg", currentImgData); err != nil {
util.Logger.Error("Failed to save variant", zap.String("variant", v.name), zap.Error(err))
} }
} }
// 保存今日额外文件 if allExist {
today := time.Now().Format("2006-01-02") util.Logger.Debug("All image variants exist in storage, linking only", zap.String("imageName", imageName))
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles { // 只建立关联信息
f.saveDailyFiles(srcImg, imgData, mkt) f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", nil)
for _, v := range targetVariants {
if v.name == variantName {
continue
}
f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", nil)
}
} else {
// 需要下载并处理
util.Logger.Debug("Downloading and processing image", zap.String("url", imgURL))
imgData, err := f.downloadImage(imgURL)
if err != nil {
util.Logger.Error("Failed to download image", zap.String("url", imgURL), zap.Error(err))
return err
}
srcImg, _, err := image.Decode(bytes.NewReader(imgData))
if err != nil {
util.Logger.Error("Failed to decode image data", zap.Error(err))
return err
}
// 保存原图
if err := f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", imgData); err != nil {
util.Logger.Error("Failed to save original variant", zap.String("variant", variantName), zap.Error(err))
}
for _, v := range targetVariants {
if v.name == variantName {
continue
}
resized := imaging.Fill(srcImg, v.width, v.height, imaging.Center, imaging.Lanczos)
buf := new(bytes.Buffer)
if err := jpeg.Encode(buf, resized, &jpeg.Options{Quality: 100}); err != nil {
util.Logger.Warn("Failed to encode jpeg", zap.String("variant", v.name), zap.Error(err))
continue
}
currentImgData := buf.Bytes()
if err := f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", currentImgData); err != nil {
util.Logger.Error("Failed to save variant", zap.String("variant", v.name), zap.Error(err))
}
}
// 保存今日额外文件
today := time.Now().Format("2006-01-02")
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles {
f.saveDailyFiles(srcImg, imgData, mkt)
}
} }
return nil return nil
} }
func (f *Fetcher) extractImageName(urlBase, hsh string) string {
// 示例: /th?id=OHR.MilwaukeeHall_ROW0871854348
start := 0
if idx := strings.Index(urlBase, "OHR."); idx != -1 {
start = idx + 4
} else if idx := strings.Index(urlBase, "id="); idx != -1 {
start = idx + 3
}
rem := urlBase[start:]
end := strings.Index(rem, "_")
if end == -1 {
end = len(rem)
}
name := rem[:end]
if name == "" {
return hsh
}
return name
}
func (f *Fetcher) probeUHD(urlBase string) (string, string) { func (f *Fetcher) probeUHD(urlBase string) (string, string) {
uhdURL := fmt.Sprintf("https://www.bing.com%s_UHD.jpg", urlBase) uhdURL := fmt.Sprintf("https://www.bing.com%s_UHD.jpg", urlBase)
resp, err := f.httpClient.Head(uhdURL) resp, err := f.httpClient.Head(uhdURL)
@@ -249,25 +302,52 @@ func (f *Fetcher) downloadImage(url string) ([]byte, error) {
return io.ReadAll(resp.Body) return io.ReadAll(resp.Body)
} }
func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, variant, format string, data []byte) error { func (f *Fetcher) generateKey(imageName, variant, format string) string {
key := fmt.Sprintf("%s/%s/%s_%s.%s", img.Mkt, img.Date, img.Date, variant, format) return fmt.Sprintf("%s/%s_%s.%s", imageName, imageName, variant, format)
}
func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, imageName, variant, format string, data []byte) error {
key := f.generateKey(imageName, variant, format)
contentType := "image/jpeg" contentType := "image/jpeg"
if format == "webp" { if format == "webp" {
contentType = "image/webp" contentType = "image/webp"
} }
stored, err := storage.GlobalStorage.Put(ctx, key, bytes.NewReader(data), contentType) var size int64
if err != nil { var publicURL string
return err
exists, _ := storage.GlobalStorage.Exists(ctx, key)
if exists {
util.Logger.Debug("Variant already exists in storage, linking", zap.String("key", key))
// 如果存在,我们需要获取它的大小和公共 URL (如果可能)
// 但目前的 Storage 接口没有 Stat我们可以尝试 Get 或者干脆 size 为 0
// 为了简单,我们只从存储中获取公共 URL
if pURL, ok := storage.GlobalStorage.PublicURL(key); ok {
publicURL = pURL
}
// size 暂时设为 0 或者从 data 中取 (如果有的话)
if data != nil {
size = int64(len(data))
}
} else if data != nil {
util.Logger.Debug("Saving variant to storage", zap.String("key", key))
stored, err := storage.GlobalStorage.Put(ctx, key, bytes.NewReader(data), contentType)
if err != nil {
return err
}
publicURL = stored.PublicURL
size = stored.Size
} else {
return fmt.Errorf("variant %s does not exist and no data provided", key)
} }
vRecord := model.ImageVariant{ vRecord := model.ImageVariant{
ImageID: img.ID, ImageID: img.ID,
Variant: variant, Variant: variant,
Format: format, Format: format,
StorageKey: stored.Key, StorageKey: key,
PublicURL: stored.PublicURL, PublicURL: publicURL,
Size: int64(len(data)), Size: size,
} }
return repo.DB.Clauses(clause.OnConflict{ return repo.DB.Clauses(clause.OnConflict{

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"math/rand"
"time" "time"
"BingPaper/internal/config" "BingPaper/internal/config"
@@ -56,6 +57,7 @@ func CleanupOldImages(ctx context.Context) error {
func GetTodayImage(mkt string) (*model.Image, error) { func GetTodayImage(mkt string) (*model.Image, error) {
today := time.Now().Format("2006-01-02") today := time.Now().Format("2006-01-02")
util.Logger.Debug("Getting today image", zap.String("mkt", mkt), zap.String("today", today))
var img model.Image var img model.Image
tx := repo.DB.Where("date = ?", today) tx := repo.DB.Where("date = ?", today)
if mkt != "" { if mkt != "" {
@@ -73,6 +75,7 @@ func GetTodayImage(mkt string) (*model.Image, error) {
} }
if err != nil { if err != nil {
util.Logger.Debug("Today image not found, trying latest image", zap.String("mkt", mkt))
// 如果今天还是没有,尝试获取最近的一张 // 如果今天还是没有,尝试获取最近的一张
tx = repo.DB.Order("date desc") tx = repo.DB.Order("date desc")
if mkt != "" { if mkt != "" {
@@ -84,12 +87,16 @@ func GetTodayImage(mkt string) (*model.Image, error) {
// 兜底逻辑:如果指定地区没找到,且开启了兜底开关,则尝试获取默认地区的图片 // 兜底逻辑:如果指定地区没找到,且开启了兜底开关,则尝试获取默认地区的图片
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback { if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt() defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Image not found, trying fallback to default market", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt { if mkt != defaultMkt {
return GetTodayImage(defaultMkt) return GetTodayImage(defaultMkt)
} }
return GetTodayImage("") return GetTodayImage("")
} }
if err == nil {
util.Logger.Debug("Found image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err return &img, err
} }
@@ -110,6 +117,7 @@ func GetAllRegionsTodayImages() ([]model.Image, error) {
} }
func GetRandomImage(mkt string) (*model.Image, error) { func GetRandomImage(mkt string) (*model.Image, error) {
util.Logger.Debug("Getting random image", zap.String("mkt", mkt))
var img model.Image var img model.Image
// SQLite 使用 RANDOM(), MySQL/Postgres 使用 RANDOM() 或 RAND() // SQLite 使用 RANDOM(), MySQL/Postgres 使用 RANDOM() 或 RAND()
// 简单起见,先查总数再 Offset // 简单起见,先查总数再 Offset
@@ -133,34 +141,35 @@ func GetRandomImage(mkt string) (*model.Image, error) {
return nil, fmt.Errorf("no images found") return nil, fmt.Errorf("no images found")
} }
// 这种方法不适合海量数据,但对于 30 天的数据没问题 // 优化随机查询:使用 Offset 代替 ORDER BY RANDOM()
tx = repo.DB.Order("RANDOM()") // 注意tx 包含了前面的 Where 条件
if mkt != "" { offset := rand.Intn(int(count))
tx = tx.Where("mkt = ?", mkt) util.Logger.Debug("Random image selection", zap.Int64("total", count), zap.Int("offset", offset))
} err := tx.Preload("Variants").Offset(offset).Limit(1).Find(&img).Error
err := tx.Preload("Variants").First(&img).Error
if err != nil {
// 适配 MySQL
tx = repo.DB.Order("RAND()")
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
err = tx.Preload("Variants").First(&img).Error
}
// 兜底逻辑 // 兜底逻辑
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback { if (err != nil || img.ID == 0) && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt() defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Random image not found, trying fallback", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt { if mkt != defaultMkt {
return GetRandomImage(defaultMkt) return GetRandomImage(defaultMkt)
} }
return GetRandomImage("") return GetRandomImage("")
} }
if err == nil && img.ID == 0 {
return nil, fmt.Errorf("no images found")
}
if err == nil {
util.Logger.Debug("Found random image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err return &img, err
} }
func GetImageByDate(date string, mkt string) (*model.Image, error) { func GetImageByDate(date string, mkt string) (*model.Image, error) {
util.Logger.Debug("Getting image by date", zap.String("date", date), zap.String("mkt", mkt))
var img model.Image var img model.Image
tx := repo.DB.Where("date = ?", date) tx := repo.DB.Where("date = ?", date)
if mkt != "" { if mkt != "" {
@@ -180,16 +189,21 @@ func GetImageByDate(date string, mkt string) (*model.Image, error) {
// 兜底逻辑 // 兜底逻辑
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback { if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt() defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Image by date not found, trying fallback", zap.String("date", date), zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt { if mkt != defaultMkt {
return GetImageByDate(date, defaultMkt) return GetImageByDate(date, defaultMkt)
} }
return GetImageByDate(date, "") return GetImageByDate(date, "")
} }
if err == nil {
util.Logger.Debug("Found image by date", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err return &img, err
} }
func GetImageList(limit int, offset int, month string, mkt string) ([]model.Image, error) { func GetImageList(limit int, offset int, month string, mkt string) ([]model.Image, error) {
util.Logger.Debug("Getting image list", zap.Int("limit", limit), zap.Int("offset", offset), zap.String("month", month), zap.String("mkt", mkt))
var images []model.Image var images []model.Image
tx := repo.DB.Model(&model.Image{}) tx := repo.DB.Model(&model.Image{})

View File

@@ -63,3 +63,15 @@ func (l *LocalStorage) Delete(ctx context.Context, key string) error {
func (l *LocalStorage) PublicURL(key string) (string, bool) { func (l *LocalStorage) PublicURL(key string) (string, bool) {
return "", false return "", false
} }
func (l *LocalStorage) Exists(ctx context.Context, key string) (bool, error) {
path := filepath.Join(l.root, key)
_, err := os.Stat(path)
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return false, err
}

View File

@@ -100,3 +100,18 @@ func (s *S3Storage) PublicURL(key string) (string, bool) {
// 也可以生成签名 URL但这里简单处理 // 也可以生成签名 URL但这里简单处理
return "", false return "", false
} }
func (s *S3Storage) Exists(ctx context.Context, key string) (bool, error) {
_, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{
Bucket: aws.String(s.bucket),
Key: aws.String(key),
})
if err != nil {
// 判断是否为 404
if strings.Contains(err.Error(), "NotFound") || strings.Contains(err.Error(), "404") {
return false, nil
}
return false, err
}
return true, nil
}

View File

@@ -17,6 +17,7 @@ type Storage interface {
Get(ctx context.Context, key string) (io.ReadCloser, string, error) Get(ctx context.Context, key string) (io.ReadCloser, string, error)
Delete(ctx context.Context, key string) error Delete(ctx context.Context, key string) error
PublicURL(key string) (string, bool) PublicURL(key string) (string, bool)
Exists(ctx context.Context, key string) (bool, error)
} }
var GlobalStorage Storage var GlobalStorage Storage

View File

@@ -72,3 +72,16 @@ func (w *WebDAVStorage) PublicURL(key string) (string, bool) {
} }
return "", false return "", false
} }
func (w *WebDAVStorage) Exists(ctx context.Context, key string) (bool, error) {
_, err := w.client.Stat(key)
if err == nil {
return true, nil
}
// gowebdav 的错误处理比较原始,通常 404 会返回错误
// 这里假设报错就是不存在,或者可以根据错误消息判断
if strings.Contains(err.Error(), "404") || strings.Contains(err.Error(), "not found") {
return false, nil
}
return false, err
}