优化存储逻辑:为 WebDAV、Local 和 S3 增加 Exists 方法;调整图片处理逻辑以避免重复存储变体;新增调试日志以便于排查问题

This commit is contained in:
2026-01-30 16:34:20 +08:00
parent 852a72c597
commit e48959d5ba
7 changed files with 205 additions and 70 deletions

View File

@@ -8,8 +8,8 @@ import (
type Image struct {
ID uint `gorm:"primaryKey" json:"id"`
Date string `gorm:"uniqueIndex:idx_date_mkt;type:varchar(10)" json:"date"` // YYYY-MM-DD
Mkt string `gorm:"uniqueIndex:idx_date_mkt;type:varchar(10)" json:"mkt"` // zh-CN, en-US etc.
Date string `gorm:"uniqueIndex:idx_date_mkt;index:idx_mkt_date,priority:2;type:varchar(10)" json:"date"` // YYYY-MM-DD
Mkt string `gorm:"uniqueIndex:idx_date_mkt;index:idx_mkt_date,priority:1;type:varchar(10)" json:"mkt"` // zh-CN, en-US etc.
Title string `json:"title"`
Copyright string `json:"copyright"`
CopyrightLink string `json:"copyrightlink"`

View File

@@ -11,6 +11,7 @@ import (
"net/http"
"os"
"path/filepath"
"strings"
"time"
"BingPaper/internal/config"
@@ -124,27 +125,12 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
// 幂等检查
var existing model.Image
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err == nil {
util.Logger.Info("Image already exists, skipping", zap.String("date", dateStr), zap.String("mkt", mkt))
util.Logger.Debug("Image already exists in DB, skipping", zap.String("date", dateStr), zap.String("mkt", mkt))
return nil
}
util.Logger.Info("Processing new image", zap.String("date", dateStr), zap.String("mkt", mkt), zap.String("title", bingImg.Title))
// UHD 探测
imgURL, variantName := f.probeUHD(bingImg.URLBase)
imgData, err := f.downloadImage(imgURL)
if err != nil {
util.Logger.Error("Failed to download image", zap.String("url", imgURL), zap.Error(err))
return err
}
// 解码图片用于缩放
srcImg, _, err := image.Decode(bytes.NewReader(imgData))
if err != nil {
util.Logger.Error("Failed to decode image data", zap.Error(err))
return err
}
imageName := f.extractImageName(bingImg.URLBase, bingImg.HSH)
util.Logger.Info("Processing image", zap.String("date", dateStr), zap.String("mkt", mkt), zap.String("imageName", imageName))
// 创建 DB 记录
dbImg := model.Image{
@@ -168,7 +154,6 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
return err
}
// 再次检查 dbImg.ID 是否被填充,如果没有被填充(说明由于冲突未插入),则需要查询出已有的 ID
if dbImg.ID == 0 {
var existing model.Image
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err != nil {
@@ -178,6 +163,9 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
dbImg = existing
}
// UHD 探测
imgURL, variantName := f.probeUHD(bingImg.URLBase)
// 保存各种分辨率
targetVariants := []struct {
name string
@@ -197,40 +185,105 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
{"320x240", 320, 240},
}
// 首先保存原图 (UHD 或 1080p)
if err := f.saveVariant(ctx, &dbImg, variantName, "jpg", imgData); err != nil {
util.Logger.Error("Failed to save original variant", zap.String("variant", variantName), zap.Error(err))
}
for _, v := range targetVariants {
// 如果目标分辨率就是原图分辨率,则跳过(已经保存过了)
if v.name == variantName {
continue
}
resized := imaging.Fill(srcImg, v.width, v.height, imaging.Center, imaging.Lanczos)
buf := new(bytes.Buffer)
if err := jpeg.Encode(buf, resized, &jpeg.Options{Quality: 100}); err != nil {
util.Logger.Warn("Failed to encode jpeg", zap.String("variant", v.name), zap.Error(err))
continue
}
currentImgData := buf.Bytes()
// 保存 JPG
if err := f.saveVariant(ctx, &dbImg, v.name, "jpg", currentImgData); err != nil {
util.Logger.Error("Failed to save variant", zap.String("variant", v.name), zap.Error(err))
// 检查是否所有变体都已存在于存储中
allExist := true
// 检查 UHD/原图
uhdKey := f.generateKey(imageName, variantName, "jpg")
exists, _ := storage.GlobalStorage.Exists(ctx, uhdKey)
if !exists {
allExist = false
} else {
for _, v := range targetVariants {
if v.name == variantName {
continue
}
vKey := f.generateKey(imageName, v.name, "jpg")
exists, _ := storage.GlobalStorage.Exists(ctx, vKey)
if !exists {
allExist = false
break
}
}
}
// 保存今日额外文件
today := time.Now().Format("2006-01-02")
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles {
f.saveDailyFiles(srcImg, imgData, mkt)
if allExist {
util.Logger.Debug("All image variants exist in storage, linking only", zap.String("imageName", imageName))
// 只建立关联信息
f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", nil)
for _, v := range targetVariants {
if v.name == variantName {
continue
}
f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", nil)
}
} else {
// 需要下载并处理
util.Logger.Debug("Downloading and processing image", zap.String("url", imgURL))
imgData, err := f.downloadImage(imgURL)
if err != nil {
util.Logger.Error("Failed to download image", zap.String("url", imgURL), zap.Error(err))
return err
}
srcImg, _, err := image.Decode(bytes.NewReader(imgData))
if err != nil {
util.Logger.Error("Failed to decode image data", zap.Error(err))
return err
}
// 保存原图
if err := f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", imgData); err != nil {
util.Logger.Error("Failed to save original variant", zap.String("variant", variantName), zap.Error(err))
}
for _, v := range targetVariants {
if v.name == variantName {
continue
}
resized := imaging.Fill(srcImg, v.width, v.height, imaging.Center, imaging.Lanczos)
buf := new(bytes.Buffer)
if err := jpeg.Encode(buf, resized, &jpeg.Options{Quality: 100}); err != nil {
util.Logger.Warn("Failed to encode jpeg", zap.String("variant", v.name), zap.Error(err))
continue
}
currentImgData := buf.Bytes()
if err := f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", currentImgData); err != nil {
util.Logger.Error("Failed to save variant", zap.String("variant", v.name), zap.Error(err))
}
}
// 保存今日额外文件
today := time.Now().Format("2006-01-02")
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles {
f.saveDailyFiles(srcImg, imgData, mkt)
}
}
return nil
}
func (f *Fetcher) extractImageName(urlBase, hsh string) string {
// 示例: /th?id=OHR.MilwaukeeHall_ROW0871854348
start := 0
if idx := strings.Index(urlBase, "OHR."); idx != -1 {
start = idx + 4
} else if idx := strings.Index(urlBase, "id="); idx != -1 {
start = idx + 3
}
rem := urlBase[start:]
end := strings.Index(rem, "_")
if end == -1 {
end = len(rem)
}
name := rem[:end]
if name == "" {
return hsh
}
return name
}
func (f *Fetcher) probeUHD(urlBase string) (string, string) {
uhdURL := fmt.Sprintf("https://www.bing.com%s_UHD.jpg", urlBase)
resp, err := f.httpClient.Head(uhdURL)
@@ -249,25 +302,52 @@ func (f *Fetcher) downloadImage(url string) ([]byte, error) {
return io.ReadAll(resp.Body)
}
func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, variant, format string, data []byte) error {
key := fmt.Sprintf("%s/%s/%s_%s.%s", img.Mkt, img.Date, img.Date, variant, format)
func (f *Fetcher) generateKey(imageName, variant, format string) string {
return fmt.Sprintf("%s/%s_%s.%s", imageName, imageName, variant, format)
}
func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, imageName, variant, format string, data []byte) error {
key := f.generateKey(imageName, variant, format)
contentType := "image/jpeg"
if format == "webp" {
contentType = "image/webp"
}
stored, err := storage.GlobalStorage.Put(ctx, key, bytes.NewReader(data), contentType)
if err != nil {
return err
var size int64
var publicURL string
exists, _ := storage.GlobalStorage.Exists(ctx, key)
if exists {
util.Logger.Debug("Variant already exists in storage, linking", zap.String("key", key))
// 如果存在,我们需要获取它的大小和公共 URL (如果可能)
// 但目前的 Storage 接口没有 Stat我们可以尝试 Get 或者干脆 size 为 0
// 为了简单,我们只从存储中获取公共 URL
if pURL, ok := storage.GlobalStorage.PublicURL(key); ok {
publicURL = pURL
}
// size 暂时设为 0 或者从 data 中取 (如果有的话)
if data != nil {
size = int64(len(data))
}
} else if data != nil {
util.Logger.Debug("Saving variant to storage", zap.String("key", key))
stored, err := storage.GlobalStorage.Put(ctx, key, bytes.NewReader(data), contentType)
if err != nil {
return err
}
publicURL = stored.PublicURL
size = stored.Size
} else {
return fmt.Errorf("variant %s does not exist and no data provided", key)
}
vRecord := model.ImageVariant{
ImageID: img.ID,
Variant: variant,
Format: format,
StorageKey: stored.Key,
PublicURL: stored.PublicURL,
Size: int64(len(data)),
StorageKey: key,
PublicURL: publicURL,
Size: size,
}
return repo.DB.Clauses(clause.OnConflict{

View File

@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"math/rand"
"time"
"BingPaper/internal/config"
@@ -56,6 +57,7 @@ func CleanupOldImages(ctx context.Context) error {
func GetTodayImage(mkt string) (*model.Image, error) {
today := time.Now().Format("2006-01-02")
util.Logger.Debug("Getting today image", zap.String("mkt", mkt), zap.String("today", today))
var img model.Image
tx := repo.DB.Where("date = ?", today)
if mkt != "" {
@@ -73,6 +75,7 @@ func GetTodayImage(mkt string) (*model.Image, error) {
}
if err != nil {
util.Logger.Debug("Today image not found, trying latest image", zap.String("mkt", mkt))
// 如果今天还是没有,尝试获取最近的一张
tx = repo.DB.Order("date desc")
if mkt != "" {
@@ -84,12 +87,16 @@ func GetTodayImage(mkt string) (*model.Image, error) {
// 兜底逻辑:如果指定地区没找到,且开启了兜底开关,则尝试获取默认地区的图片
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Image not found, trying fallback to default market", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt {
return GetTodayImage(defaultMkt)
}
return GetTodayImage("")
}
if err == nil {
util.Logger.Debug("Found image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err
}
@@ -110,6 +117,7 @@ func GetAllRegionsTodayImages() ([]model.Image, error) {
}
func GetRandomImage(mkt string) (*model.Image, error) {
util.Logger.Debug("Getting random image", zap.String("mkt", mkt))
var img model.Image
// SQLite 使用 RANDOM(), MySQL/Postgres 使用 RANDOM() 或 RAND()
// 简单起见,先查总数再 Offset
@@ -133,34 +141,35 @@ func GetRandomImage(mkt string) (*model.Image, error) {
return nil, fmt.Errorf("no images found")
}
// 这种方法不适合海量数据,但对于 30 天的数据没问题
tx = repo.DB.Order("RANDOM()")
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
err := tx.Preload("Variants").First(&img).Error
if err != nil {
// 适配 MySQL
tx = repo.DB.Order("RAND()")
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
err = tx.Preload("Variants").First(&img).Error
}
// 优化随机查询:使用 Offset 代替 ORDER BY RANDOM()
// 注意tx 包含了前面的 Where 条件
offset := rand.Intn(int(count))
util.Logger.Debug("Random image selection", zap.Int64("total", count), zap.Int("offset", offset))
err := tx.Preload("Variants").Offset(offset).Limit(1).Find(&img).Error
// 兜底逻辑
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
if (err != nil || img.ID == 0) && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Random image not found, trying fallback", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt {
return GetRandomImage(defaultMkt)
}
return GetRandomImage("")
}
if err == nil && img.ID == 0 {
return nil, fmt.Errorf("no images found")
}
if err == nil {
util.Logger.Debug("Found random image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err
}
func GetImageByDate(date string, mkt string) (*model.Image, error) {
util.Logger.Debug("Getting image by date", zap.String("date", date), zap.String("mkt", mkt))
var img model.Image
tx := repo.DB.Where("date = ?", date)
if mkt != "" {
@@ -180,16 +189,21 @@ func GetImageByDate(date string, mkt string) (*model.Image, error) {
// 兜底逻辑
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Image by date not found, trying fallback", zap.String("date", date), zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt {
return GetImageByDate(date, defaultMkt)
}
return GetImageByDate(date, "")
}
if err == nil {
util.Logger.Debug("Found image by date", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err
}
func GetImageList(limit int, offset int, month string, mkt string) ([]model.Image, error) {
util.Logger.Debug("Getting image list", zap.Int("limit", limit), zap.Int("offset", offset), zap.String("month", month), zap.String("mkt", mkt))
var images []model.Image
tx := repo.DB.Model(&model.Image{})

View File

@@ -63,3 +63,15 @@ func (l *LocalStorage) Delete(ctx context.Context, key string) error {
func (l *LocalStorage) PublicURL(key string) (string, bool) {
return "", false
}
func (l *LocalStorage) Exists(ctx context.Context, key string) (bool, error) {
path := filepath.Join(l.root, key)
_, err := os.Stat(path)
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return false, err
}

View File

@@ -100,3 +100,18 @@ func (s *S3Storage) PublicURL(key string) (string, bool) {
// 也可以生成签名 URL但这里简单处理
return "", false
}
func (s *S3Storage) Exists(ctx context.Context, key string) (bool, error) {
_, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{
Bucket: aws.String(s.bucket),
Key: aws.String(key),
})
if err != nil {
// 判断是否为 404
if strings.Contains(err.Error(), "NotFound") || strings.Contains(err.Error(), "404") {
return false, nil
}
return false, err
}
return true, nil
}

View File

@@ -17,6 +17,7 @@ type Storage interface {
Get(ctx context.Context, key string) (io.ReadCloser, string, error)
Delete(ctx context.Context, key string) error
PublicURL(key string) (string, bool)
Exists(ctx context.Context, key string) (bool, error)
}
var GlobalStorage Storage

View File

@@ -72,3 +72,16 @@ func (w *WebDAVStorage) PublicURL(key string) (string, bool) {
}
return "", false
}
func (w *WebDAVStorage) Exists(ctx context.Context, key string) (bool, error) {
_, err := w.client.Stat(key)
if err == nil {
return true, nil
}
// gowebdav 的错误处理比较原始,通常 404 会返回错误
// 这里假设报错就是不存在,或者可以根据错误消息判断
if strings.Contains(err.Error(), "404") || strings.Contains(err.Error(), "not found") {
return false, nil
}
return false, err
}