mirror of
https://git.fightbot.fun/hxuanyu/BingPaper.git
synced 2026-02-15 05:59:32 +08:00
优化存储逻辑:为 WebDAV、Local 和 S3 增加 Exists 方法;调整图片处理逻辑以避免重复存储变体;新增调试日志以便于排查问题
This commit is contained in:
@@ -8,8 +8,8 @@ import (
|
||||
|
||||
type Image struct {
|
||||
ID uint `gorm:"primaryKey" json:"id"`
|
||||
Date string `gorm:"uniqueIndex:idx_date_mkt;type:varchar(10)" json:"date"` // YYYY-MM-DD
|
||||
Mkt string `gorm:"uniqueIndex:idx_date_mkt;type:varchar(10)" json:"mkt"` // zh-CN, en-US etc.
|
||||
Date string `gorm:"uniqueIndex:idx_date_mkt;index:idx_mkt_date,priority:2;type:varchar(10)" json:"date"` // YYYY-MM-DD
|
||||
Mkt string `gorm:"uniqueIndex:idx_date_mkt;index:idx_mkt_date,priority:1;type:varchar(10)" json:"mkt"` // zh-CN, en-US etc.
|
||||
Title string `json:"title"`
|
||||
Copyright string `json:"copyright"`
|
||||
CopyrightLink string `json:"copyrightlink"`
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"BingPaper/internal/config"
|
||||
@@ -124,27 +125,12 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
|
||||
// 幂等检查
|
||||
var existing model.Image
|
||||
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err == nil {
|
||||
util.Logger.Info("Image already exists, skipping", zap.String("date", dateStr), zap.String("mkt", mkt))
|
||||
util.Logger.Debug("Image already exists in DB, skipping", zap.String("date", dateStr), zap.String("mkt", mkt))
|
||||
return nil
|
||||
}
|
||||
|
||||
util.Logger.Info("Processing new image", zap.String("date", dateStr), zap.String("mkt", mkt), zap.String("title", bingImg.Title))
|
||||
|
||||
// UHD 探测
|
||||
imgURL, variantName := f.probeUHD(bingImg.URLBase)
|
||||
|
||||
imgData, err := f.downloadImage(imgURL)
|
||||
if err != nil {
|
||||
util.Logger.Error("Failed to download image", zap.String("url", imgURL), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
// 解码图片用于缩放
|
||||
srcImg, _, err := image.Decode(bytes.NewReader(imgData))
|
||||
if err != nil {
|
||||
util.Logger.Error("Failed to decode image data", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
imageName := f.extractImageName(bingImg.URLBase, bingImg.HSH)
|
||||
util.Logger.Info("Processing image", zap.String("date", dateStr), zap.String("mkt", mkt), zap.String("imageName", imageName))
|
||||
|
||||
// 创建 DB 记录
|
||||
dbImg := model.Image{
|
||||
@@ -168,7 +154,6 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
|
||||
return err
|
||||
}
|
||||
|
||||
// 再次检查 dbImg.ID 是否被填充,如果没有被填充(说明由于冲突未插入),则需要查询出已有的 ID
|
||||
if dbImg.ID == 0 {
|
||||
var existing model.Image
|
||||
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err != nil {
|
||||
@@ -178,6 +163,9 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
|
||||
dbImg = existing
|
||||
}
|
||||
|
||||
// UHD 探测
|
||||
imgURL, variantName := f.probeUHD(bingImg.URLBase)
|
||||
|
||||
// 保存各种分辨率
|
||||
targetVariants := []struct {
|
||||
name string
|
||||
@@ -197,40 +185,105 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
|
||||
{"320x240", 320, 240},
|
||||
}
|
||||
|
||||
// 首先保存原图 (UHD 或 1080p)
|
||||
if err := f.saveVariant(ctx, &dbImg, variantName, "jpg", imgData); err != nil {
|
||||
util.Logger.Error("Failed to save original variant", zap.String("variant", variantName), zap.Error(err))
|
||||
}
|
||||
|
||||
for _, v := range targetVariants {
|
||||
// 如果目标分辨率就是原图分辨率,则跳过(已经保存过了)
|
||||
if v.name == variantName {
|
||||
continue
|
||||
}
|
||||
|
||||
resized := imaging.Fill(srcImg, v.width, v.height, imaging.Center, imaging.Lanczos)
|
||||
buf := new(bytes.Buffer)
|
||||
if err := jpeg.Encode(buf, resized, &jpeg.Options{Quality: 100}); err != nil {
|
||||
util.Logger.Warn("Failed to encode jpeg", zap.String("variant", v.name), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
currentImgData := buf.Bytes()
|
||||
|
||||
// 保存 JPG
|
||||
if err := f.saveVariant(ctx, &dbImg, v.name, "jpg", currentImgData); err != nil {
|
||||
util.Logger.Error("Failed to save variant", zap.String("variant", v.name), zap.Error(err))
|
||||
// 检查是否所有变体都已存在于存储中
|
||||
allExist := true
|
||||
// 检查 UHD/原图
|
||||
uhdKey := f.generateKey(imageName, variantName, "jpg")
|
||||
exists, _ := storage.GlobalStorage.Exists(ctx, uhdKey)
|
||||
if !exists {
|
||||
allExist = false
|
||||
} else {
|
||||
for _, v := range targetVariants {
|
||||
if v.name == variantName {
|
||||
continue
|
||||
}
|
||||
vKey := f.generateKey(imageName, v.name, "jpg")
|
||||
exists, _ := storage.GlobalStorage.Exists(ctx, vKey)
|
||||
if !exists {
|
||||
allExist = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 保存今日额外文件
|
||||
today := time.Now().Format("2006-01-02")
|
||||
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles {
|
||||
f.saveDailyFiles(srcImg, imgData, mkt)
|
||||
if allExist {
|
||||
util.Logger.Debug("All image variants exist in storage, linking only", zap.String("imageName", imageName))
|
||||
// 只建立关联信息
|
||||
f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", nil)
|
||||
for _, v := range targetVariants {
|
||||
if v.name == variantName {
|
||||
continue
|
||||
}
|
||||
f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", nil)
|
||||
}
|
||||
} else {
|
||||
// 需要下载并处理
|
||||
util.Logger.Debug("Downloading and processing image", zap.String("url", imgURL))
|
||||
imgData, err := f.downloadImage(imgURL)
|
||||
if err != nil {
|
||||
util.Logger.Error("Failed to download image", zap.String("url", imgURL), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
srcImg, _, err := image.Decode(bytes.NewReader(imgData))
|
||||
if err != nil {
|
||||
util.Logger.Error("Failed to decode image data", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
// 保存原图
|
||||
if err := f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", imgData); err != nil {
|
||||
util.Logger.Error("Failed to save original variant", zap.String("variant", variantName), zap.Error(err))
|
||||
}
|
||||
|
||||
for _, v := range targetVariants {
|
||||
if v.name == variantName {
|
||||
continue
|
||||
}
|
||||
resized := imaging.Fill(srcImg, v.width, v.height, imaging.Center, imaging.Lanczos)
|
||||
buf := new(bytes.Buffer)
|
||||
if err := jpeg.Encode(buf, resized, &jpeg.Options{Quality: 100}); err != nil {
|
||||
util.Logger.Warn("Failed to encode jpeg", zap.String("variant", v.name), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
currentImgData := buf.Bytes()
|
||||
if err := f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", currentImgData); err != nil {
|
||||
util.Logger.Error("Failed to save variant", zap.String("variant", v.name), zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
// 保存今日额外文件
|
||||
today := time.Now().Format("2006-01-02")
|
||||
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles {
|
||||
f.saveDailyFiles(srcImg, imgData, mkt)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Fetcher) extractImageName(urlBase, hsh string) string {
|
||||
// 示例: /th?id=OHR.MilwaukeeHall_ROW0871854348
|
||||
start := 0
|
||||
if idx := strings.Index(urlBase, "OHR."); idx != -1 {
|
||||
start = idx + 4
|
||||
} else if idx := strings.Index(urlBase, "id="); idx != -1 {
|
||||
start = idx + 3
|
||||
}
|
||||
|
||||
rem := urlBase[start:]
|
||||
end := strings.Index(rem, "_")
|
||||
if end == -1 {
|
||||
end = len(rem)
|
||||
}
|
||||
|
||||
name := rem[:end]
|
||||
if name == "" {
|
||||
return hsh
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func (f *Fetcher) probeUHD(urlBase string) (string, string) {
|
||||
uhdURL := fmt.Sprintf("https://www.bing.com%s_UHD.jpg", urlBase)
|
||||
resp, err := f.httpClient.Head(uhdURL)
|
||||
@@ -249,25 +302,52 @@ func (f *Fetcher) downloadImage(url string) ([]byte, error) {
|
||||
return io.ReadAll(resp.Body)
|
||||
}
|
||||
|
||||
func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, variant, format string, data []byte) error {
|
||||
key := fmt.Sprintf("%s/%s/%s_%s.%s", img.Mkt, img.Date, img.Date, variant, format)
|
||||
func (f *Fetcher) generateKey(imageName, variant, format string) string {
|
||||
return fmt.Sprintf("%s/%s_%s.%s", imageName, imageName, variant, format)
|
||||
}
|
||||
|
||||
func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, imageName, variant, format string, data []byte) error {
|
||||
key := f.generateKey(imageName, variant, format)
|
||||
contentType := "image/jpeg"
|
||||
if format == "webp" {
|
||||
contentType = "image/webp"
|
||||
}
|
||||
|
||||
stored, err := storage.GlobalStorage.Put(ctx, key, bytes.NewReader(data), contentType)
|
||||
if err != nil {
|
||||
return err
|
||||
var size int64
|
||||
var publicURL string
|
||||
|
||||
exists, _ := storage.GlobalStorage.Exists(ctx, key)
|
||||
if exists {
|
||||
util.Logger.Debug("Variant already exists in storage, linking", zap.String("key", key))
|
||||
// 如果存在,我们需要获取它的大小和公共 URL (如果可能)
|
||||
// 但目前的 Storage 接口没有 Stat,我们可以尝试 Get 或者干脆 size 为 0
|
||||
// 为了简单,我们只从存储中获取公共 URL
|
||||
if pURL, ok := storage.GlobalStorage.PublicURL(key); ok {
|
||||
publicURL = pURL
|
||||
}
|
||||
// size 暂时设为 0 或者从 data 中取 (如果有的话)
|
||||
if data != nil {
|
||||
size = int64(len(data))
|
||||
}
|
||||
} else if data != nil {
|
||||
util.Logger.Debug("Saving variant to storage", zap.String("key", key))
|
||||
stored, err := storage.GlobalStorage.Put(ctx, key, bytes.NewReader(data), contentType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
publicURL = stored.PublicURL
|
||||
size = stored.Size
|
||||
} else {
|
||||
return fmt.Errorf("variant %s does not exist and no data provided", key)
|
||||
}
|
||||
|
||||
vRecord := model.ImageVariant{
|
||||
ImageID: img.ID,
|
||||
Variant: variant,
|
||||
Format: format,
|
||||
StorageKey: stored.Key,
|
||||
PublicURL: stored.PublicURL,
|
||||
Size: int64(len(data)),
|
||||
StorageKey: key,
|
||||
PublicURL: publicURL,
|
||||
Size: size,
|
||||
}
|
||||
|
||||
return repo.DB.Clauses(clause.OnConflict{
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"BingPaper/internal/config"
|
||||
@@ -56,6 +57,7 @@ func CleanupOldImages(ctx context.Context) error {
|
||||
|
||||
func GetTodayImage(mkt string) (*model.Image, error) {
|
||||
today := time.Now().Format("2006-01-02")
|
||||
util.Logger.Debug("Getting today image", zap.String("mkt", mkt), zap.String("today", today))
|
||||
var img model.Image
|
||||
tx := repo.DB.Where("date = ?", today)
|
||||
if mkt != "" {
|
||||
@@ -73,6 +75,7 @@ func GetTodayImage(mkt string) (*model.Image, error) {
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
util.Logger.Debug("Today image not found, trying latest image", zap.String("mkt", mkt))
|
||||
// 如果今天还是没有,尝试获取最近的一张
|
||||
tx = repo.DB.Order("date desc")
|
||||
if mkt != "" {
|
||||
@@ -84,12 +87,16 @@ func GetTodayImage(mkt string) (*model.Image, error) {
|
||||
// 兜底逻辑:如果指定地区没找到,且开启了兜底开关,则尝试获取默认地区的图片
|
||||
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
|
||||
defaultMkt := config.GetConfig().GetDefaultMkt()
|
||||
util.Logger.Debug("Image not found, trying fallback to default market", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
|
||||
if mkt != defaultMkt {
|
||||
return GetTodayImage(defaultMkt)
|
||||
}
|
||||
return GetTodayImage("")
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
util.Logger.Debug("Found image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
|
||||
}
|
||||
return &img, err
|
||||
}
|
||||
|
||||
@@ -110,6 +117,7 @@ func GetAllRegionsTodayImages() ([]model.Image, error) {
|
||||
}
|
||||
|
||||
func GetRandomImage(mkt string) (*model.Image, error) {
|
||||
util.Logger.Debug("Getting random image", zap.String("mkt", mkt))
|
||||
var img model.Image
|
||||
// SQLite 使用 RANDOM(), MySQL/Postgres 使用 RANDOM() 或 RAND()
|
||||
// 简单起见,先查总数再 Offset
|
||||
@@ -133,34 +141,35 @@ func GetRandomImage(mkt string) (*model.Image, error) {
|
||||
return nil, fmt.Errorf("no images found")
|
||||
}
|
||||
|
||||
// 这种方法不适合海量数据,但对于 30 天的数据没问题
|
||||
tx = repo.DB.Order("RANDOM()")
|
||||
if mkt != "" {
|
||||
tx = tx.Where("mkt = ?", mkt)
|
||||
}
|
||||
err := tx.Preload("Variants").First(&img).Error
|
||||
if err != nil {
|
||||
// 适配 MySQL
|
||||
tx = repo.DB.Order("RAND()")
|
||||
if mkt != "" {
|
||||
tx = tx.Where("mkt = ?", mkt)
|
||||
}
|
||||
err = tx.Preload("Variants").First(&img).Error
|
||||
}
|
||||
// 优化随机查询:使用 Offset 代替 ORDER BY RANDOM()
|
||||
// 注意:tx 包含了前面的 Where 条件
|
||||
offset := rand.Intn(int(count))
|
||||
util.Logger.Debug("Random image selection", zap.Int64("total", count), zap.Int("offset", offset))
|
||||
err := tx.Preload("Variants").Offset(offset).Limit(1).Find(&img).Error
|
||||
|
||||
// 兜底逻辑
|
||||
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
|
||||
if (err != nil || img.ID == 0) && mkt != "" && config.GetConfig().API.EnableMktFallback {
|
||||
defaultMkt := config.GetConfig().GetDefaultMkt()
|
||||
util.Logger.Debug("Random image not found, trying fallback", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
|
||||
if mkt != defaultMkt {
|
||||
return GetRandomImage(defaultMkt)
|
||||
}
|
||||
return GetRandomImage("")
|
||||
}
|
||||
|
||||
if err == nil && img.ID == 0 {
|
||||
return nil, fmt.Errorf("no images found")
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
util.Logger.Debug("Found random image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
|
||||
}
|
||||
|
||||
return &img, err
|
||||
}
|
||||
|
||||
func GetImageByDate(date string, mkt string) (*model.Image, error) {
|
||||
util.Logger.Debug("Getting image by date", zap.String("date", date), zap.String("mkt", mkt))
|
||||
var img model.Image
|
||||
tx := repo.DB.Where("date = ?", date)
|
||||
if mkt != "" {
|
||||
@@ -180,16 +189,21 @@ func GetImageByDate(date string, mkt string) (*model.Image, error) {
|
||||
// 兜底逻辑
|
||||
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
|
||||
defaultMkt := config.GetConfig().GetDefaultMkt()
|
||||
util.Logger.Debug("Image by date not found, trying fallback", zap.String("date", date), zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
|
||||
if mkt != defaultMkt {
|
||||
return GetImageByDate(date, defaultMkt)
|
||||
}
|
||||
return GetImageByDate(date, "")
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
util.Logger.Debug("Found image by date", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
|
||||
}
|
||||
return &img, err
|
||||
}
|
||||
|
||||
func GetImageList(limit int, offset int, month string, mkt string) ([]model.Image, error) {
|
||||
util.Logger.Debug("Getting image list", zap.Int("limit", limit), zap.Int("offset", offset), zap.String("month", month), zap.String("mkt", mkt))
|
||||
var images []model.Image
|
||||
tx := repo.DB.Model(&model.Image{})
|
||||
|
||||
|
||||
@@ -63,3 +63,15 @@ func (l *LocalStorage) Delete(ctx context.Context, key string) error {
|
||||
func (l *LocalStorage) PublicURL(key string) (string, bool) {
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (l *LocalStorage) Exists(ctx context.Context, key string) (bool, error) {
|
||||
path := filepath.Join(l.root, key)
|
||||
_, err := os.Stat(path)
|
||||
if err == nil {
|
||||
return true, nil
|
||||
}
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
|
||||
@@ -100,3 +100,18 @@ func (s *S3Storage) PublicURL(key string) (string, bool) {
|
||||
// 也可以生成签名 URL,但这里简单处理
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (s *S3Storage) Exists(ctx context.Context, key string) (bool, error) {
|
||||
_, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{
|
||||
Bucket: aws.String(s.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
if err != nil {
|
||||
// 判断是否为 404
|
||||
if strings.Contains(err.Error(), "NotFound") || strings.Contains(err.Error(), "404") {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ type Storage interface {
|
||||
Get(ctx context.Context, key string) (io.ReadCloser, string, error)
|
||||
Delete(ctx context.Context, key string) error
|
||||
PublicURL(key string) (string, bool)
|
||||
Exists(ctx context.Context, key string) (bool, error)
|
||||
}
|
||||
|
||||
var GlobalStorage Storage
|
||||
|
||||
@@ -72,3 +72,16 @@ func (w *WebDAVStorage) PublicURL(key string) (string, bool) {
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (w *WebDAVStorage) Exists(ctx context.Context, key string) (bool, error) {
|
||||
_, err := w.client.Stat(key)
|
||||
if err == nil {
|
||||
return true, nil
|
||||
}
|
||||
// gowebdav 的错误处理比较原始,通常 404 会返回错误
|
||||
// 这里假设报错就是不存在,或者可以根据错误消息判断
|
||||
if strings.Contains(err.Error(), "404") || strings.Contains(err.Error(), "not found") {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user