数据库表重新设计,精简数据结构以及存储结构

This commit is contained in:
2026-01-30 23:02:59 +08:00
parent e40677f105
commit 49c78506b2
11 changed files with 359 additions and 319 deletions

View File

@@ -58,7 +58,7 @@ func (f *Fetcher) Fetch(ctx context.Context, n int) error {
util.Logger.Info("Starting fetch task", zap.Int("n", n))
regions := config.GetConfig().Fetcher.Regions
if len(regions) == 0 {
regions = []string{config.GetConfig().GetDefaultMkt()}
regions = []string{config.GetConfig().GetDefaultRegion()}
}
for _, mkt := range regions {
@@ -122,51 +122,17 @@ func (f *Fetcher) fetchByMkt(ctx context.Context, mkt string, idx int, n int) er
func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt string) error {
dateStr := fmt.Sprintf("%s-%s-%s", bingImg.Enddate[0:4], bingImg.Enddate[4:6], bingImg.Enddate[6:8])
// 幂等检查
var existing model.Image
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err == nil {
util.Logger.Debug("Image already exists in DB, skipping", zap.String("date", dateStr), zap.String("mkt", mkt))
// 1. 地区关联幂等检查
var existingRegion model.ImageRegion
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existingRegion).Error; err == nil {
util.Logger.Debug("ImageRegion record already exists, skipping", zap.String("date", dateStr), zap.String("mkt", mkt))
return nil
}
imageName := f.extractImageName(bingImg.URLBase, bingImg.HSH)
util.Logger.Info("Processing image", zap.String("date", dateStr), zap.String("mkt", mkt), zap.String("imageName", imageName))
// 创建 DB 记录
dbImg := model.Image{
Date: dateStr,
Mkt: mkt,
Title: bingImg.Title,
Copyright: bingImg.Copyright,
CopyrightLink: bingImg.CopyrightLink,
URLBase: bingImg.URLBase,
Quiz: bingImg.Quiz,
StartDate: bingImg.Startdate,
FullStartDate: bingImg.Fullstartdate,
HSH: bingImg.HSH,
}
if err := repo.DB.Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "date"}, {Name: "mkt"}},
DoNothing: true,
}).Create(&dbImg).Error; err != nil {
util.Logger.Error("Failed to create image record", zap.Error(err))
return err
}
if dbImg.ID == 0 {
var existing model.Image
if err := repo.DB.Where("date = ? AND mkt = ?", dateStr, mkt).First(&existing).Error; err != nil {
util.Logger.Error("Failed to query existing image record after conflict", zap.Error(err))
return err
}
dbImg = existing
}
// UHD 探测
// 2. 处理变体
imgURL, variantName := f.probeUHD(bingImg.URLBase)
// 保存各种分辨率
targetVariants := []struct {
name string
width int
@@ -185,54 +151,34 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
{"320x240", 320, 240},
}
// 检查是否所有变体都已存在于存储中
allExist := true
// 检查 UHD/原图
uhdKey := f.generateKey(imageName, variantName, "jpg")
exists, _ := storage.GlobalStorage.Exists(ctx, uhdKey)
if !exists {
allExist = false
} else {
for _, v := range targetVariants {
if v.name == variantName {
continue
}
vKey := f.generateKey(imageName, v.name, "jpg")
exists, _ := storage.GlobalStorage.Exists(ctx, vKey)
if !exists {
allExist = false
break
}
}
}
// 检查变体是否已存在 (通过 ImageName)
var existingVariants []model.ImageVariant
repo.DB.Where("image_name = ?", imageName).Find(&existingVariants)
if allExist {
util.Logger.Debug("All image variants exist in storage, linking only", zap.String("imageName", imageName))
// 只建立关联信息
f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", nil)
for _, v := range targetVariants {
if v.name == variantName {
continue
}
f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", nil)
}
allVariantsExist := len(existingVariants) > 0
var srcImg image.Image
var imgData []byte
if allVariantsExist {
util.Logger.Debug("Image variants already exist for name, linking only", zap.String("imageName", imageName))
} else {
// 需要下载并处理
util.Logger.Debug("Downloading and processing image", zap.String("url", imgURL))
imgData, err := f.downloadImage(imgURL)
util.Logger.Debug("Downloading and processing image", zap.String("url", imgURL), zap.String("imageName", imageName))
var err error
imgData, err = f.downloadImage(imgURL)
if err != nil {
util.Logger.Error("Failed to download image", zap.String("url", imgURL), zap.Error(err))
return err
}
srcImg, _, err := image.Decode(bytes.NewReader(imgData))
srcImg, _, err = image.Decode(bytes.NewReader(imgData))
if err != nil {
util.Logger.Error("Failed to decode image data", zap.Error(err))
return err
}
// 保存原图
if err := f.saveVariant(ctx, &dbImg, imageName, variantName, "jpg", imgData); err != nil {
// 保存原图变体
if err := f.saveVariant(ctx, imageName, variantName, "jpg", imgData); err != nil {
util.Logger.Error("Failed to save original variant", zap.String("variant", variantName), zap.Error(err))
}
@@ -247,14 +193,39 @@ func (f *Fetcher) processImage(ctx context.Context, bingImg BingImage, mkt strin
continue
}
currentImgData := buf.Bytes()
if err := f.saveVariant(ctx, &dbImg, imageName, v.name, "jpg", currentImgData); err != nil {
if err := f.saveVariant(ctx, imageName, v.name, "jpg", currentImgData); err != nil {
util.Logger.Error("Failed to save variant", zap.String("variant", v.name), zap.Error(err))
}
}
}
// 保存今日额外文件
today := time.Now().Format("2006-01-02")
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles {
// 3. 创建 ImageRegion 记录
regionRecord := model.ImageRegion{
HSH: bingImg.HSH,
URLBase: bingImg.URLBase,
ImageName: imageName,
Date: dateStr,
Mkt: mkt,
Title: bingImg.Title,
Copyright: bingImg.Copyright,
CopyrightLink: bingImg.CopyrightLink,
Quiz: bingImg.Quiz,
StartDate: bingImg.Startdate,
FullStartDate: bingImg.Fullstartdate,
}
if err := repo.DB.Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "date"}, {Name: "mkt"}},
UpdateAll: true,
}).Create(&regionRecord).Error; err != nil {
util.Logger.Error("Failed to create region record", zap.Error(err))
return err
}
// 4. 保存今日额外文件
today := time.Now().Format("2006-01-02")
if dateStr == today && config.GetConfig().Feature.WriteDailyFiles {
if imgData != nil && srcImg != nil {
f.saveDailyFiles(srcImg, imgData, mkt)
}
}
@@ -306,7 +277,7 @@ func (f *Fetcher) generateKey(imageName, variant, format string) string {
return fmt.Sprintf("%s/%s_%s.%s", imageName, imageName, variant, format)
}
func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, imageName, variant, format string, data []byte) error {
func (f *Fetcher) saveVariant(ctx context.Context, imageName, variant, format string, data []byte) error {
key := f.generateKey(imageName, variant, format)
contentType := "image/jpeg"
if format == "webp" {
@@ -319,13 +290,12 @@ func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, imageName,
exists, _ := storage.GlobalStorage.Exists(ctx, key)
if exists {
util.Logger.Debug("Variant already exists in storage, linking", zap.String("key", key))
// 如果存在,我们需要获取它的大小和公共 URL (如果可能)
// 但目前的 Storage 接口没有 Stat我们可以尝试 Get 或者干脆 size 为 0
// 为了简单,我们只从存储中获取公共 URL
// 如果存在,尝试获取公共 URL
if pURL, ok := storage.GlobalStorage.PublicURL(key); ok {
publicURL = pURL
}
// size 暂时设为 0 或者从 data 中取 (如果有的话)
// 如果传入了数据,则使用数据大小
if data != nil {
size = int64(len(data))
}
@@ -342,7 +312,7 @@ func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, imageName,
}
vRecord := model.ImageVariant{
ImageID: img.ID,
ImageName: imageName,
Variant: variant,
Format: format,
StorageKey: key,
@@ -351,7 +321,7 @@ func (f *Fetcher) saveVariant(ctx context.Context, img *model.Image, imageName,
}
return repo.DB.Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "image_id"}, {Name: "variant"}, {Name: "format"}},
Columns: []clause.Column{{Name: "image_name"}, {Name: "variant"}, {Name: "format"}},
DoNothing: true,
}).Create(&vRecord).Error
}
@@ -387,7 +357,7 @@ func (f *Fetcher) saveDailyFiles(srcImg image.Image, originalData []byte, mkt st
// 同时也保留一份在根目录下(兼容旧逻辑,且作为默认地区图片)
// 如果是默认地区或者是第一个抓取的地区,可以覆盖根目录的文件
if mkt == config.GetConfig().GetDefaultMkt() {
if mkt == config.GetConfig().GetDefaultRegion() {
jpegPathRoot := filepath.Join(localRoot, "daily.jpeg")
fJpegRoot, err := os.Create(jpegPathRoot)
if err == nil {

View File

@@ -15,6 +15,7 @@ import (
"BingPaper/internal/util"
"go.uber.org/zap"
"gorm.io/gorm"
)
var ErrFetchStarted = errors.New("on-demand fetch started")
@@ -28,42 +29,53 @@ func CleanupOldImages(ctx context.Context) error {
threshold := time.Now().AddDate(0, 0, -days).Format("2006-01-02")
util.Logger.Info("Starting cleanup task", zap.Int("retention_days", days), zap.String("threshold", threshold))
var images []model.Image
if err := repo.DB.Where("date < ?", threshold).Preload("Variants").Find(&images).Error; err != nil {
util.Logger.Error("Failed to query old images for cleanup", zap.Error(err))
var regionRecords []model.ImageRegion
if err := repo.DB.Where("date < ?", threshold).Preload("Variants").Find(&regionRecords).Error; err != nil {
util.Logger.Error("Failed to query old image regions for cleanup", zap.Error(err))
return err
}
for _, img := range images {
util.Logger.Info("Deleting old image", zap.String("date", img.Date))
for _, v := range img.Variants {
if err := storage.GlobalStorage.Delete(ctx, v.StorageKey); err != nil {
util.Logger.Warn("Failed to delete storage object", zap.String("key", v.StorageKey), zap.Error(err))
for _, m := range regionRecords {
util.Logger.Info("Deleting old image region record", zap.String("date", m.Date), zap.String("mkt", m.Mkt))
// 检查该图片名是否还有其他地区或日期在使用
var count int64
repo.DB.Model(&model.ImageRegion{}).Where("image_name = ? AND id != ?", m.ImageName, m.ID).Count(&count)
if count == 0 {
util.Logger.Info("Image content no longer referenced, deleting files and variants", zap.String("image_name", m.ImageName))
for _, v := range m.Variants {
if err := storage.GlobalStorage.Delete(ctx, v.StorageKey); err != nil {
util.Logger.Warn("Failed to delete storage object", zap.String("key", v.StorageKey), zap.Error(err))
}
}
// 删除变体记录
if err := repo.DB.Where("image_name = ?", m.ImageName).Delete(&model.ImageVariant{}).Error; err != nil {
util.Logger.Error("Failed to delete variants", zap.String("image_name", m.ImageName), zap.Error(err))
}
}
// 删除关联记录(逻辑外键控制)
if err := repo.DB.Where("image_id = ?", img.ID).Delete(&model.ImageVariant{}).Error; err != nil {
util.Logger.Error("Failed to delete variants", zap.Uint("image_id", img.ID), zap.Error(err))
}
// 删除主表记录
if err := repo.DB.Delete(&img).Error; err != nil {
util.Logger.Error("Failed to delete image", zap.Uint("id", img.ID), zap.Error(err))
// 删除地区记录
if err := repo.DB.Delete(&m).Error; err != nil {
util.Logger.Error("Failed to delete image region record", zap.Uint("id", m.ID), zap.Error(err))
}
}
util.Logger.Info("Cleanup task completed", zap.Int("deleted_count", len(images)))
util.Logger.Info("Cleanup task completed", zap.Int("deleted_count", len(regionRecords)))
return nil
}
func GetTodayImage(mkt string) (*model.Image, error) {
func GetTodayImage(mkt string) (*model.ImageRegion, error) {
today := time.Now().Format("2006-01-02")
util.Logger.Debug("Getting today image", zap.String("mkt", mkt), zap.String("today", today))
var img model.Image
var imgRegion model.ImageRegion
tx := repo.DB.Where("date = ?", today)
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
err := tx.Preload("Variants").First(&img).Error
err := tx.Preload("Variants", func(db *gorm.DB) *gorm.DB {
return db.Order("size asc")
}).First(&imgRegion).Error
if err != nil && mkt != "" && config.GetConfig().API.EnableOnDemandFetch && util.IsValidRegion(mkt) {
// 如果没找到,尝试异步按需抓取该地区
util.Logger.Info("Image not found in DB, starting asynchronous on-demand fetch", zap.String("mkt", mkt))
@@ -81,13 +93,15 @@ func GetTodayImage(mkt string) (*model.Image, error) {
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
err = tx.Preload("Variants").First(&img).Error
err = tx.Preload("Variants", func(db *gorm.DB) *gorm.DB {
return db.Order("size asc")
}).First(&imgRegion).Error
}
// 兜底逻辑:如果指定地区没找到,且开启了兜底开关,则尝试获取默认地区的图片
// 兜底逻辑
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Image not found, trying fallback to default market", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
defaultMkt := config.GetConfig().GetDefaultRegion()
util.Logger.Debug("Image not found, trying fallback to default region", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt {
return GetTodayImage(defaultMkt)
}
@@ -95,18 +109,18 @@ func GetTodayImage(mkt string) (*model.Image, error) {
}
if err == nil {
util.Logger.Debug("Found image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
util.Logger.Debug("Found image region record", zap.String("date", imgRegion.Date), zap.String("mkt", imgRegion.Mkt))
}
return &img, err
return &imgRegion, err
}
func GetAllRegionsTodayImages() ([]model.Image, error) {
func GetAllRegionsTodayImages() ([]model.ImageRegion, error) {
regions := config.GetConfig().Fetcher.Regions
if len(regions) == 0 {
regions = []string{config.GetConfig().GetDefaultMkt()}
regions = []string{config.GetConfig().GetDefaultRegion()}
}
var images []model.Image
var images []model.ImageRegion
for _, mkt := range regions {
img, err := GetTodayImage(mkt)
if err == nil {
@@ -116,19 +130,16 @@ func GetAllRegionsTodayImages() ([]model.Image, error) {
return images, nil
}
func GetRandomImage(mkt string) (*model.Image, error) {
func GetRandomImage(mkt string) (*model.ImageRegion, error) {
util.Logger.Debug("Getting random image", zap.String("mkt", mkt))
var img model.Image
// SQLite 使用 RANDOM(), MySQL/Postgres 使用 RANDOM() 或 RAND()
// 简单起见,先查总数再 Offset
var imgRegion model.ImageRegion
var count int64
tx := repo.DB.Model(&model.Image{})
tx := repo.DB.Model(&model.ImageRegion{})
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
tx.Count(&count)
if count == 0 && mkt != "" && config.GetConfig().API.EnableOnDemandFetch && util.IsValidRegion(mkt) {
// 如果没找到,尝试异步按需抓取该地区
util.Logger.Info("No images found in DB for region, starting asynchronous on-demand fetch", zap.String("mkt", mkt))
f := fetcher.NewFetcher()
go func() {
@@ -141,15 +152,14 @@ func GetRandomImage(mkt string) (*model.Image, error) {
return nil, fmt.Errorf("no images found")
}
// 优化随机查询:使用 Offset 代替 ORDER BY RANDOM()
// 注意tx 包含了前面的 Where 条件
offset := rand.Intn(int(count))
util.Logger.Debug("Random image selection", zap.Int64("total", count), zap.Int("offset", offset))
err := tx.Preload("Variants").Offset(offset).Limit(1).Find(&img).Error
err := tx.Preload("Variants", func(db *gorm.DB) *gorm.DB {
return db.Order("size asc")
}).Offset(offset).Limit(1).Find(&imgRegion).Error
// 兜底逻辑
if (err != nil || img.ID == 0) && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt()
if (err != nil || imgRegion.ID == 0) && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultRegion()
util.Logger.Debug("Random image not found, trying fallback", zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
if mkt != defaultMkt {
return GetRandomImage(defaultMkt)
@@ -157,27 +167,24 @@ func GetRandomImage(mkt string) (*model.Image, error) {
return GetRandomImage("")
}
if err == nil && img.ID == 0 {
if err == nil && imgRegion.ID == 0 {
return nil, fmt.Errorf("no images found")
}
if err == nil {
util.Logger.Debug("Found random image", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err
return &imgRegion, err
}
func GetImageByDate(date string, mkt string) (*model.Image, error) {
func GetImageByDate(date string, mkt string) (*model.ImageRegion, error) {
util.Logger.Debug("Getting image by date", zap.String("date", date), zap.String("mkt", mkt))
var img model.Image
var imgRegion model.ImageRegion
tx := repo.DB.Where("date = ?", date)
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
err := tx.Preload("Variants").First(&img).Error
err := tx.Preload("Variants", func(db *gorm.DB) *gorm.DB {
return db.Order("size asc")
}).First(&imgRegion).Error
if err != nil && mkt != "" && config.GetConfig().API.EnableOnDemandFetch && util.IsValidRegion(mkt) {
// 如果没找到,尝试异步按需抓取该地区
util.Logger.Info("Image not found in DB for date, starting asynchronous on-demand fetch", zap.String("mkt", mkt), zap.String("date", date))
f := fetcher.NewFetcher()
go func() {
@@ -186,39 +193,31 @@ func GetImageByDate(date string, mkt string) (*model.Image, error) {
return nil, ErrFetchStarted
}
// 兜底逻辑
if err != nil && mkt != "" && config.GetConfig().API.EnableMktFallback {
defaultMkt := config.GetConfig().GetDefaultMkt()
util.Logger.Debug("Image by date not found, trying fallback", zap.String("date", date), zap.String("mkt", mkt), zap.String("defaultMkt", defaultMkt))
defaultMkt := config.GetConfig().GetDefaultRegion()
if mkt != defaultMkt {
return GetImageByDate(date, defaultMkt)
}
return GetImageByDate(date, "")
}
if err == nil {
util.Logger.Debug("Found image by date", zap.String("date", img.Date), zap.String("mkt", img.Mkt))
}
return &img, err
return &imgRegion, err
}
func GetImageList(limit int, offset int, month string, mkt string) ([]model.Image, error) {
util.Logger.Debug("Getting image list", zap.Int("limit", limit), zap.Int("offset", offset), zap.String("month", month), zap.String("mkt", mkt))
var images []model.Image
tx := repo.DB.Model(&model.Image{})
func GetImageList(limit int, offset int, month string, mkt string) ([]model.ImageRegion, error) {
var images []model.ImageRegion
tx := repo.DB.Model(&model.ImageRegion{})
if month != "" {
// 增强过滤:确保只处理 YYYY-MM 格式,防止注入或非法字符
// 这里简单处理:只要不为空就增加 LIKE 过滤
util.Logger.Debug("Filtering images by month", zap.String("month", month))
tx = tx.Where("date LIKE ?", month+"%")
}
if mkt != "" {
tx = tx.Where("mkt = ?", mkt)
}
tx = tx.Order("date desc").Preload("Variants")
tx = tx.Order("date desc").Preload("Variants", func(db *gorm.DB) *gorm.DB {
return db.Order("size asc")
})
if limit > 0 {
tx = tx.Limit(limit)
@@ -228,8 +227,5 @@ func GetImageList(limit int, offset int, month string, mkt string) ([]model.Imag
}
err := tx.Find(&images).Error
if err != nil {
util.Logger.Error("Failed to get image list", zap.Error(err), zap.String("month", month))
}
return images, err
}