2020-10-04 14:30:42 +08:00
package caches
import (
2022-03-15 21:33:44 +08:00
"bytes"
2020-10-04 14:30:42 +08:00
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"github.com/TeaOSLab/EdgeCommon/pkg/serverconfigs"
2021-03-02 19:43:05 +08:00
"github.com/TeaOSLab/EdgeCommon/pkg/serverconfigs/shared"
2022-03-14 11:47:34 +08:00
teaconst "github.com/TeaOSLab/EdgeNode/internal/const"
2020-10-28 11:19:06 +08:00
"github.com/TeaOSLab/EdgeNode/internal/events"
2021-12-08 15:17:45 +08:00
"github.com/TeaOSLab/EdgeNode/internal/goman"
2020-12-17 17:36:10 +08:00
"github.com/TeaOSLab/EdgeNode/internal/remotelogs"
2021-11-14 10:55:09 +08:00
"github.com/TeaOSLab/EdgeNode/internal/trackers"
2020-10-04 14:30:42 +08:00
"github.com/TeaOSLab/EdgeNode/internal/utils"
2023-09-15 18:14:58 +08:00
"github.com/TeaOSLab/EdgeNode/internal/utils/fasttime"
2023-07-08 18:52:57 +08:00
fsutils "github.com/TeaOSLab/EdgeNode/internal/utils/fs"
2022-03-06 17:18:06 +08:00
setutils "github.com/TeaOSLab/EdgeNode/internal/utils/sets"
"github.com/TeaOSLab/EdgeNode/internal/utils/sizes"
2021-12-09 12:07:46 +08:00
"github.com/TeaOSLab/EdgeNode/internal/zero"
2020-10-04 14:30:42 +08:00
"github.com/iwind/TeaGo/Tea"
2021-11-13 21:30:24 +08:00
"github.com/iwind/TeaGo/rands"
"github.com/iwind/TeaGo/types"
2020-10-04 14:30:42 +08:00
stringutil "github.com/iwind/TeaGo/utils/string"
2023-08-13 18:18:55 +08:00
timeutil "github.com/iwind/TeaGo/utils/time"
2023-09-28 15:02:06 +08:00
"github.com/iwind/gosock/pkg/gosock"
2023-08-08 16:10:14 +08:00
"github.com/shirou/gopsutil/v3/load"
2021-11-13 21:30:24 +08:00
"math"
2020-10-04 14:30:42 +08:00
"os"
"path/filepath"
"regexp"
2021-11-14 16:15:07 +08:00
"sort"
2020-10-04 14:30:42 +08:00
"strconv"
"strings"
"sync"
2021-01-11 23:06:50 +08:00
"syscall"
2020-10-04 14:30:42 +08:00
"time"
)
const (
2022-04-14 09:36:02 +08:00
SizeExpiresAt = 4
OffsetExpiresAt = 0
SizeStatus = 3
OffsetStatus = SizeExpiresAt
SizeURLLength = 4
OffsetURLLength = OffsetStatus + SizeStatus
SizeHeaderLength = 4
OffsetHeaderLength = OffsetURLLength + SizeURLLength
SizeBodyLength = 8
OffsetBodyLength = OffsetHeaderLength + SizeHeaderLength
2022-11-19 17:23:45 +08:00
SizeMeta = SizeExpiresAt + SizeStatus + SizeURLLength + SizeHeaderLength + SizeBodyLength
2020-10-04 14:30:42 +08:00
)
2021-11-14 16:15:07 +08:00
const (
2023-07-08 18:52:57 +08:00
FileStorageMaxIgnoreKeys = 32768 // 最大可忽略的键值数(尺寸过大的键值)
HotItemSize = 1024 // 热点数据数量
HotItemLifeSeconds int64 = 3600 // 热点数据生命周期
FileToMemoryMaxSize = 32 * sizes . M // 可以从文件写入到内存的最大文件尺寸
FileTmpSuffix = ".tmp"
2023-08-06 18:08:28 +08:00
DefaultMinDiskFreeSpace uint64 = 5 << 30 // 当前磁盘最小剩余空间
2023-08-27 14:49:28 +08:00
DefaultStaleCacheSeconds = 1200 // 过时缓存留存时间
2023-09-15 18:14:58 +08:00
HashKeyLength = 32
2021-11-14 16:15:07 +08:00
)
2021-12-21 08:03:09 +08:00
var sharedWritingFileKeyMap = map [ string ] zero . Zero { } // key => bool
var sharedWritingFileKeyLocker = sync . Mutex { }
2021-12-21 00:27:32 +08:00
2021-05-12 21:38:44 +08:00
// FileStorage 文件缓存
2022-08-14 16:28:40 +08:00
//
// 文件结构:
// [expires time] | [ status ] | [url length] | [header length] | [body length] | [url] [header data] [body data]
2020-10-04 14:30:42 +08:00
type FileStorage struct {
2021-03-02 19:43:05 +08:00
policy * serverconfigs . HTTPCachePolicy
2022-03-15 21:33:44 +08:00
options * serverconfigs . HTTPFileCacheStorage // 二级缓存
2021-03-02 19:43:05 +08:00
memoryStorage * MemoryStorage // 一级缓存
2020-10-04 14:30:42 +08:00
2021-12-21 00:27:32 +08:00
list ListInterface
locker sync . RWMutex
purgeTicker * utils . Ticker
2021-11-14 16:15:07 +08:00
hotMap map [ string ] * HotItem // key => count
hotMapLocker sync . Mutex
lastHotSize int
hotTicker * utils . Ticker
2022-01-12 21:09:00 +08:00
2022-03-06 17:18:06 +08:00
ignoreKeys * setutils . FixedSet
2022-01-12 21:09:00 +08:00
openFileCache * OpenFileCache
2022-10-25 15:14:28 +08:00
2022-11-15 20:42:25 +08:00
mainDiskIsFull bool
subDirs [ ] * FileDir
2020-10-04 14:30:42 +08:00
}
func NewFileStorage ( policy * serverconfigs . HTTPCachePolicy ) * FileStorage {
return & FileStorage {
2021-12-21 00:27:32 +08:00
policy : policy ,
hotMap : map [ string ] * HotItem { } ,
lastHotSize : - 1 ,
2022-03-20 21:15:25 +08:00
ignoreKeys : setutils . NewFixedSet ( FileStorageMaxIgnoreKeys ) ,
2020-10-04 14:30:42 +08:00
}
}
2021-05-12 21:38:44 +08:00
// Policy 获取当前的Policy
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) Policy ( ) * serverconfigs . HTTPCachePolicy {
return this . policy
}
2022-03-15 21:33:44 +08:00
// CanUpdatePolicy 检查策略是否可以更新
func ( this * FileStorage ) CanUpdatePolicy ( newPolicy * serverconfigs . HTTPCachePolicy ) bool {
// 检查路径是否有变化
oldOptionsJSON , err := json . Marshal ( this . policy . Options )
if err != nil {
return false
}
var oldOptions = & serverconfigs . HTTPFileCacheStorage { }
err = json . Unmarshal ( oldOptionsJSON , oldOptions )
if err != nil {
return false
}
newOptionsJSON , err := json . Marshal ( newPolicy . Options )
if err != nil {
return false
}
var newOptions = & serverconfigs . HTTPFileCacheStorage { }
err = json . Unmarshal ( newOptionsJSON , newOptions )
if err != nil {
return false
}
if oldOptions . Dir == newOptions . Dir {
return true
}
return false
}
// UpdatePolicy 修改策略
func ( this * FileStorage ) UpdatePolicy ( newPolicy * serverconfigs . HTTPCachePolicy ) {
var oldOpenFileCache = this . options . OpenFileCache
this . policy = newPolicy
newOptionsJSON , err := json . Marshal ( newPolicy . Options )
if err != nil {
return
}
var newOptions = & serverconfigs . HTTPFileCacheStorage { }
err = json . Unmarshal ( newOptionsJSON , newOptions )
if err != nil {
remotelogs . Error ( "CACHE" , "update policy '" + types . String ( this . policy . Id ) + "' failed: decode options failed: " + err . Error ( ) )
return
}
2022-11-15 20:42:25 +08:00
var subDirs = [ ] * FileDir { }
for _ , subDir := range newOptions . SubDirs {
subDirs = append ( subDirs , & FileDir {
Path : subDir . Path ,
Capacity : subDir . Capacity ,
IsFull : false ,
} )
}
2023-08-08 10:07:24 +08:00
this . subDirs = subDirs
2022-11-15 20:42:25 +08:00
this . checkDiskSpace ( )
2022-03-15 21:33:44 +08:00
err = newOptions . Init ( )
if err != nil {
remotelogs . Error ( "CACHE" , "update policy '" + types . String ( this . policy . Id ) + "' failed: init options failed: " + err . Error ( ) )
return
}
this . options = newOptions
var memoryStorage = this . memoryStorage
if memoryStorage != nil {
if newOptions . MemoryPolicy != nil && newOptions . MemoryPolicy . CapacityBytes ( ) > 0 {
memoryStorage . UpdatePolicy ( newOptions . MemoryPolicy )
} else {
memoryStorage . Stop ( )
this . memoryStorage = nil
}
} else if newOptions . MemoryPolicy != nil && this . options . MemoryPolicy . Capacity != nil && this . options . MemoryPolicy . Capacity . Count > 0 {
err = this . createMemoryStorage ( )
if err != nil {
remotelogs . Error ( "CACHE" , "update policy '" + types . String ( this . policy . Id ) + "' failed: create memory storage failed: " + err . Error ( ) )
}
}
// open cache
oldOpenFileCacheJSON , _ := json . Marshal ( oldOpenFileCache )
newOpenFileCacheJSON , _ := json . Marshal ( this . options . OpenFileCache )
2022-09-18 16:18:31 +08:00
if ! bytes . Equal ( oldOpenFileCacheJSON , newOpenFileCacheJSON ) {
2022-03-15 21:33:44 +08:00
this . initOpenFileCache ( )
}
// Purge Ticker
if newPolicy . PersistenceAutoPurgeInterval != this . policy . PersistenceAutoPurgeInterval {
this . initPurgeTicker ( )
}
2023-07-18 12:45:25 +08:00
// reset ignored keys
this . ignoreKeys . Reset ( )
2022-03-15 21:33:44 +08:00
}
2021-05-12 21:38:44 +08:00
// Init 初始化
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) Init ( ) error {
this . locker . Lock ( )
defer this . locker . Unlock ( )
2022-03-15 21:33:44 +08:00
var before = time . Now ( )
2020-10-04 14:30:42 +08:00
// 配置
2022-03-15 21:33:44 +08:00
var options = & serverconfigs . HTTPFileCacheStorage { }
2020-10-04 14:30:42 +08:00
optionsJSON , err := json . Marshal ( this . policy . Options )
if err != nil {
return err
}
2022-03-15 21:33:44 +08:00
err = json . Unmarshal ( optionsJSON , options )
2020-10-04 14:30:42 +08:00
if err != nil {
return err
}
2022-03-15 21:33:44 +08:00
this . options = options
2020-10-04 14:30:42 +08:00
2022-03-15 21:33:44 +08:00
if ! filepath . IsAbs ( this . options . Dir ) {
this . options . Dir = Tea . Root + Tea . DS + this . options . Dir
2020-10-04 14:30:42 +08:00
}
2022-03-15 21:33:44 +08:00
this . options . Dir = filepath . Clean ( this . options . Dir )
var dir = this . options . Dir
2020-10-04 14:30:42 +08:00
2022-11-15 20:42:25 +08:00
var subDirs = [ ] * FileDir { }
for _ , subDir := range this . options . SubDirs {
subDirs = append ( subDirs , & FileDir {
Path : subDir . Path ,
Capacity : subDir . Capacity ,
IsFull : false ,
} )
}
this . subDirs = subDirs
if len ( subDirs ) > 0 {
this . checkDiskSpace ( )
}
2020-10-04 14:30:42 +08:00
if len ( dir ) == 0 {
return errors . New ( "[CACHE]cache storage dir can not be empty" )
}
2022-03-16 16:20:53 +08:00
var list = NewFileList ( dir + "/p" + types . String ( this . policy . Id ) + "/.indexes" )
2021-05-19 12:07:35 +08:00
err = list . Init ( )
if err != nil {
return err
}
2022-03-16 16:20:53 +08:00
list . ( * FileList ) . SetOldDir ( dir + "/p" + types . String ( this . policy . Id ) )
2021-05-19 12:07:35 +08:00
this . list = list
2020-10-04 14:30:42 +08:00
// 检查目录是否存在
_ , err = os . Stat ( dir )
if err != nil {
if ! os . IsNotExist ( err ) {
return err
} else {
err = os . MkdirAll ( dir , 0777 )
if err != nil {
2023-08-11 14:38:00 +08:00
return fmt . Errorf ( "[CACHE]can not create dir: %w" , err )
2020-10-04 14:30:42 +08:00
}
}
}
2021-05-19 12:07:35 +08:00
defer func ( ) {
// 统计
2023-07-08 18:52:57 +08:00
var totalSize = this . TotalDiskSize ( )
2022-04-20 18:23:26 +08:00
var cost = time . Since ( before ) . Seconds ( ) * 1000
2023-07-08 18:52:57 +08:00
var sizeMB = types . String ( totalSize ) + " Bytes"
if totalSize > 1 * sizes . G {
sizeMB = fmt . Sprintf ( "%.3f G" , float64 ( totalSize ) / float64 ( sizes . G ) )
} else if totalSize > 1 * sizes . M {
sizeMB = fmt . Sprintf ( "%.3f M" , float64 ( totalSize ) / float64 ( sizes . M ) )
} else if totalSize > 1 * sizes . K {
sizeMB = fmt . Sprintf ( "%.3f K" , float64 ( totalSize ) / float64 ( sizes . K ) )
2021-05-19 12:07:35 +08:00
}
2023-07-08 18:52:57 +08:00
remotelogs . Println ( "CACHE" , "init policy " + types . String ( this . policy . Id ) + " from '" + this . options . Dir + "', cost: " + fmt . Sprintf ( "%.2f" , cost ) + " ms, size: " + sizeMB )
2021-05-19 12:07:35 +08:00
} ( )
2020-10-04 14:30:42 +08:00
// 初始化list
err = this . initList ( )
if err != nil {
return err
}
2021-03-02 19:43:05 +08:00
// 加载内存缓存
2022-03-15 21:33:44 +08:00
if this . options . MemoryPolicy != nil && this . options . MemoryPolicy . Capacity != nil && this . options . MemoryPolicy . Capacity . Count > 0 {
err = this . createMemoryStorage ( )
if err != nil {
return err
2021-03-02 19:43:05 +08:00
}
}
2022-01-12 21:09:00 +08:00
// open file cache
2022-03-15 21:33:44 +08:00
this . initOpenFileCache ( )
2022-01-12 21:09:00 +08:00
2022-10-25 15:14:28 +08:00
// 检查磁盘空间
this . checkDiskSpace ( )
2023-08-13 18:18:55 +08:00
// clean *.trash directories
this . cleanAllDeletedDirs ( )
2020-10-04 14:30:42 +08:00
return nil
}
2022-03-03 19:36:28 +08:00
func ( this * FileStorage ) OpenReader ( key string , useStale bool , isPartial bool ) ( Reader , error ) {
return this . openReader ( key , true , useStale , isPartial )
2021-11-14 16:15:07 +08:00
}
2022-03-03 19:36:28 +08:00
func ( this * FileStorage ) openReader ( key string , allowMemory bool , useStale bool , isPartial bool ) ( Reader , error ) {
2021-12-16 17:27:21 +08:00
// 使用陈旧缓存的时候,我们认为是短暂的,只需要从文件里检查即可
if useStale {
allowMemory = false
}
2022-03-03 19:36:28 +08:00
// 区间缓存只存在文件中
if isPartial {
allowMemory = false
}
2021-03-02 19:43:05 +08:00
// 先尝试内存缓存
2022-03-15 21:33:44 +08:00
var memoryStorage = this . memoryStorage
if allowMemory && memoryStorage != nil {
reader , err := memoryStorage . OpenReader ( key , useStale , isPartial )
2021-03-02 19:43:05 +08:00
if err == nil {
return reader , err
}
}
2022-11-15 20:42:25 +08:00
hash , path , _ := this . keyPath ( key )
2020-10-04 14:30:42 +08:00
2021-12-16 17:27:21 +08:00
// 检查文件记录是否已过期
if ! useStale {
exists , err := this . list . Exist ( hash )
if err != nil {
return nil , err
}
if ! exists {
return nil , ErrNotFound
}
}
2020-10-04 14:30:42 +08:00
// TODO 尝试使用mmap加快读取速度
2021-06-13 17:37:57 +08:00
var isOk = false
2022-01-12 21:09:00 +08:00
var openFile * OpenFile
2022-03-15 21:33:44 +08:00
var openFileCache = this . openFileCache // 因为中间可能有修改,所以先赋值再获取
if openFileCache != nil {
openFile = openFileCache . Get ( path )
2022-01-12 21:09:00 +08:00
}
var fp * os . File
var err error
2022-01-13 15:18:49 +08:00
if openFile == nil {
2022-01-12 21:09:00 +08:00
fp , err = os . OpenFile ( path , os . O_RDONLY , 0444 )
2022-01-13 15:18:49 +08:00
if err != nil {
if ! os . IsNotExist ( err ) {
return nil , err
}
return nil , ErrNotFound
2020-10-04 14:30:42 +08:00
}
2022-01-13 15:18:49 +08:00
} else {
fp = openFile . fp
2020-10-04 14:30:42 +08:00
}
2021-06-13 17:37:57 +08:00
defer func ( ) {
if ! isOk {
_ = fp . Close ( )
2022-03-05 16:47:17 +08:00
_ = this . removeCacheFile ( path )
2021-06-13 17:37:57 +08:00
}
} ( )
2022-03-03 19:36:28 +08:00
var reader Reader
if isPartial {
var partialFileReader = NewPartialFileReader ( fp )
partialFileReader . openFile = openFile
2022-03-15 21:33:44 +08:00
partialFileReader . openFileCache = openFileCache
2022-03-03 19:36:28 +08:00
reader = partialFileReader
} else {
var fileReader = NewFileReader ( fp )
fileReader . openFile = openFile
2022-03-15 21:33:44 +08:00
fileReader . openFileCache = openFileCache
2022-03-03 19:36:28 +08:00
reader = fileReader
}
2021-01-13 12:02:50 +08:00
err = reader . Init ( )
2020-10-04 14:30:42 +08:00
if err != nil {
2021-01-13 12:02:50 +08:00
return nil , err
2020-10-04 14:30:42 +08:00
}
2021-06-13 17:37:57 +08:00
2021-11-13 21:30:24 +08:00
// 增加点击量
// 1/1000采样
2022-03-06 17:18:06 +08:00
if ! isPartial && allowMemory && reader . BodySize ( ) < FileToMemoryMaxSize {
2022-03-03 19:36:28 +08:00
this . increaseHit ( key , hash , reader )
2021-11-13 21:30:24 +08:00
}
2021-06-13 17:37:57 +08:00
isOk = true
2021-01-13 12:02:50 +08:00
return reader , nil
2020-10-04 14:30:42 +08:00
}
2021-05-12 21:38:44 +08:00
// OpenWriter 打开缓存文件等待写入
2022-11-19 17:23:45 +08:00
func ( this * FileStorage ) OpenWriter ( key string , expiresAt int64 , status int , headerSize int , bodySize int64 , maxSize int64 , isPartial bool ) ( Writer , error ) {
return this . openWriter ( key , expiresAt , status , headerSize , bodySize , maxSize , isPartial , false )
2022-04-14 09:36:02 +08:00
}
// OpenFlushWriter 打开从其他媒介直接刷入的写入器
2022-11-19 17:23:45 +08:00
func ( this * FileStorage ) OpenFlushWriter ( key string , expiresAt int64 , status int , headerSize int , bodySize int64 ) ( Writer , error ) {
return this . openWriter ( key , expiresAt , status , headerSize , bodySize , - 1 , false , true )
2022-04-14 09:36:02 +08:00
}
2022-11-19 17:23:45 +08:00
func ( this * FileStorage ) openWriter ( key string , expiredAt int64 , status int , headerSize int , bodySize int64 , maxSize int64 , isPartial bool , isFlushing bool ) ( Writer , error ) {
2022-03-14 11:47:34 +08:00
// 是否正在退出
if teaconst . IsQuiting {
2022-03-15 18:32:39 +08:00
return nil , ErrWritingUnavailable
2022-03-14 11:47:34 +08:00
}
2022-03-06 17:18:06 +08:00
// 是否已忽略
2023-07-18 12:45:25 +08:00
if maxSize > 0 && this . ignoreKeys . Has ( types . String ( maxSize ) + "$" + key ) {
2022-03-06 17:18:06 +08:00
return nil , ErrEntityTooLarge
}
2021-03-02 19:43:05 +08:00
// 先尝试内存缓存
2022-02-15 16:44:39 +08:00
// 我们限定仅小文件优先存在内存中
2022-03-06 17:18:06 +08:00
var maxMemorySize = FileToMemoryMaxSize
2023-07-18 12:45:25 +08:00
if maxSize > 0 && maxSize < maxMemorySize {
2022-03-06 17:18:06 +08:00
maxMemorySize = maxSize
}
2022-03-15 21:33:44 +08:00
var memoryStorage = this . memoryStorage
2023-08-04 16:32:15 +08:00
if ! fsutils . DiskIsExtremelyFast ( ) && ! isFlushing && ! isPartial && memoryStorage != nil && ( ( bodySize > 0 && bodySize < maxMemorySize ) || bodySize < 0 ) {
2022-11-19 17:23:45 +08:00
writer , err := memoryStorage . OpenWriter ( key , expiredAt , status , headerSize , bodySize , maxMemorySize , false )
2021-03-02 19:43:05 +08:00
if err == nil {
return writer , nil
}
2022-03-15 18:32:39 +08:00
// 如果队列满了,则等待
2023-10-02 15:20:19 +08:00
if errors . Is ( err , ErrWritingQueueFull ) {
2022-03-15 18:32:39 +08:00
return nil , err
}
2021-03-02 19:43:05 +08:00
}
2021-06-06 23:42:11 +08:00
// 是否正在写入
2021-12-21 00:27:32 +08:00
var isOk = false
2021-12-21 08:03:09 +08:00
sharedWritingFileKeyLocker . Lock ( )
_ , ok := sharedWritingFileKeyMap [ key ]
2021-06-06 23:42:11 +08:00
if ok {
2021-12-21 08:03:09 +08:00
sharedWritingFileKeyLocker . Unlock ( )
2021-06-06 23:42:11 +08:00
return nil , ErrFileIsWriting
}
2022-04-14 09:36:02 +08:00
2023-07-29 09:29:36 +08:00
if ! isFlushing && ! fsutils . WriteReady ( ) {
2022-04-14 09:36:02 +08:00
sharedWritingFileKeyLocker . Unlock ( )
2023-07-29 09:29:36 +08:00
return nil , ErrServerIsBusy
2022-04-14 09:36:02 +08:00
}
2021-12-21 08:03:09 +08:00
sharedWritingFileKeyMap [ key ] = zero . New ( )
sharedWritingFileKeyLocker . Unlock ( )
2021-06-06 23:42:11 +08:00
defer func ( ) {
2021-12-21 00:27:32 +08:00
if ! isOk {
2021-12-21 08:03:09 +08:00
sharedWritingFileKeyLocker . Lock ( )
delete ( sharedWritingFileKeyMap , key )
sharedWritingFileKeyLocker . Unlock ( )
2021-06-06 23:42:11 +08:00
}
} ( )
2023-07-08 18:52:57 +08:00
// 检查是否超出容量
2022-02-21 17:33:58 +08:00
var capacityBytes = this . diskCapacityBytes ( )
2023-07-08 18:52:57 +08:00
if capacityBytes > 0 && capacityBytes <= this . TotalDiskSize ( ) {
return nil , NewCapacityError ( "write file cache failed: over disk size, current total size: " + types . String ( this . TotalDiskSize ( ) ) + " bytes, capacity: " + types . String ( capacityBytes ) )
2020-10-05 19:15:35 +08:00
}
2022-02-21 17:33:58 +08:00
var hash = stringutil . Md5 ( key )
2022-04-20 18:23:26 +08:00
2022-11-15 20:42:25 +08:00
dir , diskIsFull := this . subDir ( hash )
if diskIsFull {
return nil , NewCapacityError ( "the disk is full" )
}
2021-12-21 00:27:32 +08:00
// 检查缓存是否已经生成
2022-03-03 19:36:28 +08:00
var cachePathName = dir + "/" + hash
var cachePath = cachePathName + ".cache"
2022-03-31 11:47:31 +08:00
// 关闭OpenFileCache
var openFileCache = this . openFileCache
if openFileCache != nil {
openFileCache . Close ( cachePath )
}
2022-06-18 20:05:09 +08:00
// 查询当前已有缓存文件
2021-12-21 00:27:32 +08:00
stat , err := os . Stat ( cachePath )
2022-06-18 20:05:09 +08:00
// 检查两次写入缓存的时间是否过于相近,分片内容不受此限制
2023-08-08 10:07:24 +08:00
if err == nil && ! isPartial && time . Since ( stat . ModTime ( ) ) <= 1 * time . Second {
2021-12-21 00:27:32 +08:00
// 防止并发连续写入
return nil , ErrFileIsWriting
}
2022-06-18 20:05:09 +08:00
// 构造文件名
2022-04-20 18:23:26 +08:00
var tmpPath = cachePath
var existsFile = false
if stat != nil {
existsFile = true
// 如果已经存在,则增加一个.tmp后缀, 防止读写冲突
tmpPath += FileTmpSuffix
}
2022-02-21 17:33:58 +08:00
if isPartial {
2022-03-03 19:36:28 +08:00
tmpPath = cachePathName + ".cache"
2022-02-21 17:33:58 +08:00
}
2021-12-21 00:27:32 +08:00
2020-10-05 20:23:18 +08:00
// 先删除
2022-03-05 19:31:50 +08:00
if ! isPartial {
err = this . list . Remove ( hash )
if err != nil {
return nil , err
}
}
// 从已经存储的内容中读取信息
var isNewCreated = true
var partialBodyOffset int64
2022-11-19 21:20:53 +08:00
var partialRanges * PartialRanges
2022-03-05 19:31:50 +08:00
if isPartial {
2022-11-19 21:20:53 +08:00
// 数据库中是否存在
existsCacheItem , _ := this . list . Exist ( hash )
if existsCacheItem {
readerFp , err := os . OpenFile ( tmpPath , os . O_RDONLY , 0444 )
if err == nil {
var partialReader = NewPartialFileReader ( readerFp )
err = partialReader . Init ( )
_ = partialReader . Close ( )
if err == nil && partialReader . bodyOffset > 0 {
partialRanges = partialReader . Ranges ( )
2022-11-20 18:07:46 +08:00
if bodySize > 0 && partialRanges != nil && partialRanges . BodySize > 0 && bodySize != partialRanges . BodySize {
_ = this . removeCacheFile ( tmpPath )
} else {
isNewCreated = false
partialBodyOffset = partialReader . bodyOffset
}
2022-11-19 21:20:53 +08:00
} else {
_ = this . removeCacheFile ( tmpPath )
}
2022-03-05 19:31:50 +08:00
}
}
2022-11-20 18:07:46 +08:00
if isNewCreated {
err = this . list . Remove ( hash )
if err != nil {
return nil , err
}
}
2022-11-19 21:20:53 +08:00
if partialRanges == nil {
partialRanges = NewPartialRanges ( expiredAt )
}
2021-05-19 12:07:35 +08:00
}
2020-10-05 20:23:18 +08:00
2022-04-15 14:23:06 +08:00
var flags = os . O_CREATE | os . O_WRONLY
2022-04-20 18:23:26 +08:00
if isNewCreated && existsFile {
2022-04-15 14:23:06 +08:00
flags |= os . O_TRUNC
}
2023-10-02 19:48:11 +08:00
fsutils . WriteBegin ( )
2022-04-15 14:23:06 +08:00
writer , err := os . OpenFile ( tmpPath , flags , 0666 )
2023-10-02 19:48:11 +08:00
fsutils . WriteEnd ( )
2020-10-04 14:30:42 +08:00
if err != nil {
2022-11-19 15:55:05 +08:00
// TODO 检查在各个系统中的稳定性
2022-11-15 22:25:49 +08:00
if os . IsNotExist ( err ) {
_ = os . MkdirAll ( dir , 0777 )
// open file again
writer , err = os . OpenFile ( tmpPath , flags , 0666 )
}
if err != nil {
return nil , err
}
2020-10-04 14:30:42 +08:00
}
2021-12-21 00:27:32 +08:00
var removeOnFailure = true
2020-10-04 14:30:42 +08:00
defer func ( ) {
if err != nil {
isOk = false
}
// 如果出错了,就删除文件,避免写一半
if ! isOk {
_ = writer . Close ( )
2021-01-11 23:06:50 +08:00
if removeOnFailure {
2021-12-21 00:27:32 +08:00
_ = os . Remove ( tmpPath )
2021-01-11 23:06:50 +08:00
}
2020-10-04 14:30:42 +08:00
}
} ( )
2021-01-11 23:06:50 +08:00
// 尝试锁定,如果锁定失败,则直接返回
err = syscall . Flock ( int ( writer . Fd ( ) ) , syscall . LOCK_EX | syscall . LOCK_NB )
if err != nil {
removeOnFailure = false
return nil , ErrFileIsWriting
}
2022-11-19 17:23:45 +08:00
var metaBodySize int64 = - 1
2022-11-20 18:07:46 +08:00
var metaHeaderSize = - 1
2022-02-21 17:33:58 +08:00
if isNewCreated {
2022-11-19 15:55:05 +08:00
// 写入meta
// 从v0.5.8开始不再在meta中写入Key
var metaBytes = make ( [ ] byte , SizeMeta )
2022-04-14 09:36:02 +08:00
binary . BigEndian . PutUint32 ( metaBytes [ OffsetExpiresAt : ] , uint32 ( expiredAt ) )
2020-10-04 14:30:42 +08:00
2022-02-21 17:33:58 +08:00
// 写入状态码
if status > 999 || status < 100 {
status = 200
}
2022-04-14 09:36:02 +08:00
copy ( metaBytes [ OffsetStatus : ] , strconv . Itoa ( status ) )
2020-10-04 14:30:42 +08:00
2022-11-19 17:23:45 +08:00
// 写入Header Length
if headerSize > 0 {
binary . BigEndian . PutUint32 ( metaBytes [ OffsetHeaderLength : ] , uint32 ( headerSize ) )
metaHeaderSize = headerSize
}
// 写入Body Length
if bodySize > 0 {
binary . BigEndian . PutUint64 ( metaBytes [ OffsetBodyLength : ] , uint64 ( bodySize ) )
metaBodySize = bodySize
}
2023-07-29 09:46:14 +08:00
fsutils . WriteBegin ( )
2022-04-14 09:36:02 +08:00
_ , err = writer . Write ( metaBytes )
2023-07-29 09:46:14 +08:00
fsutils . WriteEnd ( )
2021-01-13 12:02:50 +08:00
if err != nil {
return nil , err
}
2020-10-04 14:30:42 +08:00
}
isOk = true
2022-02-21 17:33:58 +08:00
if isPartial {
2022-11-20 18:07:46 +08:00
return NewPartialFileWriter ( writer , key , expiredAt , metaHeaderSize , metaBodySize , isNewCreated , isPartial , partialBodyOffset , partialRanges , func ( ) {
2022-02-21 17:33:58 +08:00
sharedWritingFileKeyLocker . Lock ( )
delete ( sharedWritingFileKeyMap , key )
sharedWritingFileKeyLocker . Unlock ( )
} ) , nil
} else {
2023-07-18 12:45:25 +08:00
return NewFileWriter ( this , writer , key , expiredAt , metaHeaderSize , metaBodySize , maxSize , func ( ) {
2022-02-21 17:33:58 +08:00
sharedWritingFileKeyLocker . Lock ( )
delete ( sharedWritingFileKeyMap , key )
sharedWritingFileKeyLocker . Unlock ( )
} ) , nil
}
2020-10-04 14:30:42 +08:00
}
2021-05-12 21:38:44 +08:00
// AddToList 添加到List
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) AddToList ( item * Item ) {
2022-03-14 11:47:34 +08:00
// 是否正在退出
if teaconst . IsQuiting {
return
}
2022-03-15 21:33:44 +08:00
var memoryStorage = this . memoryStorage
if memoryStorage != nil {
2021-03-02 19:43:05 +08:00
if item . Type == ItemTypeMemory {
2022-03-15 21:33:44 +08:00
memoryStorage . AddToList ( item )
2021-03-02 19:43:05 +08:00
return
}
}
2021-11-13 21:30:24 +08:00
item . MetaSize = SizeMeta + 128
2022-11-19 15:55:05 +08:00
var hash = stringutil . Md5 ( item . Key )
2021-05-19 12:07:35 +08:00
err := this . list . Add ( hash , item )
if err != nil && ! strings . Contains ( err . Error ( ) , "UNIQUE constraint failed" ) {
remotelogs . Error ( "CACHE" , "add to list failed: " + err . Error ( ) )
}
2020-10-04 14:30:42 +08:00
}
2021-05-12 21:38:44 +08:00
// Delete 删除某个键值对应的缓存
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) Delete ( key string ) error {
2022-03-14 11:47:34 +08:00
// 是否正在退出
if teaconst . IsQuiting {
return nil
}
2021-03-02 19:43:05 +08:00
// 先尝试内存缓存
2022-03-15 21:33:44 +08:00
this . runMemoryStorageSafety ( func ( memoryStorage * MemoryStorage ) {
_ = memoryStorage . Delete ( key )
} )
2021-03-02 19:43:05 +08:00
2022-11-15 20:42:25 +08:00
hash , path , _ := this . keyPath ( key )
2021-05-19 12:07:35 +08:00
err := this . list . Remove ( hash )
if err != nil {
return err
}
2022-03-05 16:47:17 +08:00
err = this . removeCacheFile ( path )
2020-10-04 14:30:42 +08:00
if err == nil || os . IsNotExist ( err ) {
return nil
}
2022-03-04 11:51:59 +08:00
2020-10-04 14:30:42 +08:00
return err
}
2021-05-12 21:38:44 +08:00
// Stat 统计
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) Stat ( ) ( * Stat , error ) {
return this . list . Stat ( func ( hash string ) bool {
return true
2021-05-19 12:07:35 +08:00
} )
2020-10-04 14:30:42 +08:00
}
2021-05-12 21:38:44 +08:00
// CleanAll 清除所有的缓存
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) CleanAll ( ) error {
this . locker . Lock ( )
defer this . locker . Unlock ( )
2021-03-02 19:43:05 +08:00
// 先尝试内存缓存
2022-03-15 21:33:44 +08:00
this . runMemoryStorageSafety ( func ( memoryStorage * MemoryStorage ) {
_ = memoryStorage . CleanAll ( )
} )
2021-03-02 19:43:05 +08:00
2021-05-19 12:07:35 +08:00
err := this . list . CleanAll ( )
if err != nil {
return err
}
2020-10-04 14:30:42 +08:00
// 删除缓存和目录
// 不能直接删除子目录,比较危险
2022-11-15 20:42:25 +08:00
var rootDirs = [ ] string { this . options . Dir }
var subDirs = this . subDirs // copy slice
if len ( subDirs ) > 0 {
for _ , subDir := range subDirs {
rootDirs = append ( rootDirs , subDir . Path )
}
2020-10-04 14:30:42 +08:00
}
2022-11-29 15:33:12 +08:00
var dirNameReg = regexp . MustCompile ( ` ^[0-9a-f] { 2}$ ` )
2022-11-15 20:42:25 +08:00
for _ , rootDir := range rootDirs {
var dir = rootDir + "/p" + types . String ( this . policy . Id )
2022-12-05 09:57:01 +08:00
err = func ( dir string ) error {
fp , err := os . Open ( dir )
if err != nil {
return err
}
defer func ( ) {
_ = fp . Close ( )
} ( )
2020-10-04 14:30:42 +08:00
2022-12-05 09:57:01 +08:00
stat , err := fp . Stat ( )
if err != nil {
return err
}
2021-05-25 18:28:24 +08:00
2022-12-05 09:57:01 +08:00
if ! stat . IsDir ( ) {
return nil
2022-11-15 20:42:25 +08:00
}
2022-12-05 09:57:01 +08:00
// 改成待删除
subDirs , err := fp . Readdir ( - 1 )
2022-11-15 20:42:25 +08:00
if err != nil {
return err
}
2022-12-05 09:57:01 +08:00
for _ , info := range subDirs {
2023-08-13 18:18:55 +08:00
var subDir = info . Name ( )
2022-11-15 20:42:25 +08:00
2022-12-05 09:57:01 +08:00
// 检查目录名
if ! dirNameReg . MatchString ( subDir ) {
continue
}
// 修改目录名
2023-08-13 18:18:55 +08:00
var tmpDir = dir + "/" + subDir + "." + timeutil . Format ( "YmdHis" ) + ".trash"
2022-12-05 09:57:01 +08:00
err = os . Rename ( dir + "/" + subDir , tmpDir )
if err != nil {
return err
}
2022-11-15 20:42:25 +08:00
}
2022-12-05 09:57:01 +08:00
// 重新遍历待删除
goman . New ( func ( ) {
err = this . cleanDeletedDirs ( dir )
if err != nil {
2023-08-13 18:18:55 +08:00
remotelogs . Warn ( "CACHE" , "delete '*.trash' dirs failed: " + err . Error ( ) )
} else {
// try to clean again, to delete writing files when deleting
time . Sleep ( 10 * time . Minute )
_ = this . cleanDeletedDirs ( dir )
2022-12-05 09:57:01 +08:00
}
} )
return nil
} ( dir )
if err != nil {
return err
}
2022-11-15 20:42:25 +08:00
}
2020-10-04 14:30:42 +08:00
return nil
}
2021-05-12 21:38:44 +08:00
// Purge 清理过期的缓存
2020-12-23 21:28:50 +08:00
func ( this * FileStorage ) Purge ( keys [ ] string , urlType string ) error {
2022-03-14 11:47:34 +08:00
// 是否正在退出
if teaconst . IsQuiting {
return nil
}
2021-03-02 19:43:05 +08:00
// 先尝试内存缓存
2022-03-15 21:33:44 +08:00
this . runMemoryStorageSafety ( func ( memoryStorage * MemoryStorage ) {
_ = memoryStorage . Purge ( keys , urlType )
} )
2021-03-02 19:43:05 +08:00
2020-12-23 21:28:50 +08:00
// 目录
if urlType == "dir" {
for _ , key := range keys {
2022-11-26 11:05:46 +08:00
// 检查是否有通配符 http(s)://*.example.com
var schemeIndex = strings . Index ( key , "://" )
if schemeIndex > 0 {
var keyRight = key [ schemeIndex + 3 : ]
if strings . HasPrefix ( keyRight , "*." ) {
err := this . list . CleanMatchPrefix ( key )
if err != nil {
return err
}
continue
}
}
2021-06-13 17:37:57 +08:00
err := this . list . CleanPrefix ( key )
2021-05-19 12:07:35 +08:00
if err != nil {
return err
}
2020-12-23 21:28:50 +08:00
}
2022-06-05 17:15:02 +08:00
return nil
2020-12-23 21:28:50 +08:00
}
2022-06-05 17:15:02 +08:00
// URL
2020-10-04 14:30:42 +08:00
for _ , key := range keys {
2022-11-26 11:05:46 +08:00
// 检查是否有通配符 http(s)://*.example.com
var schemeIndex = strings . Index ( key , "://" )
if schemeIndex > 0 {
var keyRight = key [ schemeIndex + 3 : ]
if strings . HasPrefix ( keyRight , "*." ) {
err := this . list . CleanMatchKey ( key )
if err != nil {
return err
}
continue
}
}
// 普通的Key
2022-11-15 20:42:25 +08:00
hash , path , _ := this . keyPath ( key )
2022-03-05 16:47:17 +08:00
err := this . removeCacheFile ( path )
2020-10-04 14:30:42 +08:00
if err != nil && ! os . IsNotExist ( err ) {
return err
}
2022-03-04 11:51:59 +08:00
2021-05-19 12:07:35 +08:00
err = this . list . Remove ( hash )
if err != nil {
return err
}
2020-10-04 14:30:42 +08:00
}
return nil
}
2021-05-12 21:38:44 +08:00
// Stop 停止
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) Stop ( ) {
2022-01-12 21:09:00 +08:00
events . Remove ( this )
2020-10-04 14:30:42 +08:00
this . locker . Lock ( )
defer this . locker . Unlock ( )
2021-03-02 19:43:05 +08:00
// 先尝试内存缓存
2022-03-15 21:33:44 +08:00
this . runMemoryStorageSafety ( func ( memoryStorage * MemoryStorage ) {
memoryStorage . Stop ( )
} )
2021-03-02 19:43:05 +08:00
2023-06-07 21:49:42 +08:00
if this . list != nil {
_ = this . list . Reset ( )
}
2021-11-14 16:15:07 +08:00
if this . purgeTicker != nil {
this . purgeTicker . Stop ( )
}
if this . hotTicker != nil {
this . hotTicker . Stop ( )
2020-10-04 14:30:42 +08:00
}
2021-06-13 17:37:57 +08:00
2023-06-07 21:49:42 +08:00
if this . list != nil {
_ = this . list . Close ( )
}
2022-01-12 21:09:00 +08:00
2022-03-31 11:47:31 +08:00
var openFileCache = this . openFileCache
if openFileCache != nil {
openFileCache . CloseAll ( )
2022-01-12 21:09:00 +08:00
}
2022-03-06 17:18:06 +08:00
this . ignoreKeys . Reset ( )
2020-10-04 14:30:42 +08:00
}
2021-05-13 11:50:36 +08:00
// TotalDiskSize 消耗的磁盘尺寸
func ( this * FileStorage ) TotalDiskSize ( ) int64 {
2023-08-15 15:49:23 +08:00
stat , err := fsutils . StatDeviceCache ( this . options . Dir )
2023-07-08 18:52:57 +08:00
if err == nil {
return int64 ( stat . UsedSize ( ) )
}
return 0
2021-05-13 11:50:36 +08:00
}
// TotalMemorySize 内存尺寸
func ( this * FileStorage ) TotalMemorySize ( ) int64 {
2022-03-15 21:33:44 +08:00
var memoryStorage = this . memoryStorage
if memoryStorage == nil {
2021-05-13 11:50:36 +08:00
return 0
}
2022-03-15 21:33:44 +08:00
return memoryStorage . TotalMemorySize ( )
2021-05-13 11:50:36 +08:00
}
2022-03-06 17:18:06 +08:00
// IgnoreKey 忽略某个Key, 即不缓存某个Key
2023-07-18 12:45:25 +08:00
func ( this * FileStorage ) IgnoreKey ( key string , maxSize int64 ) {
this . ignoreKeys . Push ( types . String ( maxSize ) + "$" + key )
2022-03-06 17:18:06 +08:00
}
2022-04-04 19:45:57 +08:00
// CanSendfile 是否支持Sendfile
func ( this * FileStorage ) CanSendfile ( ) bool {
if this . options == nil {
return false
}
return this . options . EnableSendfile
}
2020-10-04 14:30:42 +08:00
// 获取Key对应的文件路径
2022-11-15 20:42:25 +08:00
func ( this * FileStorage ) keyPath ( key string ) ( hash string , path string , diskIsFull bool ) {
2020-10-04 14:30:42 +08:00
hash = stringutil . Md5 ( key )
2022-11-15 20:42:25 +08:00
var dir string
dir , diskIsFull = this . subDir ( hash )
2020-10-04 14:30:42 +08:00
path = dir + "/" + hash + ".cache"
return
}
// 获取Hash对应的文件路径
2022-11-15 20:42:25 +08:00
func ( this * FileStorage ) hashPath ( hash string ) ( path string , diskIsFull bool ) {
2023-09-15 18:14:58 +08:00
if len ( hash ) != HashKeyLength {
2022-11-15 20:42:25 +08:00
return "" , false
2020-10-04 14:30:42 +08:00
}
2022-11-15 20:42:25 +08:00
var dir string
dir , diskIsFull = this . subDir ( hash )
2020-10-04 14:30:42 +08:00
path = dir + "/" + hash + ".cache"
return
}
// 初始化List
func ( this * FileStorage ) initList ( ) error {
2021-05-19 12:07:35 +08:00
err := this . list . Reset ( )
if err != nil {
return err
}
2020-10-04 14:30:42 +08:00
2021-05-23 22:59:00 +08:00
// 使用异步防止阻塞主线程
2021-12-08 15:17:45 +08:00
/ * * goman . New ( func ( ) {
2021-05-23 22:59:00 +08:00
dir := this . dir ( )
2021-01-13 12:02:50 +08:00
2021-05-23 22:59:00 +08:00
// 清除tmp
2021-11-21 16:10:07 +08:00
// TODO 需要一个更加高效的实现
2021-12-08 15:17:45 +08:00
} ) * * /
2021-01-13 12:02:50 +08:00
2020-10-04 14:30:42 +08:00
// 启动定时清理任务
2022-03-15 21:33:44 +08:00
this . initPurgeTicker ( )
// 热点处理任务
this . hotTicker = utils . NewTicker ( 1 * time . Minute )
if Tea . IsTesting ( ) {
this . hotTicker = utils . NewTicker ( 10 * time . Second )
2021-11-13 21:30:24 +08:00
}
2022-03-15 21:33:44 +08:00
goman . New ( func ( ) {
for this . hotTicker . Next ( ) {
trackers . Run ( "FILE_CACHE_STORAGE_HOT_LOOP" , func ( ) {
this . hotLoop ( )
} )
}
} )
// 退出时停止
2022-01-12 21:09:00 +08:00
events . OnKey ( events . EventQuit , this , func ( ) {
2020-12-17 17:36:10 +08:00
remotelogs . Println ( "CACHE" , "quit clean timer" )
2022-01-12 21:09:00 +08:00
{
var ticker = this . purgeTicker
if ticker != nil {
ticker . Stop ( )
}
}
{
var ticker = this . hotTicker
if ticker != nil {
ticker . Stop ( )
}
2020-10-28 11:19:06 +08:00
}
} )
2020-10-04 14:30:42 +08:00
return nil
}
// 清理任务
2022-11-15 20:42:25 +08:00
// TODO purge每个分区
2020-10-04 14:30:42 +08:00
func ( this * FileStorage ) purgeLoop ( ) {
2021-11-13 21:30:24 +08:00
// 计算是否应该开启LFU清理
2022-11-07 21:32:20 +08:00
var capacityBytes = this . diskCapacityBytes ( )
2021-11-13 21:30:24 +08:00
var startLFU = false
2023-09-29 14:52:08 +08:00
var requireFullLFU = false // 是否需要完整执行LFU
2021-11-13 21:30:24 +08:00
var lfuFreePercent = this . policy . PersistenceLFUFreePercent
if lfuFreePercent <= 0 {
lfuFreePercent = 5
2023-09-14 18:30:11 +08:00
2023-09-17 12:05:06 +08:00
// 2TB级别以上
if capacityBytes >> 30 > 2000 {
lfuFreePercent = 100 /** GB **/ / float32 ( capacityBytes >> 30 ) * 100 /** % **/
if lfuFreePercent > 3 {
lfuFreePercent = 3
}
2023-09-14 18:30:11 +08:00
}
2021-11-13 21:30:24 +08:00
}
2022-11-15 20:42:25 +08:00
2023-09-14 20:17:48 +08:00
var hasFullDisk = this . hasFullDisk ( )
2022-11-15 20:42:25 +08:00
if hasFullDisk {
2022-10-25 15:14:28 +08:00
startLFU = true
} else {
var usedPercent = float32 ( this . TotalDiskSize ( ) * 100 ) / float32 ( capacityBytes )
if capacityBytes > 0 {
if lfuFreePercent < 100 {
if usedPercent >= 100 - lfuFreePercent {
startLFU = true
}
2021-11-13 21:30:24 +08:00
}
}
}
// 清理过期
{
var times = 1
// 空闲时间多清理
2023-08-08 16:10:14 +08:00
systemLoad , _ := load . Avg ( )
if systemLoad != nil {
2023-09-28 10:56:33 +08:00
if systemLoad . Load5 < 3 {
2023-08-08 16:10:14 +08:00
times = 5
} else if systemLoad . Load5 < 5 {
2023-09-28 10:56:33 +08:00
times = 3
} else if systemLoad . Load5 < 10 {
2023-08-08 16:10:14 +08:00
times = 2
}
2021-11-13 21:30:24 +08:00
}
2023-09-28 10:56:33 +08:00
// 高速硬盘多清理
if fsutils . DiskIsExtremelyFast ( ) {
times *= 8
} else if fsutils . DiskIsFast ( ) {
times *= 4
}
2021-11-13 21:30:24 +08:00
// 处于LFU阈值时, 多清理
if startLFU {
2023-09-28 10:56:33 +08:00
times *= 5
2021-11-13 21:30:24 +08:00
}
var purgeCount = this . policy . PersistenceAutoPurgeCount
if purgeCount <= 0 {
purgeCount = 1000
2023-09-28 10:56:33 +08:00
if fsutils . DiskIsExtremelyFast ( ) {
purgeCount = 4000
} else if fsutils . DiskIsFast ( ) {
purgeCount = 2000
}
2021-11-13 21:30:24 +08:00
}
2023-09-29 14:52:08 +08:00
2021-11-13 21:30:24 +08:00
for i := 0 ; i < times ; i ++ {
countFound , err := this . list . Purge ( purgeCount , func ( hash string ) error {
2022-11-15 20:42:25 +08:00
path , _ := this . hashPath ( hash )
2022-03-05 16:47:17 +08:00
err := this . removeCacheFile ( path )
2021-11-13 21:30:24 +08:00
if err != nil && ! os . IsNotExist ( err ) {
remotelogs . Error ( "CACHE" , "purge '" + path + "' error: " + err . Error ( ) )
}
2022-03-05 16:47:17 +08:00
2021-11-13 21:30:24 +08:00
return nil
} )
if err != nil {
remotelogs . Warn ( "CACHE" , "purge file storage failed: " + err . Error ( ) )
continue
}
if countFound < purgeCount {
2023-09-29 14:52:08 +08:00
if i == 0 && startLFU {
requireFullLFU = true
}
2021-11-13 21:30:24 +08:00
break
}
time . Sleep ( 1 * time . Second )
}
}
// 磁盘空间不足时,清除老旧的缓存
if startLFU {
2023-09-15 14:46:31 +08:00
var maxCount = 2000
2023-09-14 20:17:48 +08:00
var maxLoops = 5
2023-09-15 14:46:31 +08:00
2023-09-28 10:56:33 +08:00
if fsutils . DiskIsExtremelyFast ( ) {
2023-09-15 14:46:31 +08:00
maxCount = 10000
2023-09-28 10:56:33 +08:00
} else if fsutils . DiskIsFast ( ) {
maxCount = 5000
2023-09-15 14:46:31 +08:00
}
2023-09-15 18:14:58 +08:00
var total , _ = this . list . Count ( )
if total > 0 {
for {
maxLoops --
if maxLoops <= 0 {
break
}
2023-09-14 20:17:48 +08:00
2023-09-15 18:14:58 +08:00
// 开始清理
var count = types . Int ( math . Ceil ( float64 ( total ) * float64 ( lfuFreePercent * 2 ) / 100 ) )
if count <= 0 {
break
}
2023-09-14 20:17:48 +08:00
2023-09-15 18:14:58 +08:00
// 限制单次清理的条数,防止占用太多系统资源
if count > maxCount {
count = maxCount
}
2021-11-13 21:30:24 +08:00
2023-09-29 14:52:08 +08:00
var prefix = ""
if requireFullLFU {
prefix = "fully "
}
remotelogs . Println ( "CACHE" , prefix + "LFU purge policy '" + this . policy . Name + "' id: " + types . String ( this . policy . Id ) + ", count: " + types . String ( count ) )
2023-09-15 18:14:58 +08:00
err := this . list . PurgeLFU ( count , func ( hash string ) error {
path , _ := this . hashPath ( hash )
err := this . removeCacheFile ( path )
if err != nil && ! os . IsNotExist ( err ) {
remotelogs . Error ( "CACHE" , "purge '" + path + "' error: " + err . Error ( ) )
}
2022-03-05 16:47:17 +08:00
2023-09-15 18:14:58 +08:00
return nil
} )
if err != nil {
remotelogs . Warn ( "CACHE" , "purge file storage in LFU failed: " + err . Error ( ) )
2021-11-13 21:30:24 +08:00
}
2023-09-14 20:17:48 +08:00
2023-09-15 18:14:58 +08:00
// 检查硬盘空间状态
2023-09-29 14:52:08 +08:00
if ! requireFullLFU && ! this . hasFullDisk ( ) {
2023-09-15 18:14:58 +08:00
break
}
2021-11-13 21:30:24 +08:00
}
2020-10-04 14:30:42 +08:00
}
2021-06-13 17:37:57 +08:00
}
2020-10-04 14:30:42 +08:00
}
2021-01-13 12:02:50 +08:00
2021-11-14 16:15:07 +08:00
// 热点数据任务
func ( this * FileStorage ) hotLoop ( ) {
2023-10-04 18:13:48 +08:00
var memoryStorage = this . memoryStorage // copy
2021-11-14 16:15:07 +08:00
if memoryStorage == nil {
return
}
2023-10-04 18:13:48 +08:00
// check memory space size
if ! memoryStorage . HasFreeSpaceForHotItems ( ) {
return
}
2021-11-14 16:15:07 +08:00
this . hotMapLocker . Lock ( )
if len ( this . hotMap ) == 0 {
this . hotMapLocker . Unlock ( )
this . lastHotSize = 0
return
}
this . lastHotSize = len ( this . hotMap )
var result = [ ] * HotItem { } // [ {key: ..., hits: ...}, ... ]
for _ , v := range this . hotMap {
2023-10-02 10:40:20 +08:00
if v . Hits <= 1 {
continue
}
2021-11-14 16:15:07 +08:00
result = append ( result , v )
}
this . hotMap = map [ string ] * HotItem { }
this . hotMapLocker . Unlock ( )
2022-03-20 20:58:34 +08:00
// 取Top10%写入内存
2021-11-14 16:15:07 +08:00
if len ( result ) > 0 {
sort . Slice ( result , func ( i , j int ) bool {
return result [ i ] . Hits > result [ j ] . Hits
} )
var size = 1
if len ( result ) < 10 {
size = 1
} else {
size = len ( result ) / 10
}
2021-12-19 11:32:26 +08:00
var buf = utils . BytePool16k . Get ( )
defer utils . BytePool16k . Put ( buf )
2021-11-14 16:15:07 +08:00
for _ , item := range result [ : size ] {
2022-03-03 19:36:28 +08:00
reader , err := this . openReader ( item . Key , false , false , false )
2021-11-14 16:15:07 +08:00
if err != nil {
continue
}
if reader == nil {
continue
}
2022-03-12 20:50:05 +08:00
// 如果即将过期,则忽略
var nowUnixTime = time . Now ( ) . Unix ( )
if reader . ExpiresAt ( ) <= nowUnixTime + 600 {
2021-11-14 16:15:07 +08:00
continue
}
2022-03-12 20:50:05 +08:00
// 计算合适的过期时间
var bestExpiresAt = nowUnixTime + HotItemLifeSeconds
var hotTimes = int64 ( item . Hits ) / 1000
if hotTimes > 8 {
hotTimes = 8
}
bestExpiresAt += hotTimes * HotItemLifeSeconds
var expiresAt = reader . ExpiresAt ( )
if expiresAt <= 0 || expiresAt > bestExpiresAt {
expiresAt = bestExpiresAt
}
2022-11-19 17:23:45 +08:00
writer , err := memoryStorage . openWriter ( item . Key , expiresAt , reader . Status ( ) , types . Int ( reader . HeaderSize ( ) ) , reader . BodySize ( ) , - 1 , false )
2021-11-14 16:15:07 +08:00
if err != nil {
if ! CanIgnoreErr ( err ) {
remotelogs . Error ( "CACHE" , "transfer hot item failed: " + err . Error ( ) )
}
_ = reader . Close ( )
continue
}
if writer == nil {
_ = reader . Close ( )
continue
}
err = reader . ReadHeader ( buf , func ( n int ) ( goNext bool , err error ) {
_ , err = writer . WriteHeader ( buf [ : n ] )
return
} )
if err != nil {
_ = reader . Close ( )
_ = writer . Discard ( )
continue
}
err = reader . ReadBody ( buf , func ( n int ) ( goNext bool , err error ) {
2022-12-14 15:26:18 +08:00
goNext = true
if n > 0 {
_ , err = writer . Write ( buf [ : n ] )
if err != nil {
goNext = false
}
2022-02-21 17:33:58 +08:00
}
2021-11-14 16:15:07 +08:00
return
} )
if err != nil {
_ = reader . Close ( )
_ = writer . Discard ( )
continue
}
2022-03-15 21:33:44 +08:00
memoryStorage . AddToList ( & Item {
2021-11-14 16:15:07 +08:00
Type : writer . ItemType ( ) ,
Key : item . Key ,
2022-11-26 11:05:46 +08:00
Host : ParseHost ( item . Key ) ,
2022-03-12 20:50:05 +08:00
ExpiredAt : expiresAt ,
2021-11-14 16:15:07 +08:00
HeaderSize : writer . HeaderSize ( ) ,
BodySize : writer . BodySize ( ) ,
} )
_ = reader . Close ( )
_ = writer . Close ( )
}
}
}
2021-05-12 21:38:44 +08:00
func ( this * FileStorage ) diskCapacityBytes ( ) int64 {
2022-03-15 21:33:44 +08:00
var c1 = this . policy . CapacityBytes ( )
2023-09-17 12:05:06 +08:00
var nodeCapacity = SharedManager . MaxDiskCapacity // copy
if nodeCapacity != nil {
var c2 = nodeCapacity . Bytes ( )
2021-05-12 21:38:44 +08:00
if c2 > 0 {
return c2
}
}
return c1
}
2021-06-17 21:13:21 +08:00
2023-08-13 18:18:55 +08:00
// remove all *.trash directories under policy directory
func ( this * FileStorage ) cleanAllDeletedDirs ( ) {
var rootDirs = [ ] string { this . options . Dir }
var subDirs = this . subDirs // copy slice
if len ( subDirs ) > 0 {
for _ , subDir := range subDirs {
rootDirs = append ( rootDirs , subDir . Path )
}
}
for _ , rootDir := range rootDirs {
var dir = rootDir + "/p" + types . String ( this . policy . Id )
goman . New ( func ( ) {
_ = this . cleanDeletedDirs ( dir )
} )
}
}
// 清理 *.trash 目录
2021-06-17 21:13:21 +08:00
// 由于在很多硬盘上耗时非常久,所以应该放在后台运行
func ( this * FileStorage ) cleanDeletedDirs ( dir string ) error {
fp , err := os . Open ( dir )
if err != nil {
return err
}
defer func ( ) {
_ = fp . Close ( )
} ( )
subDirs , err := fp . Readdir ( - 1 )
if err != nil {
return err
}
for _ , info := range subDirs {
2023-08-13 18:18:55 +08:00
var subDir = info . Name ( )
if ! strings . HasSuffix ( subDir , ".trash" ) {
2021-06-17 21:13:21 +08:00
continue
}
// 删除
err = os . RemoveAll ( dir + "/" + subDir )
if err != nil {
if ! os . IsNotExist ( err ) {
return err
}
}
}
return nil
}
2022-03-03 19:36:28 +08:00
// 增加某个Key的点击量
func ( this * FileStorage ) increaseHit ( key string , hash string , reader Reader ) {
var rate = this . policy . PersistenceHitSampleRate
if rate <= 0 {
rate = 1000
}
if rands . Int ( 0 , rate ) == 0 {
2022-03-15 21:33:44 +08:00
var memoryStorage = this . memoryStorage
2022-03-03 19:36:28 +08:00
// 增加到热点
// 这里不收录缓存尺寸过大的文件
2022-03-20 21:15:25 +08:00
if memoryStorage != nil && reader . BodySize ( ) > 0 && reader . BodySize ( ) < 128 * sizes . M {
2022-03-03 19:36:28 +08:00
this . hotMapLocker . Lock ( )
hotItem , ok := this . hotMap [ key ]
2022-03-12 20:50:05 +08:00
2022-03-03 19:36:28 +08:00
if ok {
hotItem . Hits ++
} else if len ( this . hotMap ) < HotItemSize { // 控制数量
this . hotMap [ key ] = & HotItem {
2022-03-12 20:50:05 +08:00
Key : key ,
Hits : 1 ,
2022-03-03 19:36:28 +08:00
}
}
this . hotMapLocker . Unlock ( )
2023-10-07 11:56:34 +08:00
// 只有重复点击的才增加点击量
if ok {
var hitErr = this . list . IncreaseHit ( hash )
if hitErr != nil {
// 此错误可以忽略
remotelogs . Error ( "CACHE" , "increase hit failed: " + hitErr . Error ( ) )
}
}
2022-03-03 19:36:28 +08:00
}
}
}
2022-03-05 16:47:17 +08:00
// 删除缓存文件
func ( this * FileStorage ) removeCacheFile ( path string ) error {
2022-03-31 11:47:31 +08:00
var openFileCache = this . openFileCache
if openFileCache != nil {
openFileCache . Close ( path )
}
2022-03-05 16:47:17 +08:00
var err = os . Remove ( path )
if err == nil || os . IsNotExist ( err ) {
err = nil
// 删除Partial相关
2023-09-15 18:14:58 +08:00
var partialPath = PartialRangesFilePath ( path )
2022-03-31 11:47:31 +08:00
if openFileCache != nil {
openFileCache . Close ( partialPath )
}
_ = os . Remove ( partialPath )
2022-03-05 16:47:17 +08:00
}
return err
}
2022-03-15 21:33:44 +08:00
// 创建当前策略包含的内存缓存
func ( this * FileStorage ) createMemoryStorage ( ) error {
var memoryPolicy = & serverconfigs . HTTPCachePolicy {
Id : this . policy . Id ,
IsOn : this . policy . IsOn ,
Name : this . policy . Name ,
Description : this . policy . Description ,
Capacity : this . options . MemoryPolicy . Capacity ,
MaxSize : & shared . SizeCapacity { Count : 128 , Unit : shared . SizeCapacityUnitMB } , // TODO 将来可以修改
Type : serverconfigs . CachePolicyStorageMemory ,
Options : this . policy . Options ,
Life : this . policy . Life ,
MinLife : this . policy . MinLife ,
MaxLife : this . policy . MaxLife ,
MemoryAutoPurgeCount : this . policy . MemoryAutoPurgeCount ,
MemoryAutoPurgeInterval : this . policy . MemoryAutoPurgeInterval ,
MemoryLFUFreePercent : this . policy . MemoryLFUFreePercent ,
}
err := memoryPolicy . Init ( )
if err != nil {
return err
}
var memoryStorage = NewMemoryStorage ( memoryPolicy , this )
err = memoryStorage . Init ( )
if err != nil {
return err
}
this . memoryStorage = memoryStorage
return nil
}
func ( this * FileStorage ) initPurgeTicker ( ) {
var autoPurgeInterval = this . policy . PersistenceAutoPurgeInterval
if autoPurgeInterval <= 0 {
autoPurgeInterval = 30
if Tea . IsTesting ( ) {
autoPurgeInterval = 10
}
}
if this . purgeTicker != nil {
this . purgeTicker . Stop ( )
}
this . purgeTicker = utils . NewTicker ( time . Duration ( autoPurgeInterval ) * time . Second )
goman . New ( func ( ) {
for this . purgeTicker . Next ( ) {
trackers . Run ( "FILE_CACHE_STORAGE_PURGE_LOOP" , func ( ) {
this . purgeLoop ( )
} )
}
} )
}
func ( this * FileStorage ) initOpenFileCache ( ) {
var err error
var oldOpenFileCache = this . openFileCache
// 启用新的
if this . options . OpenFileCache != nil && this . options . OpenFileCache . IsOn && this . options . OpenFileCache . Max > 0 {
this . openFileCache , err = NewOpenFileCache ( this . options . OpenFileCache . Max )
if err != nil {
remotelogs . Error ( "CACHE" , "open file cache failed: " + err . Error ( ) )
}
}
// 关闭老的
if oldOpenFileCache != nil {
oldOpenFileCache . CloseAll ( )
}
}
func ( this * FileStorage ) runMemoryStorageSafety ( f func ( memoryStorage * MemoryStorage ) ) {
2023-08-20 11:02:09 +08:00
var memoryStorage = this . memoryStorage // copy
2022-03-15 21:33:44 +08:00
if memoryStorage != nil {
f ( memoryStorage )
}
}
2022-10-25 15:14:28 +08:00
// 检查磁盘剩余空间
func ( this * FileStorage ) checkDiskSpace ( ) {
2023-08-06 18:08:28 +08:00
var minFreeSize = DefaultMinDiskFreeSpace
var options = this . options // copy
if options != nil && options . MinFreeSize != nil && options . MinFreeSize . Bytes ( ) > 0 {
minFreeSize = uint64 ( options . MinFreeSize . Bytes ( ) )
}
if options != nil && len ( options . Dir ) > 0 {
2023-08-15 15:49:23 +08:00
stat , err := fsutils . StatDevice ( options . Dir )
2022-10-25 15:14:28 +08:00
if err == nil {
2023-08-06 18:08:28 +08:00
this . mainDiskIsFull = stat . FreeSize ( ) < minFreeSize
2023-08-20 11:02:09 +08:00
2023-09-16 09:36:04 +08:00
// check capacity (only on main directory) when node capacity had not been set
if ! this . mainDiskIsFull {
var capacityBytes int64
var maxDiskCapacity = SharedManager . MaxDiskCapacity // copy
if maxDiskCapacity != nil && maxDiskCapacity . Bytes ( ) > 0 {
capacityBytes = SharedManager . MaxDiskCapacity . Bytes ( )
} else {
var policy = this . policy // copy
if policy != nil {
capacityBytes = policy . CapacityBytes ( ) // copy
}
}
2023-08-20 11:02:09 +08:00
if capacityBytes > 0 && stat . UsedSize ( ) >= uint64 ( capacityBytes ) {
this . mainDiskIsFull = true
}
}
2022-11-15 20:42:25 +08:00
}
}
var subDirs = this . subDirs // copy slice
for _ , subDir := range subDirs {
2023-08-15 15:49:23 +08:00
stat , err := fsutils . StatDevice ( subDir . Path )
2022-11-15 20:42:25 +08:00
if err == nil {
2023-08-06 18:08:28 +08:00
subDir . IsFull = stat . FreeSize ( ) < minFreeSize
2022-10-25 15:14:28 +08:00
}
}
}
2022-11-15 20:42:25 +08:00
2023-09-15 18:14:58 +08:00
// 检查是否有已满的磁盘分区
2023-09-14 20:17:48 +08:00
func ( this * FileStorage ) hasFullDisk ( ) bool {
this . checkDiskSpace ( )
var hasFullDisk = this . mainDiskIsFull
if ! hasFullDisk {
var subDirs = this . subDirs // copy slice
for _ , subDir := range subDirs {
if subDir . IsFull {
hasFullDisk = true
break
}
}
}
return hasFullDisk
}
2022-11-15 20:42:25 +08:00
// 获取目录
func ( this * FileStorage ) subDir ( hash string ) ( dirPath string , dirIsFull bool ) {
var suffix = "/p" + types . String ( this . policy . Id ) + "/" + hash [ : 2 ] + "/" + hash [ 2 : 4 ]
if len ( hash ) < 4 {
return this . options . Dir + suffix , this . mainDiskIsFull
}
var subDirs = this . subDirs // copy slice
var countSubDirs = len ( subDirs )
if countSubDirs == 0 {
return this . options . Dir + suffix , this . mainDiskIsFull
}
countSubDirs ++ // add main dir
// 最多只支持16个目录
if countSubDirs > 16 {
countSubDirs = 16
}
var dirIndex = this . charCode ( hash [ 0 ] ) % uint8 ( countSubDirs )
if dirIndex == 0 {
return this . options . Dir + suffix , this . mainDiskIsFull
}
var subDir = subDirs [ dirIndex - 1 ]
return subDir . Path + suffix , subDir . IsFull
}
2023-09-15 18:14:58 +08:00
// ScanGarbageCaches 清理目录中“失联”的缓存文件
// “失联”为不在HashMap中的文件
func ( this * FileStorage ) ScanGarbageCaches ( fileCallback func ( path string ) error ) error {
2023-09-17 11:43:46 +08:00
if ! this . list . ( * FileList ) . HashMapIsLoaded ( ) {
return errors . New ( "cache list is loading" )
}
2023-09-15 18:14:58 +08:00
var mainDir = this . options . Dir
var allDirs = [ ] string { mainDir }
var subDirs = this . subDirs // copy
for _ , subDir := range subDirs {
allDirs = append ( allDirs , subDir . Path )
}
2023-09-28 15:02:06 +08:00
var countDirs = 0
// process progress
var progressSock = gosock . NewTmpSock ( teaconst . CacheGarbageSockName )
_ , sockErr := progressSock . SendTimeout ( & gosock . Command { Code : "progress" , Params : map [ string ] any { "progress" : 0 } } , 1 * time . Second )
var canReportProgress = sockErr == nil
var lastProgress float64
var countFound = 0
2023-09-15 18:14:58 +08:00
for _ , subDir := range allDirs {
var dir0 = subDir + "/p" + types . String ( this . policy . Id )
dir1Matches , err := filepath . Glob ( dir0 + "/*" )
if err != nil {
// ignore error
continue
}
for _ , dir1 := range dir1Matches {
if len ( filepath . Base ( dir1 ) ) != 2 {
continue
}
dir2Matches , err := filepath . Glob ( dir1 + "/*" )
if err != nil {
// ignore error
continue
}
for _ , dir2 := range dir2Matches {
if len ( filepath . Base ( dir2 ) ) != 2 {
continue
}
2023-09-28 15:02:06 +08:00
countDirs ++
// report progress
if canReportProgress {
var progress = float64 ( countDirs ) / 65536
if fmt . Sprintf ( "%.2f" , lastProgress ) != fmt . Sprintf ( "%.2f" , progress ) {
lastProgress = progress
_ , _ = progressSock . SendTimeout ( & gosock . Command { Code : "progress" , Params : map [ string ] any {
"progress" : progress ,
"count" : countFound ,
} } , 100 * time . Millisecond )
}
}
2023-09-15 18:14:58 +08:00
fileMatches , err := filepath . Glob ( dir2 + "/*.cache" )
if err != nil {
// ignore error
continue
}
for _ , file := range fileMatches {
var filename = filepath . Base ( file )
var hash = strings . TrimSuffix ( filename , ".cache" )
if len ( hash ) != HashKeyLength {
continue
}
isReady , found := this . list . ( * FileList ) . ExistQuick ( hash )
if ! isReady {
continue
}
if found {
continue
}
// 检查文件正在被写入
stat , err := os . Stat ( file )
if err != nil {
continue
}
if fasttime . Now ( ) . Unix ( ) - stat . ModTime ( ) . Unix ( ) < 300 /** 5 minutes **/ {
continue
}
if fileCallback != nil {
2023-09-28 15:02:06 +08:00
countFound ++
2023-09-15 18:14:58 +08:00
err = fileCallback ( file )
if err != nil {
return err
}
}
}
}
}
}
2023-09-28 15:02:06 +08:00
// 100% progress
if canReportProgress && lastProgress != 1 {
_ , _ = progressSock . SendTimeout ( & gosock . Command { Code : "progress" , Params : map [ string ] any {
"progress" : 1 ,
"count" : countFound ,
} } , 100 * time . Millisecond )
}
2023-09-15 18:14:58 +08:00
return nil
}
// 计算字节数字代号
2022-11-15 20:42:25 +08:00
func ( this * FileStorage ) charCode ( r byte ) uint8 {
if r >= '0' && r <= '9' {
return r - '0'
}
if r >= 'a' && r <= 'z' {
return r - 'a' + 10
}
return 0
}