Files
srdb/compaction/manager.go
bourdon ae87c38776 Initial commit: SRDB - High-performance LSM-Tree database
- Core engine with MemTable, SST, WAL
- B+Tree indexing for SST files  
- Leveled compaction strategy
- Multi-table database management
- Schema validation and secondary indexes
- Query builder with complex conditions
- Web UI with HTMX for data visualization
- Command-line tools for diagnostics
2025-10-08 06:38:28 +08:00

445 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package compaction
import (
"fmt"
"os"
"path/filepath"
"sync"
"time"
"code.tczkiot.com/srdb/manifest"
)
// Manager 管理 Compaction 流程
type Manager struct {
compactor *Compactor
versionSet *manifest.VersionSet
sstDir string
// 控制后台 Compaction
stopCh chan struct{}
wg sync.WaitGroup
// Compaction 并发控制
compactionMu sync.Mutex // 防止并发执行 compaction
// 统计信息
mu sync.RWMutex
totalCompactions int64
lastCompactionTime time.Time
lastFailedFile int64 // 最后失败的文件编号
consecutiveFails int // 连续失败次数
lastGCTime time.Time
totalOrphansFound int64
}
// NewManager 创建新的 Compaction Manager
func NewManager(sstDir string, versionSet *manifest.VersionSet) *Manager {
return &Manager{
compactor: NewCompactor(sstDir, versionSet),
versionSet: versionSet,
sstDir: sstDir,
stopCh: make(chan struct{}),
}
}
// GetPicker 获取 Compaction Picker
func (m *Manager) GetPicker() *Picker {
return m.compactor.GetPicker()
}
// Start 启动后台 Compaction 和垃圾回收
func (m *Manager) Start() {
m.wg.Add(2)
go m.backgroundCompaction()
go m.backgroundGarbageCollection()
}
// Stop 停止后台 Compaction
func (m *Manager) Stop() {
close(m.stopCh)
m.wg.Wait()
}
// backgroundCompaction 后台 Compaction 循环
func (m *Manager) backgroundCompaction() {
defer m.wg.Done()
ticker := time.NewTicker(10 * time.Second) // 每 10 秒检查一次
defer ticker.Stop()
for {
select {
case <-m.stopCh:
return
case <-ticker.C:
m.maybeCompact()
}
}
}
// MaybeCompact 检查是否需要 Compaction 并执行(公开方法,供外部调用)
// 非阻塞:如果已有 compaction 在执行,直接返回
func (m *Manager) MaybeCompact() {
// 尝试获取锁,如果已有 compaction 在执行,直接返回
if !m.compactionMu.TryLock() {
return
}
defer m.compactionMu.Unlock()
m.doCompact()
}
// maybeCompact 内部使用的阻塞版本(后台 goroutine 使用)
func (m *Manager) maybeCompact() {
m.compactionMu.Lock()
defer m.compactionMu.Unlock()
m.doCompact()
}
// doCompact 实际执行 compaction 的逻辑(必须在持有 compactionMu 时调用)
// 支持并发执行多个层级的 compaction
func (m *Manager) doCompact() {
// 获取当前版本
version := m.versionSet.GetCurrent()
if version == nil {
return
}
// 获取所有需要 Compaction 的任务(已按优先级排序)
picker := m.compactor.GetPicker()
tasks := picker.PickCompaction(version)
if len(tasks) == 0 {
// 输出诊断信息
m.printCompactionStats(version, picker)
return
}
fmt.Printf("[Compaction] Found %d tasks to execute\n", len(tasks))
// 并发执行所有任务
successCount := 0
for _, task := range tasks {
// 检查是否是上次失败的文件(防止无限重试)
if len(task.InputFiles) > 0 {
firstFile := task.InputFiles[0].FileNumber
m.mu.Lock()
if m.lastFailedFile == firstFile && m.consecutiveFails >= 3 {
fmt.Printf("[Compaction] Skipping L%d file %d (failed %d times)\n",
task.Level, firstFile, m.consecutiveFails)
m.consecutiveFails = 0
m.lastFailedFile = 0
m.mu.Unlock()
continue
}
m.mu.Unlock()
}
// 获取最新版本(每个任务执行前)
currentVersion := m.versionSet.GetCurrent()
if currentVersion == nil {
continue
}
// 执行 Compaction
fmt.Printf("[Compaction] Starting: L%d -> L%d, files: %d\n",
task.Level, task.OutputLevel, len(task.InputFiles))
err := m.DoCompactionWithVersion(task, currentVersion)
if err != nil {
fmt.Printf("[Compaction] Failed L%d -> L%d: %v\n", task.Level, task.OutputLevel, err)
// 记录失败信息
if len(task.InputFiles) > 0 {
firstFile := task.InputFiles[0].FileNumber
m.mu.Lock()
if m.lastFailedFile == firstFile {
m.consecutiveFails++
} else {
m.lastFailedFile = firstFile
m.consecutiveFails = 1
}
m.mu.Unlock()
}
} else {
fmt.Printf("[Compaction] Completed: L%d -> L%d\n", task.Level, task.OutputLevel)
successCount++
// 清除失败计数
m.mu.Lock()
m.consecutiveFails = 0
m.lastFailedFile = 0
m.mu.Unlock()
}
}
fmt.Printf("[Compaction] Batch completed: %d/%d tasks succeeded\n", successCount, len(tasks))
}
// printCompactionStats 输出 Compaction 统计信息(每分钟一次)
func (m *Manager) printCompactionStats(version *manifest.Version, picker *Picker) {
m.mu.Lock()
defer m.mu.Unlock()
// 限制输出频率:每 60 秒输出一次
if time.Since(m.lastCompactionTime) < 60*time.Second {
return
}
m.lastCompactionTime = time.Now()
fmt.Println("[Compaction] Status check:")
for level := 0; level < 7; level++ {
files := version.GetLevel(level)
if len(files) == 0 {
continue
}
totalSize := int64(0)
for _, f := range files {
totalSize += f.FileSize
}
score := picker.GetLevelScore(version, level)
fmt.Printf(" L%d: %d files, %.2f MB, score: %.2f\n",
level, len(files), float64(totalSize)/(1024*1024), score)
}
}
// DoCompactionWithVersion 使用指定的版本执行 Compaction
func (m *Manager) DoCompactionWithVersion(task *CompactionTask, version *manifest.Version) error {
if version == nil {
return fmt.Errorf("version is nil")
}
// 执行 Compaction使用传入的 version而不是重新获取
edit, err := m.compactor.DoCompaction(task, version)
if err != nil {
return fmt.Errorf("compaction failed: %w", err)
}
// 如果 edit 为 nil说明所有文件都已经不存在无需应用变更
if edit == nil {
fmt.Printf("[Compaction] No changes needed (files already removed)\n")
return nil
}
// 应用 VersionEdit
err = m.versionSet.LogAndApply(edit)
if err != nil {
// LogAndApply 失败,清理已写入的新 SST 文件(防止孤儿文件)
fmt.Printf("[Compaction] LogAndApply failed, cleaning up new files: %v\n", err)
m.cleanupNewFiles(edit)
return fmt.Errorf("apply version edit: %w", err)
}
// LogAndApply 成功后,删除废弃的 SST 文件
m.deleteObsoleteFiles(edit)
// 更新统计信息
m.mu.Lock()
m.totalCompactions++
m.lastCompactionTime = time.Now()
m.mu.Unlock()
return nil
}
// DoCompaction 执行一次 Compaction兼容旧接口
func (m *Manager) DoCompaction(task *CompactionTask) error {
// 获取当前版本
version := m.versionSet.GetCurrent()
if version == nil {
return fmt.Errorf("no current version")
}
return m.DoCompactionWithVersion(task, version)
}
// cleanupNewFiles 清理 LogAndApply 失败后的新文件(防止孤儿文件)
func (m *Manager) cleanupNewFiles(edit *manifest.VersionEdit) {
if edit == nil {
return
}
fmt.Printf("[Compaction] Cleaning up %d new files after LogAndApply failure\n", len(edit.AddedFiles))
// 删除新创建的文件
for _, file := range edit.AddedFiles {
sstPath := filepath.Join(m.sstDir, fmt.Sprintf("%06d.sst", file.FileNumber))
err := os.Remove(sstPath)
if err != nil {
fmt.Printf("[Compaction] Failed to cleanup new file %06d.sst: %v\n", file.FileNumber, err)
} else {
fmt.Printf("[Compaction] Cleaned up new file %06d.sst\n", file.FileNumber)
}
}
}
// deleteObsoleteFiles 删除废弃的 SST 文件
func (m *Manager) deleteObsoleteFiles(edit *manifest.VersionEdit) {
if edit == nil {
fmt.Printf("[Compaction] deleteObsoleteFiles: edit is nil\n")
return
}
fmt.Printf("[Compaction] deleteObsoleteFiles: %d files to delete\n", len(edit.DeletedFiles))
// 删除被标记为删除的文件
for _, fileNum := range edit.DeletedFiles {
sstPath := filepath.Join(m.sstDir, fmt.Sprintf("%06d.sst", fileNum))
err := os.Remove(sstPath)
if err != nil {
// 删除失败只记录日志,不影响 compaction 流程
// 后台垃圾回收器会重试
fmt.Printf("[Compaction] Failed to delete obsolete file %06d.sst: %v\n", fileNum, err)
} else {
fmt.Printf("[Compaction] Deleted obsolete file %06d.sst\n", fileNum)
}
}
}
// TriggerCompaction 手动触发一次 Compaction所有需要的层级
func (m *Manager) TriggerCompaction() error {
version := m.versionSet.GetCurrent()
if version == nil {
return fmt.Errorf("no current version")
}
picker := m.compactor.GetPicker()
tasks := picker.PickCompaction(version)
if len(tasks) == 0 {
return nil // 不需要 Compaction
}
// 依次执行所有任务
for _, task := range tasks {
currentVersion := m.versionSet.GetCurrent()
if err := m.DoCompactionWithVersion(task, currentVersion); err != nil {
return err
}
}
return nil
}
// GetStats 获取 Compaction 统计信息
func (m *Manager) GetStats() map[string]interface{} {
m.mu.RLock()
defer m.mu.RUnlock()
return map[string]interface{}{
"total_compactions": m.totalCompactions,
"last_compaction_time": m.lastCompactionTime,
}
}
// GetLevelStats 获取每层的统计信息
func (m *Manager) GetLevelStats() []map[string]interface{} {
version := m.versionSet.GetCurrent()
if version == nil {
return nil
}
picker := m.compactor.GetPicker()
stats := make([]map[string]interface{}, manifest.NumLevels)
for level := 0; level < manifest.NumLevels; level++ {
files := version.GetLevel(level)
totalSize := int64(0)
for _, file := range files {
totalSize += file.FileSize
}
stats[level] = map[string]interface{}{
"level": level,
"file_count": len(files),
"total_size": totalSize,
"score": picker.GetLevelScore(version, level),
}
}
return stats
}
// backgroundGarbageCollection 后台垃圾回收循环
func (m *Manager) backgroundGarbageCollection() {
defer m.wg.Done()
ticker := time.NewTicker(5 * time.Minute) // 每 5 分钟检查一次
defer ticker.Stop()
for {
select {
case <-m.stopCh:
return
case <-ticker.C:
m.collectOrphanFiles()
}
}
}
// collectOrphanFiles 收集并删除孤儿 SST 文件
func (m *Manager) collectOrphanFiles() {
// 1. 获取当前版本中的所有活跃文件
version := m.versionSet.GetCurrent()
if version == nil {
return
}
activeFiles := make(map[int64]bool)
for level := 0; level < manifest.NumLevels; level++ {
files := version.GetLevel(level)
for _, file := range files {
activeFiles[file.FileNumber] = true
}
}
// 2. 扫描 SST 目录中的所有文件
pattern := filepath.Join(m.sstDir, "*.sst")
sstFiles, err := filepath.Glob(pattern)
if err != nil {
fmt.Printf("[GC] Failed to scan SST directory: %v\n", err)
return
}
// 3. 找出孤儿文件并删除
orphanCount := 0
for _, sstPath := range sstFiles {
// 提取文件编号
var fileNum int64
_, err := fmt.Sscanf(filepath.Base(sstPath), "%d.sst", &fileNum)
if err != nil {
continue
}
// 检查是否是活跃文件
if !activeFiles[fileNum] {
// 这是孤儿文件,删除它
err := os.Remove(sstPath)
if err != nil {
fmt.Printf("[GC] Failed to delete orphan file %06d.sst: %v\n", fileNum, err)
} else {
fmt.Printf("[GC] Deleted orphan file %06d.sst\n", fileNum)
orphanCount++
}
}
}
// 4. 更新统计信息
m.mu.Lock()
m.lastGCTime = time.Now()
m.totalOrphansFound += int64(orphanCount)
m.mu.Unlock()
if orphanCount > 0 {
fmt.Printf("[GC] Completed: cleaned up %d orphan files (total: %d)\n", orphanCount, m.totalOrphansFound)
}
}
// CleanupOrphanFiles 手动触发孤儿文件清理(可在启动时调用)
func (m *Manager) CleanupOrphanFiles() {
fmt.Println("[GC] Manual cleanup triggered")
m.collectOrphanFiles()
}