package compaction import ( "fmt" "os" "path/filepath" "sync" "time" "code.tczkiot.com/srdb/manifest" ) // Manager 管理 Compaction 流程 type Manager struct { compactor *Compactor versionSet *manifest.VersionSet sstDir string // 控制后台 Compaction stopCh chan struct{} wg sync.WaitGroup // Compaction 并发控制 compactionMu sync.Mutex // 防止并发执行 compaction // 统计信息 mu sync.RWMutex totalCompactions int64 lastCompactionTime time.Time lastFailedFile int64 // 最后失败的文件编号 consecutiveFails int // 连续失败次数 lastGCTime time.Time totalOrphansFound int64 } // NewManager 创建新的 Compaction Manager func NewManager(sstDir string, versionSet *manifest.VersionSet) *Manager { return &Manager{ compactor: NewCompactor(sstDir, versionSet), versionSet: versionSet, sstDir: sstDir, stopCh: make(chan struct{}), } } // GetPicker 获取 Compaction Picker func (m *Manager) GetPicker() *Picker { return m.compactor.GetPicker() } // Start 启动后台 Compaction 和垃圾回收 func (m *Manager) Start() { m.wg.Add(2) go m.backgroundCompaction() go m.backgroundGarbageCollection() } // Stop 停止后台 Compaction func (m *Manager) Stop() { close(m.stopCh) m.wg.Wait() } // backgroundCompaction 后台 Compaction 循环 func (m *Manager) backgroundCompaction() { defer m.wg.Done() ticker := time.NewTicker(10 * time.Second) // 每 10 秒检查一次 defer ticker.Stop() for { select { case <-m.stopCh: return case <-ticker.C: m.maybeCompact() } } } // MaybeCompact 检查是否需要 Compaction 并执行(公开方法,供外部调用) // 非阻塞:如果已有 compaction 在执行,直接返回 func (m *Manager) MaybeCompact() { // 尝试获取锁,如果已有 compaction 在执行,直接返回 if !m.compactionMu.TryLock() { return } defer m.compactionMu.Unlock() m.doCompact() } // maybeCompact 内部使用的阻塞版本(后台 goroutine 使用) func (m *Manager) maybeCompact() { m.compactionMu.Lock() defer m.compactionMu.Unlock() m.doCompact() } // doCompact 实际执行 compaction 的逻辑(必须在持有 compactionMu 时调用) // 支持并发执行多个层级的 compaction func (m *Manager) doCompact() { // 获取当前版本 version := m.versionSet.GetCurrent() if version == nil { return } // 获取所有需要 Compaction 的任务(已按优先级排序) picker := m.compactor.GetPicker() tasks := picker.PickCompaction(version) if len(tasks) == 0 { // 输出诊断信息 m.printCompactionStats(version, picker) return } fmt.Printf("[Compaction] Found %d tasks to execute\n", len(tasks)) // 并发执行所有任务 successCount := 0 for _, task := range tasks { // 检查是否是上次失败的文件(防止无限重试) if len(task.InputFiles) > 0 { firstFile := task.InputFiles[0].FileNumber m.mu.Lock() if m.lastFailedFile == firstFile && m.consecutiveFails >= 3 { fmt.Printf("[Compaction] Skipping L%d file %d (failed %d times)\n", task.Level, firstFile, m.consecutiveFails) m.consecutiveFails = 0 m.lastFailedFile = 0 m.mu.Unlock() continue } m.mu.Unlock() } // 获取最新版本(每个任务执行前) currentVersion := m.versionSet.GetCurrent() if currentVersion == nil { continue } // 执行 Compaction fmt.Printf("[Compaction] Starting: L%d -> L%d, files: %d\n", task.Level, task.OutputLevel, len(task.InputFiles)) err := m.DoCompactionWithVersion(task, currentVersion) if err != nil { fmt.Printf("[Compaction] Failed L%d -> L%d: %v\n", task.Level, task.OutputLevel, err) // 记录失败信息 if len(task.InputFiles) > 0 { firstFile := task.InputFiles[0].FileNumber m.mu.Lock() if m.lastFailedFile == firstFile { m.consecutiveFails++ } else { m.lastFailedFile = firstFile m.consecutiveFails = 1 } m.mu.Unlock() } } else { fmt.Printf("[Compaction] Completed: L%d -> L%d\n", task.Level, task.OutputLevel) successCount++ // 清除失败计数 m.mu.Lock() m.consecutiveFails = 0 m.lastFailedFile = 0 m.mu.Unlock() } } fmt.Printf("[Compaction] Batch completed: %d/%d tasks succeeded\n", successCount, len(tasks)) } // printCompactionStats 输出 Compaction 统计信息(每分钟一次) func (m *Manager) printCompactionStats(version *manifest.Version, picker *Picker) { m.mu.Lock() defer m.mu.Unlock() // 限制输出频率:每 60 秒输出一次 if time.Since(m.lastCompactionTime) < 60*time.Second { return } m.lastCompactionTime = time.Now() fmt.Println("[Compaction] Status check:") for level := 0; level < 7; level++ { files := version.GetLevel(level) if len(files) == 0 { continue } totalSize := int64(0) for _, f := range files { totalSize += f.FileSize } score := picker.GetLevelScore(version, level) fmt.Printf(" L%d: %d files, %.2f MB, score: %.2f\n", level, len(files), float64(totalSize)/(1024*1024), score) } } // DoCompactionWithVersion 使用指定的版本执行 Compaction func (m *Manager) DoCompactionWithVersion(task *CompactionTask, version *manifest.Version) error { if version == nil { return fmt.Errorf("version is nil") } // 执行 Compaction(使用传入的 version,而不是重新获取) edit, err := m.compactor.DoCompaction(task, version) if err != nil { return fmt.Errorf("compaction failed: %w", err) } // 如果 edit 为 nil,说明所有文件都已经不存在,无需应用变更 if edit == nil { fmt.Printf("[Compaction] No changes needed (files already removed)\n") return nil } // 应用 VersionEdit err = m.versionSet.LogAndApply(edit) if err != nil { // LogAndApply 失败,清理已写入的新 SST 文件(防止孤儿文件) fmt.Printf("[Compaction] LogAndApply failed, cleaning up new files: %v\n", err) m.cleanupNewFiles(edit) return fmt.Errorf("apply version edit: %w", err) } // LogAndApply 成功后,删除废弃的 SST 文件 m.deleteObsoleteFiles(edit) // 更新统计信息 m.mu.Lock() m.totalCompactions++ m.lastCompactionTime = time.Now() m.mu.Unlock() return nil } // DoCompaction 执行一次 Compaction(兼容旧接口) func (m *Manager) DoCompaction(task *CompactionTask) error { // 获取当前版本 version := m.versionSet.GetCurrent() if version == nil { return fmt.Errorf("no current version") } return m.DoCompactionWithVersion(task, version) } // cleanupNewFiles 清理 LogAndApply 失败后的新文件(防止孤儿文件) func (m *Manager) cleanupNewFiles(edit *manifest.VersionEdit) { if edit == nil { return } fmt.Printf("[Compaction] Cleaning up %d new files after LogAndApply failure\n", len(edit.AddedFiles)) // 删除新创建的文件 for _, file := range edit.AddedFiles { sstPath := filepath.Join(m.sstDir, fmt.Sprintf("%06d.sst", file.FileNumber)) err := os.Remove(sstPath) if err != nil { fmt.Printf("[Compaction] Failed to cleanup new file %06d.sst: %v\n", file.FileNumber, err) } else { fmt.Printf("[Compaction] Cleaned up new file %06d.sst\n", file.FileNumber) } } } // deleteObsoleteFiles 删除废弃的 SST 文件 func (m *Manager) deleteObsoleteFiles(edit *manifest.VersionEdit) { if edit == nil { fmt.Printf("[Compaction] deleteObsoleteFiles: edit is nil\n") return } fmt.Printf("[Compaction] deleteObsoleteFiles: %d files to delete\n", len(edit.DeletedFiles)) // 删除被标记为删除的文件 for _, fileNum := range edit.DeletedFiles { sstPath := filepath.Join(m.sstDir, fmt.Sprintf("%06d.sst", fileNum)) err := os.Remove(sstPath) if err != nil { // 删除失败只记录日志,不影响 compaction 流程 // 后台垃圾回收器会重试 fmt.Printf("[Compaction] Failed to delete obsolete file %06d.sst: %v\n", fileNum, err) } else { fmt.Printf("[Compaction] Deleted obsolete file %06d.sst\n", fileNum) } } } // TriggerCompaction 手动触发一次 Compaction(所有需要的层级) func (m *Manager) TriggerCompaction() error { version := m.versionSet.GetCurrent() if version == nil { return fmt.Errorf("no current version") } picker := m.compactor.GetPicker() tasks := picker.PickCompaction(version) if len(tasks) == 0 { return nil // 不需要 Compaction } // 依次执行所有任务 for _, task := range tasks { currentVersion := m.versionSet.GetCurrent() if err := m.DoCompactionWithVersion(task, currentVersion); err != nil { return err } } return nil } // GetStats 获取 Compaction 统计信息 func (m *Manager) GetStats() map[string]interface{} { m.mu.RLock() defer m.mu.RUnlock() return map[string]interface{}{ "total_compactions": m.totalCompactions, "last_compaction_time": m.lastCompactionTime, } } // GetLevelStats 获取每层的统计信息 func (m *Manager) GetLevelStats() []map[string]interface{} { version := m.versionSet.GetCurrent() if version == nil { return nil } picker := m.compactor.GetPicker() stats := make([]map[string]interface{}, manifest.NumLevels) for level := 0; level < manifest.NumLevels; level++ { files := version.GetLevel(level) totalSize := int64(0) for _, file := range files { totalSize += file.FileSize } stats[level] = map[string]interface{}{ "level": level, "file_count": len(files), "total_size": totalSize, "score": picker.GetLevelScore(version, level), } } return stats } // backgroundGarbageCollection 后台垃圾回收循环 func (m *Manager) backgroundGarbageCollection() { defer m.wg.Done() ticker := time.NewTicker(5 * time.Minute) // 每 5 分钟检查一次 defer ticker.Stop() for { select { case <-m.stopCh: return case <-ticker.C: m.collectOrphanFiles() } } } // collectOrphanFiles 收集并删除孤儿 SST 文件 func (m *Manager) collectOrphanFiles() { // 1. 获取当前版本中的所有活跃文件 version := m.versionSet.GetCurrent() if version == nil { return } activeFiles := make(map[int64]bool) for level := 0; level < manifest.NumLevels; level++ { files := version.GetLevel(level) for _, file := range files { activeFiles[file.FileNumber] = true } } // 2. 扫描 SST 目录中的所有文件 pattern := filepath.Join(m.sstDir, "*.sst") sstFiles, err := filepath.Glob(pattern) if err != nil { fmt.Printf("[GC] Failed to scan SST directory: %v\n", err) return } // 3. 找出孤儿文件并删除 orphanCount := 0 for _, sstPath := range sstFiles { // 提取文件编号 var fileNum int64 _, err := fmt.Sscanf(filepath.Base(sstPath), "%d.sst", &fileNum) if err != nil { continue } // 检查是否是活跃文件 if !activeFiles[fileNum] { // 这是孤儿文件,删除它 err := os.Remove(sstPath) if err != nil { fmt.Printf("[GC] Failed to delete orphan file %06d.sst: %v\n", fileNum, err) } else { fmt.Printf("[GC] Deleted orphan file %06d.sst\n", fileNum) orphanCount++ } } } // 4. 更新统计信息 m.mu.Lock() m.lastGCTime = time.Now() m.totalOrphansFound += int64(orphanCount) m.mu.Unlock() if orphanCount > 0 { fmt.Printf("[GC] Completed: cleaned up %d orphan files (total: %d)\n", orphanCount, m.totalOrphansFound) } } // CleanupOrphanFiles 手动触发孤儿文件清理(可在启动时调用) func (m *Manager) CleanupOrphanFiles() { fmt.Println("[GC] Manual cleanup triggered") m.collectOrphanFiles() }