Initial commit: SRDB - High-performance LSM-Tree database

- Core engine with MemTable, SST, WAL - B+Tree indexing for SST files - Leveled compaction strategy - Multi-table database management - Schema validation and secondary indexes - Query builder with complex conditions - Web UI with HTMX for data visualization - Command-line tools for diagnostics
2025-10-08 06:38:12 +08:00
commit ae87c38776
61 changed files with 15475 additions and 0 deletions
--- a/memtable/manager.go
+++ b/memtable/manager.go
@@ -0,0 +1,216 @@
+package memtable
+
+import (
+	"sync"
+)
+
+// ImmutableMemTable 不可变的 MemTable
+type ImmutableMemTable struct {
+	MemTable  *MemTable
+	WALNumber int64 // 对应的 WAL 编号
+}
+
+// Manager MemTable 管理器
+type Manager struct {
+	active     *MemTable            // Active MemTable (可写)
+	immutables []*ImmutableMemTable // Immutable MemTables (只读)
+	activeWAL  int64                // Active MemTable 对应的 WAL 编号
+	maxSize    int64                // MemTable 最大大小
+	mu         sync.RWMutex         // 读写锁
+}
+
+// NewManager 创建 MemTable 管理器
+func NewManager(maxSize int64) *Manager {
+	return &Manager{
+		active:     New(),
+		immutables: make([]*ImmutableMemTable, 0),
+		maxSize:    maxSize,
+	}
+}
+
+// SetActiveWAL 设置 Active MemTable 对应的 WAL 编号
+func (m *Manager) SetActiveWAL(walNumber int64) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.activeWAL = walNumber
+}
+
+// Put 写入数据到 Active MemTable
+func (m *Manager) Put(key int64, value []byte) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.active.Put(key, value)
+}
+
+// Get 查询数据（先查 Active，再查 Immutables）
+func (m *Manager) Get(key int64) ([]byte, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	// 1. 先查 Active MemTable
+	if value, found := m.active.Get(key); found {
+		return value, true
+	}
+
+	// 2. 查 Immutable MemTables（从新到旧）
+	for i := len(m.immutables) - 1; i >= 0; i-- {
+		if value, found := m.immutables[i].MemTable.Get(key); found {
+			return value, true
+		}
+	}
+
+	return nil, false
+}
+
+// GetActiveSize 获取 Active MemTable 大小
+func (m *Manager) GetActiveSize() int64 {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return m.active.Size()
+}
+
+// GetActiveCount 获取 Active MemTable 条目数
+func (m *Manager) GetActiveCount() int {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return m.active.Count()
+}
+
+// ShouldSwitch 检查是否需要切换 MemTable
+func (m *Manager) ShouldSwitch() bool {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return m.active.Size() >= m.maxSize
+}
+
+// Switch 切换 MemTable（Active → Immutable，创建新 Active）
+// 返回：旧的 WAL 编号，新的 Active MemTable
+func (m *Manager) Switch(newWALNumber int64) (oldWALNumber int64, immutable *ImmutableMemTable) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	// 1. 将 Active 变为 Immutable
+	immutable = &ImmutableMemTable{
+		MemTable:  m.active,
+		WALNumber: m.activeWAL,
+	}
+	m.immutables = append(m.immutables, immutable)
+
+	// 2. 创建新的 Active MemTable
+	m.active = New()
+	oldWALNumber = m.activeWAL
+	m.activeWAL = newWALNumber
+
+	return oldWALNumber, immutable
+}
+
+// RemoveImmutable 移除指定的 Immutable MemTable
+func (m *Manager) RemoveImmutable(target *ImmutableMemTable) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	// 查找并移除
+	for i, imm := range m.immutables {
+		if imm == target {
+			m.immutables = append(m.immutables[:i], m.immutables[i+1:]...)
+			break
+		}
+	}
+}
+
+// GetImmutableCount 获取 Immutable MemTable 数量
+func (m *Manager) GetImmutableCount() int {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return len(m.immutables)
+}
+
+// GetImmutables 获取所有 Immutable MemTables（副本）
+func (m *Manager) GetImmutables() []*ImmutableMemTable {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	immutables := make([]*ImmutableMemTable, len(m.immutables))
+	copy(immutables, m.immutables)
+	return immutables
+}
+
+// GetActive 获取 Active MemTable（用于 Flush 时读取）
+func (m *Manager) GetActive() *MemTable {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return m.active
+}
+
+// TotalCount 获取总条目数（Active + Immutables）
+func (m *Manager) TotalCount() int {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	total := m.active.Count()
+	for _, imm := range m.immutables {
+		total += imm.MemTable.Count()
+	}
+	return total
+}
+
+// TotalSize 获取总大小（Active + Immutables）
+func (m *Manager) TotalSize() int64 {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	total := m.active.Size()
+	for _, imm := range m.immutables {
+		total += imm.MemTable.Size()
+	}
+	return total
+}
+
+// NewIterator 创建 Active MemTable 的迭代器
+func (m *Manager) NewIterator() *Iterator {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return m.active.NewIterator()
+}
+
+// Stats 统计信息
+type Stats struct {
+	ActiveSize      int64
+	ActiveCount     int
+	ImmutableCount  int
+	ImmutablesSize  int64
+	ImmutablesTotal int
+	TotalSize       int64
+	TotalCount      int
+}
+
+// GetStats 获取统计信息
+func (m *Manager) GetStats() *Stats {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	stats := &Stats{
+		ActiveSize:     m.active.Size(),
+		ActiveCount:    m.active.Count(),
+		ImmutableCount: len(m.immutables),
+	}
+
+	for _, imm := range m.immutables {
+		stats.ImmutablesSize += imm.MemTable.Size()
+		stats.ImmutablesTotal += imm.MemTable.Count()
+	}
+
+	stats.TotalSize = stats.ActiveSize + stats.ImmutablesSize
+	stats.TotalCount = stats.ActiveCount + stats.ImmutablesTotal
+
+	return stats
+}
+
+// Clear 清空所有 MemTables（用于测试）
+func (m *Manager) Clear() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.active = New()
+	m.immutables = make([]*ImmutableMemTable, 0)
+}
--- a/memtable/manager_test.go
+++ b/memtable/manager_test.go
@@ -0,0 +1,192 @@
+package memtable
+
+import (
+	"testing"
+)
+
+func TestManagerBasic(t *testing.T) {
+	mgr := NewManager(1024) // 1KB
+
+	// 测试写入
+	mgr.Put(1, []byte("value1"))
+	mgr.Put(2, []byte("value2"))
+
+	// 测试读取
+	value, found := mgr.Get(1)
+	if !found || string(value) != "value1" {
+		t.Error("Get failed")
+	}
+
+	// 测试统计
+	stats := mgr.GetStats()
+	if stats.ActiveCount != 2 {
+		t.Errorf("Expected 2 entries, got %d", stats.ActiveCount)
+	}
+
+	t.Log("Manager basic test passed!")
+}
+
+func TestManagerSwitch(t *testing.T) {
+	mgr := NewManager(50) // 50 bytes
+	mgr.SetActiveWAL(1)
+
+	// 写入数据
+	mgr.Put(1, []byte("value1_very_long_to_trigger_switch"))
+	mgr.Put(2, []byte("value2_very_long_to_trigger_switch"))
+
+	// 检查是否需要切换
+	if !mgr.ShouldSwitch() {
+		t.Logf("Size: %d, MaxSize: 50", mgr.GetActiveSize())
+		// 不强制要求切换，因为大小计算可能不同
+	}
+
+	// 执行切换
+	oldWAL, immutable := mgr.Switch(2)
+	if oldWAL != 1 {
+		t.Errorf("Expected old WAL 1, got %d", oldWAL)
+	}
+
+	if immutable == nil {
+		t.Error("Immutable should not be nil")
+	}
+
+	// 检查 Immutable 数量
+	if mgr.GetImmutableCount() != 1 {
+		t.Errorf("Expected 1 immutable, got %d", mgr.GetImmutableCount())
+	}
+
+	// 新的 Active 应该是空的
+	if mgr.GetActiveCount() != 0 {
+		t.Errorf("New active should be empty, got %d", mgr.GetActiveCount())
+	}
+
+	// 应该还能查到旧数据（在 Immutable 中）
+	value, found := mgr.Get(1)
+	if !found || string(value) != "value1_very_long_to_trigger_switch" {
+		t.Error("Should find value in immutable")
+	}
+
+	t.Log("Manager switch test passed!")
+}
+
+func TestManagerMultipleImmutables(t *testing.T) {
+	mgr := NewManager(50)
+	mgr.SetActiveWAL(1)
+
+	// 第一批数据
+	mgr.Put(1, []byte("value1_long_enough"))
+	mgr.Switch(2)
+
+	// 第二批数据
+	mgr.Put(2, []byte("value2_long_enough"))
+	mgr.Switch(3)
+
+	// 第三批数据
+	mgr.Put(3, []byte("value3_long_enough"))
+	mgr.Switch(4)
+
+	// 应该有 3 个 Immutable
+	if mgr.GetImmutableCount() != 3 {
+		t.Errorf("Expected 3 immutables, got %d", mgr.GetImmutableCount())
+	}
+
+	// 应该能查到所有数据
+	for i := int64(1); i <= 3; i++ {
+		if _, found := mgr.Get(i); !found {
+			t.Errorf("Should find key %d", i)
+		}
+	}
+
+	t.Log("Manager multiple immutables test passed!")
+}
+
+func TestManagerRemoveImmutable(t *testing.T) {
+	mgr := NewManager(50)
+	mgr.SetActiveWAL(1)
+
+	// 创建 Immutable
+	mgr.Put(1, []byte("value1_long_enough"))
+	_, immutable := mgr.Switch(2)
+
+	// 移除 Immutable
+	mgr.RemoveImmutable(immutable)
+
+	// 应该没有 Immutable 了
+	if mgr.GetImmutableCount() != 0 {
+		t.Errorf("Expected 0 immutables, got %d", mgr.GetImmutableCount())
+	}
+
+	// 数据应该找不到了
+	if _, found := mgr.Get(1); found {
+		t.Error("Should not find removed data")
+	}
+
+	t.Log("Manager remove immutable test passed!")
+}
+
+func TestManagerStats(t *testing.T) {
+	mgr := NewManager(100)
+	mgr.SetActiveWAL(1)
+
+	// Active 数据
+	mgr.Put(1, []byte("active1"))
+	mgr.Put(2, []byte("active2"))
+
+	// 创建 Immutable
+	mgr.Put(3, []byte("immutable1_long"))
+	mgr.Switch(2)
+
+	// 新 Active 数据
+	mgr.Put(4, []byte("active3"))
+
+	stats := mgr.GetStats()
+
+	if stats.ActiveCount != 1 {
+		t.Errorf("Expected 1 active entry, got %d", stats.ActiveCount)
+	}
+
+	if stats.ImmutableCount != 1 {
+		t.Errorf("Expected 1 immutable, got %d", stats.ImmutableCount)
+	}
+
+	if stats.ImmutablesTotal != 3 {
+		t.Errorf("Expected 3 entries in immutables, got %d", stats.ImmutablesTotal)
+	}
+
+	if stats.TotalCount != 4 {
+		t.Errorf("Expected 4 total entries, got %d", stats.TotalCount)
+	}
+
+	t.Logf("Stats: %+v", stats)
+	t.Log("Manager stats test passed!")
+}
+
+func TestManagerConcurrent(t *testing.T) {
+	mgr := NewManager(1024)
+	mgr.SetActiveWAL(1)
+
+	// 并发写入
+	done := make(chan bool)
+	for i := 0; i < 10; i++ {
+		go func(id int) {
+			for j := 0; j < 100; j++ {
+				key := int64(id*100 + j)
+				mgr.Put(key, []byte("value"))
+			}
+			done <- true
+		}(i)
+	}
+
+	// 等待完成
+	for i := 0; i < 10; i++ {
+		<-done
+	}
+
+	// 检查总数
+	stats := mgr.GetStats()
+	if stats.TotalCount != 1000 {
+		t.Errorf("Expected 1000 entries, got %d", stats.TotalCount)
+	}
+
+	t.Log("Manager concurrent test passed!")
+}
--- a/memtable/memtable.go
+++ b/memtable/memtable.go
@@ -0,0 +1,141 @@
+package memtable
+
+import (
+	"sort"
+	"sync"
+)
+
+// MemTable 内存表
+type MemTable struct {
+	data map[int64][]byte // key -> value
+	keys []int64          // 排序的 keys
+	size int64            // 数据大小
+	mu   sync.RWMutex
+}
+
+// New 创建 MemTable
+func New() *MemTable {
+	return &MemTable{
+		data: make(map[int64][]byte),
+		keys: make([]int64, 0),
+		size: 0,
+	}
+}
+
+// Put 插入数据
+func (m *MemTable) Put(key int64, value []byte) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	// 检查是否已存在
+	if _, exists := m.data[key]; !exists {
+		m.keys = append(m.keys, key)
+		// 保持 keys 有序
+		sort.Slice(m.keys, func(i, j int) bool {
+			return m.keys[i] < m.keys[j]
+		})
+	}
+
+	m.data[key] = value
+	m.size += int64(len(value))
+}
+
+// Get 查询数据
+func (m *MemTable) Get(key int64) ([]byte, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	value, exists := m.data[key]
+	return value, exists
+}
+
+// Size 获取大小
+func (m *MemTable) Size() int64 {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	return m.size
+}
+
+// Count 获取条目数量
+func (m *MemTable) Count() int {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	return len(m.data)
+}
+
+// Keys 获取所有 keys 的副本（已排序）
+func (m *MemTable) Keys() []int64 {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	// 返回副本以避免并发问题
+	keysCopy := make([]int64, len(m.keys))
+	copy(keysCopy, m.keys)
+	return keysCopy
+}
+
+// Iterator 迭代器
+type Iterator struct {
+	mt    *MemTable
+	index int
+}
+
+// NewIterator 创建迭代器
+func (m *MemTable) NewIterator() *Iterator {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	return &Iterator{
+		mt:    m,
+		index: -1,
+	}
+}
+
+// Next 移动到下一个
+func (it *Iterator) Next() bool {
+	it.mt.mu.RLock()
+	defer it.mt.mu.RUnlock()
+
+	it.index++
+	return it.index < len(it.mt.keys)
+}
+
+// Key 当前 key
+func (it *Iterator) Key() int64 {
+	it.mt.mu.RLock()
+	defer it.mt.mu.RUnlock()
+
+	if it.index < 0 || it.index >= len(it.mt.keys) {
+		return 0
+	}
+	return it.mt.keys[it.index]
+}
+
+// Value 当前 value
+func (it *Iterator) Value() []byte {
+	it.mt.mu.RLock()
+	defer it.mt.mu.RUnlock()
+
+	if it.index < 0 || it.index >= len(it.mt.keys) {
+		return nil
+	}
+	key := it.mt.keys[it.index]
+	return it.mt.data[key]
+}
+
+// Reset 重置迭代器
+func (it *Iterator) Reset() {
+	it.index = -1
+}
+
+// Clear 清空 MemTable
+func (m *MemTable) Clear() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.data = make(map[int64][]byte)
+	m.keys = make([]int64, 0)
+	m.size = 0
+}
--- a/memtable/memtable_test.go
+++ b/memtable/memtable_test.go
@@ -0,0 +1,121 @@
+package memtable
+
+import (
+	"testing"
+)
+
+func TestMemTable(t *testing.T) {
+	mt := New()
+
+	// 1. 插入数据
+	for i := int64(1); i <= 100; i++ {
+		mt.Put(i, []byte("value_"+string(rune(i))))
+	}
+
+	if mt.Count() != 100 {
+		t.Errorf("Expected 100 entries, got %d", mt.Count())
+	}
+
+	t.Logf("Inserted 100 entries, size: %d bytes", mt.Size())
+
+	// 2. 查询数据
+	for i := int64(1); i <= 100; i++ {
+		value, exists := mt.Get(i)
+		if !exists {
+			t.Errorf("Key %d not found", i)
+		}
+		if value == nil {
+			t.Errorf("Key %d: value is nil", i)
+		}
+	}
+
+	// 3. 查询不存在的 key
+	_, exists := mt.Get(101)
+	if exists {
+		t.Error("Key 101 should not exist")
+	}
+
+	t.Log("All tests passed!")
+}
+
+func TestMemTableIterator(t *testing.T) {
+	mt := New()
+
+	// 插入数据 (乱序)
+	keys := []int64{5, 2, 8, 1, 9, 3, 7, 4, 6, 10}
+	for _, key := range keys {
+		mt.Put(key, []byte("value"))
+	}
+
+	// 迭代器应该按顺序返回
+	iter := mt.NewIterator()
+	var result []int64
+	for iter.Next() {
+		result = append(result, iter.Key())
+	}
+
+	// 验证顺序
+	for i := 0; i < len(result)-1; i++ {
+		if result[i] >= result[i+1] {
+			t.Errorf("Keys not in order: %v", result)
+			break
+		}
+	}
+
+	if len(result) != 10 {
+		t.Errorf("Expected 10 keys, got %d", len(result))
+	}
+
+	t.Logf("Iterator returned keys in order: %v", result)
+}
+
+func TestMemTableClear(t *testing.T) {
+	mt := New()
+
+	// 插入数据
+	for i := int64(1); i <= 10; i++ {
+		mt.Put(i, []byte("value"))
+	}
+
+	if mt.Count() != 10 {
+		t.Errorf("Expected 10 entries, got %d", mt.Count())
+	}
+
+	// 清空
+	mt.Clear()
+
+	if mt.Count() != 0 {
+		t.Errorf("Expected 0 entries after clear, got %d", mt.Count())
+	}
+
+	if mt.Size() != 0 {
+		t.Errorf("Expected size 0 after clear, got %d", mt.Size())
+	}
+
+	t.Log("Clear test passed!")
+}
+
+func BenchmarkMemTablePut(b *testing.B) {
+	mt := New()
+	value := make([]byte, 100)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		mt.Put(int64(i), value)
+	}
+}
+
+func BenchmarkMemTableGet(b *testing.B) {
+	mt := New()
+	value := make([]byte, 100)
+
+	// 预先插入数据
+	for i := int64(0); i < 10000; i++ {
+		mt.Put(i, value)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		mt.Get(int64(i % 10000))
+	}
+}