Initial commit: SRDB - High-performance LSM-Tree database

- Core engine with MemTable, SST, WAL
- B+Tree indexing for SST files  
- Leveled compaction strategy
- Multi-table database management
- Schema validation and secondary indexes
- Query builder with complex conditions
- Web UI with HTMX for data visualization
- Command-line tools for diagnostics
This commit is contained in:
2025-10-08 06:38:12 +08:00
commit ae87c38776
61 changed files with 15475 additions and 0 deletions

216
memtable/manager.go Normal file
View File

@@ -0,0 +1,216 @@
package memtable
import (
"sync"
)
// ImmutableMemTable 不可变的 MemTable
type ImmutableMemTable struct {
MemTable *MemTable
WALNumber int64 // 对应的 WAL 编号
}
// Manager MemTable 管理器
type Manager struct {
active *MemTable // Active MemTable (可写)
immutables []*ImmutableMemTable // Immutable MemTables (只读)
activeWAL int64 // Active MemTable 对应的 WAL 编号
maxSize int64 // MemTable 最大大小
mu sync.RWMutex // 读写锁
}
// NewManager 创建 MemTable 管理器
func NewManager(maxSize int64) *Manager {
return &Manager{
active: New(),
immutables: make([]*ImmutableMemTable, 0),
maxSize: maxSize,
}
}
// SetActiveWAL 设置 Active MemTable 对应的 WAL 编号
func (m *Manager) SetActiveWAL(walNumber int64) {
m.mu.Lock()
defer m.mu.Unlock()
m.activeWAL = walNumber
}
// Put 写入数据到 Active MemTable
func (m *Manager) Put(key int64, value []byte) {
m.mu.Lock()
defer m.mu.Unlock()
m.active.Put(key, value)
}
// Get 查询数据(先查 Active再查 Immutables
func (m *Manager) Get(key int64) ([]byte, bool) {
m.mu.RLock()
defer m.mu.RUnlock()
// 1. 先查 Active MemTable
if value, found := m.active.Get(key); found {
return value, true
}
// 2. 查 Immutable MemTables从新到旧
for i := len(m.immutables) - 1; i >= 0; i-- {
if value, found := m.immutables[i].MemTable.Get(key); found {
return value, true
}
}
return nil, false
}
// GetActiveSize 获取 Active MemTable 大小
func (m *Manager) GetActiveSize() int64 {
m.mu.RLock()
defer m.mu.RUnlock()
return m.active.Size()
}
// GetActiveCount 获取 Active MemTable 条目数
func (m *Manager) GetActiveCount() int {
m.mu.RLock()
defer m.mu.RUnlock()
return m.active.Count()
}
// ShouldSwitch 检查是否需要切换 MemTable
func (m *Manager) ShouldSwitch() bool {
m.mu.RLock()
defer m.mu.RUnlock()
return m.active.Size() >= m.maxSize
}
// Switch 切换 MemTableActive → Immutable创建新 Active
// 返回:旧的 WAL 编号,新的 Active MemTable
func (m *Manager) Switch(newWALNumber int64) (oldWALNumber int64, immutable *ImmutableMemTable) {
m.mu.Lock()
defer m.mu.Unlock()
// 1. 将 Active 变为 Immutable
immutable = &ImmutableMemTable{
MemTable: m.active,
WALNumber: m.activeWAL,
}
m.immutables = append(m.immutables, immutable)
// 2. 创建新的 Active MemTable
m.active = New()
oldWALNumber = m.activeWAL
m.activeWAL = newWALNumber
return oldWALNumber, immutable
}
// RemoveImmutable 移除指定的 Immutable MemTable
func (m *Manager) RemoveImmutable(target *ImmutableMemTable) {
m.mu.Lock()
defer m.mu.Unlock()
// 查找并移除
for i, imm := range m.immutables {
if imm == target {
m.immutables = append(m.immutables[:i], m.immutables[i+1:]...)
break
}
}
}
// GetImmutableCount 获取 Immutable MemTable 数量
func (m *Manager) GetImmutableCount() int {
m.mu.RLock()
defer m.mu.RUnlock()
return len(m.immutables)
}
// GetImmutables 获取所有 Immutable MemTables副本
func (m *Manager) GetImmutables() []*ImmutableMemTable {
m.mu.RLock()
defer m.mu.RUnlock()
immutables := make([]*ImmutableMemTable, len(m.immutables))
copy(immutables, m.immutables)
return immutables
}
// GetActive 获取 Active MemTable用于 Flush 时读取)
func (m *Manager) GetActive() *MemTable {
m.mu.RLock()
defer m.mu.RUnlock()
return m.active
}
// TotalCount 获取总条目数Active + Immutables
func (m *Manager) TotalCount() int {
m.mu.RLock()
defer m.mu.RUnlock()
total := m.active.Count()
for _, imm := range m.immutables {
total += imm.MemTable.Count()
}
return total
}
// TotalSize 获取总大小Active + Immutables
func (m *Manager) TotalSize() int64 {
m.mu.RLock()
defer m.mu.RUnlock()
total := m.active.Size()
for _, imm := range m.immutables {
total += imm.MemTable.Size()
}
return total
}
// NewIterator 创建 Active MemTable 的迭代器
func (m *Manager) NewIterator() *Iterator {
m.mu.RLock()
defer m.mu.RUnlock()
return m.active.NewIterator()
}
// Stats 统计信息
type Stats struct {
ActiveSize int64
ActiveCount int
ImmutableCount int
ImmutablesSize int64
ImmutablesTotal int
TotalSize int64
TotalCount int
}
// GetStats 获取统计信息
func (m *Manager) GetStats() *Stats {
m.mu.RLock()
defer m.mu.RUnlock()
stats := &Stats{
ActiveSize: m.active.Size(),
ActiveCount: m.active.Count(),
ImmutableCount: len(m.immutables),
}
for _, imm := range m.immutables {
stats.ImmutablesSize += imm.MemTable.Size()
stats.ImmutablesTotal += imm.MemTable.Count()
}
stats.TotalSize = stats.ActiveSize + stats.ImmutablesSize
stats.TotalCount = stats.ActiveCount + stats.ImmutablesTotal
return stats
}
// Clear 清空所有 MemTables用于测试
func (m *Manager) Clear() {
m.mu.Lock()
defer m.mu.Unlock()
m.active = New()
m.immutables = make([]*ImmutableMemTable, 0)
}

192
memtable/manager_test.go Normal file
View File

@@ -0,0 +1,192 @@
package memtable
import (
"testing"
)
func TestManagerBasic(t *testing.T) {
mgr := NewManager(1024) // 1KB
// 测试写入
mgr.Put(1, []byte("value1"))
mgr.Put(2, []byte("value2"))
// 测试读取
value, found := mgr.Get(1)
if !found || string(value) != "value1" {
t.Error("Get failed")
}
// 测试统计
stats := mgr.GetStats()
if stats.ActiveCount != 2 {
t.Errorf("Expected 2 entries, got %d", stats.ActiveCount)
}
t.Log("Manager basic test passed!")
}
func TestManagerSwitch(t *testing.T) {
mgr := NewManager(50) // 50 bytes
mgr.SetActiveWAL(1)
// 写入数据
mgr.Put(1, []byte("value1_very_long_to_trigger_switch"))
mgr.Put(2, []byte("value2_very_long_to_trigger_switch"))
// 检查是否需要切换
if !mgr.ShouldSwitch() {
t.Logf("Size: %d, MaxSize: 50", mgr.GetActiveSize())
// 不强制要求切换,因为大小计算可能不同
}
// 执行切换
oldWAL, immutable := mgr.Switch(2)
if oldWAL != 1 {
t.Errorf("Expected old WAL 1, got %d", oldWAL)
}
if immutable == nil {
t.Error("Immutable should not be nil")
}
// 检查 Immutable 数量
if mgr.GetImmutableCount() != 1 {
t.Errorf("Expected 1 immutable, got %d", mgr.GetImmutableCount())
}
// 新的 Active 应该是空的
if mgr.GetActiveCount() != 0 {
t.Errorf("New active should be empty, got %d", mgr.GetActiveCount())
}
// 应该还能查到旧数据(在 Immutable 中)
value, found := mgr.Get(1)
if !found || string(value) != "value1_very_long_to_trigger_switch" {
t.Error("Should find value in immutable")
}
t.Log("Manager switch test passed!")
}
func TestManagerMultipleImmutables(t *testing.T) {
mgr := NewManager(50)
mgr.SetActiveWAL(1)
// 第一批数据
mgr.Put(1, []byte("value1_long_enough"))
mgr.Switch(2)
// 第二批数据
mgr.Put(2, []byte("value2_long_enough"))
mgr.Switch(3)
// 第三批数据
mgr.Put(3, []byte("value3_long_enough"))
mgr.Switch(4)
// 应该有 3 个 Immutable
if mgr.GetImmutableCount() != 3 {
t.Errorf("Expected 3 immutables, got %d", mgr.GetImmutableCount())
}
// 应该能查到所有数据
for i := int64(1); i <= 3; i++ {
if _, found := mgr.Get(i); !found {
t.Errorf("Should find key %d", i)
}
}
t.Log("Manager multiple immutables test passed!")
}
func TestManagerRemoveImmutable(t *testing.T) {
mgr := NewManager(50)
mgr.SetActiveWAL(1)
// 创建 Immutable
mgr.Put(1, []byte("value1_long_enough"))
_, immutable := mgr.Switch(2)
// 移除 Immutable
mgr.RemoveImmutable(immutable)
// 应该没有 Immutable 了
if mgr.GetImmutableCount() != 0 {
t.Errorf("Expected 0 immutables, got %d", mgr.GetImmutableCount())
}
// 数据应该找不到了
if _, found := mgr.Get(1); found {
t.Error("Should not find removed data")
}
t.Log("Manager remove immutable test passed!")
}
func TestManagerStats(t *testing.T) {
mgr := NewManager(100)
mgr.SetActiveWAL(1)
// Active 数据
mgr.Put(1, []byte("active1"))
mgr.Put(2, []byte("active2"))
// 创建 Immutable
mgr.Put(3, []byte("immutable1_long"))
mgr.Switch(2)
// 新 Active 数据
mgr.Put(4, []byte("active3"))
stats := mgr.GetStats()
if stats.ActiveCount != 1 {
t.Errorf("Expected 1 active entry, got %d", stats.ActiveCount)
}
if stats.ImmutableCount != 1 {
t.Errorf("Expected 1 immutable, got %d", stats.ImmutableCount)
}
if stats.ImmutablesTotal != 3 {
t.Errorf("Expected 3 entries in immutables, got %d", stats.ImmutablesTotal)
}
if stats.TotalCount != 4 {
t.Errorf("Expected 4 total entries, got %d", stats.TotalCount)
}
t.Logf("Stats: %+v", stats)
t.Log("Manager stats test passed!")
}
func TestManagerConcurrent(t *testing.T) {
mgr := NewManager(1024)
mgr.SetActiveWAL(1)
// 并发写入
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(id int) {
for j := 0; j < 100; j++ {
key := int64(id*100 + j)
mgr.Put(key, []byte("value"))
}
done <- true
}(i)
}
// 等待完成
for i := 0; i < 10; i++ {
<-done
}
// 检查总数
stats := mgr.GetStats()
if stats.TotalCount != 1000 {
t.Errorf("Expected 1000 entries, got %d", stats.TotalCount)
}
t.Log("Manager concurrent test passed!")
}

141
memtable/memtable.go Normal file
View File

@@ -0,0 +1,141 @@
package memtable
import (
"sort"
"sync"
)
// MemTable 内存表
type MemTable struct {
data map[int64][]byte // key -> value
keys []int64 // 排序的 keys
size int64 // 数据大小
mu sync.RWMutex
}
// New 创建 MemTable
func New() *MemTable {
return &MemTable{
data: make(map[int64][]byte),
keys: make([]int64, 0),
size: 0,
}
}
// Put 插入数据
func (m *MemTable) Put(key int64, value []byte) {
m.mu.Lock()
defer m.mu.Unlock()
// 检查是否已存在
if _, exists := m.data[key]; !exists {
m.keys = append(m.keys, key)
// 保持 keys 有序
sort.Slice(m.keys, func(i, j int) bool {
return m.keys[i] < m.keys[j]
})
}
m.data[key] = value
m.size += int64(len(value))
}
// Get 查询数据
func (m *MemTable) Get(key int64) ([]byte, bool) {
m.mu.RLock()
defer m.mu.RUnlock()
value, exists := m.data[key]
return value, exists
}
// Size 获取大小
func (m *MemTable) Size() int64 {
m.mu.RLock()
defer m.mu.RUnlock()
return m.size
}
// Count 获取条目数量
func (m *MemTable) Count() int {
m.mu.RLock()
defer m.mu.RUnlock()
return len(m.data)
}
// Keys 获取所有 keys 的副本(已排序)
func (m *MemTable) Keys() []int64 {
m.mu.RLock()
defer m.mu.RUnlock()
// 返回副本以避免并发问题
keysCopy := make([]int64, len(m.keys))
copy(keysCopy, m.keys)
return keysCopy
}
// Iterator 迭代器
type Iterator struct {
mt *MemTable
index int
}
// NewIterator 创建迭代器
func (m *MemTable) NewIterator() *Iterator {
m.mu.RLock()
defer m.mu.RUnlock()
return &Iterator{
mt: m,
index: -1,
}
}
// Next 移动到下一个
func (it *Iterator) Next() bool {
it.mt.mu.RLock()
defer it.mt.mu.RUnlock()
it.index++
return it.index < len(it.mt.keys)
}
// Key 当前 key
func (it *Iterator) Key() int64 {
it.mt.mu.RLock()
defer it.mt.mu.RUnlock()
if it.index < 0 || it.index >= len(it.mt.keys) {
return 0
}
return it.mt.keys[it.index]
}
// Value 当前 value
func (it *Iterator) Value() []byte {
it.mt.mu.RLock()
defer it.mt.mu.RUnlock()
if it.index < 0 || it.index >= len(it.mt.keys) {
return nil
}
key := it.mt.keys[it.index]
return it.mt.data[key]
}
// Reset 重置迭代器
func (it *Iterator) Reset() {
it.index = -1
}
// Clear 清空 MemTable
func (m *MemTable) Clear() {
m.mu.Lock()
defer m.mu.Unlock()
m.data = make(map[int64][]byte)
m.keys = make([]int64, 0)
m.size = 0
}

121
memtable/memtable_test.go Normal file
View File

@@ -0,0 +1,121 @@
package memtable
import (
"testing"
)
func TestMemTable(t *testing.T) {
mt := New()
// 1. 插入数据
for i := int64(1); i <= 100; i++ {
mt.Put(i, []byte("value_"+string(rune(i))))
}
if mt.Count() != 100 {
t.Errorf("Expected 100 entries, got %d", mt.Count())
}
t.Logf("Inserted 100 entries, size: %d bytes", mt.Size())
// 2. 查询数据
for i := int64(1); i <= 100; i++ {
value, exists := mt.Get(i)
if !exists {
t.Errorf("Key %d not found", i)
}
if value == nil {
t.Errorf("Key %d: value is nil", i)
}
}
// 3. 查询不存在的 key
_, exists := mt.Get(101)
if exists {
t.Error("Key 101 should not exist")
}
t.Log("All tests passed!")
}
func TestMemTableIterator(t *testing.T) {
mt := New()
// 插入数据 (乱序)
keys := []int64{5, 2, 8, 1, 9, 3, 7, 4, 6, 10}
for _, key := range keys {
mt.Put(key, []byte("value"))
}
// 迭代器应该按顺序返回
iter := mt.NewIterator()
var result []int64
for iter.Next() {
result = append(result, iter.Key())
}
// 验证顺序
for i := 0; i < len(result)-1; i++ {
if result[i] >= result[i+1] {
t.Errorf("Keys not in order: %v", result)
break
}
}
if len(result) != 10 {
t.Errorf("Expected 10 keys, got %d", len(result))
}
t.Logf("Iterator returned keys in order: %v", result)
}
func TestMemTableClear(t *testing.T) {
mt := New()
// 插入数据
for i := int64(1); i <= 10; i++ {
mt.Put(i, []byte("value"))
}
if mt.Count() != 10 {
t.Errorf("Expected 10 entries, got %d", mt.Count())
}
// 清空
mt.Clear()
if mt.Count() != 0 {
t.Errorf("Expected 0 entries after clear, got %d", mt.Count())
}
if mt.Size() != 0 {
t.Errorf("Expected size 0 after clear, got %d", mt.Size())
}
t.Log("Clear test passed!")
}
func BenchmarkMemTablePut(b *testing.B) {
mt := New()
value := make([]byte, 100)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mt.Put(int64(i), value)
}
}
func BenchmarkMemTableGet(b *testing.B) {
mt := New()
value := make([]byte, 100)
// 预先插入数据
for i := int64(0); i < 10000; i++ {
mt.Put(i, value)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
mt.Get(int64(i % 10000))
}
}