Files
srdb/index_btree_test.go
bourdon 77087d36c6 功能:增强 Schema 系统和添加新示例
- 扩展 Schema 支持更多数据类型(Duration、URL、JSON 等)
- 优化 SSTable 编码解码性能
- 添加多个新示例程序:
  - all_types: 展示所有支持的数据类型
  - new_types: 演示新增类型的使用
  - struct_tags: 展示结构体标签功能
  - time_duration: 时间和持续时间处理示例
- 完善测试用例和文档
- 优化代码结构和错误处理
2025-10-10 02:57:36 +08:00

641 lines
14 KiB
Go
Raw Permalink Blame History

package srdb
import (
"fmt"
"os"
"testing"
"time"
)
func TestIndexBTreeBasic(t *testing.T) {
// 创建临时目录
tmpDir := t.TempDir()
// 创建 Schema
schema, err := NewSchema("test", []Field{
{Name: "id", Type: Int64},
{Name: "name", Type: String},
{Name: "city", Type: String},
})
if err != nil {
t.Fatal(err)
}
// 创建索引管理器
mgr := NewIndexManager(tmpDir, schema)
defer mgr.Close()
// 创建索引
err = mgr.CreateIndex("city")
if err != nil {
t.Fatalf("Failed to create index: %v", err)
}
// 添加测试数据
testData := []struct {
city string
seq int64
}{
{"Beijing", 1},
{"Shanghai", 2},
{"Beijing", 3},
{"Shenzhen", 4},
{"Shanghai", 5},
{"Beijing", 6},
}
for _, td := range testData {
data := map[string]any{
"id": td.seq,
"name": "user_" + string(rune(td.seq)),
"city": td.city,
}
err := mgr.AddToIndexes(data, td.seq)
if err != nil {
t.Fatalf("Failed to add to index: %v", err)
}
}
// 构建索引
err = mgr.BuildAll()
if err != nil {
t.Fatalf("Failed to build index: %v", err)
}
// 关闭并重新打开,测试持久化
mgr.Close()
// 重新打开
mgr2 := NewIndexManager(tmpDir, schema)
defer mgr2.Close()
// 查询索引
idx, exists := mgr2.GetIndex("city")
if !exists {
t.Fatal("Index not found after reload")
}
// 验证索引使用 B+Tree
if !idx.useBTree {
t.Error("Index should be using B+Tree format")
}
// 验证查询结果
testCases := []struct {
city string
expectedSeqs []int64
}{
{"Beijing", []int64{1, 3, 6}},
{"Shanghai", []int64{2, 5}},
{"Shenzhen", []int64{4}},
{"Unknown", nil},
}
for _, tc := range testCases {
seqs, err := idx.Get(tc.city)
if err != nil {
t.Errorf("Failed to query index for %s: %v", tc.city, err)
continue
}
if len(seqs) != len(tc.expectedSeqs) {
t.Errorf("City %s: expected %d seqs, got %d", tc.city, len(tc.expectedSeqs), len(seqs))
continue
}
// 验证 seq 值
seqMap := make(map[int64]bool)
for _, seq := range seqs {
seqMap[seq] = true
}
for _, expectedSeq := range tc.expectedSeqs {
if !seqMap[expectedSeq] {
t.Errorf("City %s: missing expected seq %d", tc.city, expectedSeq)
}
}
}
// 验证元数据
metadata := idx.GetMetadata()
if metadata.MinSeq != 1 || metadata.MaxSeq != 6 || metadata.RowCount != 6 {
t.Errorf("Invalid metadata: MinSeq=%d, MaxSeq=%d, RowCount=%d",
metadata.MinSeq, metadata.MaxSeq, metadata.RowCount)
}
}
func TestIndexBTreeLargeDataset(t *testing.T) {
// 创建临时目录
tmpDir := t.TempDir()
// 创建 Schema
schema, err := NewSchema("test", []Field{
{Name: "id", Type: Int64},
{Name: "category", Type: String},
})
if err != nil {
t.Fatal(err)
}
// 创建索引管理器
mgr := NewIndexManager(tmpDir, schema)
defer mgr.Close()
// 创建索引
err = mgr.CreateIndex("category")
if err != nil {
t.Fatalf("Failed to create index: %v", err)
}
// 添加大量测试数据
numRecords := 10000
numCategories := 100
for i := 1; i <= numRecords; i++ {
category := "cat_" + string(rune('A'+(i%numCategories)))
data := map[string]any{
"id": int64(i),
"category": category,
}
err := mgr.AddToIndexes(data, int64(i))
if err != nil {
t.Fatalf("Failed to add to index: %v", err)
}
}
// 构建索引
startBuild := time.Now()
err = mgr.BuildAll()
if err != nil {
t.Fatalf("Failed to build index: %v", err)
}
buildTime := time.Since(startBuild)
t.Logf("Built index with %d records in %v", numRecords, buildTime)
// 获取索引文件大小
idx, _ := mgr.GetIndex("category")
stat, _ := idx.file.Stat()
t.Logf("Index file size: %d bytes", stat.Size())
// 关闭并重新打开
mgr.Close()
// 重新打开
mgr2 := NewIndexManager(tmpDir, schema)
defer mgr2.Close()
idx2, exists := mgr2.GetIndex("category")
if !exists {
t.Fatal("Index not found after reload")
}
// 验证索引使用 B+Tree
if !idx2.useBTree {
t.Error("Index should be using B+Tree format")
}
// 随机查询测试
startQuery := time.Now()
for i := 0; i < 100; i++ {
category := "cat_" + string(rune('A'+(i%numCategories)))
seqs, err := idx2.Get(category)
if err != nil {
t.Errorf("Failed to query index for %s: %v", category, err)
}
// 验证返回的 seq 数量
expectedCount := numRecords / numCategories
if len(seqs) != expectedCount {
t.Errorf("Category %s: expected %d seqs, got %d", category, expectedCount, len(seqs))
}
}
queryTime := time.Since(startQuery)
t.Logf("Queried 100 categories in %v (avg: %v per query)", queryTime, queryTime/100)
}
func TestIndexBTreeBackwardCompatibility(t *testing.T) {
// 创建临时目录
tmpDir := t.TempDir()
// 创建 Schema
schema, err := NewSchema("test", []Field{
{Name: "id", Type: Int64},
{Name: "status", Type: String},
})
if err != nil {
t.Fatal(err)
}
// 1. 创建索引管理器并用旧方式(通过先禁用新格式)创建索引
mgr := NewIndexManager(tmpDir, schema)
// 创建索引
err = mgr.CreateIndex("status")
if err != nil {
t.Fatalf("Failed to create index: %v", err)
}
// 添加<E6B7BB><E58AA0><EFBFBD>
testData := map[string][]int64{
"active": {1, 3, 5},
"inactive": {2, 4},
}
for status, seqs := range testData {
for _, seq := range seqs {
data := map[string]any{
"id": seq,
"status": status,
}
err := mgr.AddToIndexes(data, seq)
if err != nil {
t.Fatalf("Failed to add to index: %v", err)
}
}
}
// 构建索引(会使用新的 B+Tree 格式)
err = mgr.BuildAll()
if err != nil {
t.Fatalf("Failed to build index: %v", err)
}
// 关闭
mgr.Close()
// 2. 重新加载并验证
mgr2 := NewIndexManager(tmpDir, schema)
defer mgr2.Close()
idx, exists := mgr2.GetIndex("status")
if !exists {
t.Fatal("Failed to load index")
}
// 应该使用 B+Tree 格式
if !idx.useBTree {
t.Error("Index should be using B+Tree format")
}
// 验证查询结果
seqs, err := idx.Get("active")
if err != nil || len(seqs) != 3 {
t.Errorf("Failed to query index: err=%v, seqs=%v", err, seqs)
}
seqs, err = idx.Get("inactive")
if err != nil || len(seqs) != 2 {
t.Errorf("Failed to query index: err=%v, seqs=%v", err, seqs)
}
t.Log("Successfully loaded and queried B+Tree format index")
}
func TestIndexBTreeIncrementalUpdate(t *testing.T) {
// 创建临时目录
tmpDir := t.TempDir()
// 创建 Schema
schema, err := NewSchema("test", []Field{
{Name: "id", Type: Int64},
{Name: "tag", Type: String},
})
if err != nil {
t.Fatal(err)
}
// 创建索引管理器
mgr := NewIndexManager(tmpDir, schema)
defer mgr.Close()
// 创建索引
err = mgr.CreateIndex("tag")
if err != nil {
t.Fatalf("Failed to create index: %v", err)
}
// 添加初始数据
for i := 1; i <= 100; i++ {
tag := "tag_" + string(rune('A'+(i%10)))
data := map[string]any{
"id": int64(i),
"tag": tag,
}
err := mgr.AddToIndexes(data, int64(i))
if err != nil {
t.Fatalf("Failed to add to index: %v", err)
}
}
// 构建索引
err = mgr.BuildAll()
if err != nil {
t.Fatalf("Failed to build index: %v", err)
}
// 获取索引
idx, _ := mgr.GetIndex("tag")
// 验证初始元数据
metadata := idx.GetMetadata()
if metadata.MaxSeq != 100 {
t.Errorf("Expected MaxSeq=100, got %d", metadata.MaxSeq)
}
// 增量更新:添加新数据
getData := func(seq int64) (map[string]any, error) {
tag := "tag_" + string(rune('A'+(int(seq)%10)))
return map[string]any{
"id": seq,
"tag": tag,
}, nil
}
err = idx.IncrementalUpdate(getData, 101, 200)
if err != nil {
t.Fatalf("Failed to incremental update: %v", err)
}
// 验证更新后的元数据
metadata = idx.GetMetadata()
if metadata.MaxSeq != 200 {
t.Errorf("Expected MaxSeq=200 after update, got %d", metadata.MaxSeq)
}
if metadata.RowCount != 200 {
t.Errorf("Expected RowCount=200 after update, got %d", metadata.RowCount)
}
// 验证可以查询到新数据
seqs, err := idx.Get("tag_A")
if err != nil {
t.Fatalf("Failed to query index: %v", err)
}
// tag_A 应该包含 seq 1, 11, 21, ..., 191 (20个)
if len(seqs) != 20 {
t.Errorf("Expected 20 seqs for tag_A, got %d", len(seqs))
}
t.Logf("Incremental update successful: %d records indexed", metadata.RowCount)
}
// TestIndexBTreeWriter 测试 B+Tree 写入器
func TestIndexBTreeWriter(t *testing.T) {
tmpDir := t.TempDir()
filePath := tmpDir + "/test_idx.sst"
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_RDWR, 0644)
if err != nil {
t.Fatalf("Failed to create file: %v", err)
}
defer file.Close()
// 创建写入器
metadata := IndexMetadata{
MinSeq: 1,
MaxSeq: 10,
RowCount: 10,
CreatedAt: time.Now().UnixNano(),
UpdatedAt: time.Now().UnixNano(),
}
writer := NewIndexBTreeWriter(file, metadata)
// 添加测试数据
testData := map[string][]int64{
"apple": {1, 2, 3},
"banana": {4, 5},
"cherry": {6, 7, 8, 9},
"date": {10},
}
for value, seqs := range testData {
writer.Add(value, seqs)
}
// 构建索引
err = writer.Build()
if err != nil {
t.Fatalf("Failed to build index: %v", err)
}
t.Log("B+Tree index written successfully")
// 关闭并重新打开文件
file.Close()
// 读取索引
file, err = os.Open(filePath)
if err != nil {
t.Fatalf("Failed to reopen file: %v", err)
}
reader, err := NewIndexBTreeReader(file)
if err != nil {
t.Fatalf("Failed to create reader: %v", err)
}
defer reader.Close()
// 验证读取
for value, expectedSeqs := range testData {
seqs, err := reader.Get(value)
if err != nil {
t.Errorf("Failed to get %s: %v", value, err)
continue
}
if len(seqs) != len(expectedSeqs) {
t.Errorf("%s: expected %d seqs, got %d", value, len(expectedSeqs), len(seqs))
t.Logf(" Expected: %v", expectedSeqs)
t.Logf(" Got: %v", seqs)
} else {
// 验证每个 seq
seqMap := make(map[int64]bool)
for _, seq := range seqs {
seqMap[seq] = true
}
for _, expectedSeq := range expectedSeqs {
if !seqMap[expectedSeq] {
t.Errorf("%s: missing seq %d", value, expectedSeq)
}
}
}
}
// 测试不存在的值
seqs, err := reader.Get("unknown")
if err != nil {
t.Errorf("Failed to get unknown: %v", err)
}
if seqs != nil {
t.Errorf("Expected nil for unknown value, got %v", seqs)
}
t.Log("All B+Tree reads successful")
}
// TestValueToKey 测试哈希函数
func TestValueToKey(t *testing.T) {
testCases := []string{
"apple",
"banana",
"cherry",
"Beijing",
"Shanghai",
"Shenzhen",
}
keyMap := make(map[int64]string)
for _, value := range testCases {
key := valueToKey(value)
t.Logf("valueToKey(%s) = %d", value, key)
// 检查哈希冲突
if existingValue, exists := keyMap[key]; exists {
t.Errorf("Hash collision: %s and %s both hash to %d", value, existingValue, key)
}
keyMap[key] = value
// 验证哈希的一致性
key2 := valueToKey(value)
if key != key2 {
t.Errorf("Hash inconsistency for %s: %d != %d", value, key, key2)
}
}
}
// TestIndexBTreeDataTypes 测试不同数据类型
func TestIndexBTreeDataTypes(t *testing.T) {
tmpDir := t.TempDir()
filePath := tmpDir + "/test_types.sst"
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_RDWR, 0644)
if err != nil {
t.Fatalf("Failed to create file: %v", err)
}
defer file.Close()
metadata := IndexMetadata{
MinSeq: 1,
MaxSeq: 5,
RowCount: 5,
CreatedAt: time.Now().UnixNano(),
UpdatedAt: time.Now().UnixNano(),
}
writer := NewIndexBTreeWriter(file, metadata)
// 测试不同类型的值(都转换为字符串)
testData := map[string][]int64{
"123": {1}, // 数字字符串
"true": {2}, // 布尔字符串
"hello": {3}, // 普通字符串
"世界": {4}, // 中文
"": {5}, // 空字符串
}
for value, seqs := range testData {
writer.Add(value, seqs)
}
err = writer.Build()
if err != nil {
t.Fatalf("Failed to build: %v", err)
}
file.Close()
// 重新读取
file, err = os.Open(filePath)
if err != nil {
t.Fatalf("Failed to reopen: %v", err)
}
reader, err := NewIndexBTreeReader(file)
if err != nil {
t.Fatalf("Failed to create reader: %v", err)
}
defer reader.Close()
// 验证
for value, expectedSeqs := range testData {
seqs, err := reader.Get(value)
if err != nil {
t.Errorf("Failed to get '%s': %v", value, err)
continue
}
if len(seqs) != len(expectedSeqs) {
t.Errorf("'%s': expected %d seqs, got %d", value, len(expectedSeqs), len(seqs))
}
}
t.Log("All data types tested successfully")
}
// 测试大数据量
func TestIndexBTreeLargeData(t *testing.T) {
tmpDir := t.TempDir()
filePath := tmpDir + "/test_large.sst"
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_RDWR, 0644)
if err != nil {
t.Fatalf("Failed to create file: %v", err)
}
defer file.Close()
metadata := IndexMetadata{
MinSeq: 1,
MaxSeq: 1000,
RowCount: 1000,
CreatedAt: time.Now().UnixNano(),
UpdatedAt: time.Now().UnixNano(),
}
writer := NewIndexBTreeWriter(file, metadata)
// 添加 1000 个不同的值
for i := range 1000 {
value := fmt.Sprintf("value_%d", i)
seqs := []int64{int64(i + 1)}
writer.Add(value, seqs)
}
err = writer.Build()
if err != nil {
t.Fatalf("Failed to build: %v", err)
}
fileInfo, _ := file.Stat()
t.Logf("Index file size: %d bytes", fileInfo.Size())
file.Close()
// 重新读取
file, err = os.Open(filePath)
if err != nil {
t.Fatalf("Failed to reopen: %v", err)
}
reader, err := NewIndexBTreeReader(file)
if err != nil {
t.Fatalf("Failed to create reader: %v", err)
}
defer reader.Close()
// 随机验证 100 个值
for i := range 100 {
value := fmt.Sprintf("value_%d", i*10)
seqs, err := reader.Get(value)
if err != nil {
t.Errorf("Failed to get %s: %v", value, err)
continue
}
if len(seqs) != 1 || seqs[0] != int64(i*10+1) {
t.Errorf("%s: expected [%d], got %v", value, i*10+1, seqs)
}
}
t.Log("Large data test successful")
}