Initial commit: SRDB - High-performance LSM-Tree database
- Core engine with MemTable, SST, WAL - B+Tree indexing for SST files - Leveled compaction strategy - Multi-table database management - Schema validation and secondary indexes - Query builder with complex conditions - Web UI with HTMX for data visualization - Command-line tools for diagnostics
This commit is contained in:
155
sst/writer.go
Normal file
155
sst/writer.go
Normal file
@@ -0,0 +1,155 @@
|
||||
package sst
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"code.tczkiot.com/srdb/btree"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
// Writer SST 文件写入器
|
||||
type Writer struct {
|
||||
file *os.File
|
||||
builder *btree.Builder
|
||||
dataOffset int64
|
||||
dataStart int64 // 数据起始位置
|
||||
rowCount int64
|
||||
minKey int64
|
||||
maxKey int64
|
||||
minTime int64
|
||||
maxTime int64
|
||||
compression uint8
|
||||
}
|
||||
|
||||
// NewWriter 创建 SST 写入器
|
||||
func NewWriter(file *os.File) *Writer {
|
||||
return &Writer{
|
||||
file: file,
|
||||
builder: btree.NewBuilder(file, HeaderSize),
|
||||
dataOffset: 0, // 先写数据,后面会更新
|
||||
compression: CompressionSnappy,
|
||||
minKey: -1,
|
||||
maxKey: -1,
|
||||
minTime: -1,
|
||||
maxTime: -1,
|
||||
}
|
||||
}
|
||||
|
||||
// Row 表示一行数据
|
||||
type Row struct {
|
||||
Seq int64 // _seq
|
||||
Time int64 // _time
|
||||
Data map[string]any // 用户数据
|
||||
}
|
||||
|
||||
// Add 添加一行数据
|
||||
func (w *Writer) Add(row *Row) error {
|
||||
// 更新统计信息
|
||||
if w.minKey == -1 || row.Seq < w.minKey {
|
||||
w.minKey = row.Seq
|
||||
}
|
||||
if w.maxKey == -1 || row.Seq > w.maxKey {
|
||||
w.maxKey = row.Seq
|
||||
}
|
||||
if w.minTime == -1 || row.Time < w.minTime {
|
||||
w.minTime = row.Time
|
||||
}
|
||||
if w.maxTime == -1 || row.Time > w.maxTime {
|
||||
w.maxTime = row.Time
|
||||
}
|
||||
w.rowCount++
|
||||
|
||||
// 序列化数据 (简单的 JSON 序列化,后续可以优化)
|
||||
data := encodeRow(row)
|
||||
|
||||
// 压缩数据
|
||||
var compressed []byte
|
||||
if w.compression == CompressionSnappy {
|
||||
compressed = snappy.Encode(nil, data)
|
||||
} else {
|
||||
compressed = data
|
||||
}
|
||||
|
||||
// 写入数据块
|
||||
// 第一次写入时,确定数据起始位置
|
||||
if w.dataStart == 0 {
|
||||
// 预留足够空间给 B+Tree 索引
|
||||
// 假设索引最多占用 10% 的空间,最少 1 MB
|
||||
estimatedIndexSize := int64(10 * 1024 * 1024) // 10 MB
|
||||
w.dataStart = HeaderSize + estimatedIndexSize
|
||||
w.dataOffset = w.dataStart
|
||||
}
|
||||
|
||||
offset := w.dataOffset
|
||||
_, err := w.file.WriteAt(compressed, offset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 添加到 B+Tree
|
||||
err = w.builder.Add(row.Seq, offset, int32(len(compressed)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 更新数据偏移
|
||||
w.dataOffset += int64(len(compressed))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Finish 完成写入
|
||||
func (w *Writer) Finish() error {
|
||||
// 1. 构建 B+Tree 索引
|
||||
rootOffset, err := w.builder.Build()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 2. 计算索引大小
|
||||
indexSize := w.dataStart - HeaderSize
|
||||
|
||||
// 3. 创建 Header
|
||||
header := &Header{
|
||||
Magic: MagicNumber,
|
||||
Version: Version,
|
||||
Compression: w.compression,
|
||||
IndexOffset: HeaderSize,
|
||||
IndexSize: indexSize,
|
||||
RootOffset: rootOffset,
|
||||
DataOffset: w.dataStart,
|
||||
DataSize: w.dataOffset - w.dataStart,
|
||||
RowCount: w.rowCount,
|
||||
MinKey: w.minKey,
|
||||
MaxKey: w.maxKey,
|
||||
MinTime: w.minTime,
|
||||
MaxTime: w.maxTime,
|
||||
}
|
||||
|
||||
// 4. 写入 Header
|
||||
headerData := header.Marshal()
|
||||
_, err = w.file.WriteAt(headerData, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 5. Sync 到磁盘
|
||||
return w.file.Sync()
|
||||
}
|
||||
|
||||
// encodeRow 编码行数据 (使用二进制格式)
|
||||
func encodeRow(row *Row) []byte {
|
||||
// 使用二进制格式编码
|
||||
encoded, err := encodeRowBinary(row)
|
||||
if err != nil {
|
||||
// 降级到 JSON (不应该发生)
|
||||
data := map[string]interface{}{
|
||||
"_seq": row.Seq,
|
||||
"_time": row.Time,
|
||||
"data": row.Data,
|
||||
}
|
||||
encoded, _ = json.Marshal(data)
|
||||
}
|
||||
return encoded
|
||||
}
|
||||
Reference in New Issue
Block a user