Files
srdb/sst/writer.go
bourdon ae87c38776 Initial commit: SRDB - High-performance LSM-Tree database
- Core engine with MemTable, SST, WAL
- B+Tree indexing for SST files  
- Leveled compaction strategy
- Multi-table database management
- Schema validation and secondary indexes
- Query builder with complex conditions
- Web UI with HTMX for data visualization
- Command-line tools for diagnostics
2025-10-08 06:38:28 +08:00

156 lines
3.2 KiB
Go

package sst
import (
"encoding/json"
"os"
"code.tczkiot.com/srdb/btree"
"github.com/golang/snappy"
)
// Writer SST 文件写入器
type Writer struct {
file *os.File
builder *btree.Builder
dataOffset int64
dataStart int64 // 数据起始位置
rowCount int64
minKey int64
maxKey int64
minTime int64
maxTime int64
compression uint8
}
// NewWriter 创建 SST 写入器
func NewWriter(file *os.File) *Writer {
return &Writer{
file: file,
builder: btree.NewBuilder(file, HeaderSize),
dataOffset: 0, // 先写数据,后面会更新
compression: CompressionSnappy,
minKey: -1,
maxKey: -1,
minTime: -1,
maxTime: -1,
}
}
// Row 表示一行数据
type Row struct {
Seq int64 // _seq
Time int64 // _time
Data map[string]any // 用户数据
}
// Add 添加一行数据
func (w *Writer) Add(row *Row) error {
// 更新统计信息
if w.minKey == -1 || row.Seq < w.minKey {
w.minKey = row.Seq
}
if w.maxKey == -1 || row.Seq > w.maxKey {
w.maxKey = row.Seq
}
if w.minTime == -1 || row.Time < w.minTime {
w.minTime = row.Time
}
if w.maxTime == -1 || row.Time > w.maxTime {
w.maxTime = row.Time
}
w.rowCount++
// 序列化数据 (简单的 JSON 序列化,后续可以优化)
data := encodeRow(row)
// 压缩数据
var compressed []byte
if w.compression == CompressionSnappy {
compressed = snappy.Encode(nil, data)
} else {
compressed = data
}
// 写入数据块
// 第一次写入时,确定数据起始位置
if w.dataStart == 0 {
// 预留足够空间给 B+Tree 索引
// 假设索引最多占用 10% 的空间,最少 1 MB
estimatedIndexSize := int64(10 * 1024 * 1024) // 10 MB
w.dataStart = HeaderSize + estimatedIndexSize
w.dataOffset = w.dataStart
}
offset := w.dataOffset
_, err := w.file.WriteAt(compressed, offset)
if err != nil {
return err
}
// 添加到 B+Tree
err = w.builder.Add(row.Seq, offset, int32(len(compressed)))
if err != nil {
return err
}
// 更新数据偏移
w.dataOffset += int64(len(compressed))
return nil
}
// Finish 完成写入
func (w *Writer) Finish() error {
// 1. 构建 B+Tree 索引
rootOffset, err := w.builder.Build()
if err != nil {
return err
}
// 2. 计算索引大小
indexSize := w.dataStart - HeaderSize
// 3. 创建 Header
header := &Header{
Magic: MagicNumber,
Version: Version,
Compression: w.compression,
IndexOffset: HeaderSize,
IndexSize: indexSize,
RootOffset: rootOffset,
DataOffset: w.dataStart,
DataSize: w.dataOffset - w.dataStart,
RowCount: w.rowCount,
MinKey: w.minKey,
MaxKey: w.maxKey,
MinTime: w.minTime,
MaxTime: w.maxTime,
}
// 4. 写入 Header
headerData := header.Marshal()
_, err = w.file.WriteAt(headerData, 0)
if err != nil {
return err
}
// 5. Sync 到磁盘
return w.file.Sync()
}
// encodeRow 编码行数据 (使用二进制格式)
func encodeRow(row *Row) []byte {
// 使用二进制格式编码
encoded, err := encodeRowBinary(row)
if err != nil {
// 降级到 JSON (不应该发生)
data := map[string]interface{}{
"_seq": row.Seq,
"_time": row.Time,
"data": row.Data,
}
encoded, _ = json.Marshal(data)
}
return encoded
}