重构:优化记录格式并修复核心功能
- 修改记录存储格式为 [4B len][8B offset][4B CRC][16B UUID][data] - 修复 TopicProcessor 中 WaitGroup 使用错误导致 handler 不执行的问题 - 修复写入保护逻辑,避免 dirtyOffset=-1 时误判为写入中 - 添加统计信息定期持久化功能 - 改进 UTF-8 字符截断处理,防止 CJK 字符乱码 - 优化 Web UI:显示人类可读的文件大小,支持点击外部关闭弹窗 - 重构示例代码,添加 webui 和 webui_integration 示例 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
223
query.go
223
query.go
@@ -5,6 +5,9 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// RecordStatus 记录处理状态
|
||||
@@ -46,20 +49,39 @@ type RecordWithIndex struct {
|
||||
Index int // 记录在日志文件中的索引位置
|
||||
}
|
||||
|
||||
// RecordMetadata 记录元数据(不包含完整数据)
|
||||
type RecordMetadata struct {
|
||||
Index int // 记录索引
|
||||
UUID uuid.UUID // UUID
|
||||
DataSize uint32 // 数据大小(字节)
|
||||
DataPreview string // 数据预览(前 200 个字符)
|
||||
Full bool
|
||||
}
|
||||
|
||||
// RecordMetadataWithStatus 带状态的记录元数据
|
||||
type RecordMetadataWithStatus struct {
|
||||
Metadata *RecordMetadata
|
||||
Status RecordStatus // 记录的处理状态
|
||||
}
|
||||
|
||||
// RecordQuery 记录查询器
|
||||
type RecordQuery struct {
|
||||
logPath string
|
||||
fd *os.File
|
||||
rbuf []byte // 复用读缓冲区
|
||||
index *RecordIndex // 索引文件管理器(来自外部)
|
||||
writer *LogWriter // 日志写入器(来自外部)
|
||||
}
|
||||
|
||||
// NewRecordQuery 创建记录查询器
|
||||
// index 参数必须由外部提供,确保所有组件使用同一个索引实例
|
||||
func NewRecordQuery(logPath string, index *RecordIndex) (*RecordQuery, error) {
|
||||
func NewRecordQuery(logPath string, index *RecordIndex, writer *LogWriter) (*RecordQuery, error) {
|
||||
if index == nil {
|
||||
return nil, NewValidationError("index", "index cannot be nil", ErrNilParameter)
|
||||
}
|
||||
if writer == nil {
|
||||
return nil, NewValidationError("writer", "writer cannot be nil", ErrNilParameter)
|
||||
}
|
||||
|
||||
fd, err := os.Open(logPath)
|
||||
if err != nil {
|
||||
@@ -71,11 +93,100 @@ func NewRecordQuery(logPath string, index *RecordIndex) (*RecordQuery, error) {
|
||||
fd: fd,
|
||||
rbuf: make([]byte, 8<<20), // 8 MiB 缓冲区
|
||||
index: index,
|
||||
writer: writer,
|
||||
}
|
||||
|
||||
return rq, nil
|
||||
}
|
||||
|
||||
// readRecordsMetadataForward 从指定索引位置向前顺序读取记录元数据(不读取完整 Data,但读取预览)
|
||||
// startIndex: 起始记录索引
|
||||
// count: 读取数量
|
||||
func (rq *RecordQuery) readRecordsMetadataForward(startIndex, count int) ([]*RecordMetadata, error) {
|
||||
// 获取起始 offset
|
||||
startOffset, err := rq.index.GetOffset(startIndex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get start offset: %w", err)
|
||||
}
|
||||
|
||||
if _, err := rq.fd.Seek(startOffset, 0); err != nil {
|
||||
return nil, fmt.Errorf("seek to offset %d: %w", startOffset, err)
|
||||
}
|
||||
|
||||
results := make([]*RecordMetadata, 0, count)
|
||||
currentIndex := startIndex
|
||||
|
||||
for len(results) < count {
|
||||
// 读取头部:[4B len][8B offset][4B CRC][16B UUID] = 32 字节
|
||||
hdr := rq.rbuf[:32]
|
||||
if _, err := io.ReadFull(rq.fd, hdr); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("read header at index %d: %w", currentIndex, err)
|
||||
}
|
||||
|
||||
dataOffset := binary.LittleEndian.Uint64(hdr[4:12])
|
||||
dirtyOffset := rq.writer.GetDirtyOffset()
|
||||
// 如果正在写入(dirtyOffset >= 0)且记录位置 >= 写入位置,等待写入完成
|
||||
if dirtyOffset >= 0 && dataOffset >= uint64(dirtyOffset) {
|
||||
break
|
||||
}
|
||||
|
||||
dataLen := binary.LittleEndian.Uint32(hdr[0:4])
|
||||
var uuidBytes [16]byte
|
||||
copy(uuidBytes[:], hdr[16:32])
|
||||
|
||||
// 读取数据预览(最多 200 字节)
|
||||
previewSize := min(int(dataLen), 200)
|
||||
|
||||
previewData := make([]byte, previewSize)
|
||||
if _, err := io.ReadFull(rq.fd, previewData); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("read preview at index %d: %w", currentIndex, err)
|
||||
}
|
||||
|
||||
// 确保预览数据不会在 UTF-8 字符中间截断
|
||||
validPreviewSize := previewSize
|
||||
if previewSize > 0 && previewSize < int(dataLen) {
|
||||
// 只有在截断的情况下才需要检查
|
||||
// 从后往前最多检查 3 个字节,找到最后一个完整的 UTF-8 字符边界
|
||||
for i := 0; i < 3 && validPreviewSize > 0; i++ {
|
||||
if utf8.Valid(previewData[:validPreviewSize]) {
|
||||
break
|
||||
}
|
||||
validPreviewSize--
|
||||
}
|
||||
}
|
||||
|
||||
metadata := &RecordMetadata{
|
||||
Index: currentIndex,
|
||||
UUID: uuidBytes,
|
||||
DataSize: dataLen,
|
||||
DataPreview: string(previewData[:validPreviewSize]),
|
||||
Full: previewSize == int(dataLen),
|
||||
}
|
||||
|
||||
// 跳过剩余数据部分
|
||||
remainingSize := int64(dataLen) - int64(previewSize)
|
||||
if remainingSize > 0 {
|
||||
if _, err := rq.fd.Seek(remainingSize, 1); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("skip remaining data at index %d: %w", currentIndex, err)
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, metadata)
|
||||
currentIndex++
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// readRecordsForward 从指定索引位置向前顺序读取记录
|
||||
// startIndex: 起始记录索引
|
||||
// count: 读取数量
|
||||
@@ -94,8 +205,8 @@ func (rq *RecordQuery) readRecordsForward(startIndex, count int) ([]*Record, err
|
||||
currentOffset := startOffset
|
||||
|
||||
for len(results) < count {
|
||||
// 读取头部:[4B len][4B CRC][16B UUID] = 24 字节
|
||||
hdr := rq.rbuf[:24]
|
||||
// 读取头部:[4B len][8B offset][4B CRC][16B UUID] = 32 字节
|
||||
hdr := rq.rbuf[:32]
|
||||
if _, err := io.ReadFull(rq.fd, hdr); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
@@ -105,18 +216,24 @@ func (rq *RecordQuery) readRecordsForward(startIndex, count int) ([]*Record, err
|
||||
|
||||
rec := &Record{
|
||||
Len: binary.LittleEndian.Uint32(hdr[0:4]),
|
||||
CRC: binary.LittleEndian.Uint32(hdr[4:8]),
|
||||
// hdr[4:12] 是 offset,读取时不需要使用
|
||||
CRC: binary.LittleEndian.Uint32(hdr[12:16]),
|
||||
}
|
||||
copy(rec.UUID[:], hdr[8:24])
|
||||
copy(rec.UUID[:], hdr[16:32])
|
||||
|
||||
// 读取数据
|
||||
rec.Data = make([]byte, rec.Len)
|
||||
if _, err := io.ReadFull(rq.fd, rec.Data); err != nil {
|
||||
// 如果遇到 EOF,说明文件可能不完整(被截断或索引不一致)
|
||||
// 返回已读取的记录,而不是报错
|
||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("read data at offset %d: %w", currentOffset, err)
|
||||
}
|
||||
|
||||
results = append(results, rec)
|
||||
currentOffset += 24 + int64(rec.Len)
|
||||
currentOffset += 32 + int64(rec.Len)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
@@ -222,6 +339,100 @@ func (rq *RecordQuery) QueryNewest(refIndex, count int) ([]*RecordWithIndex, err
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// QueryOldestMetadata 从参考索引向索引递减方向查询记录元数据(查询更早的记录,不读取完整数据)
|
||||
// refIndex: 参考索引位置
|
||||
// count: 查询数量
|
||||
// 返回的记录按索引递增方向排序,只包含元数据信息
|
||||
// 例如:QueryOldestMetadata(5, 3) 查询索引 2, 3, 4(不包含 5),返回 [2, 3, 4]
|
||||
func (rq *RecordQuery) QueryOldestMetadata(refIndex, count int) ([]*RecordMetadata, error) {
|
||||
if count <= 0 {
|
||||
return nil, NewValidationError("count", "count must be greater than 0", ErrInvalidCount)
|
||||
}
|
||||
|
||||
totalCount := rq.index.Count()
|
||||
if totalCount == 0 {
|
||||
return []*RecordMetadata{}, nil
|
||||
}
|
||||
|
||||
// 验证参考索引范围(严格模式)
|
||||
if refIndex < 0 || refIndex > totalCount {
|
||||
return nil, NewValidationError("refIndex", fmt.Sprintf("refIndex %d out of range [0, %d]", refIndex, totalCount), ErrInvalidRange)
|
||||
}
|
||||
|
||||
// 计算实际起始索引(向索引递减方向)
|
||||
startIndex := refIndex - count
|
||||
if startIndex < 0 {
|
||||
startIndex = 0
|
||||
count = refIndex // 调整实际数量
|
||||
}
|
||||
|
||||
if count <= 0 {
|
||||
return []*RecordMetadata{}, nil
|
||||
}
|
||||
|
||||
// 读取元数据
|
||||
return rq.readRecordsMetadataForward(startIndex, count)
|
||||
}
|
||||
|
||||
// QueryNewestMetadata 从参考索引向索引递增方向查询记录元数据(查询更新的记录,不读取完整数据)
|
||||
// refIndex: 参考索引位置
|
||||
// count: 查询数量
|
||||
// 返回的记录按索引递增方向排序,只包含元数据信息
|
||||
// 例如:QueryNewestMetadata(5, 3) 查询索引 6, 7, 8(不包含 5),返回 [6, 7, 8]
|
||||
func (rq *RecordQuery) QueryNewestMetadata(refIndex, count int) ([]*RecordMetadata, error) {
|
||||
if count <= 0 {
|
||||
return nil, NewValidationError("count", "count must be greater than 0", ErrInvalidCount)
|
||||
}
|
||||
|
||||
totalCount := rq.index.Count()
|
||||
if totalCount == 0 {
|
||||
return []*RecordMetadata{}, nil
|
||||
}
|
||||
|
||||
// 验证参考索引范围(严格模式)
|
||||
// QueryNewestMetadata 允许 refIndex = -1(从头开始查询)
|
||||
if refIndex < -1 || refIndex >= totalCount {
|
||||
return nil, NewValidationError("refIndex", fmt.Sprintf("refIndex %d out of range [-1, %d)", refIndex, totalCount), ErrInvalidRange)
|
||||
}
|
||||
|
||||
// 计算实际起始索引(向索引递增方向)
|
||||
startIndex := refIndex + 1
|
||||
if startIndex >= totalCount {
|
||||
return []*RecordMetadata{}, nil
|
||||
}
|
||||
|
||||
// 限制查询数量
|
||||
remainCount := totalCount - startIndex
|
||||
if count > remainCount {
|
||||
count = remainCount
|
||||
}
|
||||
|
||||
// 读取元数据
|
||||
return rq.readRecordsMetadataForward(startIndex, count)
|
||||
}
|
||||
|
||||
// QueryByIndex 根据索引查询单条记录的完整数据
|
||||
// index: 记录索引
|
||||
// 返回完整的记录数据
|
||||
func (rq *RecordQuery) QueryByIndex(index int) (*Record, error) {
|
||||
totalCount := rq.index.Count()
|
||||
if index < 0 || index >= totalCount {
|
||||
return nil, NewValidationError("index", fmt.Sprintf("index %d out of range [0, %d)", index, totalCount), ErrInvalidRange)
|
||||
}
|
||||
|
||||
// 读取单条记录
|
||||
records, err := rq.readRecordsForward(index, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(records) == 0 {
|
||||
return nil, fmt.Errorf("record at index %d not found", index)
|
||||
}
|
||||
|
||||
return records[0], nil
|
||||
}
|
||||
|
||||
// GetRecordCount 获取记录总数
|
||||
func (rq *RecordQuery) GetRecordCount() (int, error) {
|
||||
return rq.index.Count(), nil
|
||||
|
||||
Reference in New Issue
Block a user