重构：优化记录格式并修复核心功能

- 修改记录存储格式为 [4B len][8B offset][4B CRC][16B UUID][data] - 修复 TopicProcessor 中 WaitGroup 使用错误导致 handler 不执行的问题 - 修复写入保护逻辑，避免 dirtyOffset=-1 时误判为写入中 - 添加统计信息定期持久化功能 - 改进 UTF-8 字符截断处理，防止 CJK 字符乱码 - 优化 Web UI：显示人类可读的文件大小，支持点击外部关闭弹窗 - 重构示例代码，添加 webui 和 webui_integration 示例 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-04 17:54:49 +08:00
parent 955a467248
commit 810664eb12
18 changed files with 1810 additions and 1170 deletions
--- a/query.go
+++ b/query.go
@@ -5,6 +5,9 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"unicode/utf8"
+
+	"github.com/google/uuid"
 )

 // RecordStatus 记录处理状态
@@ -46,20 +49,39 @@ type RecordWithIndex struct {
 	Index  int // 记录在日志文件中的索引位置
 }

+// RecordMetadata 记录元数据（不包含完整数据）
+type RecordMetadata struct {
+	Index       int       // 记录索引
+	UUID        uuid.UUID // UUID
+	DataSize    uint32    // 数据大小（字节）
+	DataPreview string    // 数据预览（前 200 个字符）
+	Full        bool
+}
+
+// RecordMetadataWithStatus 带状态的记录元数据
+type RecordMetadataWithStatus struct {
+	Metadata *RecordMetadata
+	Status   RecordStatus // 记录的处理状态
+}
+
 // RecordQuery 记录查询器
 type RecordQuery struct {
 	logPath string
 	fd      *os.File
 	rbuf    []byte       // 复用读缓冲区
 	index   *RecordIndex // 索引文件管理器（来自外部）
+	writer  *LogWriter   // 日志写入器（来自外部）
 }

 // NewRecordQuery 创建记录查询器
 // index 参数必须由外部提供，确保所有组件使用同一个索引实例
-func NewRecordQuery(logPath string, index *RecordIndex) (*RecordQuery, error) {
+func NewRecordQuery(logPath string, index *RecordIndex, writer *LogWriter) (*RecordQuery, error) {
 	if index == nil {
 		return nil, NewValidationError("index", "index cannot be nil", ErrNilParameter)
 	}
+	if writer == nil {
+		return nil, NewValidationError("writer", "writer cannot be nil", ErrNilParameter)
+	}

 	fd, err := os.Open(logPath)
 	if err != nil {
@@ -71,11 +93,100 @@ func NewRecordQuery(logPath string, index *RecordIndex) (*RecordQuery, error) {
 		fd:      fd,
 		rbuf:    make([]byte, 8<<20), // 8 MiB 缓冲区
 		index:   index,
+		writer:  writer,
 	}

 	return rq, nil
 }

+// readRecordsMetadataForward 从指定索引位置向前顺序读取记录元数据（不读取完整 Data，但读取预览）
+// startIndex: 起始记录索引
+// count: 读取数量
+func (rq *RecordQuery) readRecordsMetadataForward(startIndex, count int) ([]*RecordMetadata, error) {
+	// 获取起始 offset
+	startOffset, err := rq.index.GetOffset(startIndex)
+	if err != nil {
+		return nil, fmt.Errorf("get start offset: %w", err)
+	}
+
+	if _, err := rq.fd.Seek(startOffset, 0); err != nil {
+		return nil, fmt.Errorf("seek to offset %d: %w", startOffset, err)
+	}
+
+	results := make([]*RecordMetadata, 0, count)
+	currentIndex := startIndex
+
+	for len(results) < count {
+		// 读取头部：[4B len][8B offset][4B CRC][16B UUID] = 32 字节
+		hdr := rq.rbuf[:32]
+		if _, err := io.ReadFull(rq.fd, hdr); err != nil {
+			if err == io.EOF {
+				break
+			}
+			return nil, fmt.Errorf("read header at index %d: %w", currentIndex, err)
+		}
+
+		dataOffset := binary.LittleEndian.Uint64(hdr[4:12])
+		dirtyOffset := rq.writer.GetDirtyOffset()
+		// 如果正在写入（dirtyOffset >= 0）且记录位置 >= 写入位置，等待写入完成
+		if dirtyOffset >= 0 && dataOffset >= uint64(dirtyOffset) {
+			break
+		}
+
+		dataLen := binary.LittleEndian.Uint32(hdr[0:4])
+		var uuidBytes [16]byte
+		copy(uuidBytes[:], hdr[16:32])
+
+		// 读取数据预览（最多 200 字节）
+		previewSize := min(int(dataLen), 200)
+
+		previewData := make([]byte, previewSize)
+		if _, err := io.ReadFull(rq.fd, previewData); err != nil {
+			if err == io.EOF {
+				break
+			}
+			return nil, fmt.Errorf("read preview at index %d: %w", currentIndex, err)
+		}
+
+		// 确保预览数据不会在 UTF-8 字符中间截断
+		validPreviewSize := previewSize
+		if previewSize > 0 && previewSize < int(dataLen) {
+			// 只有在截断的情况下才需要检查
+			// 从后往前最多检查 3 个字节，找到最后一个完整的 UTF-8 字符边界
+			for i := 0; i < 3 && validPreviewSize > 0; i++ {
+				if utf8.Valid(previewData[:validPreviewSize]) {
+					break
+				}
+				validPreviewSize--
+			}
+		}
+
+		metadata := &RecordMetadata{
+			Index:       currentIndex,
+			UUID:        uuidBytes,
+			DataSize:    dataLen,
+			DataPreview: string(previewData[:validPreviewSize]),
+			Full:        previewSize == int(dataLen),
+		}
+
+		// 跳过剩余数据部分
+		remainingSize := int64(dataLen) - int64(previewSize)
+		if remainingSize > 0 {
+			if _, err := rq.fd.Seek(remainingSize, 1); err != nil {
+				if err == io.EOF {
+					break
+				}
+				return nil, fmt.Errorf("skip remaining data at index %d: %w", currentIndex, err)
+			}
+		}
+
+		results = append(results, metadata)
+		currentIndex++
+	}
+
+	return results, nil
+}
+
 // readRecordsForward 从指定索引位置向前顺序读取记录
 // startIndex: 起始记录索引
 // count: 读取数量
@@ -94,8 +205,8 @@ func (rq *RecordQuery) readRecordsForward(startIndex, count int) ([]*Record, err
 	currentOffset := startOffset

 	for len(results) < count {
-		// 读取头部：[4B len][4B CRC][16B UUID] = 24 字节
-		hdr := rq.rbuf[:24]
+		// 读取头部：[4B len][8B offset][4B CRC][16B UUID] = 32 字节
+		hdr := rq.rbuf[:32]
 		if _, err := io.ReadFull(rq.fd, hdr); err != nil {
 			if err == io.EOF {
 				break
@@ -105,18 +216,24 @@ func (rq *RecordQuery) readRecordsForward(startIndex, count int) ([]*Record, err

 		rec := &Record{
 			Len: binary.LittleEndian.Uint32(hdr[0:4]),
-			CRC: binary.LittleEndian.Uint32(hdr[4:8]),
+			// hdr[4:12] 是 offset，读取时不需要使用
+			CRC: binary.LittleEndian.Uint32(hdr[12:16]),
 		}
-		copy(rec.UUID[:], hdr[8:24])
+		copy(rec.UUID[:], hdr[16:32])

 		// 读取数据
 		rec.Data = make([]byte, rec.Len)
 		if _, err := io.ReadFull(rq.fd, rec.Data); err != nil {
+			// 如果遇到 EOF，说明文件可能不完整（被截断或索引不一致）
+			// 返回已读取的记录，而不是报错
+			if err == io.EOF || err == io.ErrUnexpectedEOF {
+				break
+			}
 			return nil, fmt.Errorf("read data at offset %d: %w", currentOffset, err)
 		}

 		results = append(results, rec)
-		currentOffset += 24 + int64(rec.Len)
+		currentOffset += 32 + int64(rec.Len)
 	}

 	return results, nil
@@ -222,6 +339,100 @@ func (rq *RecordQuery) QueryNewest(refIndex, count int) ([]*RecordWithIndex, err
 	return results, nil
 }

+// QueryOldestMetadata 从参考索引向索引递减方向查询记录元数据（查询更早的记录，不读取完整数据）
+// refIndex: 参考索引位置
+// count: 查询数量
+// 返回的记录按索引递增方向排序，只包含元数据信息
+// 例如：QueryOldestMetadata(5, 3) 查询索引 2, 3, 4（不包含 5），返回 [2, 3, 4]
+func (rq *RecordQuery) QueryOldestMetadata(refIndex, count int) ([]*RecordMetadata, error) {
+	if count <= 0 {
+		return nil, NewValidationError("count", "count must be greater than 0", ErrInvalidCount)
+	}
+
+	totalCount := rq.index.Count()
+	if totalCount == 0 {
+		return []*RecordMetadata{}, nil
+	}
+
+	// 验证参考索引范围（严格模式）
+	if refIndex < 0 || refIndex > totalCount {
+		return nil, NewValidationError("refIndex", fmt.Sprintf("refIndex %d out of range [0, %d]", refIndex, totalCount), ErrInvalidRange)
+	}
+
+	// 计算实际起始索引（向索引递减方向）
+	startIndex := refIndex - count
+	if startIndex < 0 {
+		startIndex = 0
+		count = refIndex // 调整实际数量
+	}
+
+	if count <= 0 {
+		return []*RecordMetadata{}, nil
+	}
+
+	// 读取元数据
+	return rq.readRecordsMetadataForward(startIndex, count)
+}
+
+// QueryNewestMetadata 从参考索引向索引递增方向查询记录元数据（查询更新的记录，不读取完整数据）
+// refIndex: 参考索引位置
+// count: 查询数量
+// 返回的记录按索引递增方向排序，只包含元数据信息
+// 例如：QueryNewestMetadata(5, 3) 查询索引 6, 7, 8（不包含 5），返回 [6, 7, 8]
+func (rq *RecordQuery) QueryNewestMetadata(refIndex, count int) ([]*RecordMetadata, error) {
+	if count <= 0 {
+		return nil, NewValidationError("count", "count must be greater than 0", ErrInvalidCount)
+	}
+
+	totalCount := rq.index.Count()
+	if totalCount == 0 {
+		return []*RecordMetadata{}, nil
+	}
+
+	// 验证参考索引范围（严格模式）
+	// QueryNewestMetadata 允许 refIndex = -1（从头开始查询）
+	if refIndex < -1 || refIndex >= totalCount {
+		return nil, NewValidationError("refIndex", fmt.Sprintf("refIndex %d out of range [-1, %d)", refIndex, totalCount), ErrInvalidRange)
+	}
+
+	// 计算实际起始索引（向索引递增方向）
+	startIndex := refIndex + 1
+	if startIndex >= totalCount {
+		return []*RecordMetadata{}, nil
+	}
+
+	// 限制查询数量
+	remainCount := totalCount - startIndex
+	if count > remainCount {
+		count = remainCount
+	}
+
+	// 读取元数据
+	return rq.readRecordsMetadataForward(startIndex, count)
+}
+
+// QueryByIndex 根据索引查询单条记录的完整数据
+// index: 记录索引
+// 返回完整的记录数据
+func (rq *RecordQuery) QueryByIndex(index int) (*Record, error) {
+	totalCount := rq.index.Count()
+	if index < 0 || index >= totalCount {
+		return nil, NewValidationError("index", fmt.Sprintf("index %d out of range [0, %d)", index, totalCount), ErrInvalidRange)
+	}
+
+	// 读取单条记录
+	records, err := rq.readRecordsForward(index, 1)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(records) == 0 {
+		return nil, fmt.Errorf("record at index %d not found", index)
+	}
+
+	return records[0], nil
+}
+
 // GetRecordCount 获取记录总数
 func (rq *RecordQuery) GetRecordCount() (int, error) {
 	return rq.index.Count(), nil