feat: 添加监控仪表盘

- 新增 Lit.js 组件化 UI (ui/ 目录)
  - tasks-chart: 带十字准星和拖拽选择的图表
  - queue-table: 队列列表,支持暂停/恢复
  - queue-modal: 队列详情弹窗,支持任务重试
  - time-range-picker: Prometheus 风格时间选择器
  - help-tooltip: 可复用的提示组件

- HTTPHandler 功能
  - SSE 实时推送 (stats + queues)
  - 队列暂停/恢复 API
  - 任务重试 API
  - 时间范围查询 API

- Inspector 改进
  - Prometheus 风格单表存储
  - 集成到 Start/Stop 生命周期
  - 新增 PauseQueue/UnpauseQueue/RunTask 方法

- 代码重构
  - Start 函数拆分为小函数
  - 优雅关闭流程优化

- 其他
  - 忽略 SQLite 数据库文件
  - example 添加延迟/定点任务示例
This commit is contained in:
2025-12-09 19:58:18 +08:00
parent c88bde7b11
commit 1f9f1cab53
17 changed files with 3665 additions and 787 deletions

View File

@@ -1,332 +1,381 @@
// Package taskq 提供基于 Redis 的异步任务队列功能
// inspect.go 文件包含任务队列的监控和检查功能
// inspect.go 文件包含统计采集器和相关数据结构
package taskq
import (
_ "embed"
"encoding/json"
"database/sql"
"fmt"
"html/template"
"net/http"
"sort"
"strconv"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/hibiken/asynq"
_ "github.com/mattn/go-sqlite3"
)
//go:embed dashboard.html
var dashboardHTML string
// ==================== Inspector 统计采集器 ====================
// InspectOptions 配置监控服务的选项
type InspectOptions struct {
// RootPath 监控服务的根路径
// 默认为 "/monitor"
RootPath string
// ReadOnly 是否只读模式,禁用所有修改操作
// 默认为 false
ReadOnly bool
}
// HTTPHandler 监控服务的 HTTP 处理器
type HTTPHandler struct {
router *http.ServeMux
rootPath string
readOnly bool
// Inspector 统计采集器,独立于 HTTP 服务运行
type Inspector struct {
inspector *asynq.Inspector
db *sql.DB
closeCh chan struct{}
closeOnce sync.Once
interval time.Duration
}
// NewInspectHandler 创建新的监控处理器
// 使用全局的 redisClient 创建 asynq.Inspector
func NewInspectHandler(opts InspectOptions) (*HTTPHandler, error) {
// InspectorOptions 配置统计采集器的选项
type InspectorOptions struct {
// Interval 采集间隔,默认 2 秒
Interval time.Duration
// DBPath SQLite 数据库文件路径,默认为 "./taskq_stats.db"
DBPath string
}
// NewInspector 创建新的统计采集器
func NewInspector(opts InspectorOptions) (*Inspector, error) {
if redisClient == nil {
return nil, fmt.Errorf("taskq: redis client not initialized, call SetRedis() first")
}
// 设置默认值
if opts.RootPath == "" {
opts.RootPath = "/monitor"
if opts.Interval <= 0 {
opts.Interval = 2 * time.Second
}
// 确保路径以 / 开头且不以 / 结尾
if !strings.HasPrefix(opts.RootPath, "/") {
opts.RootPath = "/" + opts.RootPath
}
opts.RootPath = strings.TrimSuffix(opts.RootPath, "/")
// 创建 asynq inspector
inspector := asynq.NewInspectorFromRedisClient(redisClient)
handler := &HTTPHandler{
router: http.NewServeMux(),
rootPath: opts.RootPath,
readOnly: opts.ReadOnly,
inspector: inspector,
if opts.DBPath == "" {
opts.DBPath = "./taskq_stats.db"
}
handler.setupRoutes()
return handler, nil
}
// ServeHTTP 实现 http.Handler 接口
func (h *HTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
h.router.ServeHTTP(w, r)
}
// RootPath 返回监控服务的根路径
func (h *HTTPHandler) RootPath() string {
return h.rootPath
}
// Close 关闭 inspector 连接
func (h *HTTPHandler) Close() error {
return h.inspector.Close()
}
// setupRoutes 设置路由
func (h *HTTPHandler) setupRoutes() {
// API 路由
apiPath := h.rootPath + "/api/"
h.router.HandleFunc(apiPath+"queues", h.handleQueues)
h.router.HandleFunc(apiPath+"queues/", h.handleQueueDetail)
h.router.HandleFunc(apiPath+"tasks/", h.handleTasks)
// 主页路由
h.router.HandleFunc(h.rootPath+"/", h.handleDashboard)
h.router.HandleFunc(h.rootPath, h.handleDashboard)
}
// handleQueues 处理队列列表请求
func (h *HTTPHandler) handleQueues(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
// 获取所有队列信息
asynqQueues, err := h.inspector.Queues()
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get queues: %v", err), http.StatusInternalServerError)
return
}
fmt.Println("Redis中的队列:", asynqQueues)
fmt.Println("注册的队列:", queues)
// 获取每个队列的详细信息
type QueueInfo struct {
Name string `json:"name"`
Priority int `json:"priority"`
Active int `json:"active"`
Pending int `json:"pending"`
Retry int `json:"retry"`
Archived int `json:"archived"`
}
var queueInfos []QueueInfo
// 首先显示所有注册的队列即使Redis中还没有任务
for queueName, priority := range queues {
stats, err := h.inspector.GetQueueInfo(queueName)
if err != nil {
// 如果队列不存在,创建一个空的状态
queueInfos = append(queueInfos, QueueInfo{
Name: queueName,
Priority: priority,
Active: 0,
Pending: 0,
Retry: 0,
Archived: 0,
})
} else {
queueInfos = append(queueInfos, QueueInfo{
Name: queueName,
Priority: priority,
Active: stats.Active,
Pending: stats.Pending,
Retry: stats.Retry,
Archived: stats.Archived,
})
// 确保目录存在
dir := filepath.Dir(opts.DBPath)
if dir != "" && dir != "." {
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, fmt.Errorf("taskq: failed to create directory: %v", err)
}
}
// 按优先级排序
sort.Slice(queueInfos, func(i, j int) bool {
return queueInfos[i].Priority > queueInfos[j].Priority
// 打开 SQLite 数据库
db, err := sql.Open("sqlite3", opts.DBPath)
if err != nil {
return nil, fmt.Errorf("taskq: failed to open database: %v", err)
}
// 初始化数据库表
if err := initStatsDB(db); err != nil {
db.Close()
return nil, fmt.Errorf("taskq: failed to init database: %v", err)
}
ins := &Inspector{
inspector: asynq.NewInspectorFromRedisClient(redisClient),
db: db,
closeCh: make(chan struct{}),
interval: opts.Interval,
}
// 启动后台统计采集
go ins.startCollector()
return ins, nil
}
// initStatsDB 初始化数据库Prometheus 风格:单表 + 标签)
// 设计思路:
// - 单表存储所有队列的统计数据,通过 queue 列区分
// - 复合索引支持按时间和队列两个维度高效查询
// - 类似 Prometheus 的 (timestamp, labels, value) 模型
func initStatsDB(db *sql.DB) error {
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp INTEGER NOT NULL,
queue TEXT NOT NULL,
active INTEGER DEFAULT 0,
pending INTEGER DEFAULT 0,
scheduled INTEGER DEFAULT 0,
retry INTEGER DEFAULT 0,
archived INTEGER DEFAULT 0,
completed INTEGER DEFAULT 0,
succeeded INTEGER DEFAULT 0,
failed INTEGER DEFAULT 0
);
-- 按队列查询WHERE queue = ? ORDER BY timestamp
CREATE INDEX IF NOT EXISTS idx_metrics_queue_time ON metrics(queue, timestamp DESC);
-- 按时间查询所有队列WHERE timestamp BETWEEN ? AND ?
CREATE INDEX IF NOT EXISTS idx_metrics_time ON metrics(timestamp DESC);
-- 唯一约束:同一时间同一队列只有一条记录
CREATE UNIQUE INDEX IF NOT EXISTS idx_metrics_unique ON metrics(timestamp, queue);
`)
return err
}
// Close 关闭统计采集器
func (ins *Inspector) Close() error {
ins.closeOnce.Do(func() {
close(ins.closeCh)
})
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(queueInfos)
if ins.db != nil {
ins.db.Close()
}
return ins.inspector.Close()
}
// handleQueueDetail 处理队列详情请求
func (h *HTTPHandler) handleQueueDetail(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
// startCollector 启动后台统计采集任务
func (ins *Inspector) startCollector() {
ticker := time.NewTicker(ins.interval)
defer ticker.Stop()
for {
select {
case <-ins.closeCh:
return
case <-ticker.C:
ins.collectStats()
}
}
}
// collectStats 采集所有队列的统计数据
func (ins *Inspector) collectStats() {
now := time.Now().Unix()
for queueName := range queues {
stats, err := ins.inspector.GetQueueInfo(queueName)
if err != nil {
continue
}
qs := QueueStats{
Queue: queueName,
Timestamp: now,
Active: stats.Active,
Pending: stats.Pending,
Scheduled: stats.Scheduled,
Retry: stats.Retry,
Archived: stats.Archived,
Completed: stats.Completed,
Succeeded: stats.Processed - stats.Failed,
Failed: stats.Failed,
}
ins.saveMetrics(qs)
}
}
// saveMetrics 保存统计数据到 metrics 表
func (ins *Inspector) saveMetrics(stats QueueStats) error {
if ins.db == nil {
return nil
}
// 从 URL 中提取队列名称
path := strings.TrimPrefix(r.URL.Path, h.rootPath+"/api/queues/")
parts := strings.Split(path, "/")
if len(parts) == 0 || parts[0] == "" {
http.Error(w, "Queue name is required", http.StatusBadRequest)
return
}
queueName := parts[0]
_, err := ins.db.Exec(`
INSERT OR REPLACE INTO metrics (timestamp, queue, active, pending, scheduled, retry, archived, completed, succeeded, failed)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`, stats.Timestamp, stats.Queue, stats.Active, stats.Pending, stats.Scheduled, stats.Retry, stats.Archived, stats.Completed, stats.Succeeded, stats.Failed)
// 检查队列是否已注册
if _, exists := queues[queueName]; !exists {
http.Error(w, "Queue not found", http.StatusNotFound)
return
return err
}
// GetQueueInfo 获取队列信息
func (ins *Inspector) GetQueueInfo(queueName string) (*asynq.QueueInfo, error) {
return ins.inspector.GetQueueInfo(queueName)
}
// ListActiveTasks 获取活跃任务列表
func (ins *Inspector) ListActiveTasks(queueName string, opts ...asynq.ListOption) ([]*asynq.TaskInfo, error) {
return ins.inspector.ListActiveTasks(queueName, opts...)
}
// ListPendingTasks 获取等待任务列表
func (ins *Inspector) ListPendingTasks(queueName string, opts ...asynq.ListOption) ([]*asynq.TaskInfo, error) {
return ins.inspector.ListPendingTasks(queueName, opts...)
}
// ListScheduledTasks 获取计划任务列表
func (ins *Inspector) ListScheduledTasks(queueName string, opts ...asynq.ListOption) ([]*asynq.TaskInfo, error) {
return ins.inspector.ListScheduledTasks(queueName, opts...)
}
// ListRetryTasks 获取重试任务列表
func (ins *Inspector) ListRetryTasks(queueName string, opts ...asynq.ListOption) ([]*asynq.TaskInfo, error) {
return ins.inspector.ListRetryTasks(queueName, opts...)
}
// ListArchivedTasks 获取归档任务列表
func (ins *Inspector) ListArchivedTasks(queueName string, opts ...asynq.ListOption) ([]*asynq.TaskInfo, error) {
return ins.inspector.ListArchivedTasks(queueName, opts...)
}
// ListCompletedTasks 获取已完成任务列表
func (ins *Inspector) ListCompletedTasks(queueName string, opts ...asynq.ListOption) ([]*asynq.TaskInfo, error) {
return ins.inspector.ListCompletedTasks(queueName, opts...)
}
// RunTask 立即运行归档任务(重试失败任务)
func (ins *Inspector) RunTask(queueName, taskID string) error {
return ins.inspector.RunTask(queueName, taskID)
}
// PauseQueue 暂停队列
func (ins *Inspector) PauseQueue(queueName string) error {
return ins.inspector.PauseQueue(queueName)
}
// UnpauseQueue 恢复队列
func (ins *Inspector) UnpauseQueue(queueName string) error {
return ins.inspector.UnpauseQueue(queueName)
}
// ==================== 统计数据结构 ====================
// QueueInfo 获取每个队列的详细信息
type QueueInfo struct {
Name string `json:"name"`
Priority int `json:"priority"`
Size int `json:"size"` // 队列中任务总数
Active int `json:"active"` // 活跃任务数
Pending int `json:"pending"` // 等待任务数
Scheduled int `json:"scheduled"` // 计划任务数
Retry int `json:"retry"` // 重试任务数
Archived int `json:"archived"` // 归档任务数
Completed int `json:"completed"` // 已完成任务数
Processed int `json:"processed"` // 累计处理数(今日)
Failed int `json:"failed"` // 累计失败数(今日)
Paused bool `json:"paused"` // 是否暂停
MemoryUsage int64 `json:"memory_usage"` // 内存使用(字节)
Latency int64 `json:"latency"` // 延迟(毫秒)
}
// QueueStats 队列统计数据点(用于存储历史数据)
type QueueStats struct {
Timestamp int64 `json:"t"` // Unix 时间戳(秒)
Queue string `json:"q,omitempty"` // 队列名称(汇总查询时为空)
Active int `json:"a"` // 活跃任务数
Pending int `json:"p"` // 等待任务数
Scheduled int `json:"s"` // 计划任务数
Retry int `json:"r"` // 重试任务数
Archived int `json:"ar"` // 归档任务数
Completed int `json:"c"` // 已完成任务数
Succeeded int `json:"su"` // 成功数
Failed int `json:"f"` // 失败数
}
// ==================== 全局统计数据查询 ====================
var statsDB *sql.DB
var statsDBMu sync.RWMutex
// SetStatsDB 设置全局统计数据库(供 HTTPHandler 使用)
func SetStatsDB(db *sql.DB) {
statsDBMu.Lock()
defer statsDBMu.Unlock()
statsDB = db
}
// StatsQuery 统计查询参数
type StatsQuery struct {
Queue string // 队列名称,为空则查询所有队列汇总
Start int64 // 开始时间戳0 表示不限制
End int64 // 结束时间戳0 表示不限制
Limit int // 返回数量限制,默认 500
}
// getQueueStats 获取队列历史统计数据
func getQueueStats(queueName string, limit int) ([]QueueStats, error) {
return getQueueStatsWithQuery(StatsQuery{
Queue: queueName,
Limit: limit,
})
}
// getQueueStatsWithQuery 根据查询条件获取统计数据Prometheus 风格单表查询)
// - 按队列查询:使用 idx_metrics_queue_time 索引
// - 按时间汇总:使用 idx_metrics_time 索引 + GROUP BY
func getQueueStatsWithQuery(q StatsQuery) ([]QueueStats, error) {
statsDBMu.RLock()
db := statsDB
statsDBMu.RUnlock()
if db == nil {
return nil, nil
}
// 获取队列详细信息
stats, err := h.inspector.GetQueueInfo(queueName)
if q.Limit <= 0 {
q.Limit = 500
}
var args []any
var whereClause string
var conditions []string
// 构建 WHERE 条件
if q.Queue != "" {
conditions = append(conditions, "queue = ?")
args = append(args, q.Queue)
}
if q.Start > 0 {
conditions = append(conditions, "timestamp >= ?")
args = append(args, q.Start)
}
if q.End > 0 {
conditions = append(conditions, "timestamp <= ?")
args = append(args, q.End)
}
if len(conditions) > 0 {
whereClause = "WHERE " + strings.Join(conditions, " AND ")
}
var query string
if q.Queue != "" {
// 查询单个队列
query = fmt.Sprintf(`
SELECT timestamp, queue, active, pending, scheduled, retry, archived, completed, succeeded, failed
FROM metrics
%s
ORDER BY timestamp DESC
LIMIT ?
`, whereClause)
} else {
// 查询所有队列汇总(按时间 GROUP BY
query = fmt.Sprintf(`
SELECT timestamp, '' as queue, SUM(active), SUM(pending), SUM(scheduled), SUM(retry), SUM(archived), SUM(completed), SUM(succeeded), SUM(failed)
FROM metrics
%s
GROUP BY timestamp
ORDER BY timestamp DESC
LIMIT ?
`, whereClause)
}
args = append(args, q.Limit)
rows, err := db.Query(query, args...)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get queue info: %v", err), http.StatusInternalServerError)
return
return nil, err
}
defer rows.Close()
var statsList []QueueStats
for rows.Next() {
var s QueueStats
if err := rows.Scan(&s.Timestamp, &s.Queue, &s.Active, &s.Pending, &s.Scheduled, &s.Retry, &s.Archived, &s.Completed, &s.Succeeded, &s.Failed); err != nil {
continue
}
statsList = append(statsList, s)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(stats)
// 反转顺序,使时间从早到晚
for i, j := 0, len(statsList)-1; i < j; i, j = i+1, j-1 {
statsList[i], statsList[j] = statsList[j], statsList[i]
}
return statsList, nil
}
// 转换任务信息
type TaskInfo struct {
ID string `json:"id"`
Type string `json:"type"`
Payload string `json:"payload"`
Queue string `json:"queue"`
Retried int `json:"retried"`
LastFailed string `json:"last_failed,omitempty"`
LastError string `json:"last_error,omitempty"`
NextProcess string `json:"next_process,omitempty"`
CompletedAt string `json:"completed_at,omitempty"`
}
// handleTasks 处理任务列表请求
func (h *HTTPHandler) handleTasks(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
// 从 URL 中提取队列名称和任务状态
path := strings.TrimPrefix(r.URL.Path, h.rootPath+"/api/tasks/")
parts := strings.Split(path, "/")
if len(parts) < 2 {
http.Error(w, "Queue name and task state are required", http.StatusBadRequest)
return
}
queueName := parts[0]
taskState := parts[1]
// 检查队列是否已注册
if _, exists := queues[queueName]; !exists {
http.Error(w, "Queue not found", http.StatusNotFound)
return
}
// 解析分页参数
page := 1
pageSize := 20
if p := r.URL.Query().Get("page"); p != "" {
if parsed, err := strconv.Atoi(p); err == nil && parsed > 0 {
page = parsed
}
}
if ps := r.URL.Query().Get("page_size"); ps != "" {
if parsed, err := strconv.Atoi(ps); err == nil && parsed > 0 && parsed <= 100 {
pageSize = parsed
}
}
// 根据任务状态获取任务列表
var tasks []*asynq.TaskInfo
var err error
switch taskState {
case "active":
tasks, err = h.inspector.ListActiveTasks(queueName, asynq.PageSize(pageSize), asynq.Page(page-1))
case "pending":
tasks, err = h.inspector.ListPendingTasks(queueName, asynq.PageSize(pageSize), asynq.Page(page-1))
case "retry":
tasks, err = h.inspector.ListRetryTasks(queueName, asynq.PageSize(pageSize), asynq.Page(page-1))
case "archived":
tasks, err = h.inspector.ListArchivedTasks(queueName, asynq.PageSize(pageSize), asynq.Page(page-1))
case "completed":
tasks, err = h.inspector.ListCompletedTasks(queueName, asynq.PageSize(pageSize), asynq.Page(page-1))
default:
http.Error(w, "Invalid task state. Valid states: active, pending, retry, archived, completed", http.StatusBadRequest)
return
}
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get tasks: %v", err), http.StatusInternalServerError)
return
}
var taskInfos []TaskInfo
for _, task := range tasks {
info := TaskInfo{
ID: task.ID,
Type: task.Type,
Payload: string(task.Payload),
Queue: task.Queue,
Retried: task.Retried,
}
if !task.LastFailedAt.IsZero() {
info.LastFailed = task.LastFailedAt.Format(time.RFC3339)
}
if task.LastErr != "" {
info.LastError = task.LastErr
}
if !task.NextProcessAt.IsZero() {
info.NextProcess = task.NextProcessAt.Format(time.RFC3339)
}
if !task.CompletedAt.IsZero() {
info.CompletedAt = task.CompletedAt.Format(time.RFC3339)
}
taskInfos = append(taskInfos, info)
}
response := map[string]any{
"tasks": taskInfos,
"page": page,
"page_size": pageSize,
"total": len(taskInfos),
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
// handleDashboard 处理仪表板页面
func (h *HTTPHandler) handleDashboard(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
// 使用嵌入的 HTML 模板
tmpl, err := template.New("dashboard").Parse(dashboardHTML)
if err != nil {
http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
return
}
data := struct {
RootPath string
}{
RootPath: h.rootPath,
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
tmpl.Execute(w, data)
// GetStatsDB 返回 Inspector 的数据库连接(供外部设置给 HTTPHandler
func (ins *Inspector) GetStatsDB() *sql.DB {
return ins.db
}