Files
pipelinedb/examples/data-analytics/main.go
2025-09-30 15:05:56 +08:00

305 lines
7.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// 演示数据分析和报告功能
package main
import (
"encoding/json"
"fmt"
"log"
"math/rand"
"os"
"sort"
"time"
"code.tczkiot.com/wlw/pipelinedb"
"code.tczkiot.com/wlw/pipelinedb/examples/common"
)
// UserEvent 用户事件结构
type UserEvent struct {
UserID string `json:"user_id"`
Action string `json:"action"`
Page string `json:"page"`
Timestamp time.Time `json:"timestamp"`
Value float64 `json:"value,omitempty"`
}
// AnalyticsReport 分析报告
type AnalyticsReport struct {
TotalEvents int `json:"total_events"`
UniqueUsers int `json:"unique_users"`
TopActions []ActionCount `json:"top_actions"`
TopPages []PageCount `json:"top_pages"`
UserActivity map[string]int `json:"user_activity"`
HourlyActivity map[int]int `json:"hourly_activity"`
TotalValue float64 `json:"total_value"`
}
type ActionCount struct {
Action string `json:"action"`
Count int `json:"count"`
}
type PageCount struct {
Page string `json:"page"`
Count int `json:"count"`
}
func main() {
// 创建临时数据库文件
dbFile := "analytics_example.db"
defer os.Remove(dbFile)
// 确保文件可以创建
if _, err := os.Create(dbFile); err != nil {
log.Fatalf("创建数据库文件失败: %v", err)
}
fmt.Println("🚀 数据分析示例")
fmt.Println("================")
// 配置数据库
fmt.Println("\n📂 步骤1: 配置数据库")
config := &pipelinedb.Config{
CacheSize: 100,
}
// 创建处理器
handler := common.NewExampleHandler("数据分析")
pdb, err := pipelinedb.Open(pipelinedb.Options{
Filename: dbFile,
Config: config,
Handler: handler,
})
if err != nil {
log.Fatalf("打开数据库失败: %v", err)
}
defer pdb.Stop()
fmt.Println("✅ 数据库已配置")
// 生成模拟用户行为数据
fmt.Println("\n📊 步骤2: 生成模拟用户行为数据")
users := []string{"user001", "user002", "user003", "user004", "user005"}
actions := []string{"页面访问", "按钮点击", "表单提交", "文件下载", "搜索"}
pages := []string{"/home", "/products", "/about", "/contact", "/login", "/checkout"}
// 生成1000个随机事件
events := make([]UserEvent, 1000)
baseTime := time.Now().Add(-24 * time.Hour) // 从24小时前开始
for i := 0; i < 1000; i++ {
event := UserEvent{
UserID: users[rand.Intn(len(users))],
Action: actions[rand.Intn(len(actions))],
Page: pages[rand.Intn(len(pages))],
Timestamp: baseTime.Add(time.Duration(i) * time.Minute),
}
// 为某些动作添加价值
if event.Action == "表单提交" || event.Action == "文件下载" {
event.Value = rand.Float64() * 100
}
events[i] = event
}
fmt.Printf("✅ 生成了 %d 个用户事件\n", len(events))
// 将事件存储到数据库
fmt.Println("\n💾 步骤3: 存储事件数据")
for i, event := range events {
// 序列化事件数据
eventData, err := json.Marshal(event)
if err != nil {
log.Fatalf("序列化事件失败: %v", err)
}
// 创建元数据
metadata := fmt.Sprintf(`{"user_id": "%s", "action": "%s", "page": "%s"}`,
event.UserID, event.Action, event.Page)
// 根据动作类型分组
group := "用户行为"
if event.Action == "搜索" {
group = "搜索行为"
} else if event.Value > 0 {
group = "有价值行为"
}
recordID, err := pdb.AcceptData(group, eventData, metadata)
if err != nil {
log.Fatalf("存储事件失败: %v", err)
}
if (i+1)%200 == 0 {
fmt.Printf(" 📝 已存储 %d/%d 个事件最新ID: %d\n", i+1, len(events), recordID)
}
}
// 数据分析
fmt.Println("\n🔍 步骤4: 执行数据分析")
report := &AnalyticsReport{
UserActivity: make(map[string]int),
HourlyActivity: make(map[int]int),
}
// 获取所有组的数据
groups := []string{"用户行为", "搜索行为", "有价值行为"}
allEvents := []UserEvent{}
for _, group := range groups {
fmt.Printf("📋 分析组: %s\n", group)
pageReq := &pipelinedb.PageRequest{
Page: 1,
PageSize: 1000, // 获取所有数据
}
response, err := pdb.GetRecordsByGroup(group, pageReq)
if err != nil {
fmt.Printf(" ❌ 查询组失败: %v\n", err)
continue
}
fmt.Printf(" 📊 找到 %d 条记录\n", len(response.Records))
// 解析事件数据
for _, record := range response.Records {
var event UserEvent
if err := json.Unmarshal(record.Data, &event); err != nil {
continue
}
allEvents = append(allEvents, event)
}
}
// 分析数据
fmt.Println("\n📈 步骤5: 生成分析报告")
report.TotalEvents = len(allEvents)
// 统计唯一用户
uniqueUsers := make(map[string]bool)
actionCounts := make(map[string]int)
pageCounts := make(map[string]int)
for _, event := range allEvents {
// 唯一用户
uniqueUsers[event.UserID] = true
// 用户活跃度
report.UserActivity[event.UserID]++
// 动作统计
actionCounts[event.Action]++
// 页面统计
pageCounts[event.Page]++
// 小时活跃度
hour := event.Timestamp.Hour()
report.HourlyActivity[hour]++
// 总价值
report.TotalValue += event.Value
}
report.UniqueUsers = len(uniqueUsers)
// 排序Top动作
for action, count := range actionCounts {
report.TopActions = append(report.TopActions, ActionCount{
Action: action,
Count: count,
})
}
sort.Slice(report.TopActions, func(i, j int) bool {
return report.TopActions[i].Count > report.TopActions[j].Count
})
// 排序Top页面
for page, count := range pageCounts {
report.TopPages = append(report.TopPages, PageCount{
Page: page,
Count: count,
})
}
sort.Slice(report.TopPages, func(i, j int) bool {
return report.TopPages[i].Count > report.TopPages[j].Count
})
// 显示报告
fmt.Println("\n📋 数据分析报告")
fmt.Println("================")
fmt.Printf("📊 总体统计:\n")
fmt.Printf(" 总事件数: %d\n", report.TotalEvents)
fmt.Printf(" 唯一用户数: %d\n", report.UniqueUsers)
fmt.Printf(" 总价值: %.2f\n", report.TotalValue)
fmt.Printf(" 平均每用户事件: %.1f\n", float64(report.TotalEvents)/float64(report.UniqueUsers))
fmt.Printf("\n🔥 热门动作 (Top 5):\n")
for i, action := range report.TopActions {
if i >= 5 {
break
}
percentage := float64(action.Count) / float64(report.TotalEvents) * 100
fmt.Printf(" %d. %s: %d 次 (%.1f%%)\n",
i+1, action.Action, action.Count, percentage)
}
fmt.Printf("\n📄 热门页面 (Top 5):\n")
for i, page := range report.TopPages {
if i >= 5 {
break
}
percentage := float64(page.Count) / float64(report.TotalEvents) * 100
fmt.Printf(" %d. %s: %d 次 (%.1f%%)\n",
i+1, page.Page, page.Count, percentage)
}
fmt.Printf("\n👥 用户活跃度:\n")
for userID, count := range report.UserActivity {
fmt.Printf(" %s: %d 次事件\n", userID, count)
}
// 数据库统计
fmt.Println("\n💾 数据库统计")
stats, err := pdb.GetStats()
if err != nil {
log.Fatalf("获取数据库统计失败: %v", err)
}
fmt.Printf("📈 存储统计:\n")
fmt.Printf(" 数据库总记录: %d\n", stats.TotalRecords)
fmt.Printf(" 总组数: %d\n", len(stats.GroupStats))
for group, groupStats := range stats.GroupStats {
fmt.Printf(" [%s]: 热:%d 温:%d 冷:%d\n",
group, groupStats.HotRecords, groupStats.WarmRecords, groupStats.ColdRecords)
}
// 导出报告
fmt.Println("\n📤 步骤6: 导出分析报告")
reportJSON, err := json.MarshalIndent(report, "", " ")
if err != nil {
log.Fatalf("序列化报告失败: %v", err)
}
reportFile := "analytics_report.json"
err = os.WriteFile(reportFile, reportJSON, 0644)
if err != nil {
log.Fatalf("写入报告文件失败: %v", err)
}
fmt.Printf("✅ 分析报告已导出到: %s\n", reportFile)
defer os.Remove(reportFile) // 清理示例文件
fmt.Println("\n🎉 数据分析示例完成!")
fmt.Println("💡 提示: 这个示例展示了如何使用Pipeline Database进行复杂的数据分析")
}