diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-26 20:57:53 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-26 20:57:53 +0300 |
| commit | 4a657e44e7111d7d3b9a9ba5e453901e19af2ecb (patch) | |
| tree | 5cc8571e00a29ab7656633984fb9893ca369ccec /benchmarks/cmd | |
| parent | ee5250441a2d241dc1a980dfd051a12f2db898cf (diff) | |
fix: resolve package conflicts and update documentation
- Move main package files to benchmarks/cmd/ to fix test failures
- Update CLAUDE.md with comprehensive benchmarking and profiling instructions
- Fix unused imports in serverless.go
- Remove experimental buffered pipe/copy implementations
- Remove outdated documentation files
All integration tests now pass successfully.
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'benchmarks/cmd')
| -rw-r--r-- | benchmarks/cmd/generate_profile_data.go | 159 | ||||
| -rw-r--r-- | benchmarks/cmd/profile_example.go | 316 |
2 files changed, 475 insertions, 0 deletions
diff --git a/benchmarks/cmd/generate_profile_data.go b/benchmarks/cmd/generate_profile_data.go new file mode 100644 index 0000000..0b34047 --- /dev/null +++ b/benchmarks/cmd/generate_profile_data.go @@ -0,0 +1,159 @@ +package main + +import ( + "flag" + "fmt" + "log" + "math/rand" + "os" + "strconv" + "strings" + "time" +) + +func main() { + var ( + size string + output string + format string + ) + + flag.StringVar(&size, "size", "10MB", "Size of the file (e.g., 10MB, 100MB, 1GB)") + flag.StringVar(&output, "output", "test.log", "Output file path") + flag.StringVar(&format, "format", "log", "Format: log or csv") + flag.Parse() + + // Parse size + sizeBytes, err := parseSize(size) + if err != nil { + log.Fatalf("Invalid size: %v", err) + } + + // Generate data + switch format { + case "log": + generateLogFile(output, sizeBytes) + case "csv": + generateCSVFile(output, sizeBytes) + default: + log.Fatalf("Unknown format: %s", format) + } + + fmt.Printf("Generated %s file: %s\n", size, output) +} + +func parseSize(size string) (int64, error) { + size = strings.ToUpper(size) + multiplier := int64(1) + + if strings.HasSuffix(size, "GB") { + multiplier = 1024 * 1024 * 1024 + size = strings.TrimSuffix(size, "GB") + } else if strings.HasSuffix(size, "MB") { + multiplier = 1024 * 1024 + size = strings.TrimSuffix(size, "MB") + } else if strings.HasSuffix(size, "KB") { + multiplier = 1024 + size = strings.TrimSuffix(size, "KB") + } + + base, err := strconv.ParseInt(size, 10, 64) + if err != nil { + return 0, err + } + + return base * multiplier, nil +} + +func generateLogFile(filename string, targetSize int64) { + f, err := os.Create(filename) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + // Sample log lines + logLevels := []string{"INFO", "WARN", "ERROR", "DEBUG"} + actions := []string{ + "Processing request", + "Handling connection", + "Executing query", + "Loading configuration", + "Updating cache", + "Validating input", + "Sending response", + "Checking permissions", + } + + bytesWritten := int64(0) + lineNum := 0 + startTime := time.Now() + + for bytesWritten < targetSize { + lineNum++ + timestamp := startTime.Add(time.Duration(lineNum) * time.Millisecond).Format("2006-01-02 15:04:05.000") + level := logLevels[rand.Intn(len(logLevels))] + action := actions[rand.Intn(len(actions))] + userID := rand.Intn(1000) + requestID := fmt.Sprintf("req-%d", lineNum) + duration := rand.Intn(5000) + + line := fmt.Sprintf("[%s] %s - %s for user%d (request: %s, duration: %dms)\n", + timestamp, level, action, userID, requestID, duration) + + n, err := f.WriteString(line) + if err != nil { + log.Fatal(err) + } + bytesWritten += int64(n) + + // Add some variety with stack traces for errors + if level == "ERROR" && rand.Float32() < 0.3 { + stackTrace := fmt.Sprintf(" Stack trace:\n at function1() file1.go:123\n at function2() file2.go:456\n at main() main.go:789\n") + n, err := f.WriteString(stackTrace) + if err != nil { + log.Fatal(err) + } + bytesWritten += int64(n) + } + } +} + +func generateCSVFile(filename string, targetSize int64) { + f, err := os.Create(filename) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + // Write header + header := "timestamp,user,action,duration,status,category\n" + f.WriteString(header) + bytesWritten := int64(len(header)) + + actions := []string{"login", "query", "update", "delete", "logout", "search", "export", "import"} + statuses := []string{"success", "failure", "timeout", "pending"} + categories := []string{"web", "api", "batch", "admin"} + + lineNum := 0 + startTime := time.Now() + + for bytesWritten < targetSize { + lineNum++ + timestamp := startTime.Add(time.Duration(lineNum) * time.Second).Format("2006-01-02 15:04:05") + user := fmt.Sprintf("user%d", rand.Intn(100)) + action := actions[rand.Intn(len(actions))] + duration := 100 + rand.Intn(9900) + status := statuses[rand.Intn(len(statuses))] + category := categories[rand.Intn(len(categories))] + + line := fmt.Sprintf("%s,%s,%s,%d,%s,%s\n", + timestamp, user, action, duration, status, category) + + n, err := f.WriteString(line) + if err != nil { + log.Fatal(err) + } + bytesWritten += int64(n) + } +}
\ No newline at end of file diff --git a/benchmarks/cmd/profile_example.go b/benchmarks/cmd/profile_example.go new file mode 100644 index 0000000..f996565 --- /dev/null +++ b/benchmarks/cmd/profile_example.go @@ -0,0 +1,316 @@ +package main + +import ( + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// Example of using the profiling framework to find performance bottlenecks +func main() { + fmt.Println("DTail Profiling Example") + fmt.Println("======================") + fmt.Println() + + // Create test data + testFile := createTestData() + defer os.Remove(testFile) + + // Profile dcat + fmt.Println("1. Profiling dcat...") + profileDCat(testFile) + + // Profile dgrep + fmt.Println("\n2. Profiling dgrep...") + profileDGrep(testFile) + + // Profile dmap + csvFile := createCSVData() + defer os.Remove(csvFile) + fmt.Println("\n3. Profiling dmap...") + profileDMap(csvFile) + + // Analyze results + fmt.Println("\n4. Analyzing profiles...") + analyzeProfiles() +} + +func createTestData() string { + filename := "test_data.log" + f, err := os.Create(filename) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + // Generate 100MB of log data + for i := 0; i < 1000000; i++ { + timestamp := time.Now().Format("2006-01-02 15:04:05.000") + level := []string{"INFO", "WARN", "ERROR", "DEBUG"}[i%4] + fmt.Fprintf(f, "[%s] %s - Processing request %d from user%d\n", + timestamp, level, i, i%1000) + } + + return filename +} + +func createCSVData() string { + filename := "test_data.csv" + f, err := os.Create(filename) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + // Header + fmt.Fprintln(f, "timestamp,user,action,duration,status") + + // Generate data + for i := 0; i < 100000; i++ { + timestamp := time.Now().Add(time.Duration(i) * time.Second).Format("2006-01-02 15:04:05") + user := fmt.Sprintf("user%d", i%100) + action := []string{"login", "query", "update", "logout"}[i%4] + duration := 100 + i%900 + status := []string{"success", "failure"}[i%2] + + fmt.Fprintf(f, "%s,%s,%s,%d,%s\n", timestamp, user, action, duration, status) + } + + return filename +} + +func profileDCat(testFile string) { + // Run dcat with profiling + cmd := exec.Command("../dcat", + "-profile", + "-profiledir", "profiles", + "-plain", + "-cfg", "none", + testFile) + + start := time.Now() + output, err := cmd.CombinedOutput() + duration := time.Since(start) + + if err != nil { + fmt.Printf("Error: %v\n", err) + fmt.Printf("Output: %s\n", output) + return + } + + fmt.Printf(" Completed in %v\n", duration) + + // Find generated profiles + profiles, _ := filepath.Glob("profiles/dcat_*.prof") + for _, p := range profiles { + info, _ := os.Stat(p) + fmt.Printf(" Generated: %s (%d KB)\n", filepath.Base(p), info.Size()/1024) + } +} + +func profileDGrep(testFile string) { + // Run dgrep with profiling + cmd := exec.Command("../dgrep", + "-profile", + "-profiledir", "profiles", + "-plain", + "-cfg", "none", + "-regex", "ERROR|WARN", + "-before", "2", + "-after", "2", + testFile) + + start := time.Now() + output, err := cmd.CombinedOutput() + duration := time.Since(start) + + if err != nil { + fmt.Printf("Error: %v\n", err) + fmt.Printf("Output: %s\n", output) + return + } + + fmt.Printf(" Completed in %v\n", duration) + + // Count matches + matches := strings.Count(string(output), "ERROR") + strings.Count(string(output), "WARN") + fmt.Printf(" Found %d matches\n", matches) +} + +func profileDMap(csvFile string) { + // Get absolute path for the CSV file + absPath, err := filepath.Abs(csvFile) + if err != nil { + fmt.Printf("Error getting absolute path: %v\n", err) + return + } + + // Run dmap with profiling - correct syntax with -files flag + queries := []string{ + "select count(*)", + "select user, count(*) group by user", + "select action, avg(duration), max(duration) group by action", + } + + for i, query := range queries { + fmt.Printf(" Query %d: %s\n", i+1, query) + + cmd := exec.Command("../dmap", + "-profile", + "-profiledir", "profiles", + "-plain", + "-cfg", "none", + "-files", absPath, + "-query", query) + + start := time.Now() + output, err := cmd.CombinedOutput() + duration := time.Since(start) + + if err != nil { + fmt.Printf(" Error: %v\n", err) + fmt.Printf(" Output: %s\n", output) + continue + } + + fmt.Printf(" Completed in %v\n", duration) + } +} + +func truncateQuery(query string) string { + if len(query) > 50 { + return query[:47] + "..." + } + return query +} + +func analyzeProfiles() { + // Find latest CPU profiles + cpuProfiles, _ := filepath.Glob("profiles/*_cpu_*.prof") + if len(cpuProfiles) == 0 { + fmt.Println("No CPU profiles found") + return + } + + // Analyze each tool's CPU profile + tools := []string{"dcat", "dgrep", "dmap"} + for _, tool := range tools { + var latestProfile string + var latestTime time.Time + + // Find latest profile for this tool + for _, profile := range cpuProfiles { + if strings.Contains(profile, tool+"_cpu_") { + info, err := os.Stat(profile) + if err == nil && info.ModTime().After(latestTime) { + latestProfile = profile + latestTime = info.ModTime() + } + } + } + + if latestProfile == "" { + continue + } + + fmt.Printf("\nAnalyzing %s CPU profile:\n", tool) + + // Run profile.sh + cmd := exec.Command("../profiling/profile.sh", + "-top", "5", + latestProfile) + + output, err := cmd.CombinedOutput() + if err != nil { + fmt.Printf(" Error analyzing: %v\n", err) + continue + } + + // Extract and display key information + lines := strings.Split(string(output), "\n") + inTable := false + for _, line := range lines { + if strings.Contains(line, "Function") && strings.Contains(line, "Flat") { + inTable = true + } + if inTable && (strings.Contains(line, "%") || strings.Contains(line, "---")) { + fmt.Printf(" %s\n", line) + } + if inTable && line == "" { + break + } + } + + // Suggest optimizations based on findings + suggestOptimizations(tool, string(output)) + } +} + +func suggestOptimizations(tool string, analysis string) { + fmt.Printf("\n Optimization suggestions for %s:\n", tool) + + // Common patterns to look for + suggestions := []struct { + pattern string + suggestion string + }{ + {"regexp.Compile", " - Pre-compile regex patterns instead of compiling in loops"}, + {"strings.Join", " - Use strings.Builder for string concatenation"}, + {"runtime.mallocgc", " - High allocation rate; consider object pooling"}, + {"syscall", " - I/O bottleneck; consider buffering or async I/O"}, + {"runtime.gcBgMarkWorker", " - High GC pressure; reduce allocations"}, + } + + foundAny := false + for _, s := range suggestions { + if strings.Contains(analysis, s.pattern) { + fmt.Println(s.suggestion) + foundAny = true + } + } + + if !foundAny { + fmt.Println(" - Profile looks good; no obvious bottlenecks found") + } +} + +// Helper function to demonstrate how to use profiling in tests +func ExampleBenchmarkWithProfiling() { + // This would typically be in a _test.go file + fmt.Println(` +Example benchmark with profiling: + +func BenchmarkDCatLargeFile(b *testing.B) { + // Enable profiling for this specific benchmark + if *cpuprofile != "" { + f, _ := os.Create(*cpuprofile) + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + + // Generate test file + testFile := generateLargeFile(b) + defer os.Remove(testFile) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + cmd := exec.Command("./dcat", "-plain", testFile) + cmd.Run() + } + + if *memprofile != "" { + f, _ := os.Create(*memprofile) + runtime.GC() + pprof.WriteHeapProfile(f) + f.Close() + } +} + +Run with: go test -bench=BenchmarkDCatLargeFile -cpuprofile=cpu.prof -memprofile=mem.prof +`) +}
\ No newline at end of file |
