summaryrefslogtreecommitdiff
path: root/benchmarks/cmd
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-26 20:57:53 +0300
committerPaul Buetow <paul@buetow.org>2025-06-26 20:57:53 +0300
commit4a657e44e7111d7d3b9a9ba5e453901e19af2ecb (patch)
tree5cc8571e00a29ab7656633984fb9893ca369ccec /benchmarks/cmd
parentee5250441a2d241dc1a980dfd051a12f2db898cf (diff)
fix: resolve package conflicts and update documentation
- Move main package files to benchmarks/cmd/ to fix test failures - Update CLAUDE.md with comprehensive benchmarking and profiling instructions - Fix unused imports in serverless.go - Remove experimental buffered pipe/copy implementations - Remove outdated documentation files All integration tests now pass successfully. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'benchmarks/cmd')
-rw-r--r--benchmarks/cmd/generate_profile_data.go159
-rw-r--r--benchmarks/cmd/profile_example.go316
2 files changed, 475 insertions, 0 deletions
diff --git a/benchmarks/cmd/generate_profile_data.go b/benchmarks/cmd/generate_profile_data.go
new file mode 100644
index 0000000..0b34047
--- /dev/null
+++ b/benchmarks/cmd/generate_profile_data.go
@@ -0,0 +1,159 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "math/rand"
+ "os"
+ "strconv"
+ "strings"
+ "time"
+)
+
+func main() {
+ var (
+ size string
+ output string
+ format string
+ )
+
+ flag.StringVar(&size, "size", "10MB", "Size of the file (e.g., 10MB, 100MB, 1GB)")
+ flag.StringVar(&output, "output", "test.log", "Output file path")
+ flag.StringVar(&format, "format", "log", "Format: log or csv")
+ flag.Parse()
+
+ // Parse size
+ sizeBytes, err := parseSize(size)
+ if err != nil {
+ log.Fatalf("Invalid size: %v", err)
+ }
+
+ // Generate data
+ switch format {
+ case "log":
+ generateLogFile(output, sizeBytes)
+ case "csv":
+ generateCSVFile(output, sizeBytes)
+ default:
+ log.Fatalf("Unknown format: %s", format)
+ }
+
+ fmt.Printf("Generated %s file: %s\n", size, output)
+}
+
+func parseSize(size string) (int64, error) {
+ size = strings.ToUpper(size)
+ multiplier := int64(1)
+
+ if strings.HasSuffix(size, "GB") {
+ multiplier = 1024 * 1024 * 1024
+ size = strings.TrimSuffix(size, "GB")
+ } else if strings.HasSuffix(size, "MB") {
+ multiplier = 1024 * 1024
+ size = strings.TrimSuffix(size, "MB")
+ } else if strings.HasSuffix(size, "KB") {
+ multiplier = 1024
+ size = strings.TrimSuffix(size, "KB")
+ }
+
+ base, err := strconv.ParseInt(size, 10, 64)
+ if err != nil {
+ return 0, err
+ }
+
+ return base * multiplier, nil
+}
+
+func generateLogFile(filename string, targetSize int64) {
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Sample log lines
+ logLevels := []string{"INFO", "WARN", "ERROR", "DEBUG"}
+ actions := []string{
+ "Processing request",
+ "Handling connection",
+ "Executing query",
+ "Loading configuration",
+ "Updating cache",
+ "Validating input",
+ "Sending response",
+ "Checking permissions",
+ }
+
+ bytesWritten := int64(0)
+ lineNum := 0
+ startTime := time.Now()
+
+ for bytesWritten < targetSize {
+ lineNum++
+ timestamp := startTime.Add(time.Duration(lineNum) * time.Millisecond).Format("2006-01-02 15:04:05.000")
+ level := logLevels[rand.Intn(len(logLevels))]
+ action := actions[rand.Intn(len(actions))]
+ userID := rand.Intn(1000)
+ requestID := fmt.Sprintf("req-%d", lineNum)
+ duration := rand.Intn(5000)
+
+ line := fmt.Sprintf("[%s] %s - %s for user%d (request: %s, duration: %dms)\n",
+ timestamp, level, action, userID, requestID, duration)
+
+ n, err := f.WriteString(line)
+ if err != nil {
+ log.Fatal(err)
+ }
+ bytesWritten += int64(n)
+
+ // Add some variety with stack traces for errors
+ if level == "ERROR" && rand.Float32() < 0.3 {
+ stackTrace := fmt.Sprintf(" Stack trace:\n at function1() file1.go:123\n at function2() file2.go:456\n at main() main.go:789\n")
+ n, err := f.WriteString(stackTrace)
+ if err != nil {
+ log.Fatal(err)
+ }
+ bytesWritten += int64(n)
+ }
+ }
+}
+
+func generateCSVFile(filename string, targetSize int64) {
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Write header
+ header := "timestamp,user,action,duration,status,category\n"
+ f.WriteString(header)
+ bytesWritten := int64(len(header))
+
+ actions := []string{"login", "query", "update", "delete", "logout", "search", "export", "import"}
+ statuses := []string{"success", "failure", "timeout", "pending"}
+ categories := []string{"web", "api", "batch", "admin"}
+
+ lineNum := 0
+ startTime := time.Now()
+
+ for bytesWritten < targetSize {
+ lineNum++
+ timestamp := startTime.Add(time.Duration(lineNum) * time.Second).Format("2006-01-02 15:04:05")
+ user := fmt.Sprintf("user%d", rand.Intn(100))
+ action := actions[rand.Intn(len(actions))]
+ duration := 100 + rand.Intn(9900)
+ status := statuses[rand.Intn(len(statuses))]
+ category := categories[rand.Intn(len(categories))]
+
+ line := fmt.Sprintf("%s,%s,%s,%d,%s,%s\n",
+ timestamp, user, action, duration, status, category)
+
+ n, err := f.WriteString(line)
+ if err != nil {
+ log.Fatal(err)
+ }
+ bytesWritten += int64(n)
+ }
+} \ No newline at end of file
diff --git a/benchmarks/cmd/profile_example.go b/benchmarks/cmd/profile_example.go
new file mode 100644
index 0000000..f996565
--- /dev/null
+++ b/benchmarks/cmd/profile_example.go
@@ -0,0 +1,316 @@
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+)
+
+// Example of using the profiling framework to find performance bottlenecks
+func main() {
+ fmt.Println("DTail Profiling Example")
+ fmt.Println("======================")
+ fmt.Println()
+
+ // Create test data
+ testFile := createTestData()
+ defer os.Remove(testFile)
+
+ // Profile dcat
+ fmt.Println("1. Profiling dcat...")
+ profileDCat(testFile)
+
+ // Profile dgrep
+ fmt.Println("\n2. Profiling dgrep...")
+ profileDGrep(testFile)
+
+ // Profile dmap
+ csvFile := createCSVData()
+ defer os.Remove(csvFile)
+ fmt.Println("\n3. Profiling dmap...")
+ profileDMap(csvFile)
+
+ // Analyze results
+ fmt.Println("\n4. Analyzing profiles...")
+ analyzeProfiles()
+}
+
+func createTestData() string {
+ filename := "test_data.log"
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Generate 100MB of log data
+ for i := 0; i < 1000000; i++ {
+ timestamp := time.Now().Format("2006-01-02 15:04:05.000")
+ level := []string{"INFO", "WARN", "ERROR", "DEBUG"}[i%4]
+ fmt.Fprintf(f, "[%s] %s - Processing request %d from user%d\n",
+ timestamp, level, i, i%1000)
+ }
+
+ return filename
+}
+
+func createCSVData() string {
+ filename := "test_data.csv"
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Header
+ fmt.Fprintln(f, "timestamp,user,action,duration,status")
+
+ // Generate data
+ for i := 0; i < 100000; i++ {
+ timestamp := time.Now().Add(time.Duration(i) * time.Second).Format("2006-01-02 15:04:05")
+ user := fmt.Sprintf("user%d", i%100)
+ action := []string{"login", "query", "update", "logout"}[i%4]
+ duration := 100 + i%900
+ status := []string{"success", "failure"}[i%2]
+
+ fmt.Fprintf(f, "%s,%s,%s,%d,%s\n", timestamp, user, action, duration, status)
+ }
+
+ return filename
+}
+
+func profileDCat(testFile string) {
+ // Run dcat with profiling
+ cmd := exec.Command("../dcat",
+ "-profile",
+ "-profiledir", "profiles",
+ "-plain",
+ "-cfg", "none",
+ testFile)
+
+ start := time.Now()
+ output, err := cmd.CombinedOutput()
+ duration := time.Since(start)
+
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ fmt.Printf("Output: %s\n", output)
+ return
+ }
+
+ fmt.Printf(" Completed in %v\n", duration)
+
+ // Find generated profiles
+ profiles, _ := filepath.Glob("profiles/dcat_*.prof")
+ for _, p := range profiles {
+ info, _ := os.Stat(p)
+ fmt.Printf(" Generated: %s (%d KB)\n", filepath.Base(p), info.Size()/1024)
+ }
+}
+
+func profileDGrep(testFile string) {
+ // Run dgrep with profiling
+ cmd := exec.Command("../dgrep",
+ "-profile",
+ "-profiledir", "profiles",
+ "-plain",
+ "-cfg", "none",
+ "-regex", "ERROR|WARN",
+ "-before", "2",
+ "-after", "2",
+ testFile)
+
+ start := time.Now()
+ output, err := cmd.CombinedOutput()
+ duration := time.Since(start)
+
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ fmt.Printf("Output: %s\n", output)
+ return
+ }
+
+ fmt.Printf(" Completed in %v\n", duration)
+
+ // Count matches
+ matches := strings.Count(string(output), "ERROR") + strings.Count(string(output), "WARN")
+ fmt.Printf(" Found %d matches\n", matches)
+}
+
+func profileDMap(csvFile string) {
+ // Get absolute path for the CSV file
+ absPath, err := filepath.Abs(csvFile)
+ if err != nil {
+ fmt.Printf("Error getting absolute path: %v\n", err)
+ return
+ }
+
+ // Run dmap with profiling - correct syntax with -files flag
+ queries := []string{
+ "select count(*)",
+ "select user, count(*) group by user",
+ "select action, avg(duration), max(duration) group by action",
+ }
+
+ for i, query := range queries {
+ fmt.Printf(" Query %d: %s\n", i+1, query)
+
+ cmd := exec.Command("../dmap",
+ "-profile",
+ "-profiledir", "profiles",
+ "-plain",
+ "-cfg", "none",
+ "-files", absPath,
+ "-query", query)
+
+ start := time.Now()
+ output, err := cmd.CombinedOutput()
+ duration := time.Since(start)
+
+ if err != nil {
+ fmt.Printf(" Error: %v\n", err)
+ fmt.Printf(" Output: %s\n", output)
+ continue
+ }
+
+ fmt.Printf(" Completed in %v\n", duration)
+ }
+}
+
+func truncateQuery(query string) string {
+ if len(query) > 50 {
+ return query[:47] + "..."
+ }
+ return query
+}
+
+func analyzeProfiles() {
+ // Find latest CPU profiles
+ cpuProfiles, _ := filepath.Glob("profiles/*_cpu_*.prof")
+ if len(cpuProfiles) == 0 {
+ fmt.Println("No CPU profiles found")
+ return
+ }
+
+ // Analyze each tool's CPU profile
+ tools := []string{"dcat", "dgrep", "dmap"}
+ for _, tool := range tools {
+ var latestProfile string
+ var latestTime time.Time
+
+ // Find latest profile for this tool
+ for _, profile := range cpuProfiles {
+ if strings.Contains(profile, tool+"_cpu_") {
+ info, err := os.Stat(profile)
+ if err == nil && info.ModTime().After(latestTime) {
+ latestProfile = profile
+ latestTime = info.ModTime()
+ }
+ }
+ }
+
+ if latestProfile == "" {
+ continue
+ }
+
+ fmt.Printf("\nAnalyzing %s CPU profile:\n", tool)
+
+ // Run profile.sh
+ cmd := exec.Command("../profiling/profile.sh",
+ "-top", "5",
+ latestProfile)
+
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ fmt.Printf(" Error analyzing: %v\n", err)
+ continue
+ }
+
+ // Extract and display key information
+ lines := strings.Split(string(output), "\n")
+ inTable := false
+ for _, line := range lines {
+ if strings.Contains(line, "Function") && strings.Contains(line, "Flat") {
+ inTable = true
+ }
+ if inTable && (strings.Contains(line, "%") || strings.Contains(line, "---")) {
+ fmt.Printf(" %s\n", line)
+ }
+ if inTable && line == "" {
+ break
+ }
+ }
+
+ // Suggest optimizations based on findings
+ suggestOptimizations(tool, string(output))
+ }
+}
+
+func suggestOptimizations(tool string, analysis string) {
+ fmt.Printf("\n Optimization suggestions for %s:\n", tool)
+
+ // Common patterns to look for
+ suggestions := []struct {
+ pattern string
+ suggestion string
+ }{
+ {"regexp.Compile", " - Pre-compile regex patterns instead of compiling in loops"},
+ {"strings.Join", " - Use strings.Builder for string concatenation"},
+ {"runtime.mallocgc", " - High allocation rate; consider object pooling"},
+ {"syscall", " - I/O bottleneck; consider buffering or async I/O"},
+ {"runtime.gcBgMarkWorker", " - High GC pressure; reduce allocations"},
+ }
+
+ foundAny := false
+ for _, s := range suggestions {
+ if strings.Contains(analysis, s.pattern) {
+ fmt.Println(s.suggestion)
+ foundAny = true
+ }
+ }
+
+ if !foundAny {
+ fmt.Println(" - Profile looks good; no obvious bottlenecks found")
+ }
+}
+
+// Helper function to demonstrate how to use profiling in tests
+func ExampleBenchmarkWithProfiling() {
+ // This would typically be in a _test.go file
+ fmt.Println(`
+Example benchmark with profiling:
+
+func BenchmarkDCatLargeFile(b *testing.B) {
+ // Enable profiling for this specific benchmark
+ if *cpuprofile != "" {
+ f, _ := os.Create(*cpuprofile)
+ pprof.StartCPUProfile(f)
+ defer pprof.StopCPUProfile()
+ }
+
+ // Generate test file
+ testFile := generateLargeFile(b)
+ defer os.Remove(testFile)
+
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ cmd := exec.Command("./dcat", "-plain", testFile)
+ cmd.Run()
+ }
+
+ if *memprofile != "" {
+ f, _ := os.Create(*memprofile)
+ runtime.GC()
+ pprof.WriteHeapProfile(f)
+ f.Close()
+ }
+}
+
+Run with: go test -bench=BenchmarkDCatLargeFile -cpuprofile=cpu.prof -memprofile=mem.prof
+`)
+} \ No newline at end of file