fix: resolve package conflicts and update documentation

- Move main package files to benchmarks/cmd/ to fix test failures - Update CLAUDE.md with comprehensive benchmarking and profiling instructions - Fix unused imports in serverless.go - Remove experimental buffered pipe/copy implementations - Remove outdated documentation files All integration tests now pass successfully. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2025-06-26 20:57:53 +0300
committer: Paul Buetow <paul@buetow.org> 2025-06-26 20:57:53 +0300
commit: 4a657e44e7111d7d3b9a9ba5e453901e19af2ecb (patch)
tree: 5cc8571e00a29ab7656633984fb9893ca369ccec /benchmarks/cmd
parent: ee5250441a2d241dc1a980dfd051a12f2db898cf (diff)
2 files changed, 475 insertions, 0 deletions
diff --git a/benchmarks/cmd/generate_profile_data.go b/benchmarks/cmd/generate_profile_data.go
new file mode 100644
index 0000000..0b34047
--- /dev/null
+++ b/benchmarks/cmd/generate_profile_data.go
@@ -0,0 +1,159 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"math/rand"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+func main() {
+	var (
+		size   string
+		output string
+		format string
+	)
+
+	flag.StringVar(&size, "size", "10MB", "Size of the file (e.g., 10MB, 100MB, 1GB)")
+	flag.StringVar(&output, "output", "test.log", "Output file path")
+	flag.StringVar(&format, "format", "log", "Format: log or csv")
+	flag.Parse()
+
+	// Parse size
+	sizeBytes, err := parseSize(size)
+	if err != nil {
+		log.Fatalf("Invalid size: %v", err)
+	}
+
+	// Generate data
+	switch format {
+	case "log":
+		generateLogFile(output, sizeBytes)
+	case "csv":
+		generateCSVFile(output, sizeBytes)
+	default:
+		log.Fatalf("Unknown format: %s", format)
+	}
+
+	fmt.Printf("Generated %s file: %s\n", size, output)
+}
+
+func parseSize(size string) (int64, error) {
+	size = strings.ToUpper(size)
+	multiplier := int64(1)
+
+	if strings.HasSuffix(size, "GB") {
+		multiplier = 1024 * 1024 * 1024
+		size = strings.TrimSuffix(size, "GB")
+	} else if strings.HasSuffix(size, "MB") {
+		multiplier = 1024 * 1024
+		size = strings.TrimSuffix(size, "MB")
+	} else if strings.HasSuffix(size, "KB") {
+		multiplier = 1024
+		size = strings.TrimSuffix(size, "KB")
+	}
+
+	base, err := strconv.ParseInt(size, 10, 64)
+	if err != nil {
+		return 0, err
+	}
+
+	return base * multiplier, nil
+}
+
+func generateLogFile(filename string, targetSize int64) {
+	f, err := os.Create(filename)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer f.Close()
+
+	// Sample log lines
+	logLevels := []string{"INFO", "WARN", "ERROR", "DEBUG"}
+	actions := []string{
+		"Processing request",
+		"Handling connection",
+		"Executing query",
+		"Loading configuration",
+		"Updating cache",
+		"Validating input",
+		"Sending response",
+		"Checking permissions",
+	}
+
+	bytesWritten := int64(0)
+	lineNum := 0
+	startTime := time.Now()
+
+	for bytesWritten < targetSize {
+		lineNum++
+		timestamp := startTime.Add(time.Duration(lineNum) * time.Millisecond).Format("2006-01-02 15:04:05.000")
+		level := logLevels[rand.Intn(len(logLevels))]
+		action := actions[rand.Intn(len(actions))]
+		userID := rand.Intn(1000)
+		requestID := fmt.Sprintf("req-%d", lineNum)
+		duration := rand.Intn(5000)
+
+		line := fmt.Sprintf("[%s] %s - %s for user%d (request: %s, duration: %dms)\n",
+			timestamp, level, action, userID, requestID, duration)
+
+		n, err := f.WriteString(line)
+		if err != nil {
+			log.Fatal(err)
+		}
+		bytesWritten += int64(n)
+
+		// Add some variety with stack traces for errors
+		if level == "ERROR" && rand.Float32() < 0.3 {
+			stackTrace := fmt.Sprintf("  Stack trace:\n    at function1() file1.go:123\n    at function2() file2.go:456\n    at main() main.go:789\n")
+			n, err := f.WriteString(stackTrace)
+			if err != nil {
+				log.Fatal(err)
+			}
+			bytesWritten += int64(n)
+		}
+	}
+}
+
+func generateCSVFile(filename string, targetSize int64) {
+	f, err := os.Create(filename)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer f.Close()
+
+	// Write header
+	header := "timestamp,user,action,duration,status,category\n"
+	f.WriteString(header)
+	bytesWritten := int64(len(header))
+
+	actions := []string{"login", "query", "update", "delete", "logout", "search", "export", "import"}
+	statuses := []string{"success", "failure", "timeout", "pending"}
+	categories := []string{"web", "api", "batch", "admin"}
+
+	lineNum := 0
+	startTime := time.Now()
+
+	for bytesWritten < targetSize {
+		lineNum++
+		timestamp := startTime.Add(time.Duration(lineNum) * time.Second).Format("2006-01-02 15:04:05")
+		user := fmt.Sprintf("user%d", rand.Intn(100))
+		action := actions[rand.Intn(len(actions))]
+		duration := 100 + rand.Intn(9900)
+		status := statuses[rand.Intn(len(statuses))]
+		category := categories[rand.Intn(len(categories))]
+
+		line := fmt.Sprintf("%s,%s,%s,%d,%s,%s\n",
+			timestamp, user, action, duration, status, category)
+
+		n, err := f.WriteString(line)
+		if err != nil {
+			log.Fatal(err)
+		}
+		bytesWritten += int64(n)
+	}
+}
+\ No newline at end of file
diff --git a/benchmarks/cmd/profile_example.go b/benchmarks/cmd/profile_example.go
new file mode 100644
index 0000000..f996565
--- /dev/null
+++ b/benchmarks/cmd/profile_example.go
@@ -0,0 +1,316 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// Example of using the profiling framework to find performance bottlenecks
+func main() {
+	fmt.Println("DTail Profiling Example")
+	fmt.Println("======================")
+	fmt.Println()
+
+	// Create test data
+	testFile := createTestData()
+	defer os.Remove(testFile)
+
+	// Profile dcat
+	fmt.Println("1. Profiling dcat...")
+	profileDCat(testFile)
+
+	// Profile dgrep
+	fmt.Println("\n2. Profiling dgrep...")
+	profileDGrep(testFile)
+
+	// Profile dmap
+	csvFile := createCSVData()
+	defer os.Remove(csvFile)
+	fmt.Println("\n3. Profiling dmap...")
+	profileDMap(csvFile)
+
+	// Analyze results
+	fmt.Println("\n4. Analyzing profiles...")
+	analyzeProfiles()
+}
+
+func createTestData() string {
+	filename := "test_data.log"
+	f, err := os.Create(filename)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer f.Close()
+
+	// Generate 100MB of log data
+	for i := 0; i < 1000000; i++ {
+		timestamp := time.Now().Format("2006-01-02 15:04:05.000")
+		level := []string{"INFO", "WARN", "ERROR", "DEBUG"}[i%4]
+		fmt.Fprintf(f, "[%s] %s - Processing request %d from user%d\n", 
+			timestamp, level, i, i%1000)
+	}
+
+	return filename
+}
+
+func createCSVData() string {
+	filename := "test_data.csv"
+	f, err := os.Create(filename)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer f.Close()
+
+	// Header
+	fmt.Fprintln(f, "timestamp,user,action,duration,status")
+
+	// Generate data
+	for i := 0; i < 100000; i++ {
+		timestamp := time.Now().Add(time.Duration(i) * time.Second).Format("2006-01-02 15:04:05")
+		user := fmt.Sprintf("user%d", i%100)
+		action := []string{"login", "query", "update", "logout"}[i%4]
+		duration := 100 + i%900
+		status := []string{"success", "failure"}[i%2]
+		
+		fmt.Fprintf(f, "%s,%s,%s,%d,%s\n", timestamp, user, action, duration, status)
+	}
+
+	return filename
+}
+
+func profileDCat(testFile string) {
+	// Run dcat with profiling
+	cmd := exec.Command("../dcat", 
+		"-profile",
+		"-profiledir", "profiles",
+		"-plain",
+		"-cfg", "none",
+		testFile)
+
+	start := time.Now()
+	output, err := cmd.CombinedOutput()
+	duration := time.Since(start)
+
+	if err != nil {
+		fmt.Printf("Error: %v\n", err)
+		fmt.Printf("Output: %s\n", output)
+		return
+	}
+
+	fmt.Printf("  Completed in %v\n", duration)
+	
+	// Find generated profiles
+	profiles, _ := filepath.Glob("profiles/dcat_*.prof")
+	for _, p := range profiles {
+		info, _ := os.Stat(p)
+		fmt.Printf("  Generated: %s (%d KB)\n", filepath.Base(p), info.Size()/1024)
+	}
+}
+
+func profileDGrep(testFile string) {
+	// Run dgrep with profiling
+	cmd := exec.Command("../dgrep",
+		"-profile",
+		"-profiledir", "profiles",
+		"-plain",
+		"-cfg", "none",
+		"-regex", "ERROR|WARN",
+		"-before", "2",
+		"-after", "2",
+		testFile)
+
+	start := time.Now()
+	output, err := cmd.CombinedOutput()
+	duration := time.Since(start)
+
+	if err != nil {
+		fmt.Printf("Error: %v\n", err)
+		fmt.Printf("Output: %s\n", output)
+		return
+	}
+
+	fmt.Printf("  Completed in %v\n", duration)
+	
+	// Count matches
+	matches := strings.Count(string(output), "ERROR") + strings.Count(string(output), "WARN")
+	fmt.Printf("  Found %d matches\n", matches)
+}
+
+func profileDMap(csvFile string) {
+	// Get absolute path for the CSV file
+	absPath, err := filepath.Abs(csvFile)
+	if err != nil {
+		fmt.Printf("Error getting absolute path: %v\n", err)
+		return
+	}
+	
+	// Run dmap with profiling - correct syntax with -files flag
+	queries := []string{
+		"select count(*)",
+		"select user, count(*) group by user",
+		"select action, avg(duration), max(duration) group by action",
+	}
+
+	for i, query := range queries {
+		fmt.Printf("  Query %d: %s\n", i+1, query)
+		
+		cmd := exec.Command("../dmap",
+			"-profile",
+			"-profiledir", "profiles",
+			"-plain",
+			"-cfg", "none",
+			"-files", absPath,
+			"-query", query)
+
+		start := time.Now()
+		output, err := cmd.CombinedOutput()
+		duration := time.Since(start)
+
+		if err != nil {
+			fmt.Printf("    Error: %v\n", err)
+			fmt.Printf("    Output: %s\n", output)
+			continue
+		}
+
+		fmt.Printf("    Completed in %v\n", duration)
+	}
+}
+
+func truncateQuery(query string) string {
+	if len(query) > 50 {
+		return query[:47] + "..."
+	}
+	return query
+}
+
+func analyzeProfiles() {
+	// Find latest CPU profiles
+	cpuProfiles, _ := filepath.Glob("profiles/*_cpu_*.prof")
+	if len(cpuProfiles) == 0 {
+		fmt.Println("No CPU profiles found")
+		return
+	}
+
+	// Analyze each tool's CPU profile
+	tools := []string{"dcat", "dgrep", "dmap"}
+	for _, tool := range tools {
+		var latestProfile string
+		var latestTime time.Time
+
+		// Find latest profile for this tool
+		for _, profile := range cpuProfiles {
+			if strings.Contains(profile, tool+"_cpu_") {
+				info, err := os.Stat(profile)
+				if err == nil && info.ModTime().After(latestTime) {
+					latestProfile = profile
+					latestTime = info.ModTime()
+				}
+			}
+		}
+
+		if latestProfile == "" {
+			continue
+		}
+
+		fmt.Printf("\nAnalyzing %s CPU profile:\n", tool)
+		
+		// Run profile.sh
+		cmd := exec.Command("../profiling/profile.sh",
+			"-top", "5",
+			latestProfile)
+		
+		output, err := cmd.CombinedOutput()
+		if err != nil {
+			fmt.Printf("  Error analyzing: %v\n", err)
+			continue
+		}
+
+		// Extract and display key information
+		lines := strings.Split(string(output), "\n")
+		inTable := false
+		for _, line := range lines {
+			if strings.Contains(line, "Function") && strings.Contains(line, "Flat") {
+				inTable = true
+			}
+			if inTable && (strings.Contains(line, "%") || strings.Contains(line, "---")) {
+				fmt.Printf("  %s\n", line)
+			}
+			if inTable && line == "" {
+				break
+			}
+		}
+		
+		// Suggest optimizations based on findings
+		suggestOptimizations(tool, string(output))
+	}
+}
+
+func suggestOptimizations(tool string, analysis string) {
+	fmt.Printf("\n  Optimization suggestions for %s:\n", tool)
+	
+	// Common patterns to look for
+	suggestions := []struct {
+		pattern string
+		suggestion string
+	}{
+		{"regexp.Compile", "  - Pre-compile regex patterns instead of compiling in loops"},
+		{"strings.Join", "  - Use strings.Builder for string concatenation"},
+		{"runtime.mallocgc", "  - High allocation rate; consider object pooling"},
+		{"syscall", "  - I/O bottleneck; consider buffering or async I/O"},
+		{"runtime.gcBgMarkWorker", "  - High GC pressure; reduce allocations"},
+	}
+	
+	foundAny := false
+	for _, s := range suggestions {
+		if strings.Contains(analysis, s.pattern) {
+			fmt.Println(s.suggestion)
+			foundAny = true
+		}
+	}
+	
+	if !foundAny {
+		fmt.Println("  - Profile looks good; no obvious bottlenecks found")
+	}
+}
+
+// Helper function to demonstrate how to use profiling in tests
+func ExampleBenchmarkWithProfiling() {
+	// This would typically be in a _test.go file
+	fmt.Println(`
+Example benchmark with profiling:
+
+func BenchmarkDCatLargeFile(b *testing.B) {
+    // Enable profiling for this specific benchmark
+    if *cpuprofile != "" {
+        f, _ := os.Create(*cpuprofile)
+        pprof.StartCPUProfile(f)
+        defer pprof.StopCPUProfile()
+    }
+    
+    // Generate test file
+    testFile := generateLargeFile(b)
+    defer os.Remove(testFile)
+    
+    b.ResetTimer()
+    
+    for i := 0; i < b.N; i++ {
+        cmd := exec.Command("./dcat", "-plain", testFile)
+        cmd.Run()
+    }
+    
+    if *memprofile != "" {
+        f, _ := os.Create(*memprofile)
+        runtime.GC()
+        pprof.WriteHeapProfile(f)
+        f.Close()
+    }
+}
+
+Run with: go test -bench=BenchmarkDCatLargeFile -cpuprofile=cpu.prof -memprofile=mem.prof
+`)
+}
+\ No newline at end of file
author	Paul Buetow <paul@buetow.org>	2025-06-26 20:57:53 +0300
committer	Paul Buetow <paul@buetow.org>	2025-06-26 20:57:53 +0300
commit	4a657e44e7111d7d3b9a9ba5e453901e19af2ecb (patch)
tree	5cc8571e00a29ab7656633984fb9893ca369ccec /benchmarks/cmd
parent	ee5250441a2d241dc1a980dfd051a12f2db898cf (diff)