summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/pgo_commands_detail.md216
-rw-r--r--internal/tools/pgo/pgo.go25
2 files changed, 240 insertions, 1 deletions
diff --git a/doc/pgo_commands_detail.md b/doc/pgo_commands_detail.md
new file mode 100644
index 0000000..e874ba8
--- /dev/null
+++ b/doc/pgo_commands_detail.md
@@ -0,0 +1,216 @@
+# PGO Command Execution Details
+
+This document shows the exact commands executed during Profile-Guided Optimization (PGO) generation for DTail tools.
+
+## Overview
+
+When running `make pgo-generate` or `dtail-tools pgo`, the following commands are executed to generate performance profiles for each tool.
+
+## Commands Executed
+
+### 1. Building Baseline Binaries
+
+```bash
+go build -o pgo-build/dtail-baseline ./cmd/dtail
+go build -o pgo-build/dcat-baseline ./cmd/dcat
+go build -o pgo-build/dgrep-baseline ./cmd/dgrep
+go build -o pgo-build/dmap-baseline ./cmd/dmap
+go build -o pgo-build/dserver-baseline ./cmd/dserver
+```
+
+### 2. Profile Generation Commands
+
+#### DTail Profile Generation
+```bash
+# Background log writer process
+bash -c "for i in {1..200}; do
+ level=$((i % 4))
+ case $level in
+ 0) lvl=INFO;;
+ 1) lvl=WARN;;
+ 2) lvl=ERROR;;
+ 3) lvl=DEBUG;;
+ esac
+ echo \"[2025-07-04 15:00:00] $lvl - Test log line number $i with some additional text to process\" >> growing.log
+ sleep 0.015
+done"
+
+# DTail command
+pgo-build/dtail-baseline \
+ -cfg none \
+ -plain \
+ -profile \
+ -profiledir pgo-profiles/iter_dtail_TIMESTAMP \
+ -regex "ERROR|WARN" \
+ -shutdownAfter 3 \
+ pgo-profiles/growing.log
+```
+
+#### DCat Profile Generation
+```bash
+pgo-build/dcat-baseline \
+ -cfg none \
+ -plain \
+ -profile \
+ -profiledir pgo-profiles/iter_dcat_TIMESTAMP \
+ pgo-profiles/test.log
+```
+
+#### DGrep Profile Generation
+```bash
+pgo-build/dgrep-baseline \
+ -cfg none \
+ -plain \
+ -profile \
+ -profiledir pgo-profiles/iter_dgrep_TIMESTAMP \
+ -regex "ERROR|WARN" \
+ pgo-profiles/test.log
+```
+
+#### DMap Profile Generation
+```bash
+pgo-build/dmap-baseline \
+ -cfg none \
+ -plain \
+ -profile \
+ -profiledir pgo-profiles/iter_dmap_TIMESTAMP \
+ -files pgo-profiles/test.csv \
+ -query "select status, count(*) group by status"
+```
+
+#### DServer Profile Generation
+```bash
+# Start dserver with pprof endpoint
+pgo-build/dserver-baseline \
+ -cfg none \
+ -pprof localhost:16060 \
+ -port 12222
+
+# Client commands to generate server load (run concurrently):
+pgo-build/dcat-baseline \
+ -cfg none \
+ -server localhost:12222 \
+ pgo-profiles/test.log
+
+pgo-build/dgrep-baseline \
+ -cfg none \
+ -server localhost:12222 \
+ -regex "ERROR|WARN" \
+ pgo-profiles/test.log
+
+pgo-build/dgrep-baseline \
+ -cfg none \
+ -server localhost:12222 \
+ -regex "INFO.*action" \
+ pgo-profiles/test.log
+
+pgo-build/dmap-baseline \
+ -cfg none \
+ -server localhost:12222 \
+ -files pgo-profiles/test.csv \
+ -query "select status, count(*) group by status"
+
+pgo-build/dmap-baseline \
+ -cfg none \
+ -server localhost:12222 \
+ -files pgo-profiles/test.csv \
+ -query "select department, avg(salary) group by department"
+
+# Capture CPU profile via HTTP
+curl http://localhost:16060/debug/pprof/profile?seconds=5 > dserver.pprof
+```
+
+### 3. Profile Merging
+
+When multiple iterations are run, profiles are merged:
+
+```bash
+# Merge multiple profile iterations
+go tool pprof -proto \
+ pgo-profiles/dcat.pprof.0.pprof \
+ pgo-profiles/dcat.pprof.1.pprof \
+ > pgo-profiles/dcat.pprof
+```
+
+### 4. Building with PGO
+
+```bash
+# Build optimized binaries using profiles
+go build -pgo=pgo-profiles/dcat.pprof -o pgo-build/dcat ./cmd/dcat
+go build -pgo=pgo-profiles/dgrep.pprof -o pgo-build/dgrep ./cmd/dgrep
+go build -pgo=pgo-profiles/dmap.pprof -o pgo-build/dmap ./cmd/dmap
+go build -pgo=pgo-profiles/dtail.pprof -o pgo-build/dtail ./cmd/dtail
+go build -pgo=pgo-profiles/dserver.pprof -o pgo-build/dserver ./cmd/dserver
+```
+
+### 5. Performance Comparison Commands
+
+Quick benchmarks are run to compare baseline vs optimized:
+
+```bash
+# Baseline benchmark
+pgo-build/dcat-baseline -cfg none -plain /tmp/pgo_bench.log
+
+# Optimized benchmark
+pgo-build/dcat -cfg none -plain /tmp/pgo_bench.log
+
+# Similar commands for dgrep and dmap
+pgo-build/dgrep-baseline -cfg none -plain -regex ERROR /tmp/pgo_bench.log
+pgo-build/dmap-baseline -cfg none -plain -files /tmp/pgo_bench.csv -query "select count(*)"
+```
+
+## Test Data Generation
+
+The PGO framework generates realistic test data:
+
+### Log File (test.log)
+- Contains timestamps, log levels (INFO, WARN, ERROR, DEBUG)
+- Includes user actions, durations, and status information
+- Default size: 1,000,000 lines (configurable with -datasize)
+
+### CSV File (test.csv)
+- Contains employee data with departments, salaries, status
+- Used for MapReduce queries
+- Default size: 100,000 rows (1/10 of log file size)
+
+### Growing Log File (growing.log)
+- Used specifically for dtail testing
+- Simulates real-time log generation
+- Writes ~200 lines over 3 seconds with mixed log levels
+
+## Customization Options
+
+### Adjust Test Data Size
+```bash
+dtail-tools pgo -datasize 5000000 # 5 million lines
+```
+
+### Run More Iterations
+```bash
+dtail-tools pgo -iterations 5 # Run 5 iterations per command
+```
+
+### Profile Specific Commands Only
+```bash
+dtail-tools pgo dcat dgrep # Only optimize dcat and dgrep
+```
+
+### Verbose Output
+```bash
+dtail-tools pgo -v # Show all command execution details
+```
+
+### Profile Generation Only
+```bash
+dtail-tools pgo -profileonly # Skip building optimized binaries
+```
+
+## Notes
+
+1. **Empty Profiles**: Some commands (like dtail) may generate empty profiles if they are I/O-bound. This is handled gracefully.
+
+2. **DServer Profiling**: Uses HTTP pprof endpoint instead of command-line profiling to capture server-side performance data.
+
+3. **Concurrent Execution**: Multiple client commands are run concurrently against dserver to generate realistic load patterns.
+
+4. **Profile Quality**: The effectiveness of PGO depends on how well the test workload represents real-world usage patterns. \ No newline at end of file
diff --git a/internal/tools/pgo/pgo.go b/internal/tools/pgo/pgo.go
index ee6422b..3b0b8b8 100644
--- a/internal/tools/pgo/pgo.go
+++ b/internal/tools/pgo/pgo.go
@@ -213,6 +213,9 @@ func runSingleWorkload(cfg *Config, command, binary string, testFiles map[string
}
defer os.RemoveAll(iterProfileDir)
+ // Always show what command is being executed
+ fmt.Printf(" Executing %s workload...\n", command)
+
switch command {
case "dtail":
// For dtail, we need to simulate a growing log file
@@ -240,6 +243,7 @@ func runSingleWorkload(cfg *Config, command, binary string, testFiles map[string
"-regex", "ERROR|WARN", // Filter for errors and warnings
"-shutdownAfter", "3", // Exit after 3 seconds
growingLog)
+ fmt.Printf(" Command: %s %s\n", binary, strings.Join(cmd.Args[1:], " "))
case "dcat":
cmd = exec.Command(binary,
@@ -248,6 +252,7 @@ func runSingleWorkload(cfg *Config, command, binary string, testFiles map[string
"-profile",
"-profiledir", iterProfileDir,
testFiles["log"])
+ fmt.Printf(" Command: %s %s\n", binary, strings.Join(cmd.Args[1:], " "))
case "dgrep":
cmd = exec.Command(binary,
@@ -257,6 +262,7 @@ func runSingleWorkload(cfg *Config, command, binary string, testFiles map[string
"-profiledir", iterProfileDir,
"-regex", "ERROR|WARN",
testFiles["log"])
+ fmt.Printf(" Command: %s %s\n", binary, strings.Join(cmd.Args[1:], " "))
case "dmap":
cmd = exec.Command(binary,
@@ -266,6 +272,7 @@ func runSingleWorkload(cfg *Config, command, binary string, testFiles map[string
"-profiledir", iterProfileDir,
"-files", testFiles["csv"],
"-query", "select status, count(*) group by status")
+ fmt.Printf(" Command: %s %s\n", binary, strings.Join(cmd.Args[1:], " "))
case "dserver":
// For dserver, we'll simulate some client connections
@@ -319,12 +326,16 @@ func copyFile(src, dst string) error {
}
func runDServerWorkload(cfg *Config, binary string, testFiles map[string]string, profilePath string) error {
+ fmt.Printf(" Executing dserver workload...\n")
+
// Start dserver with pprof endpoint
serverCmd := exec.Command(binary,
"-cfg", "none",
"-pprof", "localhost:16060", // pprof endpoint
"-port", "12222") // Use non-standard port
+ fmt.Printf(" Starting dserver: %s %s\n", binary, strings.Join(serverCmd.Args[1:], " "))
+
if cfg.Verbose {
serverCmd.Stdout = os.Stdout
serverCmd.Stderr = os.Stderr
@@ -354,6 +365,11 @@ func runDServerWorkload(cfg *Config, binary string, testFiles map[string]string,
{"dmap", []string{"-cfg", "none", "-server", "localhost:12222", "-files", testFiles["csv"], "-query", "select department, avg(salary) group by department"}},
}
+ fmt.Println(" Client commands to generate server load:")
+ for _, client := range clients {
+ fmt.Printf(" %s %s\n", client.cmd, strings.Join(client.args, " "))
+ }
+
// Run clients concurrently to generate more server load
var wg sync.WaitGroup
for i := 0; i < 2; i++ { // Run each client twice
@@ -375,6 +391,7 @@ func runDServerWorkload(cfg *Config, binary string, testFiles map[string]string,
// Give a moment for workload to start
time.Sleep(500 * time.Millisecond)
+ fmt.Println(" Capturing CPU profile via HTTP endpoint...")
// Capture CPU profile from pprof endpoint (this blocks for 5 seconds)
resp, err := http.Get("http://localhost:16060/debug/pprof/profile?seconds=5")
if err != nil {
@@ -396,6 +413,7 @@ func runDServerWorkload(cfg *Config, binary string, testFiles map[string]string,
return
}
+ fmt.Printf(" Profile captured and saved to %s\n", profilePath)
profileDone <- nil
}()
@@ -438,18 +456,21 @@ func mergeProfiles(profiles []string, output string) error {
if len(validProfiles) == 0 {
// All profiles are empty, create an empty output file
// This allows the workflow to continue
- fmt.Printf("Warning: All profiles for this command are empty (I/O-bound operation?)\n")
+ fmt.Printf(" Warning: All profiles for this command are empty (I/O-bound operation?)\n")
return os.WriteFile(output, []byte{}, 0644)
}
if len(validProfiles) == 1 {
+ fmt.Printf(" Using single profile (no merge needed)\n")
// Just rename
return os.Rename(validProfiles[0], output)
}
+ fmt.Printf(" Merging %d profiles...\n", len(validProfiles))
// Use go tool pprof to merge
args := append([]string{"tool", "pprof", "-proto"}, validProfiles...)
cmd := exec.Command("go", args...)
+ fmt.Printf(" Command: go %s\n", strings.Join(args, " "))
outFile, err := os.Create(output)
if err != nil {
@@ -526,7 +547,9 @@ func comparePerformance(cfg *Config) error {
fmt.Printf("\n%s:\n", cmd)
// Run benchmark
+ fmt.Printf(" Running baseline benchmark...\n")
baselineTime := benchmarkCommand(baseline, cmd, testFiles)
+ fmt.Printf(" Running optimized benchmark...\n")
optimizedTime := benchmarkCommand(optimized, cmd, testFiles)
if baselineTime > 0 && optimizedTime > 0 {