summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-19 10:10:23 +0300
committerPaul Buetow <paul@buetow.org>2025-06-19 10:10:23 +0300
commitfdd68ef02bb17988631e11ad581df9b65ce65b81 (patch)
treeefd024056b5b34fc8ad8e5e42bbf5f91f53dc0fb
parent077bdbfe10e1f955e2a869516fde55be1bff371b (diff)
Implement line ending preservation and address CLAUDE comments
- Fix server-side line ending preservation in plain mode by updating basehandler to not add protocol delimiters, preserving original CRLF/LF line endings - Add comprehensive documentation to ProcessLine methods in all processors - Remove all CLAUDE comments and replace with proper function documentation - Update DCat test to include --quiet flag for cleaner server output - Clean up PGO script and report files from scripts directory - Improve code formatting and consistency across processor files 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
-rw-r--r--CLAUDE.md24
-rw-r--r--integrationtests/dcat_test.go4
-rw-r--r--integrationtests/debug_output.txt12
-rw-r--r--integrationtests/dmap_test.go2
-rw-r--r--integrationtests/server_output.txt12
-rw-r--r--integrationtests/serverless_output.txt10
-rw-r--r--integrationtests/test_result.txt12
-rw-r--r--integrationtests/test_unix_endings.txt10
-rw-r--r--internal/io/fs/catprocessor.go32
-rw-r--r--internal/io/fs/directprocessor.go100
-rw-r--r--internal/io/fs/grepprocessor.go70
-rw-r--r--internal/io/fs/mapprocessor.go53
-rw-r--r--internal/server/handlers/basehandler.go7
-rwxr-xr-xscripts/pgo.sh159
-rw-r--r--scripts/pgo_report.txt68
15 files changed, 200 insertions, 375 deletions
diff --git a/CLAUDE.md b/CLAUDE.md
index 8b64c89..2ae5a8d 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -49,28 +49,6 @@ make vet
make lint
```
-### Performance Optimization
-```bash
-# Run Performance Guided Optimization (PGO) for dgrep
-make pgo
-
-# This implements true PGO using Go's -pgo compiler flag:
-# - Create test file (100MB with 1M lines) in scripts/ if needed
-# - Build baseline version without PGO
-# - Collect CPU profile for training data
-# - Rebuild dgrep with -pgo flag using the training profile
-# - Compare baseline vs PGO-optimized performance (5 iterations each)
-# - Generate detailed before/after analysis report (scripts/pgo_report.txt)
-#
-# All PGO files are organized in scripts/ directory to keep project root clean
-```
-
-### Installation
-```bash
-# Install all binaries to $GOPATH/bin
-make install
-```
-
### Optional Build Tags
- `DTAIL_USE_ACL=1` - Enable Linux ACL support
- `DTAIL_USE_PROPRIETARY=1` - Enable proprietary features
@@ -135,4 +113,4 @@ DTail is a distributed log processing system with client-server architecture usi
## Integration Testing Guidelines
-- Integration tests for serverless and server mode should always rely on exact the same test files. Same count, same content, same sizes. No exceptions. \ No newline at end of file
+- Integration tests for serverless and server mode should always rely on exact the same test files. Same count, same content, same sizes. No exceptions.
diff --git a/integrationtests/dcat_test.go b/integrationtests/dcat_test.go
index 8477b0e..ebaffa2 100644
--- a/integrationtests/dcat_test.go
+++ b/integrationtests/dcat_test.go
@@ -27,7 +27,7 @@ func TestDCat1(t *testing.T) {
for _, mode := range modes {
t.Run(mode.name, func(t *testing.T) {
- // Test all files in both modes now that channel buffer issue is fixed
+ // Test all files in both modes, restarting server for each file in server mode
for _, inFile := range inFiles {
if err := testDCat1(t, inFile, mode.useServer); err != nil {
t.Error(err)
@@ -43,7 +43,7 @@ func testDCat1(t *testing.T, inFile string, useServer bool) error {
if useServer {
// Now that channel buffer issue is fixed, use the actual test file
- return testDCatWithServer(t, []string{"--plain", "--cfg", "none", inFile}, outFile, inFile)
+ return testDCatWithServer(t, []string{"--plain", "--cfg", "none", "--quiet", inFile}, outFile, inFile)
} else {
_, err := runCommand(context.TODO(), t, outFile,
"../dcat", "--plain", "--cfg", "none", inFile)
diff --git a/integrationtests/debug_output.txt b/integrationtests/debug_output.txt
new file mode 100644
index 0000000..31a2f00
--- /dev/null
+++ b/integrationtests/debug_output.txt
@@ -0,0 +1,12 @@
+CLIENT|earth|WARN|Encountered unknown host|{localhost:5000 0xc0001140f0 0xc000038830 [localhost]:5000 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC/U+OsBtXGLh3+22Z2Rl2Pb2319iBzcwzWEwk8YKFKNAK922NQorXAEn+x89Xz1cFd2xRWzb1rGyM4cFVa+feGJ6fLgVt837YfnHctzwcgla0e1VmeX2GBOoJEcOhIiKld/Loa5ctosTtE3Vk/STwFTC5VbLwxRAUDB5BvXhkj2/ncL2gYnqDqRjzWpeIoGdlw7qKqeYxMnb0nDyfQKY0BcnLuB/3vcVBsr3UllGDr1wkfGYglYfI+tYNfqq+fuU5pNdz2QE7KH41VEPetMRACYWOM8c04YPuqcuc6TgeHorCjDkb2CMGfJnM+X6wIcgiucM3ULNuOO2hLIRje8pMzsXYkR1AGp8aT27QnbzpumGXDOwyDN1OEQlrlfvn24uQO4ONz83TKcDpb6n4V6sESawwKg0UAuvoehFnR6DdlfbKa0AZ5I/KJ1p2GMHyUQc1ZuiUFel231/GxEOAkY2kJI0/EB+0D5aYu7fhbWoBD28z68jo+spiMzgmTklQvnKEI0tkXCEaWX5NTuyVBV76yvHN5tfJoKjFYBDFMGHuhFGFWCrR9l7UhdY5Hx7W+zdd5muG4bjXIBM7cjtnwkhGjaPCX2v+UlBX7uMSpPLJ+1K3V0rICCZc5nMnxdypnPp+bLtfgd+ryQPdiJywMyHyfoMlP1H65WfzqrmRFbxy6TQ== [127.0.0.1]:5000 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC/U+OsBtXGLh3+22Z2Rl2Pb2319iBzcwzWEwk8YKFKNAK922NQorXAEn+x89Xz1cFd2xRWzb1rGyM4cFVa+feGJ6fLgVt837YfnHctzwcgla0e1VmeX2GBOoJEcOhIiKld/Loa5ctosTtE3Vk/STwFTC5VbLwxRAUDB5BvXhkj2/ncL2gYnqDqRjzWpeIoGdlw7qKqeYxMnb0nDyfQKY0BcnLuB/3vcVBsr3UllGDr1wkfGYglYfI+tYNfqq+fuU5pNdz2QE7KH41VEPetMRACYWOM8c04YPuqcuc6TgeHorCjDkb2CMGfJnM+X6wIcgiucM3ULNuOO2hLIRje8pMzsXYkR1AGp8aT27QnbzpumGXDOwyDN1OEQlrlfvn24uQO4ONz83TKcDpb6n4V6sESawwKg0UAuvoehFnR6DdlfbKa0AZ5I/KJ1p2GMHyUQc1ZuiUFel231/GxEOAkY2kJI0/EB+0D5aYu7fhbWoBD28z68jo+spiMzgmTklQvnKEI0tkXCEaWX5NTuyVBV76yvHN5tfJoKjFYBDFMGHuhFGFWCrR9l7UhdY5Hx7W+zdd5muG4bjXIBM7cjtnwkhGjaPCX2v+UlBX7uMSpPLJ+1K3V0rICCZc5nMnxdypnPp+bLtfgd+ryQPdiJywMyHyfoMlP1H65WfzqrmRFbxy6TQ== 0xc00009ff10}
+CLIENT|earth|WARN|Trusting host keys of servers|[localhost:5000]
+1 Sat 2 Oct 13:46:45 EEST 2021
+2 Sat 2 Oct 13:46:45 EEST 2021
+3 Sat 2 Oct 13:46:45 EEST 2021
+4 Sat 2 Oct 13:46:45 EEST 2021
+5 Sat 2 Oct 13:46:45 EEST 2021
+6 Sat 2 Oct 13:46:45 EEST 2021
+7 Sat 2 Oct 13:46:45 EEST 2021
+8 Sat 2 Oct 13:46:45 EEST 2021
+9 Sat 2 Oct 13:46:45 EEST 2021
+10 Sat 2 Oct 13:46:45 EEST 2021
diff --git a/integrationtests/dmap_test.go b/integrationtests/dmap_test.go
index 84f6ff9..25819f5 100644
--- a/integrationtests/dmap_test.go
+++ b/integrationtests/dmap_test.go
@@ -251,7 +251,7 @@ func testDMap3(t *testing.T, useServer bool) error {
args := []string{"--query", query, "--cfg", "none", "--logger", "stdout", "--logLevel", "info", "--noColor"}
args = append(args, fileList...)
-
+
stdoutCh, stderrCh, cmdErrCh, err := startCommand(ctx, t, "", "../dmap", args...)
if err != nil {
diff --git a/integrationtests/server_output.txt b/integrationtests/server_output.txt
new file mode 100644
index 0000000..2a552d4
--- /dev/null
+++ b/integrationtests/server_output.txt
@@ -0,0 +1,12 @@
+CLIENT|earth|WARN|Encountered unknown host|{localhost:4999 0xc000202090 0xc000212100 [localhost]:4999 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC37DSP1HSwMS6d5Ckst4LMypL5hOFjmPIQs6gu0nUKAXTR3G1Nl1UIFTxvGiMMv93XkjU9VhG2k7FD6oXheth972U8Yy1/mIUTjcoOGE1aX0pw4XWyDX5YTfG7+4LGnZfITVraa6vQ50LPGt64rtoOUZhajOdjnCEVn++8Ch7hUXS/YvZPSUpp9sH68xR+5wHb6tjGol+G2PRt14i80IVJLNl5EvmPrAeQHb6ZKjJf3K1QO3wSwS96fW0wP7VgGVWIFHnFCnbEyEBbE6JzKcz41CK6ODolzB/BwNgHO//CLUL6UMC/eudoylFdERPOwrKQdLm3RxDAffZE/SY5+0KrKl/zqK4xYN7ovx40Ae149t8OR7PP/IIX+IGfD5764gcDISN0ySJf2yvca7JcP8FXHSfQrWF3AQtzFfeXuuLLPyoSU7/IUob+FUxu2qSb9ZMp6no3kp5ne2AG1//5oBgZgkoDxuh1U/eXWp9CAnmCRausXR/lVj4nT0Ynp8Ib6xNhSORb9ZBNzdVLzdSiWG1UkuDfMxG8d3KLwN3YYzazTixFzICMGH7AGyvgsLvA2+qwWadk+bGtYEFZV4z6NAydw3CsmZ07hNyPgxOqhJEtBoiSEbicUrdPtL5dZQEirwDtL8GvdytPiXcgFoD9r/g2v6kyUYayrpDtuHPa2UBXAQ== [127.0.0.1]:4999 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC37DSP1HSwMS6d5Ckst4LMypL5hOFjmPIQs6gu0nUKAXTR3G1Nl1UIFTxvGiMMv93XkjU9VhG2k7FD6oXheth972U8Yy1/mIUTjcoOGE1aX0pw4XWyDX5YTfG7+4LGnZfITVraa6vQ50LPGt64rtoOUZhajOdjnCEVn++8Ch7hUXS/YvZPSUpp9sH68xR+5wHb6tjGol+G2PRt14i80IVJLNl5EvmPrAeQHb6ZKjJf3K1QO3wSwS96fW0wP7VgGVWIFHnFCnbEyEBbE6JzKcz41CK6ODolzB/BwNgHO//CLUL6UMC/eudoylFdERPOwrKQdLm3RxDAffZE/SY5+0KrKl/zqK4xYN7ovx40Ae149t8OR7PP/IIX+IGfD5764gcDISN0ySJf2yvca7JcP8FXHSfQrWF3AQtzFfeXuuLLPyoSU7/IUob+FUxu2qSb9ZMp6no3kp5ne2AG1//5oBgZgkoDxuh1U/eXWp9CAnmCRausXR/lVj4nT0Ynp8Ib6xNhSORb9ZBNzdVLzdSiWG1UkuDfMxG8d3KLwN3YYzazTixFzICMGH7AGyvgsLvA2+qwWadk+bGtYEFZV4z6NAydw3CsmZ07hNyPgxOqhJEtBoiSEbicUrdPtL5dZQEirwDtL8GvdytPiXcgFoD9r/g2v6kyUYayrpDtuHPa2UBXAQ== 0xc00020f490}
+CLIENT|earth|WARN|Trusting host keys of servers|[localhost:4999]
+1 Sat 2 Oct 13:46:45 EEST 2021
+2 Sat 2 Oct 13:46:45 EEST 2021
+3 Sat 2 Oct 13:46:45 EEST 2021
+4 Sat 2 Oct 13:46:45 EEST 2021
+5 Sat 2 Oct 13:46:45 EEST 2021
+6 Sat 2 Oct 13:46:45 EEST 2021
+7 Sat 2 Oct 13:46:45 EEST 2021
+8 Sat 2 Oct 13:46:45 EEST 2021
+9 Sat 2 Oct 13:46:45 EEST 2021
+10 Sat 2 Oct 13:46:45 EEST 2021
diff --git a/integrationtests/serverless_output.txt b/integrationtests/serverless_output.txt
new file mode 100644
index 0000000..f952bf8
--- /dev/null
+++ b/integrationtests/serverless_output.txt
@@ -0,0 +1,10 @@
+1 Sat 2 Oct 13:46:45 EEST 2021
+2 Sat 2 Oct 13:46:45 EEST 2021
+3 Sat 2 Oct 13:46:45 EEST 2021
+4 Sat 2 Oct 13:46:45 EEST 2021
+5 Sat 2 Oct 13:46:45 EEST 2021
+6 Sat 2 Oct 13:46:45 EEST 2021
+7 Sat 2 Oct 13:46:45 EEST 2021
+8 Sat 2 Oct 13:46:45 EEST 2021
+9 Sat 2 Oct 13:46:45 EEST 2021
+10 Sat 2 Oct 13:46:45 EEST 2021
diff --git a/integrationtests/test_result.txt b/integrationtests/test_result.txt
new file mode 100644
index 0000000..8c15866
--- /dev/null
+++ b/integrationtests/test_result.txt
@@ -0,0 +1,12 @@
+CLIENT|earth|WARN|Encountered unknown host|{localhost:5001 0xc000284060 0xc000282100 [localhost]:5001 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCscpKJDrsj1JeZxSJYt4FkA6buqyXWH8X9f1mayn4czsaXs08RA0Ko2xJxlBfogm2Bg71VizoPs/a6OhSbzqWqRDQxoOBK4tqg+FQ4C7XzHHNLHxvB5SlziNBlAxxzg9hI8+p2BIuSKJlgOkNC3MMsPP8mly4eTaO+16leNEaefoDDKCG0nw24C/ae7xzi5XNJELRaTCEWTMgVFyJ1XUkg8uXp/fwMK72KvQfEoviX6jET3pQO8qBJHsXDjwAMqdFJ/xMzBOj1zRcfwQ3ZBhIc/v/ZlYts9dpDGlv2np2oIa9kos8AWi1zdMAn4HoniPxesVRxGo7IHiOON6qGkAZea+E9GpgdeZBOs5E18/OVZtdGIyZ8uKSMWpLekK6kVu7tuJrEG/Elvv52XcYtP4iJFVcmcFn9djGMYgZHze9kssgNseskAQnmKp6/P+GAxHmrBc+b8qflDgRkvuRLqc1dSORL2JWP52S3SRhNuOvp2c0qOdTOXklEftY+z1aacQJLEHiD9bS/4B/dX6JaDHEP6tzPB89+kyS5KEt34VROJTxivYY4MKknxxd86mRbC/2WBDRu28YAbasL0lyh8PMeztWqkrXVCMSdLI0ZiJ9AeDAT5O0cIkqIC3diaZwphEySfgkRkApamFZpdhLYWOXYf8HmAUMBMmub/sA9p9+Ipw== [127.0.0.1]:5001 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCscpKJDrsj1JeZxSJYt4FkA6buqyXWH8X9f1mayn4czsaXs08RA0Ko2xJxlBfogm2Bg71VizoPs/a6OhSbzqWqRDQxoOBK4tqg+FQ4C7XzHHNLHxvB5SlziNBlAxxzg9hI8+p2BIuSKJlgOkNC3MMsPP8mly4eTaO+16leNEaefoDDKCG0nw24C/ae7xzi5XNJELRaTCEWTMgVFyJ1XUkg8uXp/fwMK72KvQfEoviX6jET3pQO8qBJHsXDjwAMqdFJ/xMzBOj1zRcfwQ3ZBhIc/v/ZlYts9dpDGlv2np2oIa9kos8AWi1zdMAn4HoniPxesVRxGo7IHiOON6qGkAZea+E9GpgdeZBOs5E18/OVZtdGIyZ8uKSMWpLekK6kVu7tuJrEG/Elvv52XcYtP4iJFVcmcFn9djGMYgZHze9kssgNseskAQnmKp6/P+GAxHmrBc+b8qflDgRkvuRLqc1dSORL2JWP52S3SRhNuOvp2c0qOdTOXklEftY+z1aacQJLEHiD9bS/4B/dX6JaDHEP6tzPB89+kyS5KEt34VROJTxivYY4MKknxxd86mRbC/2WBDRu28YAbasL0lyh8PMeztWqkrXVCMSdLI0ZiJ9AeDAT5O0cIkqIC3diaZwphEySfgkRkApamFZpdhLYWOXYf8HmAUMBMmub/sA9p9+Ipw== 0xc00029b570}
+CLIENT|earth|WARN|Trusting host keys of servers|[localhost:5001]
+1 Sat 2 Oct 13:46:45 EEST 2021
+2 Sat 2 Oct 13:46:45 EEST 2021
+3 Sat 2 Oct 13:46:45 EEST 2021
+4 Sat 2 Oct 13:46:45 EEST 2021
+5 Sat 2 Oct 13:46:45 EEST 2021
+6 Sat 2 Oct 13:46:45 EEST 2021
+7 Sat 2 Oct 13:46:45 EEST 2021
+8 Sat 2 Oct 13:46:45 EEST 2021
+9 Sat 2 Oct 13:46:45 EEST 2021
+10 Sat 2 Oct 13:46:45 EEST 2021
diff --git a/integrationtests/test_unix_endings.txt b/integrationtests/test_unix_endings.txt
new file mode 100644
index 0000000..ee22195
--- /dev/null
+++ b/integrationtests/test_unix_endings.txt
@@ -0,0 +1,10 @@
+1 Sat 2 Oct 13:46:45 EEST 2021
+2 Sat 2 Oct 13:46:45 EEST 2021
+3 Sat 2 Oct 13:46:45 EEST 2021
+4 Sat 2 Oct 13:46:45 EEST 2021
+5 Sat 2 Oct 13:46:45 EEST 2021
+6 Sat 2 Oct 13:46:45 EEST 2021
+7 Sat 2 Oct 13:46:45 EEST 2021
+8 Sat 2 Oct 13:46:45 EEST 2021
+9 Sat 2 Oct 13:46:45 EEST 2021
+10 Sat 2 Oct 13:46:45 EEST 2021
diff --git a/internal/io/fs/catprocessor.go b/internal/io/fs/catprocessor.go
index 4430488..b062c7f 100644
--- a/internal/io/fs/catprocessor.go
+++ b/internal/io/fs/catprocessor.go
@@ -10,18 +10,18 @@ import (
// CatProcessor handles cat-style output
type CatProcessor struct {
- plain bool
- noColor bool
- hostname string
+ plain bool
+ noColor bool
+ hostname string
isFirstLine bool
}
// NewCatProcessor creates a new cat processor
func NewCatProcessor(plain, noColor bool, hostname string) *CatProcessor {
return &CatProcessor{
- plain: plain,
- noColor: noColor,
- hostname: hostname,
+ plain: plain,
+ noColor: noColor,
+ hostname: hostname,
isFirstLine: true,
}
}
@@ -34,12 +34,16 @@ func (cp *CatProcessor) Cleanup() error {
return nil
}
+// ProcessLine processes a single line for cat output.
+// In plain mode, it preserves the original line exactly including line endings.
+// In non-plain mode, it formats the line according to DTail protocol with optional colorization.
+// Returns the formatted line and true (cat always outputs all lines).
func (cp *CatProcessor) ProcessLine(line []byte, lineNum int, filePath string, stats *stats, sourceID string) ([]byte, bool) {
// Update stats for matched line (cat always matches all lines)
if stats != nil {
stats.updateLineMatched()
}
-
+
// Format output to match existing behavior
if cp.plain {
// In plain mode, preserve the original line exactly as it is
@@ -48,7 +52,7 @@ func (cp *CatProcessor) ProcessLine(line []byte, lineNum int, filePath string, s
copy(result, line)
return result, true
}
-
+
// Format exactly like original basehandler.go for non-plain mode
// REMOTE|{hostname}|{TransmittedPerc}|{Count}|{SourceID}|{Content}¬
var transmittedPerc int
@@ -58,17 +62,17 @@ func (cp *CatProcessor) ProcessLine(line []byte, lineNum int, filePath string, s
transmittedPerc = 100
count = stats.totalLineCount()
}
-
+
// Build the protocol line
protocolLine := fmt.Sprintf("REMOTE%s%s%s%3d%s%v%s%s%s%s",
protocol.FieldDelimiter, cp.hostname, protocol.FieldDelimiter,
transmittedPerc, protocol.FieldDelimiter, count, protocol.FieldDelimiter,
sourceID, protocol.FieldDelimiter, string(line))
-
+
// Apply ANSI color formatting if not in plain mode and not noColor mode
if !cp.plain && !cp.noColor {
colorized := brush.Colorfy(protocolLine)
-
+
// Add color reset prefix for all lines except the first
var result []byte
if cp.isFirstLine {
@@ -86,12 +90,12 @@ func (cp *CatProcessor) ProcessLine(line []byte, lineNum int, filePath string, s
}
return result, true
}
-
+
// No color formatting
result := make([]byte, len(protocolLine)+1)
copy(result, protocolLine)
result[len(protocolLine)] = '\n'
-
+
return result, true
}
@@ -102,4 +106,4 @@ func (cp *CatProcessor) Flush() []byte {
return []byte("\x1b[39m\x1b[49m\x1b[49m\x1b[39m")
}
return nil
-} \ No newline at end of file
+}
diff --git a/internal/io/fs/directprocessor.go b/internal/io/fs/directprocessor.go
index 762fa8f..9c564e7 100644
--- a/internal/io/fs/directprocessor.go
+++ b/internal/io/fs/directprocessor.go
@@ -63,9 +63,9 @@ func (dp *DirectProcessor) ProcessReader(ctx context.Context, reader io.Reader,
if catProcessor, ok := dp.processor.(*CatProcessor); ok && catProcessor.plain {
return dp.processReaderPreservingLineEndings(ctx, reader, filePath)
}
-
+
scanner := bufio.NewScanner(reader)
-
+
// Set buffer size respecting MaxLineLength configuration
maxLineLength := config.Server.MaxLineLength
initialBufSize := 64 * 1024
@@ -73,7 +73,7 @@ func (dp *DirectProcessor) ProcessReader(ctx context.Context, reader io.Reader,
initialBufSize = maxLineLength
}
scanner.Buffer(make([]byte, initialBufSize), maxLineLength)
-
+
lineNum := 0
for scanner.Scan() {
select {
@@ -81,35 +81,35 @@ func (dp *DirectProcessor) ProcessReader(ctx context.Context, reader io.Reader,
return ctx.Err()
default:
}
-
+
lineNum++
line := scanner.Bytes()
-
+
// Update position stats
if dp.stats != nil {
dp.stats.updatePosition()
}
-
- // Process line directly
+
+ // Process line directly
if result, shouldSend := dp.processor.ProcessLine(line, lineNum, filePath, dp.stats, dp.sourceID); shouldSend {
if _, err := dp.output.Write(result); err != nil {
return err
}
-
+
// Update transmission stats
if dp.stats != nil {
dp.stats.updateLineTransmitted()
}
}
}
-
+
// Flush any buffered output
if final := dp.processor.Flush(); len(final) > 0 {
if _, err := dp.output.Write(final); err != nil {
return err
}
}
-
+
return scanner.Err()
}
@@ -121,20 +121,19 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
lineNum := 0
maxLineLength := config.Server.MaxLineLength
warnedAboutLongLine := false
-
-
+
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
-
+
n, err := reader.Read(buf)
if n > 0 {
data := append(remaining, buf[:n]...)
remaining = remaining[:0]
-
+
// Process complete lines
for {
// Find next line ending (LF or CRLF)
@@ -153,24 +152,24 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
splitLine := make([]byte, maxLineLength+1)
copy(splitLine, data[:maxLineLength])
splitLine[maxLineLength] = '\n'
-
+
// Update position stats
if dp.stats != nil {
dp.stats.updatePosition()
}
-
+
// Process the split line
if result, shouldSend := dp.processor.ProcessLine(splitLine, lineNum, filePath, dp.stats, dp.sourceID); shouldSend {
if _, err := dp.output.Write(result); err != nil {
return err
}
-
+
// Update transmission stats
if dp.stats != nil {
dp.stats.updateLineTransmitted()
}
}
-
+
// Continue with remaining data
data = data[maxLineLength:]
continue
@@ -180,19 +179,20 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
break
}
}
-
- line := data[:lfIndex+1] // Include the LF
- data = data[lfIndex+1:] // Continue with remaining data
-
+
+ // Extract the line including its original line ending (CRLF or LF)
+ line := data[:lfIndex+1] // Include the LF (and CR if present before it)
+ data = data[lfIndex+1:] // Continue with remaining data
+
// Reset warning flag for new line
warnedAboutLongLine = false
-
+
// Check if this line exceeds max length and needs to be split
if len(line) > maxLineLength {
// Split the long line into chunks
lineContent := line[:len(line)-1] // Remove the LF
lineEnding := line[len(line)-1:] // Keep the LF
-
+
for len(lineContent) > 0 {
lineNum++
var chunk []byte
@@ -207,18 +207,18 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
copy(chunk[len(lineContent):], lineEnding)
lineContent = nil
}
-
+
// Update position stats
if dp.stats != nil {
dp.stats.updatePosition()
}
-
+
// Process the chunk
if result, shouldSend := dp.processor.ProcessLine(chunk, lineNum, filePath, dp.stats, dp.sourceID); shouldSend {
if _, err := dp.output.Write(result); err != nil {
return err
}
-
+
// Update transmission stats
if dp.stats != nil {
dp.stats.updateLineTransmitted()
@@ -228,18 +228,18 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
} else {
// Normal line processing
lineNum++
-
+
// Update position stats
if dp.stats != nil {
dp.stats.updatePosition()
}
-
+
// Process line directly (line includes original line ending)
if result, shouldSend := dp.processor.ProcessLine(line, lineNum, filePath, dp.stats, dp.sourceID); shouldSend {
if _, err := dp.output.Write(result); err != nil {
return err
}
-
+
// Update transmission stats
if dp.stats != nil {
dp.stats.updateLineTransmitted()
@@ -248,12 +248,12 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
}
}
}
-
+
if err == io.EOF {
// Process any remaining data as the last line, respecting line length limit
for len(remaining) > 0 {
lineNum++
-
+
var lineToProcess []byte
if len(remaining) > maxLineLength {
// Split the remaining data
@@ -266,17 +266,17 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
lineToProcess = remaining
remaining = nil
}
-
+
// Update position stats
if dp.stats != nil {
dp.stats.updatePosition()
}
-
+
if result, shouldSend := dp.processor.ProcessLine(lineToProcess, lineNum, filePath, dp.stats, dp.sourceID); shouldSend {
if _, err := dp.output.Write(result); err != nil {
return err
}
-
+
// Update transmission stats
if dp.stats != nil {
dp.stats.updateLineTransmitted()
@@ -285,19 +285,19 @@ func (dp *DirectProcessor) processReaderPreservingLineEndings(ctx context.Contex
}
break
}
-
+
if err != nil {
return err
}
}
-
+
// Flush any buffered output
if final := dp.processor.Flush(); len(final) > 0 {
if _, err := dp.output.Write(final); err != nil {
return err
}
}
-
+
return nil
}
@@ -323,7 +323,7 @@ func (dp *DirectProcessor) ProcessFileWithTailing(ctx context.Context, filePath
func (dp *DirectProcessor) followFile(ctx context.Context, filePath string) error {
// Track our current position in the file
var lastSize int64
-
+
for {
select {
case <-ctx.Done():
@@ -334,7 +334,7 @@ func (dp *DirectProcessor) followFile(ctx context.Context, filePath string) erro
if err != nil {
continue
}
-
+
currentSize := fileInfo.Size()
if currentSize > lastSize {
// File has new content, read it
@@ -342,19 +342,19 @@ func (dp *DirectProcessor) followFile(ctx context.Context, filePath string) erro
if err != nil {
continue
}
-
+
// Seek to where we left off
if _, err := file.Seek(lastSize, 0); err != nil {
file.Close()
continue
}
-
+
// Process new content
if err := dp.processNewContent(ctx, file, filePath); err != nil {
file.Close()
continue
}
-
+
lastSize = currentSize
file.Close()
}
@@ -365,35 +365,35 @@ func (dp *DirectProcessor) followFile(ctx context.Context, filePath string) erro
// processNewContent processes new content that was added to the file
func (dp *DirectProcessor) processNewContent(ctx context.Context, file *os.File, filePath string) error {
scanner := bufio.NewScanner(file)
-
+
// Start line counting from where we left off (simplified approach)
lineNum := 1
-
+
for scanner.Scan() {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
-
+
lineBuf := scanner.Bytes()
if result, shouldSend := dp.processor.ProcessLine(lineBuf, lineNum, filePath, dp.stats, dp.sourceID); shouldSend {
if _, err := dp.output.Write(result); err != nil {
return err
}
-
+
// Update transmission stats
if dp.stats != nil {
dp.stats.updateLineTransmitted()
}
}
lineNum++
-
+
// Update position stats
if dp.stats != nil {
dp.stats.updatePosition()
}
}
-
+
return scanner.Err()
-} \ No newline at end of file
+}
diff --git a/internal/io/fs/grepprocessor.go b/internal/io/fs/grepprocessor.go
index 6b34dc1..ed1c271 100644
--- a/internal/io/fs/grepprocessor.go
+++ b/internal/io/fs/grepprocessor.go
@@ -15,38 +15,38 @@ type GrepProcessor struct {
plain bool
noColor bool
hostname string
-
+
// Context handling
beforeContext int
afterContext int
maxCount int
-
+
// State for context processing
- matchCount int
- afterRemaining int
- beforeBuffer [][]byte
- beforeLineNums []int
+ matchCount int
+ afterRemaining int
+ beforeBuffer [][]byte
+ beforeLineNums []int
}
// NewGrepProcessor creates a new grep processor
func NewGrepProcessor(re regex.Regex, plain, noColor bool, hostname string, beforeContext, afterContext, maxCount int) *GrepProcessor {
gp := &GrepProcessor{
- regex: re,
- plain: plain,
- noColor: noColor,
- hostname: hostname,
- beforeContext: beforeContext,
- afterContext: afterContext,
- maxCount: maxCount,
- matchCount: 0,
+ regex: re,
+ plain: plain,
+ noColor: noColor,
+ hostname: hostname,
+ beforeContext: beforeContext,
+ afterContext: afterContext,
+ maxCount: maxCount,
+ matchCount: 0,
afterRemaining: 0,
}
-
+
if beforeContext > 0 {
gp.beforeBuffer = make([][]byte, 0, beforeContext)
gp.beforeLineNums = make([]int, 0, beforeContext)
}
-
+
return gp
}
@@ -58,10 +58,12 @@ func (gp *GrepProcessor) Cleanup() error {
return nil
}
+// ProcessLine processes a single line for grep filtering with context support.
+// Returns formatted output for matching lines and their context, or nil for non-matching lines.
+// Handles before/after context lines and respects maxCount limit.
func (gp *GrepProcessor) ProcessLine(line []byte, lineNum int, filePath string, stats *stats, sourceID string) ([]byte, bool) {
isMatch := gp.regex.Match(line)
-
-
+
// Handle lines that don't match the regex
if !isMatch {
// Handle after context lines (only for non-matching lines)
@@ -78,7 +80,7 @@ func (gp *GrepProcessor) ProcessLine(line []byte, lineNum int, filePath string,
// Make a copy of the line for buffering
lineCopy := make([]byte, len(line))
copy(lineCopy, line)
-
+
// Add to buffer, removing oldest if at capacity
if len(gp.beforeBuffer) >= gp.beforeContext {
gp.beforeBuffer = gp.beforeBuffer[1:]
@@ -89,23 +91,23 @@ func (gp *GrepProcessor) ProcessLine(line []byte, lineNum int, filePath string,
}
return nil, false
}
-
+
// Line matches the regex
gp.matchCount++
-
+
// Check if we've reached maxCount
if gp.maxCount > 0 && gp.matchCount > gp.maxCount {
return nil, false
}
-
+
// Update stats for matched line
if stats != nil {
stats.updateLineMatched()
}
-
+
// Build result with before context, current line, and set up after context
var result []byte
-
+
// First, output any before context lines
if gp.beforeContext > 0 {
for i, beforeLine := range gp.beforeBuffer {
@@ -117,16 +119,16 @@ func (gp *GrepProcessor) ProcessLine(line []byte, lineNum int, filePath string,
gp.beforeBuffer = gp.beforeBuffer[:0]
gp.beforeLineNums = gp.beforeLineNums[:0]
}
-
+
// Add the matching line
formatted := gp.formatLine(line, lineNum, filePath, stats, sourceID)
result = append(result, formatted...)
-
+
// Set up after context (only if we're not already in after context mode)
if gp.afterContext > 0 && gp.afterRemaining == 0 {
gp.afterRemaining = gp.afterContext
}
-
+
return result, true
}
@@ -143,7 +145,7 @@ func (gp *GrepProcessor) formatLine(line []byte, lineNum int, filePath string, s
result[len(line)] = '\n'
return result
}
-
+
// Format exactly like original basehandler.go for non-plain mode
// REMOTE|{hostname}|{TransmittedPerc}|{Count}|{SourceID}|{Content}¬
var transmittedPerc int
@@ -152,14 +154,14 @@ func (gp *GrepProcessor) formatLine(line []byte, lineNum int, filePath string, s
transmittedPerc = stats.transmittedPerc()
count = stats.totalLineCount()
}
-
+
// Build the protocol line
protocolLine := fmt.Sprintf("REMOTE%s%s%s%3d%s%v%s%s%s%s",
protocol.FieldDelimiter, gp.hostname, protocol.FieldDelimiter,
transmittedPerc, protocol.FieldDelimiter, count, protocol.FieldDelimiter,
sourceID, protocol.FieldDelimiter, string(line))
-
- // Apply ANSI color formatting if not in plain mode and not noColor mode
+
+ // Apply ANSI color formatting if not in plain mode and not noColor mode.
if !gp.plain && !gp.noColor {
colorized := brush.Colorfy(protocolLine)
result := make([]byte, len(colorized)+1)
@@ -167,11 +169,11 @@ func (gp *GrepProcessor) formatLine(line []byte, lineNum int, filePath string, s
result[len(colorized)] = '\n'
return result
}
-
+
// No color formatting
result := make([]byte, len(protocolLine)+1)
copy(result, protocolLine)
result[len(protocolLine)] = '\n'
-
+
return result
-} \ No newline at end of file
+}
diff --git a/internal/io/fs/mapprocessor.go b/internal/io/fs/mapprocessor.go
index b1b2283..a2e051d 100644
--- a/internal/io/fs/mapprocessor.go
+++ b/internal/io/fs/mapprocessor.go
@@ -16,15 +16,15 @@ import (
// MapProcessor handles MapReduce-style aggregation
type MapProcessor struct {
- plain bool
- hostname string
- query *mapr.Query
- parser logformat.Parser
- groupSet *mapr.GroupSet
- buffer []byte
- output io.Writer
- lastSerialized time.Time
- serializeFunc func(groupSet *mapr.GroupSet)
+ plain bool
+ hostname string
+ query *mapr.Query
+ parser logformat.Parser
+ groupSet *mapr.GroupSet
+ buffer []byte
+ output io.Writer
+ lastSerialized time.Time
+ serializeFunc func(groupSet *mapr.GroupSet)
}
// NewMapProcessor creates a new map processor
@@ -63,10 +63,10 @@ func NewMapProcessor(plain bool, hostname string, queryStr string, output io.Wri
output: output,
lastSerialized: time.Now(),
}
-
+
// Set up serialization function
mp.serializeFunc = mp.defaultSerializeFunc
-
+
return mp, nil
}
@@ -83,10 +83,13 @@ func (mp *MapProcessor) Cleanup() error {
return nil
}
+// ProcessLine processes a single line for MapReduce aggregation.
+// Parses the line, applies WHERE and SET clauses, aggregates matching fields,
+// and handles periodic serialization. Returns nil (no immediate output for MapReduce).
func (mp *MapProcessor) ProcessLine(line []byte, lineNum int, filePath string, stats *stats, sourceID string) ([]byte, bool) {
// Convert line to string and parse fields
maprLine := strings.TrimSpace(string(line))
-
+
fields, err := mp.parser.MakeFields(maprLine)
if err != nil {
// Should fields be ignored anyway?
@@ -95,12 +98,12 @@ func (mp *MapProcessor) ProcessLine(line []byte, lineNum int, filePath string, s
}
return nil, false
}
-
+
// Apply WHERE clause filter
if !mp.query.WhereClause(fields) {
return nil, false
}
-
+
// Apply SET clause (add additional fields)
if len(mp.query.Set) > 0 {
if err := mp.query.SetClause(fields); err != nil {
@@ -108,20 +111,23 @@ func (mp *MapProcessor) ProcessLine(line []byte, lineNum int, filePath string, s
return nil, false
}
}
-
+
// Aggregate the fields
mp.aggregateFields(fields)
-
+
// Check if we should serialize results periodically (every 5 seconds by default)
now := time.Now()
if now.Sub(mp.lastSerialized) >= mp.query.Interval {
mp.periodicSerialize()
mp.lastSerialized = now
}
-
+
return nil, false // No immediate output for MapReduce - output happens periodically
}
+// aggregateFields groups parsed fields by the GROUP BY clause and aggregates values
+// according to the SELECT operations. Creates a group key from GROUP BY fields
+// and updates the corresponding aggregation set with SELECT field values.
func (mp *MapProcessor) aggregateFields(fields map[string]string) {
var sb strings.Builder
for i, field := range mp.query.GroupBy {
@@ -160,12 +166,15 @@ func (mp *MapProcessor) periodicSerialize() {
mp.groupSet = mapr.NewGroupSet()
}
-// defaultSerializeFunc implements the default serialization behavior
+// defaultSerializeFunc implements the default serialization behavior for MapReduce results.
+// This function is called periodically to send aggregated data to the client.
+// It uses a channel-based approach to serialize the group set and format output
+// according to the DTail protocol (A|serialized_data¬) for transmission.
func (mp *MapProcessor) defaultSerializeFunc(groupSet *mapr.GroupSet) {
// Use a channel to collect serialized data
ch := make(chan string, 100)
done := make(chan struct{})
-
+
go func() {
defer close(done)
for msg := range ch {
@@ -175,14 +184,14 @@ func (mp *MapProcessor) defaultSerializeFunc(groupSet *mapr.GroupSet) {
output.WriteString(protocol.FieldDelimiter)
output.WriteString(msg)
output.WriteByte(protocol.MessageDelimiter)
-
+
// Write to output immediately
if mp.output != nil {
mp.output.Write([]byte(output.String()))
}
}
}()
-
+
// Serialize the group set
ctx := context.Background()
groupSet.Serialize(ctx, ch)
@@ -196,4 +205,4 @@ func (mp *MapProcessor) Flush() []byte {
mp.serializeFunc(mp.groupSet)
}
return nil // Output is handled by serializeFunc
-} \ No newline at end of file
+}
diff --git a/internal/server/handlers/basehandler.go b/internal/server/handlers/basehandler.go
index 7daf071..f23c9e5 100644
--- a/internal/server/handlers/basehandler.go
+++ b/internal/server/handlers/basehandler.go
@@ -107,9 +107,12 @@ func (h *baseHandler) Read(p []byte) (n int, err error) {
h.readBuf.WriteString(protocol.FieldDelimiter)
h.readBuf.WriteString(line.SourceID)
h.readBuf.WriteString(protocol.FieldDelimiter)
+ h.readBuf.WriteString(line.Content.String())
+ h.readBuf.WriteByte(protocol.MessageDelimiter)
+ } else {
+ // In plain mode, preserve exact line content including line endings
+ h.readBuf.WriteString(line.Content.String())
}
- h.readBuf.WriteString(line.Content.String())
- h.readBuf.WriteByte(protocol.MessageDelimiter)
n = copy(p, h.readBuf.Bytes())
pool.RecycleBytesBuffer(line.Content)
line.Recycle()
diff --git a/scripts/pgo.sh b/scripts/pgo.sh
deleted file mode 100755
index a590313..0000000
--- a/scripts/pgo.sh
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/bash
-
-# Performance Guided Optimization (PGO) script for dgrep
-# This script implements true PGO using Go's -pgo compiler flag:
-# 1. Build baseline version
-# 2. Generate CPU profile for training
-# 3. Rebuild with PGO using the profile
-# 4. Compare before/after performance
-
-set -e
-
-# Global variables
-setup_environment() {
- # Get the directory where this script is located
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- # Get the project root directory (parent of scripts)
- PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
-
- # Change to project root to run commands
- cd "$PROJECT_ROOT"
-
- # Define paths for all PGO files in scripts directory
- PGO_DIR="$SCRIPT_DIR"
- TEST_FILE="$PGO_DIR/test_100mb.txt"
- BASELINE_CPU_PROF="$PGO_DIR/pgo_baseline_cpu.prof"
- BASELINE_MEM_PROF="$PGO_DIR/pgo_baseline_mem.prof"
- TRAINING_PROF="$PGO_DIR/pgo_training.prof"
- OPTIMIZED_CPU_PROF="$PGO_DIR/pgo_optimized_cpu.prof"
- OPTIMIZED_MEM_PROF="$PGO_DIR/pgo_optimized_mem.prof"
- REPORT_FILE="$PGO_DIR/pgo_report.txt"
-
- echo "=== Starting Profile Guided Optimization (PGO) for dgrep ==="
- echo "Working directory: $PROJECT_ROOT"
- echo "PGO files location: $PGO_DIR"
-}
-
-create_test_file() {
- echo "1. Creating test file if needed..."
- if [ ! -f "$TEST_FILE" ]; then
- echo "Creating 100MB test file with 1M lines..."
- for i in $(seq 1 1000000); do
- echo "$i: This is a test line with INFO level logging and some extra content to make it realistic"
- done > "$TEST_FILE"
- fi
-}
-
-build_baseline() {
- echo "2. Building baseline version (without PGO)..."
- # Clean any existing binaries
- rm -f dgrep dgrep_pgo dcat dmap dtail dserver dtailhealth
- go build -tags '' -o dgrep ./cmd/dgrep/main.go
-}
-
-collect_training_data() {
- echo "3. Running baseline performance test and collecting training profile..."
- echo " - Generating baseline CPU and memory profiles..."
- ./dgrep --plain -regex "INFO" -files "$TEST_FILE" -cpuprofile "$BASELINE_CPU_PROF" -memprofile "$BASELINE_MEM_PROF" > /dev/null
-
- echo " - Collecting training profile for PGO..."
- ./dgrep --plain -regex "INFO" -files "$TEST_FILE" -cpuprofile "$TRAINING_PROF" > /dev/null
-}
-
-build_pgo_optimized() {
- echo "4. Building PGO-optimized version using training profile..."
- go build -tags '' -pgo="$TRAINING_PROF" -o dgrep_pgo ./cmd/dgrep/main.go
-}
-
-run_pgo_performance_test() {
- echo "5. Running PGO-optimized performance test..."
- echo " - Generating optimized CPU and memory profiles..."
- ./dgrep_pgo --plain -regex "INFO" -files "$TEST_FILE" -cpuprofile "$OPTIMIZED_CPU_PROF" -memprofile "$OPTIMIZED_MEM_PROF" > /dev/null
-}
-
-run_performance_comparison() {
- echo "6. Running performance comparison..."
- echo "=== PROFILE GUIDED OPTIMIZATION REPORT ===" > "$REPORT_FILE"
- echo "Generated: $(date)" >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
-
- echo "BASELINE (without PGO):" >> "$REPORT_FILE"
- echo "Baseline performance (5 iterations):" >> "$REPORT_FILE"
- for i in 1 2 3 4 5; do
- echo " Iteration $i:"
- { time ./dgrep --plain -regex "INFO" -files "$TEST_FILE" > /dev/null; } 2>&1 | grep real >> "$REPORT_FILE"
- done
-
- echo "" >> "$REPORT_FILE"
- echo "PGO-OPTIMIZED:" >> "$REPORT_FILE"
- echo "PGO-optimized performance (5 iterations):" >> "$REPORT_FILE"
- for i in 1 2 3 4 5; do
- echo " Iteration $i:"
- { time ./dgrep_pgo --plain -regex "INFO" -files "$TEST_FILE" > /dev/null; } 2>&1 | grep real >> "$REPORT_FILE"
- done
-}
-
-generate_detailed_analysis() {
- echo "7. Adding detailed profile analysis..."
- echo "" >> "$REPORT_FILE"
- echo "DETAILED ANALYSIS:" >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "Baseline CPU Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$BASELINE_CPU_PROF" | head -10 >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "PGO-Optimized CPU Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$OPTIMIZED_CPU_PROF" | head -10 >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "Baseline Memory Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$BASELINE_MEM_PROF" | head -10 >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "PGO-Optimized Memory Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$OPTIMIZED_MEM_PROF" | head -10 >> "$REPORT_FILE"
-}
-
-cleanup() {
- echo "8. Cleaning up..."
- rm -f dgrep_pgo
-}
-
-show_summary() {
- echo "=== PGO Complete! ==="
- echo "Results saved to: $REPORT_FILE"
- echo "Profile files generated:"
- echo " - Baseline: $BASELINE_CPU_PROF, $BASELINE_MEM_PROF"
- echo " - Training: $TRAINING_PROF"
- echo " - Optimized: $OPTIMIZED_CPU_PROF, $OPTIMIZED_MEM_PROF"
- echo ""
- echo "Test file location: $TEST_FILE"
- echo ""
- echo "PGO Process:"
- echo " ✓ Built baseline version without PGO"
- echo " ✓ Collected CPU profile for training"
- echo " ✓ Rebuilt with Go's -pgo flag using training profile"
- echo " ✓ Compared baseline vs PGO-optimized performance"
- echo ""
-
- # Show performance comparison from report
- echo "=== Performance Comparison ==="
- echo "Check $REPORT_FILE for detailed before/after comparison"
- grep -A 20 "BASELINE (without PGO)" "$REPORT_FILE" | head -10
- echo "..."
- grep -A 20 "PGO-OPTIMIZED" "$REPORT_FILE" | head -10
-}
-
-# Main execution flow
-main() {
- setup_environment
- create_test_file
- build_baseline
- collect_training_data
- build_pgo_optimized
- run_pgo_performance_test
- run_performance_comparison
- generate_detailed_analysis
- cleanup
- show_summary
-}
-
-# Run the main function
-main "$@" \ No newline at end of file
diff --git a/scripts/pgo_report.txt b/scripts/pgo_report.txt
deleted file mode 100644
index d6e1a83..0000000
--- a/scripts/pgo_report.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-=== PROFILE GUIDED OPTIMIZATION REPORT ===
-Generated: Mon 16 Jun 23:18:37 EEST 2025
-
-BASELINE (without PGO):
-Baseline performance (5 iterations):
-real 0m3.040s
-real 0m3.029s
-real 0m3.032s
-real 0m3.030s
-real 0m3.031s
-
-PGO-OPTIMIZED:
-PGO-optimized performance (5 iterations):
-real 0m3.035s
-real 0m3.033s
-real 0m3.033s
-real 0m3.034s
-real 0m3.031s
-
-DETAILED ANALYSIS:
-
-Baseline CPU Profile:
-File: dgrep
-Build ID: c4f25989f74683061bfabfc72b383431d1aeeb23
-Type: cpu
-Time: 2025-06-16 23:17:42 EEST
-Duration: 3.20s, Total samples = 8.73s (272.51%)
-Showing nodes accounting for 7.32s, 83.85% of 8.73s total
-Dropped 174 nodes (cum <= 0.04s)
- flat flat% sum% cum cum%
- 2.23s 25.54% 25.54% 2.23s 25.54% internal/runtime/syscall.Syscall6
- 0.37s 4.24% 29.78% 1.01s 11.57% runtime.selectgo
-
-PGO-Optimized CPU Profile:
-File: dgrep_pgo
-Build ID: 106bf00e9fe2a0beaaf9b0e80a5e7e14aae84c40
-Type: cpu
-Time: 2025-06-16 23:18:34 EEST
-Duration: 3.11s, Total samples = 8.66s (278.78%)
-Showing nodes accounting for 7.41s, 85.57% of 8.66s total
-Dropped 152 nodes (cum <= 0.04s)
- flat flat% sum% cum cum%
- 2.17s 25.06% 25.06% 2.17s 25.06% internal/runtime/syscall.Syscall6
- 0.51s 5.89% 30.95% 1.31s 15.13% runtime.selectgo
-
-Baseline Memory Profile:
-File: dgrep
-Build ID: c4f25989f74683061bfabfc72b383431d1aeeb23
-Type: inuse_space
-Time: 2025-06-16 23:17:45 EEST
-Showing nodes accounting for 66.08MB, 100% of 66.08MB total
- flat flat% sum% cum cum%
- 33MB 49.94% 49.94% 60.84MB 92.06% time.NewTimer
- 27.83MB 42.12% 92.06% 27.83MB 42.12% time.newTimer
- 1.72MB 2.61% 94.67% 1.72MB 2.61% runtime/pprof.StartCPUProfile
- 1.50MB 2.27% 96.94% 1.50MB 2.27% runtime.allocm
-
-PGO-Optimized Memory Profile:
-File: dgrep_pgo
-Build ID: 106bf00e9fe2a0beaaf9b0e80a5e7e14aae84c40
-Type: inuse_space
-Time: 2025-06-16 23:18:37 EEST
-Showing nodes accounting for 80.57MB, 100% of 80.57MB total
- flat flat% sum% cum cum%
- 42.35MB 52.57% 52.57% 42.35MB 52.57% time.newTimer
- 32.50MB 40.34% 92.91% 74.86MB 92.91% time.NewTimer
- 2MB 2.49% 95.39% 2MB 2.49% runtime.allocm
- 1.16MB 1.44% 96.83% 1.16MB 1.44% runtime/pprof.StartCPUProfile