diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-26 21:54:29 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-26 21:54:29 +0300 |
| commit | 45613574ddfc6203ca31388ee7f6c60423f1cbd2 (patch) | |
| tree | 603888f06861a927118fae0b4741df3faaa3155f | |
| parent | 62d9774b4c2f776de60e8231fa6b8378d8afef4d (diff) | |
fix: update Makefile clean target and fix dmap profiling behavior
- Updated 'make clean' to also remove all .tmp and .prof files in the repo
- Fixed dmap profiling scripts to let dmap complete naturally instead of
killing it after a timeout (dmap terminates when input is fully processed)
- Removed the special run_profile_dmap function as it's no longer needed
- Updated all profiling scripts to reflect that dmap has a natural exit point
Thanks for the correction - dmap does indeed terminate after processing
all data from the source file, so the timeout/kill approach was unnecessary.
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
| -rw-r--r-- | CLAUDE.md | 5 | ||||
| -rw-r--r-- | Makefile | 4 | ||||
| -rwxr-xr-x | profiling/profile_benchmarks.sh | 40 | ||||
| -rwxr-xr-x | profiling/profile_dmap.sh | 26 | ||||
| -rwxr-xr-x | profiling/profile_quick.sh | 10 |
5 files changed, 19 insertions, 66 deletions
@@ -20,9 +20,6 @@ make dgrep # Client for searching files make dmap # Client for MapReduce queries make dtailhealth # Health check client -# Install binaries -make install - # Clean build artifacts make clean @@ -194,4 +191,4 @@ When modifying server behavior: When working with MapReduce: 1. Query parsing in `/internal/mapr/parse/` 2. Aggregation logic in `/internal/mapr/reducer/` -3. Log format parsing in `/internal/mapr/logformat/`
\ No newline at end of file +3. Log format parsing in `/internal/mapr/logformat/` @@ -30,6 +30,10 @@ clean: ls ./cmd/ | while read cmd; do \ test -f $$cmd && rm $$cmd; \ done + @echo "Removing .tmp files..." + find . -name "*.tmp" -type f -delete + @echo "Removing .prof files..." + find . -name "*.prof" -type f -delete vet: find . -type d | egrep -v '(./examples|./log|./doc)' | while read dir; do \ echo ${GO} vet $$dir; \ diff --git a/profiling/profile_benchmarks.sh b/profiling/profile_benchmarks.sh index 4716192..d3fcabd 100755 --- a/profiling/profile_benchmarks.sh +++ b/profiling/profile_benchmarks.sh @@ -69,38 +69,6 @@ run_profile() { echo } -# Special function for profiling dmap which runs continuously -run_profile_dmap() { - local cmd=$1 - local name=$2 - local args=$3 - - echo -e "${GREEN}Profiling $cmd - $name${NC}" - - for i in $(seq 1 $PROFILE_RUNS); do - echo " Run $i/$PROFILE_RUNS..." - echo " Command: $cmd -profile -profiledir $PROFILE_DIR $args (will interrupt after 3s)" - - # Run dmap in background, wait a bit for it to process, then interrupt it - $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 & - local pid=$! - - # Wait for dmap to process the file and generate initial results - sleep 3 - - # Send interrupt signal to make it exit cleanly - # We expect this to return non-zero, so we ignore the exit code - kill -INT $pid 2>/dev/null || true - wait $pid 2>/dev/null || true - - echo " Completed" - - # Small delay between runs - sleep 1 - done - - echo -} # Generate test data echo -e "${GREEN}Preparing test data...${NC}" @@ -165,13 +133,13 @@ if [ ! -f "$TEST_DATA_DIR/dtail_format.log" ]; then fi # Profile dmap with DTail format -run_profile_dmap "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log" -run_profile_dmap "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log" -run_profile_dmap "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log" +run_profile "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log" +run_profile "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log" +run_profile "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log" # Also test CSV format echo -e "\n${YELLOW}Testing CSV format with dmap${NC}" -run_profile_dmap "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv" +run_profile "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv" echo echo -e "${GREEN}Profiling complete!${NC}" diff --git a/profiling/profile_dmap.sh b/profiling/profile_dmap.sh index 1abf629..03402e1 100755 --- a/profiling/profile_dmap.sh +++ b/profiling/profile_dmap.sh @@ -100,33 +100,21 @@ echo -e "${GREEN}Profiling dmap queries...${NC}" # Query 1: Simple count echo -e "\n${YELLOW}Query: Count by hostname${NC}" QUERY="from STATS select count(\$line) group by hostname" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" -# Run dmap in background and interrupt after 3 seconds -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & -DMAP_PID=$! -sleep 3 -kill -INT $DMAP_PID 2>/dev/null || true -wait $DMAP_PID 2>/dev/null || true +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log" +# Run dmap and let it complete naturally +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 # Query 2: Aggregations echo -e "\n${YELLOW}Query: Sum and average${NC}" QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by hostname" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & -DMAP_PID=$! -sleep 3 -kill -INT $DMAP_PID 2>/dev/null || true -wait $DMAP_PID 2>/dev/null || true +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log" +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 # Query 3: Min/Max echo -e "\n${YELLOW}Query: Min and max${NC}" QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by hostname" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & -DMAP_PID=$! -sleep 3 -kill -INT $DMAP_PID 2>/dev/null || true -wait $DMAP_PID 2>/dev/null || true +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log" +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 echo echo -e "${GREEN}Analyzing dmap profiles...${NC}" diff --git a/profiling/profile_quick.sh b/profiling/profile_quick.sh index db63c73..249b73c 100755 --- a/profiling/profile_quick.sh +++ b/profiling/profile_quick.sh @@ -69,13 +69,9 @@ fi # Profile dmap (use proper MapReduce query on CSV file) echo -e "\n${YELLOW}Profiling dmap...${NC}" QUERY="select count($line),avg($duration) group by $user logformat csv" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/quick_test.csv (will interrupt after 3s)" -# Run dmap in background and interrupt after 3 seconds -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1 & -DMAP_PID=$! -sleep 3 -kill -INT $DMAP_PID 2>/dev/null || true -wait $DMAP_PID 2>/dev/null || true +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/quick_test.csv" +# Run dmap and let it complete naturally +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1 DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1) if [ -n "$DMAP_CPU" ]; then |
