summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-26 21:54:29 +0300
committerPaul Buetow <paul@buetow.org>2025-06-26 21:54:29 +0300
commit45613574ddfc6203ca31388ee7f6c60423f1cbd2 (patch)
tree603888f06861a927118fae0b4741df3faaa3155f
parent62d9774b4c2f776de60e8231fa6b8378d8afef4d (diff)
fix: update Makefile clean target and fix dmap profiling behavior
- Updated 'make clean' to also remove all .tmp and .prof files in the repo - Fixed dmap profiling scripts to let dmap complete naturally instead of killing it after a timeout (dmap terminates when input is fully processed) - Removed the special run_profile_dmap function as it's no longer needed - Updated all profiling scripts to reflect that dmap has a natural exit point Thanks for the correction - dmap does indeed terminate after processing all data from the source file, so the timeout/kill approach was unnecessary. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
-rw-r--r--CLAUDE.md5
-rw-r--r--Makefile4
-rwxr-xr-xprofiling/profile_benchmarks.sh40
-rwxr-xr-xprofiling/profile_dmap.sh26
-rwxr-xr-xprofiling/profile_quick.sh10
5 files changed, 19 insertions, 66 deletions
diff --git a/CLAUDE.md b/CLAUDE.md
index 39df79d..06c3562 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -20,9 +20,6 @@ make dgrep # Client for searching files
make dmap # Client for MapReduce queries
make dtailhealth # Health check client
-# Install binaries
-make install
-
# Clean build artifacts
make clean
@@ -194,4 +191,4 @@ When modifying server behavior:
When working with MapReduce:
1. Query parsing in `/internal/mapr/parse/`
2. Aggregation logic in `/internal/mapr/reducer/`
-3. Log format parsing in `/internal/mapr/logformat/` \ No newline at end of file
+3. Log format parsing in `/internal/mapr/logformat/`
diff --git a/Makefile b/Makefile
index bfe8d39..4a0df5e 100644
--- a/Makefile
+++ b/Makefile
@@ -30,6 +30,10 @@ clean:
ls ./cmd/ | while read cmd; do \
test -f $$cmd && rm $$cmd; \
done
+ @echo "Removing .tmp files..."
+ find . -name "*.tmp" -type f -delete
+ @echo "Removing .prof files..."
+ find . -name "*.prof" -type f -delete
vet:
find . -type d | egrep -v '(./examples|./log|./doc)' | while read dir; do \
echo ${GO} vet $$dir; \
diff --git a/profiling/profile_benchmarks.sh b/profiling/profile_benchmarks.sh
index 4716192..d3fcabd 100755
--- a/profiling/profile_benchmarks.sh
+++ b/profiling/profile_benchmarks.sh
@@ -69,38 +69,6 @@ run_profile() {
echo
}
-# Special function for profiling dmap which runs continuously
-run_profile_dmap() {
- local cmd=$1
- local name=$2
- local args=$3
-
- echo -e "${GREEN}Profiling $cmd - $name${NC}"
-
- for i in $(seq 1 $PROFILE_RUNS); do
- echo " Run $i/$PROFILE_RUNS..."
- echo " Command: $cmd -profile -profiledir $PROFILE_DIR $args (will interrupt after 3s)"
-
- # Run dmap in background, wait a bit for it to process, then interrupt it
- $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 &
- local pid=$!
-
- # Wait for dmap to process the file and generate initial results
- sleep 3
-
- # Send interrupt signal to make it exit cleanly
- # We expect this to return non-zero, so we ignore the exit code
- kill -INT $pid 2>/dev/null || true
- wait $pid 2>/dev/null || true
-
- echo " Completed"
-
- # Small delay between runs
- sleep 1
- done
-
- echo
-}
# Generate test data
echo -e "${GREEN}Preparing test data...${NC}"
@@ -165,13 +133,13 @@ if [ ! -f "$TEST_DATA_DIR/dtail_format.log" ]; then
fi
# Profile dmap with DTail format
-run_profile_dmap "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log"
-run_profile_dmap "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log"
-run_profile_dmap "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log"
+run_profile "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log"
+run_profile "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log"
+run_profile "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log"
# Also test CSV format
echo -e "\n${YELLOW}Testing CSV format with dmap${NC}"
-run_profile_dmap "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv"
+run_profile "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv"
echo
echo -e "${GREEN}Profiling complete!${NC}"
diff --git a/profiling/profile_dmap.sh b/profiling/profile_dmap.sh
index 1abf629..03402e1 100755
--- a/profiling/profile_dmap.sh
+++ b/profiling/profile_dmap.sh
@@ -100,33 +100,21 @@ echo -e "${GREEN}Profiling dmap queries...${NC}"
# Query 1: Simple count
echo -e "\n${YELLOW}Query: Count by hostname${NC}"
QUERY="from STATS select count(\$line) group by hostname"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)"
-# Run dmap in background and interrupt after 3 seconds
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 &
-DMAP_PID=$!
-sleep 3
-kill -INT $DMAP_PID 2>/dev/null || true
-wait $DMAP_PID 2>/dev/null || true
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
+# Run dmap and let it complete naturally
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
# Query 2: Aggregations
echo -e "\n${YELLOW}Query: Sum and average${NC}"
QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by hostname"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)"
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 &
-DMAP_PID=$!
-sleep 3
-kill -INT $DMAP_PID 2>/dev/null || true
-wait $DMAP_PID 2>/dev/null || true
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
# Query 3: Min/Max
echo -e "\n${YELLOW}Query: Min and max${NC}"
QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by hostname"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)"
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 &
-DMAP_PID=$!
-sleep 3
-kill -INT $DMAP_PID 2>/dev/null || true
-wait $DMAP_PID 2>/dev/null || true
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
echo
echo -e "${GREEN}Analyzing dmap profiles...${NC}"
diff --git a/profiling/profile_quick.sh b/profiling/profile_quick.sh
index db63c73..249b73c 100755
--- a/profiling/profile_quick.sh
+++ b/profiling/profile_quick.sh
@@ -69,13 +69,9 @@ fi
# Profile dmap (use proper MapReduce query on CSV file)
echo -e "\n${YELLOW}Profiling dmap...${NC}"
QUERY="select count($line),avg($duration) group by $user logformat csv"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/quick_test.csv (will interrupt after 3s)"
-# Run dmap in background and interrupt after 3 seconds
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1 &
-DMAP_PID=$!
-sleep 3
-kill -INT $DMAP_PID 2>/dev/null || true
-wait $DMAP_PID 2>/dev/null || true
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/quick_test.csv"
+# Run dmap and let it complete naturally
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1
DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1)
if [ -n "$DMAP_CPU" ]; then