summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-02-15 14:14:32 +0200
committerPaul Buetow <paul@buetow.org>2026-02-15 14:14:45 +0200
commit00e5de525bde5d0d77d9553c6126908f2fdfde20 (patch)
tree0686bc72be5f7a5bc5fd18f5dcdd6d0cfb868ac3 /lib
parente6aa888599062843409d037b4007be43ef3b0f02 (diff)
Improve --generate performance with incremental builds (62s -> 2s)
Add mtime-based skip logic to avoid regenerating unchanged files: - generate::fromgmi skips .gmi files where all outputs are newer - template::_generate_file skips templates when output is fresh - Diff-before-overwrite in templates, gemfeed, and notes indexes to preserve mtimes and prevent cascading cache invalidation - Global dependency check (.lastgen sentinel) for header/footer/CSS - Job throttling via wait -n capped at nproc cores - Add --force flag and FORCE_REBUILD env var to bypass skip logic - Fix misleading atom feed "empty cache" log message Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/atomfeed.source.sh6
-rw-r--r--lib/gemfeed.source.sh14
-rw-r--r--lib/generate.source.sh80
-rw-r--r--lib/notes.source.sh7
-rw-r--r--lib/template.source.sh49
5 files changed, 148 insertions, 8 deletions
diff --git a/lib/atomfeed.source.sh b/lib/atomfeed.source.sh
index 4b598a7..3fe46be 100644
--- a/lib/atomfeed.source.sh
+++ b/lib/atomfeed.source.sh
@@ -61,7 +61,11 @@ atomfeed::generate () {
local -r atom_file="$gemfeed_dir/atom.xml"
log INFO "Generating Atom feed to $atom_file"
- log INFO 'This may takes a while with an empty cache....'
+
+ # Only warn about slow generation when the cache is actually empty
+ if [ ! -d "$CONTENT_BASE_DIR/cache" ] || [ -z "$(ls "$CONTENT_BASE_DIR/cache/gemfeed/" 2>/dev/null)" ]; then
+ log INFO 'This may take a while with an empty cache....'
+ fi
cat <<ATOMHEADER > "$atom_file.tmp"
<?xml version="1.0" encoding="utf-8"?>
diff --git a/lib/gemfeed.source.sh b/lib/gemfeed.source.sh
index f069ef5..c405083 100644
--- a/lib/gemfeed.source.sh
+++ b/lib/gemfeed.source.sh
@@ -19,7 +19,12 @@ gemfeed::updatemainindex () {
$SED -E -n '/^=> / { s| ./| ./gemfeed/|; p; }' \
"$gemfeed_dir/index.gmi" >> "$index_gmi.tmp"
- mv "$index_gmi.tmp" "$index_gmi"
+ # Only overwrite if content changed, preserving mtime for template skip logic
+ if [[ -f "$index_gmi" ]] && diff -q "$index_gmi.tmp" "$index_gmi" >/dev/null 2>&1; then
+ rm "$index_gmi.tmp"
+ else
+ mv "$index_gmi.tmp" "$index_gmi"
+ fi
}
gemfeed::_get_word_count () {
@@ -59,7 +64,12 @@ GEMFEED
"$gemfeed_dir/index.gmi.tmp"
done < <(gemfeed::get_posts)
- mv "$gemfeed_dir/index.gmi.tmp" "$gemfeed_dir/index.gmi"
+ # Only overwrite if content changed, preserving mtime for template skip logic
+ if [[ -f "$gemfeed_dir/index.gmi" ]] && diff -q "$gemfeed_dir/index.gmi.tmp" "$gemfeed_dir/index.gmi" >/dev/null 2>&1; then
+ rm "$gemfeed_dir/index.gmi.tmp"
+ else
+ mv "$gemfeed_dir/index.gmi.tmp" "$gemfeed_dir/index.gmi"
+ fi
gemfeed::updatemainindex
}
diff --git a/lib/generate.source.sh b/lib/generate.source.sh
index 313531d..fec4f20 100644
--- a/lib/generate.source.sh
+++ b/lib/generate.source.sh
@@ -41,12 +41,18 @@ generate::internal_link_id () {
}
# Add other docs (e.g. images, videos) from Gemtext to output format.
+# Skips copying if the output file already exists and is newer than the source.
generate::fromgmi_add_docs () {
local -r src="$1"; shift
local -r format="$1"; shift
local -r dest=${src/gemtext/$format}
local -r dest_dir=$(dirname "$dest")
+ # Skip if output already exists and is newer than source
+ if [[ -f "$dest" ]] && [[ "$dest" -nt "$src" ]]; then
+ return
+ fi
+
if [[ ! -d "$dest_dir" ]]; then
mkdir -p "$dest_dir"
fi
@@ -140,14 +146,70 @@ generate::_to_output_format () {
mv "$dest.tmp" "$dest"
}
+# Check if any global dependency (header, footer, CSS, config) has changed
+# since the last generation. Sets _force_rebuild=yes if so.
+generate::_check_global_deps () {
+ local -r sentinel="$CONTENT_BASE_DIR/.gemtexter.lastgen"
+
+ if [[ "$FORCE_REBUILD" == yes ]]; then
+ _force_rebuild=yes
+ return
+ fi
+
+ if [[ ! -f "$sentinel" ]]; then
+ _force_rebuild=yes
+ return
+ fi
+
+ local dep
+ for dep in "$HTML_HEADER" "$HTML_FOOTER" "$HTML_CSS_STYLE" ./gemtexter.conf; do
+ if [[ -f "$dep" ]] && [[ "$dep" -nt "$sentinel" ]]; then
+ log INFO "Global dependency $dep changed, forcing full rebuild"
+ _force_rebuild=yes
+ return
+ fi
+ done
+
+ _force_rebuild=no
+}
+
+# Check if a source .gmi file is fresh (all outputs newer than source).
+# Returns 0 (true) if all outputs exist and are newer, meaning we can skip.
+generate::_is_fresh () {
+ local -r src="$1"; shift
+
+ if [[ "$_force_rebuild" == yes ]]; then
+ return 1
+ fi
+
+ local format dest
+ for format in "$@"; do
+ dest=${src/gemtext/$format}
+ dest=${dest/.gmi/.$format}
+ if [[ ! -f "$dest" ]] || [[ "$src" -nt "$dest" ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
# Generate a given output format from a Gemtext file.
generate::fromgmi () {
local -i num_gmi_files=0
+ local -i num_skipped_files=0
local -i num_doc_files=0
local current_page
+ local _force_rebuild=no
+
+ # Cap concurrent jobs to the number of CPU cores
+ local -r max_jobs=$(( $(nproc 2>/dev/null || echo 4) ))
log INFO "Generating $* from Gemtext"
+ # Check if global deps changed (header, footer, CSS, config)
+ generate::_check_global_deps
+
# Add atom feed for HTML
generate::convert_gmi_atom_to_html_atom 'html'
@@ -156,16 +218,28 @@ generate::fromgmi () {
if test -n "$CONTENT_FILTER" && ! $GREP -q "$CONTENT_FILTER" <<< "$src"; then
continue
fi
+
+ # Skip files where all outputs are newer than the source
+ if generate::_is_fresh "$src" "$@"; then
+ log VERBOSE "Skipping unchanged $src"
+ num_skipped_files=$(( num_skipped_files + 1 ))
+ continue
+ fi
+
current_page=$($SED "s|$CONTENT_BASE_DIR/gemtext||;"'s/.gmi$//;' <<< "$src")
num_gmi_files=$(( num_gmi_files + 1 ))
log INFO "Generating output formats from $src"
for format in "$@"; do
+ # Throttle: wait for a job slot before spawning
+ while (( $(jobs -rp | wc -l) >= max_jobs )); do
+ wait -n
+ done
generate::_to_output_format "$src" "$current_page" "$format" &
done
done < <(find "$CONTENT_BASE_DIR/gemtext" -type f -name \*.gmi)
wait
- log INFO "Converted $num_gmi_files Gemtext files"
+ log INFO "Converted $num_gmi_files Gemtext files (skipped $num_skipped_files unchanged)"
# Add non-.gmi files to html dir.
log VERBOSE "Adding other docs to $*"
@@ -204,6 +278,10 @@ generate::fromgmi () {
for format in "$@"; do
log INFO "$format can be found in $CONTENT_BASE_DIR/$format now"
done
+
+ # Update sentinel file so next run can detect global dep changes
+ touch "$CONTENT_BASE_DIR/.gemtexter.lastgen"
+
log INFO "You may want to commit all changes to version control!"
}
diff --git a/lib/notes.source.sh b/lib/notes.source.sh
index bdea5fc..85bd2b5 100644
--- a/lib/notes.source.sh
+++ b/lib/notes.source.sh
@@ -41,5 +41,10 @@ That were all notes. Hope they were useful!
=> ../ Go back to main site
NOTES
- mv "$notes_dir/index.gmi.tmp" "$notes_dir/index.gmi"
+ # Only overwrite if content changed, preserving mtime for template skip logic
+ if [[ -f "$notes_dir/index.gmi" ]] && diff -q "$notes_dir/index.gmi.tmp" "$notes_dir/index.gmi" >/dev/null 2>&1; then
+ rm "$notes_dir/index.gmi.tmp"
+ else
+ mv "$notes_dir/index.gmi.tmp" "$notes_dir/index.gmi"
+ fi
}
diff --git a/lib/template.source.sh b/lib/template.source.sh
index 9a2ff38..9488ea8 100644
--- a/lib/template.source.sh
+++ b/lib/template.source.sh
@@ -1,6 +1,7 @@
template::generate () {
log INFO 'Generating files from templates'
local -i num_tpl_files=0
+ declare -A _TPL_DIR_NEWEST_MTIME
while read -r tpl_path; do
if test -n "$CONTENT_FILTER" && ! $GREP -q "$CONTENT_FILTER" <<< "$tpl_path"; then
@@ -19,12 +20,47 @@ template::draft () {
template::generate
}
+# Compute the newest mtime among .gmi.tpl files and non-template .gmi files
+# in a given directory. Result is cached in _TPL_DIR_NEWEST_MTIME associative array.
+template::_dir_newest_mtime () {
+ local -r dir="$1"; shift
+
+ if [[ -n "${_TPL_DIR_NEWEST_MTIME[$dir]+x}" ]]; then
+ echo "${_TPL_DIR_NEWEST_MTIME[$dir]}"
+ return
+ fi
+
+ # Find newest mtime among .gmi.tpl files and non-template, non-index .gmi files
+ local newest=0
+ local mtime
+ while read -r mtime _; do
+ mtime=${mtime%%.*}
+ if (( mtime > newest )); then
+ newest=$mtime
+ fi
+ done < <(find "$dir" -maxdepth 1 \( -name '*.gmi.tpl' -o -name '*.gmi' \) -type f \
+ ! -name 'index.gmi' -printf '%T@ %p\n')
+
+ _TPL_DIR_NEWEST_MTIME[$dir]="$newest"
+ echo "$newest"
+}
+
template::_generate_file () {
local -r tpl_path="$1"; shift
local -r tpl_dir="$(dirname "$tpl_path")"
local -r tpl="$(basename "$tpl_path")"
local -r dest="${tpl/.tpl/}"
+ # Skip if output is newer than the template and all relevant siblings
+ if [[ "$FORCE_REBUILD" != yes ]] && [[ -f "$tpl_dir/$dest" ]]; then
+ local -r dest_mtime=$(stat -c '%Y' "$tpl_dir/$dest")
+ local -r dir_newest=$(template::_dir_newest_mtime "$tpl_dir")
+ if (( dest_mtime >= dir_newest )); then
+ log VERBOSE "Skipping unchanged template $tpl_path"
+ return
+ fi
+ fi
+
cd "$tpl_dir" || log PANIC "Unable to chdir to $tpl_dir"
log INFO "Generating $tpl_path -> $dest"
@@ -33,9 +69,16 @@ template::_generate_file () {
export CURRENT_GMI="$dest"
template::_generate < "$tpl" > "$dest.tmp"
- mv "$dest.tmp" "$dest"
- log INFO "Done generating $dest"
- cd -
+
+ # Only overwrite if content actually changed, preserving mtime for caches
+ if [[ -f "$dest" ]] && diff -q "$dest.tmp" "$dest" >/dev/null 2>&1; then
+ rm "$dest.tmp"
+ log VERBOSE "Template output unchanged for $dest"
+ else
+ mv "$dest.tmp" "$dest"
+ log INFO "Done generating $dest"
+ fi
+ cd - >/dev/null
}
template::_generate () {