# TODO: Idea, could also cache .html and .md files, so they don't need to be re-generated # every time! So the caching can be more generic. atomfeed::_from_cache () { local -r gmi_file_path="$1"; shift local -r cache_file_path="$1"; shift if [ ! -f "${cache_file_path}.info" ]; then # No cache there. return 1 elif ! diff "${cache_file_path}.info" <(ls -l "$gmi_file_path") >/dev/null; then # Need to refresh the cache. return 1 fi log VERBOSE "Retrieving feed content for $gmi_file_path from $cache_file_path" cat "$cache_file_path" } atomfeed::_make_cache () { local -r gmi_file_path="$1"; shift local -r cache_file_path="$1"; shift log VERBOSE "Making feed content cache from $gmi_file_path" local -r cache_file_dir="$(dirname "$cache_file_path")" if [ ! -d "$cache_file_dir" ]; then mkdir -p "$cache_file_dir" fi # sed: Remove all before the first header # sed: Make HTML links absolute, Atom relative URLs feature seems a mess # across different Atom clients. html::fromgmi < <($SED '/Go back to the main site/d' "$gmi_file_path") | $SED "s|href=\"\./|href=\"https://$DOMAIN/gemfeed/|g; s|src=\"\./|src=\"https://$DOMAIN/gemfeed/|g;" | tee "$cache_file_path" ls -l "$gmi_file_path" > "${cache_file_path}.info" } # Retrieve the core content as XHTML of the blog post. atomfeed::content () { local -r gmi_file_path="$1"; shift local -r cache_file_path="${gmi_file_path/gemtext/cache}.atomcache" atomfeed::_from_cache "$gmi_file_path" "$cache_file_path" || atomfeed::_make_cache "$gmi_file_path" "$cache_file_path" } # Generate an atom.xml feed file. atomfeed::generate () { local -r gemfeed_dir="$CONTENT_BASE_DIR/gemtext/gemfeed" if [ ! -d "$gemfeed_dir" ]; then return elif [ -n "$CONTENT_FILTER" ]; then log WARN "Not generating Atom feed in filter mode" return fi local -r atom_file="$gemfeed_dir/atom.xml" log INFO "Generating Atom feed to $atom_file" # Only warn about slow generation when the cache is actually empty if [ ! -d "$CONTENT_BASE_DIR/cache" ] || [ -z "$(ls "$CONTENT_BASE_DIR/cache/gemfeed/" 2>/dev/null)" ]; then log INFO 'This may take a while with an empty cache....' fi cat < "$atom_file.tmp" $($DATE $DATE_FORMAT) $DOMAIN feed $SUBTITLE gemini://$DOMAIN/ ATOMHEADER while read -r gmi_file; do atomfeed::_entry "$gemfeed_dir" "$gmi_file" "$atom_file.tmp" done < <(gemfeed::get_posts | head -n "$ATOM_MAX_ENTRIES") cat <> "$atom_file.tmp" ATOMFOOTER atomfeed::xmllint "$atom_file.tmp" # Delete the 3rd line of the atom feeds (global feed update timestamp) if ! diff -u <($SED 3d "$atom_file") <($SED 3d "$atom_file.tmp"); then log INFO 'Feed got something new!' mv "$atom_file.tmp" "$atom_file" else log INFO 'Nothing really new in the feed' rm "$atom_file.tmp" fi } atomfeed::verify () { if [ "$(find "$CONTENT_BASE_DIR" -name \*.xml.tmp | wc -l)" -ge 1 ]; then find "$CONTENT_BASE_DIR" -name \*.xml.tmp log WARN "Found incomplete Atom feed files with the suffix .xml.tmp from a previous run, removing them" find "$CONTENT_BASE_DIR" -name \*.xml.tmp -delete fi find "$CONTENT_BASE_DIR" -name atom.xml | while read -r atom_xml; do atomfeed::xmllint "$atom_xml" done log INFO "Atom feed/s seem fine" } atomfeed::_entry () { local -r gemfeed_dir="$1"; shift local -r gmi_file="$1"; shift local -r tmp_atom_file="$1"; shift log INFO "Generating Atom feed entry for $gmi_file" # Get HTML content for the feed local content="$(atomfeed::content "$gemfeed_dir/$gmi_file")" assert::not_empty content "$content" # Extract first heading as post title. local title=$(generate::extract_title "$gemfeed_dir/$gmi_file") assert::not_empty title "$title" # Extract first paragraph from Gemtext as the summary. local summary=$($SED -n '/^[A-Z]/ { p; q; }' "$gemfeed_dir/$gmi_file" | tr '"' "'") if [ -z "$summary" ]; then # No summary found, maybe there is only a quote... summary=$($SED -n '/^>/ { s/> *//; p; q; }' "$gemfeed_dir/$gmi_file" | tr '"' "'") fi assert::not_empty summary "$summary" # Extract the date from the file name. local date=$(head "$gemfeed_dir/$gmi_file" | $SED -n '/^> Published at / { s/.*Published at //; s/;.*//; p; }') if [ -z "$date" ]; then # Extract the date from the file. date=$($DATE $DATE_FORMAT --reference "$gemfeed_dir/$gmi_file") log WARN "No publishing date specified for $gmi_file, assuming $date" atomfeed::_insert_date "$date" "$gemfeed_dir/$gmi_file" else log INFO "Publishing date is $date" fi assert::not_empty publishing_date "$date" cat <> "$tmp_atom_file" $title gemini://$DOMAIN/gemfeed/$gmi_file $date $AUTHOR $EMAIL $summary
$content
ATOMENTRY } atomfeed::xmllint () { local -r atom_feed="$1" if [ -n "$XMLLINT" ]; then log INFO "XMLLinting Atom feed $atom_feed" if ! $XMLLINT "$atom_feed" >/dev/null; then log PANIC "Atom feed $atom_feed isn't valid XML, please re-try" return 2 fi log INFO 'Atom feed is OK' else log WARN 'Skipping XMLLinting Atom feed as "xmllint" command is no installed!' fi } # Convert the Gemtext Atom feed to a HTML Atom feed by replacing .gmi # extensions with .html and gemini:// with https:// protocol. atomfeed::convert_to_html () { if [ ! -f "$CONTENT_BASE_DIR/gemtext/gemfeed/atom.xml" ]; then return fi log INFO 'Converting Gemtext Atom feed to HTML Atom feed' if [ ! -d "$CONTENT_BASE_DIR/html/gemfeed" ]; then mkdir -p "$CONTENT_BASE_DIR/html/gemfeed" fi $SED 's|.gmi |.html |g; s|.gmi"|.html"|g; s|.gmi|.html|g; s|gemini://|https://|g' \ < "$CONTENT_BASE_DIR/gemtext/gemfeed/atom.xml" \ > "$CONTENT_BASE_DIR/html/gemfeed/atom.xml.tmp" atomfeed::xmllint "$CONTENT_BASE_DIR/html/gemfeed/atom.xml.tmp" && mv "$CONTENT_BASE_DIR/html/gemfeed/atom.xml.tmp" "$CONTENT_BASE_DIR/html/gemfeed/atom.xml" } atomfeed::_insert_date () { local -r date="$1"; shift local -r gmi_file_path="$1"; shift # Insert below first header { $SED '/^#/q' "$gmi_file_path" echo echo "> Published at $date" $SED -n '/^#/,$p' "$gmi_file_path" | $SED 1d } > "$gmi_file_path.insert.tmp" mv "$gmi_file_path.insert.tmp" "$gmi_file_path" if [ -f "$gmi_file_path.tpl" ]; then atomfeed::_insert_date "$date" "$gmi_file_path.tpl" fi }