diff options
| author | Paul Buetow <paul@buetow.org> | 2025-10-02 11:28:55 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-10-02 11:28:55 +0300 |
| commit | 2b034797107660d4d83f8a7acdc55d32db785b82 (patch) | |
| tree | 522fb586193c8a65ca6aee42df64eecc3555f644 /gemfeed/examples/conf/dotfiles/scripts/brokenlinkfinder | |
| parent | 4d7d90638186ac71067232007607f6637d560a4d (diff) | |
Update content for md
Diffstat (limited to 'gemfeed/examples/conf/dotfiles/scripts/brokenlinkfinder')
| -rw-r--r-- | gemfeed/examples/conf/dotfiles/scripts/brokenlinkfinder | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/gemfeed/examples/conf/dotfiles/scripts/brokenlinkfinder b/gemfeed/examples/conf/dotfiles/scripts/brokenlinkfinder new file mode 100644 index 00000000..7fe15765 --- /dev/null +++ b/gemfeed/examples/conf/dotfiles/scripts/brokenlinkfinder @@ -0,0 +1,73 @@ +#!/usr/bin/env ruby + +require 'net/http' +require 'uri' +require 'nokogiri' +require 'set' + +# Method to fetch and parse HTML from a URL +def fetch_html(url) + response = Net::HTTP.get_response(URI(url)) + response.body if response.is_a?(Net::HTTPSuccess) +rescue StandardError => e + puts "Error fetching #{url}: #{e.message}" + nil +end + +# Method to find and check links on a page +def check_links(url, domain) + html = fetch_html(url) + return unless html + + checked = Set.new + broken = Set.new + + document = Nokogiri::HTML(html) + links = document.css('a').map { |link| link['href'] }.compact + + internal_links = links.select do |link| + link.start_with?('/') || link.start_with?('./') || URI(link).host == domain + end + puts "Internal links: #{internal_links}" + + internal_links.uniq.each do |link| + full_url = link.start_with?('/') || link.start_with?('./') ? "#{url}#{link}" : link + full_url.sub!('./', '/') + next if checked.include?(full_url) + + broken << full_url unless check_link(full_url) + checked << full_url + end + + broken +end + +# Method to check if a link is broken +def check_link(url) + uri = URI(url) + response = Net::HTTP.get_response(uri) + + if response.is_a?(Net::HTTPSuccess) + puts "Working link: #{url}" + true + else + puts "Broken link: #{url} (HTTP #{response.code})" + false + end +rescue StandardError => e + puts "Error checking #{url}: #{e.message}" + false +end + +# Main program +if ARGV.length != 1 + puts 'Usage: ruby brokenlinkfinder.rb <URL>' + exit +end + +start_url = ARGV.first +domain = URI(start_url).host + +check_links(start_url, domain).each do |broken| + puts "Broken: #{broken}" +end |
