blob: 7fe15765cf6589e48c5c306326b0610ee07af7d9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
#!/usr/bin/env ruby
require 'net/http'
require 'uri'
require 'nokogiri'
require 'set'
# Method to fetch and parse HTML from a URL
def fetch_html(url)
response = Net::HTTP.get_response(URI(url))
response.body if response.is_a?(Net::HTTPSuccess)
rescue StandardError => e
puts "Error fetching #{url}: #{e.message}"
nil
end
# Method to find and check links on a page
def check_links(url, domain)
html = fetch_html(url)
return unless html
checked = Set.new
broken = Set.new
document = Nokogiri::HTML(html)
links = document.css('a').map { |link| link['href'] }.compact
internal_links = links.select do |link|
link.start_with?('/') || link.start_with?('./') || URI(link).host == domain
end
puts "Internal links: #{internal_links}"
internal_links.uniq.each do |link|
full_url = link.start_with?('/') || link.start_with?('./') ? "#{url}#{link}" : link
full_url.sub!('./', '/')
next if checked.include?(full_url)
broken << full_url unless check_link(full_url)
checked << full_url
end
broken
end
# Method to check if a link is broken
def check_link(url)
uri = URI(url)
response = Net::HTTP.get_response(uri)
if response.is_a?(Net::HTTPSuccess)
puts "Working link: #{url}"
true
else
puts "Broken link: #{url} (HTTP #{response.code})"
false
end
rescue StandardError => e
puts "Error checking #{url}: #{e.message}"
false
end
# Main program
if ARGV.length != 1
puts 'Usage: ruby brokenlinkfinder.rb <URL>'
exit
end
start_url = ARGV.first
domain = URI(start_url).host
check_links(start_url, domain).each do |broken|
puts "Broken: #{broken}"
end
|