11#!/usr/bin/env ruby
2+ # frozen_string_literal: true
3+
4+ # ---------------------------------------------------------
5+ # HTMLProofer Runner Script
6+ # ---------------------------------------------------------
7+ # This script checks the generated static site (usually from
8+ # Jekyll or another static site generator) for:
9+ # - Broken links
10+ # - Invalid HTML
11+ # - Missing OpenGraph tags
12+ # - Missing favicons
13+ # - 4xx link errors
14+ #
15+ # HTMLProofer helps ensure the built website is clean,
16+ # accessible, and free of broken external/internal links.
17+ #
18+ # This version adds clear comments and formatting to make
19+ # it easier for contributors to understand and maintain.
20+ # ---------------------------------------------------------
221
322require "bundler/setup"
423require "html-proofer"
524
25+ # ---------------------------------------------------------
26+ # URLs & patterns to ignore during link checking.
27+ # Some websites block automated requests, cause false
28+ # positives, or frequently return rate-limit errors.
29+ # ---------------------------------------------------------
630url_ignores = [
731 "https://okdistribute.xyz/post/okf-de" ,
832 "https://www.drupal.org/community-initiatives/drupal-core/usability" ,
@@ -15,7 +39,9 @@ url_ignores = [
1539 "https://stackoverflow.com/questions/18664074/" ,
1640 "http://geekfeminism.wikia.com/wiki/Meritocracy" ,
1741 "https://news.ycombinator.com/item?id=7531689" ,
18- %r{^https?://stackoverflow\. com/questions/18664074/getting-error-peer-authentication-failed-for-user-postgres-when-(trying-)?to-ge} ,
42+
43+ # Regex patterns for broader ignore rules
44+ %r{^https?://stackoverflow\. com/questions/18664074/} ,
1945 %r{^https?://readwrite\. com/2014/10/10/open-source-diversity-how-to-contribute/} ,
2046 %r{^https?://twitter\. com/} ,
2147 %r{^https?://(www\. )?kickstarter\. com/} ,
@@ -29,17 +55,20 @@ url_ignores = [
2955 %r{^https?://(www\. )?medium\. com} ,
3056]
3157
58+ # ---------------------------------------------------------
59+ # Run HTMLProofer with project-specific settings
60+ # ---------------------------------------------------------
3261HTMLProofer ::Runner . new (
33- [ "_site" ] ,
34- parallel : { in_threads : 4 } ,
62+ [ "_site" ] , # Directory containing the generated site
63+ parallel : { in_threads : 4 } , # Speed up checks using 4 threads
3564 type : :directory ,
36- ignore_urls : url_ignores ,
37- check_html : true ,
38- check_opengraph : true ,
39- favicon : true ,
40- assume_extension : true ,
41- allow_missing_href : true ,
42- enforce_https : false ,
43- only_4xx : true ,
44- ignore_status_codes : [ 429 ]
65+ ignore_urls : url_ignores , # Skip known-problematic URLs
66+ check_html : true , # Validate HTML structure
67+ check_opengraph : true , # Check for OpenGraph tags
68+ favicon : true , # Ensure favicon exists
69+ assume_extension : true , # Allow links without file extensions
70+ allow_missing_href : true , # Don't fail on <a> tags with no href
71+ enforce_https : false , # Allow HTTP links
72+ only_4xx : true , # Only report 4xx errors from external URLs
73+ ignore_status_codes : [ 429 ] # Ignore Too Many Requests (rate-limit)
4574) . run
0 commit comments