From fe827b3da367b500f42413faab603babb37107ee Mon Sep 17 00:00:00 2001 From: spamguy Date: Tue, 3 Jun 2025 15:29:03 -0700 Subject: [PATCH 1/3] Implement Zsh scraper --- lib/docs/filters/zsh/clean_html.rb | 20 ++++++++++++++ lib/docs/filters/zsh/entries.rb | 41 +++++++++++++++++++++++++++++ lib/docs/scrapers/zsh.rb | 33 +++++++++++++++++++++++ public/icons/docs/zsh/16.png | Bin 0 -> 687 bytes public/icons/docs/zsh/16@2x.png | Bin 0 -> 1206 bytes public/icons/docs/zsh/SOURCE | 2 ++ 6 files changed, 96 insertions(+) create mode 100644 lib/docs/filters/zsh/clean_html.rb create mode 100644 lib/docs/filters/zsh/entries.rb create mode 100644 lib/docs/scrapers/zsh.rb create mode 100644 public/icons/docs/zsh/16.png create mode 100644 public/icons/docs/zsh/16@2x.png create mode 100644 public/icons/docs/zsh/SOURCE diff --git a/lib/docs/filters/zsh/clean_html.rb b/lib/docs/filters/zsh/clean_html.rb new file mode 100644 index 0000000000..43d65e045c --- /dev/null +++ b/lib/docs/filters/zsh/clean_html.rb @@ -0,0 +1,20 @@ +module Docs + class Zsh + class CleanHtmlFilter < Filter + def call + css('table.header', 'table.menu', 'hr').remove + + # Remove indices from headers. + css('h1', 'h2', 'h3').each do |node| + node.content = node.content.match(/^[\d\.]* (.*)$/)&.captures&.first + end + + css('h2.section ~ a').each do |node| + node.next_element['id'] = node['name'] + end + + doc + end + end + end +end diff --git a/lib/docs/filters/zsh/entries.rb b/lib/docs/filters/zsh/entries.rb new file mode 100644 index 0000000000..e6bc07305e --- /dev/null +++ b/lib/docs/filters/zsh/entries.rb @@ -0,0 +1,41 @@ +module Docs + class Zsh + class EntriesFilter < Docs::EntriesFilter + def get_name + extract_header_text(at_css('h1.chapter').content) + end + + def additional_entries + entries = [] + + css('h2.section').each do |node| + type = get_type + + # Linkable anchor sits above

. + a = node.xpath('preceding-sibling::a').last + header_text = extract_header_text(node.content) + + if type == 'Zsh Modules' + module_name = header_text.match(/The (zsh\/.*) Module/)&.captures&.first + header_text = module_name if module_name.present? + end + + entries << [header_text, a['name'], type] if header_text != 'Description' + end + + entries + end + + def get_type + extract_header_text(at_css('h1.chapter').content) + end + + private + + # Extracts text from a string, dropping indices preceding it. + def extract_header_text(str) + str.match(/^[\d\.]* (.*)$/)&.captures&.first + end + end + end +end diff --git a/lib/docs/scrapers/zsh.rb b/lib/docs/scrapers/zsh.rb new file mode 100644 index 0000000000..82423d34f2 --- /dev/null +++ b/lib/docs/scrapers/zsh.rb @@ -0,0 +1,33 @@ +module Docs + class Zsh < UrlScraper + self.type = 'zsh' + self.release = '5.9.0' + self.base_url = 'https://zsh.sourceforge.io/Doc/Release/' + self.root_path = 'index.html' + self.links = { + home: 'https://zsh.sourceforge.io/', + code: 'https://sourceforge.net/p/zsh/web/ci/master/tree/', + } + + options[:skip] = %w( + zsh_toc.html + zsh_abt.html + The-Z-Shell-Manual.html + Introduction.html + ) + options[:skip_patterns] = [/-Index.html/] + + html_filters.push 'zsh/entries', 'zsh/clean_html' + + options[:attribution] = <<-HTML + The Z Shell is copyright © 1992–2017 Paul Falstad, Richard Coleman, + Zoltán Hidvégi, Andrew Main, Peter Stephenson, Sven Wischnowsky, and others.
+ Licensed under the MIT License. + HTML + + def get_latest_version(opts) + body = fetch('https://zsh.sourceforge.io/Doc/Release', opts) + body.scan(/, Zsh version ([0-9.]+)/)[0][0][0...-1] + end + end +end diff --git a/public/icons/docs/zsh/16.png b/public/icons/docs/zsh/16.png new file mode 100644 index 0000000000000000000000000000000000000000..05dc56e07e77cd0c22fff897d23390b270292ef1 GIT binary patch literal 687 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJV{wqX6T`Z5GB1G~mUKs7M+SzC z{oH>NS%Lf;0X`wF|AFA|-(TlXeBZw2&&y|jKfeF5f6M=We}5d_^(oKm%hbltWnuq5 zfBgUV&)50Azph{WGO7N@zAc~fJbvH3@qNpRpC=CdJbCa_rsJ2+;?He)-#0G)zH{CG z|Nnknzw~X@?0?_B{(1H6>&&+A>*jwd4*D{s;qQmHe?PwaI;;KXiT$7Qy?@`i{&i-@ zx0SR1{rvHDPUr83cmF)R`zgcr&+|uL7f=1Rc*^(n3x1wG`v1?LZ)+EPN;dx7lJ)1+ z^Z&np|Jbqm+sc{WR?q&neA>@5hyT2M`tR4z|Ns8|ef#R~tLJ||eejG-Gza>Tv%n*= zn1O-sFbFdq&tH)O6qG1&jVKAuPb(=;EJ|evNX*PD(erZ+Q83jr)HC|Mhj|-N&9>AC z&ooa@Ed~xChm}E!k(GfF$npYWX($^M;u?(1U~wiO+mMk790O56c4j*ZSUd~J27v@1 z2I+^kOkFcXkY-6 zeR=hp#i?64RX}Eyr;B5VMsRY1f~e8R;r|Fg*DBg)5h?HGVGr`h}OLx5xJZ)3P+4 zXJXUBuJM>{EBp41uaC!1AuueRjZHk9)!baHe23Q~Muvb5TxZ>s-W&tD$kWx&Wt~$( F69D&l8(;tc literal 0 HcmV?d00001 diff --git a/public/icons/docs/zsh/16@2x.png b/public/icons/docs/zsh/16@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..014d7ab78a835b47ea610fada2e2a929fbfb06b4 GIT binary patch literal 1206 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1SJ1Ryj={W7>k44ofy`glX(f`u%tWsIx;Y9 z?C1WI$O`0h7I;J!GcfQS24TkI`72U@f)XXJ5hcO-X(i=}MX3w{iJ5sNdVa1U3Z{C7 zdPcwZFmD5@*_ImNnda%K#lQjNurf$7vNA9NSzbUa4P}E|qru1w76-YQV0J5OE3=IrGvM;ZGvp97tr;34rIn~p}F~r0B?bP@jkx-Fi`>i({-;M236HuJG zK+`br;t~6gCpp+|hy|=X)%7W$*HEQHwYQU#GtkLZ+r`AOM?_KRppa%$nAtL~3vF96 zf>x_GuparOK8uap`~BUrw6bUC?mTur=%=o-TXl+hUH|jC`+w8k-6?Jp6g(!R8@1!! z9UhGnygUlWgT$*&N=a;#b1EzUxxCMHb!&o)%e>cH)Asze?)<*#<95l-VpBtwL~b%! z^Vwf+W463O{9m1aZ?qp)C3>)aTgNt!X+Hl(fj^udEH`Xhr~2}eXw$voovQr2`?kmM ze)!+*%5?Lxu9)MJnR~0d|9tM4oGH?M`pD!DegCYM1Sna52rFH;O=-sk=~8jVPwjP2 zj14ywZ7h;iQi|tWxB8*+gDo|#E>E7V^?!AQdsqE0mn%^+Qpr409=j&UF>uvpp6m0BE|bkuS0pLZ`Figo@k?_aUc*@pS`<*N7F%c_sNEAdWj`}3Or`-M6?=AZK4 zT;^w)G<1B@^-Z#?V6EV?VU6Q_ep~k5MZQ4R-5-;={;bF}F;HUCT))|O+q=;J{}%hs z$XPP)y4qCBuUBtpN2c)$qJac{3e?jAGm6 zx-Gg@L>L$pu5UEoFD!FT<^G+en`$?e>VLG$Jr#TG&6dLZhZ(rLyAx9wG?!;?z7sh) z;5{P%Mh6v+xiED|wfX7KK5o~gnb` zc8u}G)60VG_djm^eRiZ^bc+bW1WMaf;?$3Rt zO(%BzpSXgF!K1ToQ_-!p!Z&w(yy)tXdVca$=H&2~{7Nr&nPqxyxwy{S{KIm4H&FVX b!TyKcyum@)P5I<1Q0DV=^>bP0l+XkK0jK0J literal 0 HcmV?d00001 diff --git a/public/icons/docs/zsh/SOURCE b/public/icons/docs/zsh/SOURCE new file mode 100644 index 0000000000..70cc4aeed8 --- /dev/null +++ b/public/icons/docs/zsh/SOURCE @@ -0,0 +1,2 @@ +https://sourceforge.net/p/zsh/web/ci/master/tree/favicon.png + From 54d3b88bb18528a912d3a6b403401cc188c83049 Mon Sep 17 00:00:00 2001 From: spamguy Date: Fri, 13 Jun 2025 12:46:28 -0700 Subject: [PATCH 2/3] Add entries for function calls and operators --- lib/docs/filters/zsh/entries.rb | 43 +++++++++++++++++++++++++++++---- lib/docs/scrapers/zsh.rb | 2 +- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/lib/docs/filters/zsh/entries.rb b/lib/docs/filters/zsh/entries.rb index e6bc07305e..02e7dc9602 100644 --- a/lib/docs/filters/zsh/entries.rb +++ b/lib/docs/filters/zsh/entries.rb @@ -7,20 +7,53 @@ def get_name def additional_entries entries = [] - + used_fns = [] + css('h2.section').each do |node| type = get_type - # Linkable anchor sits above

. a = node.xpath('preceding-sibling::a').last header_text = extract_header_text(node.content) - if type == 'Zsh Modules' - module_name = header_text.match(/The (zsh\/.*) Module/)&.captures&.first + case type + when 'Zsh Modules' + module_name = header_text.match(/The (zsh\/.* Module)/)&.captures&.first header_text = module_name if module_name.present? + when 'Calendar Function System' + header_text << ' (Calendar)' end - entries << [header_text, a['name'], type] if header_text != 'Description' + entries << [header_text, a['name'], type] unless header_text.start_with?('Description') + end + + # Functions are documented within
elements. + # Names are wrapped in
, details within
. + #
can also contain anchors for the next function. + doc.css('> dl').each do |node| + type = get_type + fn_names = node.css('> dt') + node.css('dd a[name]').each_with_index do |anchor, i| + if fn_names[i].present? && anchor['name'].present? + fn_names[i]['id'] = anchor['name'] + + # Groups of functions are sometimes comma-delimited. + # Strip arguments, flags, etc. from function name. + # Skip flag-only headers. + fn_names[i].inner_html.split(', ').each do |fn| + fn.gsub!(/<(?:tt|var)>(.+?)<\/(?:tt|var)>/, '\1') + fn = fn.split(' ').first + fn.gsub!(/(?:[\[\(]).*(?:[\]\)]).*$/, '') + + # Add context for operators. + fn << " (#{type})" if fn.length == 1 + + if fn.present? && !fn.match?(/^[\-\[]/) && !used_fns.include?(fn) + used_fns << fn + entries << [fn, anchor['name'], type] + end + end + end + end end entries diff --git a/lib/docs/scrapers/zsh.rb b/lib/docs/scrapers/zsh.rb index 82423d34f2..b4705960b3 100644 --- a/lib/docs/scrapers/zsh.rb +++ b/lib/docs/scrapers/zsh.rb @@ -27,7 +27,7 @@ class Zsh < UrlScraper def get_latest_version(opts) body = fetch('https://zsh.sourceforge.io/Doc/Release', opts) - body.scan(/, Zsh version ([0-9.]+)/)[0][0][0...-1] + body.scan(/Zsh version ([0-9.]+)/)[0][0] end end end From b29a3c8c28500bd8bccd5fd36ac27d279aa6fde0 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Fri, 27 Jun 2025 21:27:39 +0200 Subject: [PATCH 3/3] zsh: add news entry --- assets/javascripts/news.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/assets/javascripts/news.json b/assets/javascripts/news.json index 3ddc39a516..2c5c71680c 100644 --- a/assets/javascripts/news.json +++ b/assets/javascripts/news.json @@ -1,4 +1,8 @@ [ + [ + "2025-06-27", + "New documentation: Zsh" + ], [ "2025-05-28", "New documentation: Vert.x"