Skip to content

Commit 673da16

Browse files
authored
Merge pull request #181 from julwrites/fix-passage-headers-18071459911303667092
Fix excessive vertical whitespace in passage headers
2 parents 602f434 + f377607 commit 673da16

1 file changed

Lines changed: 26 additions & 2 deletions

File tree

pkg/app/passage.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"log"
99
"net/url"
10+
"regexp"
1011
"strings"
1112
stdhtml "html"
1213

@@ -59,6 +60,19 @@ func isNextSiblingBr(node *html.Node) bool {
5960
return false
6061
}
6162

63+
func hasNextSignificantSibling(node *html.Node) bool {
64+
for next := node.NextSibling; next != nil; next = next.NextSibling {
65+
if next.Type == html.TextNode {
66+
if len(strings.TrimSpace(next.Data)) == 0 {
67+
continue
68+
}
69+
return true
70+
}
71+
return true // Any element
72+
}
73+
return false
74+
}
75+
6276
func ParseNodesForPassage(node *html.Node) string {
6377
var parts []string
6478

@@ -114,7 +128,7 @@ func ParseNodesForPassage(node *html.Node) string {
114128
if headerText == "Footnotes" || headerText == "Cross references" {
115129
continue
116130
}
117-
parts = append(parts, fmt.Sprintf("\n\n<b>%s</b>\n", headerText))
131+
parts = append(parts, fmt.Sprintf("\n\n<b>%s</b>\n", strings.TrimSpace(headerText)))
118132
case "ul", "ol":
119133
parts = append(parts, ParseNodesForPassage(child))
120134
case "li":
@@ -136,11 +150,21 @@ func ParseNodesForPassage(node *html.Node) string {
136150
return strings.Join(parts, "")
137151
}
138152

153+
// Collapse multiple newlines (potentially with spaces in between) to max 2 newlines
154+
// \n\s*\n\s*\n+ -> \n\n
155+
var newlineRegex = regexp.MustCompile(`\n\s*\n[\s\n]*`)
156+
157+
func CleanPassageText(text string) string {
158+
text = newlineRegex.ReplaceAllString(text, "\n\n")
159+
return strings.TrimSpace(text)
160+
}
161+
139162
func GetPassage(ref string, doc *html.Node, version string) string {
140163
// Replaced FilterTree with direct parsing of the root node
141164
// This allows handling arbitrary structure (divs, lists) returned by the API
142165

143166
text := ParseNodesForPassage(doc)
167+
text = CleanPassageText(text)
144168

145169
var passage strings.Builder
146170

@@ -151,7 +175,7 @@ func GetPassage(ref string, doc *html.Node, version string) string {
151175
}
152176

153177
passage.WriteString("\n")
154-
passage.WriteString(strings.TrimSpace(text))
178+
passage.WriteString(text)
155179

156180
return passage.String()
157181
}

0 commit comments

Comments
 (0)