Skip to content

Commit ae5d701

Browse files
authored
Merge pull request #115 from julwrites/staging
Reverting to previous parser for nodes
2 parents 703b317 + 7431a14 commit ae5d701

8 files changed

Lines changed: 181 additions & 85 deletions

File tree

pkg/app/api_client.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ var (
2323
configMutex sync.Mutex
2424
)
2525

26-
// resetAPIConfigCache invalidates the cache, forcing a reload on next call.
26+
// ResetAPIConfigCache invalidates the cache, forcing a reload on next call.
2727
// This is primarily for testing purposes.
28-
func resetAPIConfigCache() {
28+
func ResetAPIConfigCache() {
2929
configMutex.Lock()
3030
defer configMutex.Unlock()
3131
configInitialized = false

pkg/app/api_client_test.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import (
66
"net/http"
77
"net/http/httptest"
88
"testing"
9+
10+
"github.com/julwrites/ScriptureBot/pkg/utils"
911
)
1012

1113
func TestSubmitQuery(t *testing.T) {
@@ -88,8 +90,8 @@ func TestSubmitQuery(t *testing.T) {
8890
restore := setEnv("BIBLE_API_URL", "")
8991
defer restore()
9092
// Also unset PROJECT_ID to avoid Secret Manager lookup
91-
defer setEnv("GCLOUD_PROJECT_ID", "")()
92-
resetAPIConfigCache()
93+
defer utils.SetEnv("GCLOUD_PROJECT_ID", "")()
94+
ResetAPIConfigCache()
9395

9496
req := QueryRequest{}
9597
var resp VerseResponse
@@ -102,10 +104,10 @@ func TestSubmitQuery(t *testing.T) {
102104

103105
func TestGetAPIConfig_SecretManagerFallback(t *testing.T) {
104106
// Ensure Env Vars are empty
105-
defer setEnv("BIBLE_API_URL", "")()
106-
defer setEnv("BIBLE_API_KEY", "")()
107-
defer setEnv("GCLOUD_PROJECT_ID", "test-project")()
108-
resetAPIConfigCache()
107+
defer utils.SetEnv("BIBLE_API_URL", "")()
108+
defer utils.SetEnv("BIBLE_API_KEY", "")()
109+
defer utils.SetEnv("GCLOUD_PROJECT_ID", "test-project")()
110+
ResetAPIConfigCache()
109111

110112
// Mock the secret function
111113
oldGetSecret := getSecretFunc
@@ -136,10 +138,10 @@ func TestGetAPIConfig_SecretManagerFallback(t *testing.T) {
136138

137139
func TestGetAPIConfig_PassedProjectID(t *testing.T) {
138140
// Ensure Env Vars are empty, including GCLOUD_PROJECT_ID
139-
defer setEnv("BIBLE_API_URL", "")()
140-
defer setEnv("BIBLE_API_KEY", "")()
141-
defer setEnv("GCLOUD_PROJECT_ID", "")()
142-
resetAPIConfigCache()
141+
defer utils.SetEnv("BIBLE_API_URL", "")()
142+
defer utils.SetEnv("BIBLE_API_KEY", "")()
143+
defer utils.SetEnv("GCLOUD_PROJECT_ID", "")()
144+
ResetAPIConfigCache()
143145

144146
// Mock the secret function
145147
oldGetSecret := getSecretFunc

pkg/app/passage.go

Lines changed: 96 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -35,51 +35,105 @@ func GetReference(doc *html.Node) string {
3535
return utils.GetTextNode(refNode).Data
3636
}
3737

38-
func ParseNodesForPassage(node *html.Node) string {
39-
var parts []string
40-
41-
for child := node.FirstChild; child != nil; child = child.NextSibling {
42-
if child.Type == html.TextNode {
43-
parts = append(parts, child.Data)
44-
} else if child.Type == html.ElementNode {
45-
var subParts string
46-
switch child.Data {
47-
case "sup":
48-
isFootnote := func(node *html.Node) bool {
49-
for _, attr := range node.Attr {
50-
if attr.Key == "class" && attr.Val == "footnote" {
51-
return true
52-
}
53-
}
54-
return false
55-
}
56-
if isFootnote(child) {
57-
continue
58-
}
59-
childText := ParseNodesForPassage(child)
60-
if len(childText) > 0 {
61-
subParts = fmt.Sprintf("<b>%s</b>", childText)
62-
}
63-
case "i":
64-
childText := ParseNodesForPassage(child)
65-
subParts = fmt.Sprintf("<i>%s</i>", childText)
66-
case "p", "span", "body", "html":
67-
subParts = ParseNodesForPassage(child)
68-
case "br":
69-
subParts = "\n"
70-
default:
71-
subParts = ParseNodesForPassage(child)
38+
// Helper function to escape characters for Telegram MarkdownV2
39+
func escapeMarkdownV2(s string) string {
40+
// According to Telegram API docs for MarkdownV2, characters to escape are:
41+
// '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!'
42+
// Note: '^' is not in this list. Let's assume it doesn't need escaping.
43+
// The logic should be to escape these characters *only* when they are not part of a formatting tag.
44+
// However, since we are processing raw text nodes, any special character should be escaped.
45+
r := strings.NewReplacer(
46+
"_", `\_`, "*", `\*`, "[", `\[`, "]", `\]`, "(", `\(`, ")", `\)`,
47+
"~", `\~`, "`", "\\`", ">", `\>`, "#", `\#`, "+", `\+`, "-", `\-`,
48+
"=", `\=`, "|", `\|`, "{", `\{`, "}", `\}`, ".", `\.`, "!", `\!`,
49+
)
50+
return r.Replace(s)
51+
}
52+
53+
// Helper functions for parsing
54+
func isFormattingTag(tag string) bool {
55+
return tag == "sup" || tag == "i" || tag == "b"
56+
}
57+
58+
func isHeaderTag(tag string) bool {
59+
return tag == "h1" || tag == "h2" || tag == "h3" || tag == "h4"
60+
}
61+
62+
func wrapText(text, tag string) string {
63+
if strings.TrimSpace(text) == "" {
64+
return text
65+
}
66+
67+
if tag == "sup" {
68+
// User-specified format for superscript
69+
return fmt.Sprintf("^%s^", strings.Trim(text, " "))
70+
}
71+
if tag == "i" {
72+
return fmt.Sprintf("_%s_", text)
73+
}
74+
if tag == "b" || isHeaderTag(tag) {
75+
return fmt.Sprintf("*%s*", text)
76+
}
77+
return text
78+
}
79+
80+
func parseNode(node *html.Node) string {
81+
if node.Type == html.TextNode {
82+
return escapeMarkdownV2(node.Data)
83+
}
84+
85+
if node.Type != html.ElementNode {
86+
var content strings.Builder
87+
for c := node.FirstChild; c != nil; c = c.NextSibling {
88+
content.WriteString(parseNode(c))
89+
}
90+
return content.String()
91+
}
92+
93+
tag := node.Data
94+
95+
// Handle non-formatting tags first
96+
if tag == "br" {
97+
return "\n"
98+
}
99+
if !isFormattingTag(tag) && !isHeaderTag(tag) {
100+
var content strings.Builder
101+
for c := node.FirstChild; c != nil; c = c.NextSibling {
102+
content.WriteString(parseNode(c))
103+
}
104+
return content.String()
105+
}
106+
107+
// Handle formatting tags (b, i, sup, h1-h4)
108+
if tag == "sup" {
109+
for _, attr := range node.Attr {
110+
if attr.Key == "class" && attr.Val == "footnote" {
111+
return "" // Ignore footnote nodes
72112
}
73-
parts = append(parts, subParts)
74113
}
75114
}
76115

77-
text := strings.Join(parts, "")
116+
var content strings.Builder
117+
var textBuffer strings.Builder
118+
119+
flushTextBuffer := func() {
120+
if textBuffer.Len() > 0 {
121+
content.WriteString(wrapText(textBuffer.String(), tag))
122+
textBuffer.Reset()
123+
}
124+
}
78125

79-
if node.Data == "h1" || node.Data == "h2" || node.Data == "h3" || node.Data == "h4" {
80-
text = fmt.Sprintf("<b>%s</b>", text)
126+
for c := node.FirstChild; c != nil; c = c.NextSibling {
127+
if c.Type == html.ElementNode && (isFormattingTag(c.Data) || isHeaderTag(c.Data)) {
128+
flushTextBuffer()
129+
content.WriteString(parseNode(c))
130+
} else {
131+
textBuffer.WriteString(parseNode(c))
132+
}
81133
}
82-
return text
134+
flushTextBuffer()
135+
136+
return content.String()
83137
}
84138

85139
func ParsePassageFromHtml(rawHtml string) string {
@@ -88,8 +142,7 @@ func ParsePassageFromHtml(rawHtml string) string {
88142
log.Printf("Error parsing html: %v", err)
89143
return rawHtml
90144
}
91-
92-
return ParseNodesForPassage(doc)
145+
return parseNode(doc)
93146
}
94147

95148
// Deprecated: Using new API service
@@ -119,7 +172,7 @@ func GetPassage(ref string, doc *html.Node, version string) string {
119172
return false
120173
})
121174

122-
textBlocks := utils.MapNodeListToString(filtNodes, ParseNodesForPassage)
175+
textBlocks := utils.MapNodeListToString(filtNodes, parseNode)
123176

124177
var passage strings.Builder
125178

@@ -173,6 +226,7 @@ func GetBiblePassage(env def.SessionData) def.SessionData {
173226
}
174227

175228
// Deprecated: Using new API service logic inside GetBiblePassage
229+
// Deprecated: Using new API service
176230
func CheckBibleReference(ref string) bool {
177231
log.Printf("Checking reference %s", ref)
178232

pkg/app/passage_test.go

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ import (
1010
"github.com/julwrites/ScriptureBot/pkg/utils"
1111
)
1212

13+
func setEnv(key, value string) func() {
14+
ResetAPIConfigCache()
15+
return utils.SetEnv(key, value)
16+
}
17+
1318
func TestGetBiblePassageHtml(t *testing.T) {
1419
doc := GetPassageHtml("gen 8", "NIV")
1520

@@ -69,6 +74,7 @@ func TestGetBiblePassage(t *testing.T) {
6974
defer ts.Close()
7075

7176
defer setEnv("BIBLE_API_URL", ts.URL)()
77+
defer setEnv("BIBLE_API_KEY", "test_key")()
7278

7379
t.Run("Success", func(t *testing.T) {
7480
var env def.SessionData
@@ -78,7 +84,7 @@ func TestGetBiblePassage(t *testing.T) {
7884
env.User.Config = utils.SerializeUserConfig(conf)
7985
env = GetBiblePassage(env)
8086

81-
if env.Res.Message != "In the beginning God created the heavens and the earth." {
87+
if env.Res.Message != `In the beginning God created the heavens and the earth\.` {
8288
t.Errorf("Expected passage text, got '%s'", env.Res.Message)
8389
}
8490
})
@@ -105,26 +111,57 @@ func TestGetBiblePassage(t *testing.T) {
105111
}
106112

107113
func TestParsePassageFromHtml(t *testing.T) {
108-
t.Run("Valid HTML", func(t *testing.T) {
109-
html := "<p><span><sup>12 </sup>But to all who did receive him, who believed in his name, he gave the right to become children of God,</span></p>"
110-
expected := "<b>12 </b>But to all who did receive him, who believed in his name, he gave the right to become children of God,"
114+
t.Run("Valid HTML with superscript", func(t *testing.T) {
115+
html := `<p><span><sup>12 </sup>But to all who did receive him, who believed in his name, he gave the right to become children of God,</span></p>`
116+
expected := `^12^But to all who did receive him, who believed in his name, he gave the right to become children of God,`
111117
if got := ParsePassageFromHtml(html); got != expected {
112118
t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
113119
}
114120
})
115121

116122
t.Run("HTML with italics", func(t *testing.T) {
117-
html := "<p><i>This is italic.</i></p>"
118-
expected := "<i>This is italic.</i>"
123+
html := `<p><i>This is italic.</i></p>`
124+
expected := `_This is italic\._`
125+
if got := ParsePassageFromHtml(html); got != expected {
126+
t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
127+
}
128+
})
129+
130+
t.Run("HTML with bold", func(t *testing.T) {
131+
html := `<p><b>This is bold.</b></p>`
132+
expected := `*This is bold\.*`
133+
if got := ParsePassageFromHtml(html); got != expected {
134+
t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
135+
}
136+
})
137+
138+
t.Run("HTML with line breaks", func(t *testing.T) {
139+
html := `<p>Line 1.<br>Line 2.</p>`
140+
expected := "Line 1\\.\nLine 2\\."
119141
if got := ParsePassageFromHtml(html); got != expected {
120142
t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
121143
}
122144
})
123145

124146
t.Run("Invalid HTML", func(t *testing.T) {
125-
html := "<p>This is malformed HTML"
126-
// The parser should still try its best. In this case, it should just return the text.
127-
expected := "This is malformed HTML"
147+
html := `<p>This is malformed HTML`
148+
expected := `This is malformed HTML`
149+
if got := ParsePassageFromHtml(html); got != expected {
150+
t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
151+
}
152+
})
153+
154+
t.Run("Nested HTML tags", func(t *testing.T) {
155+
html := `<p><b>This is bold, <i>and this is italic.</i></b></p>`
156+
expected := `*This is bold, *_and this is italic\._`
157+
if got := ParsePassageFromHtml(html); got != expected {
158+
t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
159+
}
160+
})
161+
162+
t.Run("MarkdownV2 escaping", func(t *testing.T) {
163+
html := `<p>This has special characters: *_. [hello](world)!</p>`
164+
expected := `This has special characters: \*\_\. \[hello\]\(world\)\!`
128165
if got := ParsePassageFromHtml(html); got != expected {
129166
t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
130167
}

pkg/app/test_utils_test.go

Lines changed: 0 additions & 20 deletions
This file was deleted.

pkg/bot/bot.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package bot
66
import (
77
"fmt"
88
"log"
9+
"os"
910

1011
"github.com/julwrites/BotPlatform/pkg/def"
1112

@@ -27,6 +28,9 @@ func RunCommands(env def.SessionData) def.SessionData {
2728
env.Msg.Command = app.CMD_CLOSE
2829
}
2930

31+
// Propagate secrets
32+
env.Secrets.PROJECT_ID = os.Getenv("GCLOUD_PROJECT_ID")
33+
3034
env = app.ProcessCommand(env)
3135

3236
return env

pkg/bot/bot_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ func TestRunCommands(t *testing.T) {
2828
}))
2929
defer ts.Close()
3030

31-
// Override API config
31+
// Override API config and defer reset
3232
app.SetAPIConfigOverride(ts.URL, "dummy")
33+
defer app.ResetAPIConfigCache()
3334

3435
var env def.SessionData
3536
var conf utils.UserConfig
@@ -39,7 +40,7 @@ func TestRunCommands(t *testing.T) {
3940

4041
env = RunCommands(env)
4142

42-
if !strings.Contains(env.Res.Message, "Not so the wicked!") {
43+
if !strings.Contains(env.Res.Message, "Not so the wicked\\!") {
4344
t.Errorf("Failed TestRunCommands Passage command. Got: %s", env.Res.Message)
4445
}
4546
}

0 commit comments

Comments
 (0)