Merge pull request #115 from julwrites/staging

julwrites · web-flow · commit ae5d70119a95 · 2025-11-26T01:29:27.000+08:00
Reverting to previous parser for nodes
diff --git a/pkg/app/api_client.go b/pkg/app/api_client.go
@@ -23,9 +23,9 @@ var (
 	configMutex       sync.Mutex
 )
 
-// resetAPIConfigCache invalidates the cache, forcing a reload on next call.
+// ResetAPIConfigCache invalidates the cache, forcing a reload on next call.
 // This is primarily for testing purposes.
-func resetAPIConfigCache() {
+func ResetAPIConfigCache() {
 	configMutex.Lock()
 	defer configMutex.Unlock()
 	configInitialized = false
diff --git a/pkg/app/api_client_test.go b/pkg/app/api_client_test.go
@@ -6,6 +6,8 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"testing"
+
+	"github.com/julwrites/ScriptureBot/pkg/utils"
 )
 
 func TestSubmitQuery(t *testing.T) {
@@ -88,8 +90,8 @@ func TestSubmitQuery(t *testing.T) {
 		restore := setEnv("BIBLE_API_URL", "")
 		defer restore()
 		// Also unset PROJECT_ID to avoid Secret Manager lookup
-		defer setEnv("GCLOUD_PROJECT_ID", "")()
-		resetAPIConfigCache()
+		defer utils.SetEnv("GCLOUD_PROJECT_ID", "")()
+		ResetAPIConfigCache()
 
 		req := QueryRequest{}
 		var resp VerseResponse
@@ -102,10 +104,10 @@ func TestSubmitQuery(t *testing.T) {
 
 func TestGetAPIConfig_SecretManagerFallback(t *testing.T) {
 	// Ensure Env Vars are empty
-	defer setEnv("BIBLE_API_URL", "")()
-	defer setEnv("BIBLE_API_KEY", "")()
-	defer setEnv("GCLOUD_PROJECT_ID", "test-project")()
-	resetAPIConfigCache()
+	defer utils.SetEnv("BIBLE_API_URL", "")()
+	defer utils.SetEnv("BIBLE_API_KEY", "")()
+	defer utils.SetEnv("GCLOUD_PROJECT_ID", "test-project")()
+	ResetAPIConfigCache()
 
 	// Mock the secret function
 	oldGetSecret := getSecretFunc
@@ -136,10 +138,10 @@ func TestGetAPIConfig_SecretManagerFallback(t *testing.T) {
 
 func TestGetAPIConfig_PassedProjectID(t *testing.T) {
 	// Ensure Env Vars are empty, including GCLOUD_PROJECT_ID
-	defer setEnv("BIBLE_API_URL", "")()
-	defer setEnv("BIBLE_API_KEY", "")()
-	defer setEnv("GCLOUD_PROJECT_ID", "")()
-	resetAPIConfigCache()
+	defer utils.SetEnv("BIBLE_API_URL", "")()
+	defer utils.SetEnv("BIBLE_API_KEY", "")()
+	defer utils.SetEnv("GCLOUD_PROJECT_ID", "")()
+	ResetAPIConfigCache()
 
 	// Mock the secret function
 	oldGetSecret := getSecretFunc
diff --git a/pkg/app/passage.go b/pkg/app/passage.go
@@ -35,51 +35,105 @@ func GetReference(doc *html.Node) string {
 	return utils.GetTextNode(refNode).Data
 }
 
-func ParseNodesForPassage(node *html.Node) string {
-	var parts []string
-
-	for child := node.FirstChild; child != nil; child = child.NextSibling {
-		if child.Type == html.TextNode {
-			parts = append(parts, child.Data)
-		} else if child.Type == html.ElementNode {
-			var subParts string
-			switch child.Data {
-			case "sup":
-				isFootnote := func(node *html.Node) bool {
-					for _, attr := range node.Attr {
-						if attr.Key == "class" && attr.Val == "footnote" {
-							return true
-						}
-					}
-					return false
-				}
-				if isFootnote(child) {
-					continue
-				}
-				childText := ParseNodesForPassage(child)
-				if len(childText) > 0 {
-					subParts = fmt.Sprintf("<b>%s</b>", childText)
-				}
-			case "i":
-				childText := ParseNodesForPassage(child)
-				subParts = fmt.Sprintf("<i>%s</i>", childText)
-			case "p", "span", "body", "html":
-				subParts = ParseNodesForPassage(child)
-			case "br":
-				subParts = "\n"
-			default:
-				subParts = ParseNodesForPassage(child)
+// Helper function to escape characters for Telegram MarkdownV2
+func escapeMarkdownV2(s string) string {
+	// According to Telegram API docs for MarkdownV2, characters to escape are:
+	// '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!'
+	// Note: '^' is not in this list. Let's assume it doesn't need escaping.
+	// The logic should be to escape these characters *only* when they are not part of a formatting tag.
+	// However, since we are processing raw text nodes, any special character should be escaped.
+	r := strings.NewReplacer(
+		"_", `\_`, "*", `\*`, "[", `\[`, "]", `\]`, "(", `\(`, ")", `\)`,
+		"~", `\~`, "`", "\\`", ">", `\>`, "#", `\#`, "+", `\+`, "-", `\-`,
+		"=", `\=`, "|", `\|`, "{", `\{`, "}", `\}`, ".", `\.`, "!", `\!`,
+	)
+	return r.Replace(s)
+}
+
+// Helper functions for parsing
+func isFormattingTag(tag string) bool {
+	return tag == "sup" || tag == "i" || tag == "b"
+}
+
+func isHeaderTag(tag string) bool {
+	return tag == "h1" || tag == "h2" || tag == "h3" || tag == "h4"
+}
+
+func wrapText(text, tag string) string {
+	if strings.TrimSpace(text) == "" {
+		return text
+	}
+
+	if tag == "sup" {
+		// User-specified format for superscript
+		return fmt.Sprintf("^%s^", strings.Trim(text, " "))
+	}
+	if tag == "i" {
+		return fmt.Sprintf("_%s_", text)
+	}
+	if tag == "b" || isHeaderTag(tag) {
+		return fmt.Sprintf("*%s*", text)
+	}
+	return text
+}
+
+func parseNode(node *html.Node) string {
+	if node.Type == html.TextNode {
+		return escapeMarkdownV2(node.Data)
+	}
+
+	if node.Type != html.ElementNode {
+		var content strings.Builder
+		for c := node.FirstChild; c != nil; c = c.NextSibling {
+			content.WriteString(parseNode(c))
+		}
+		return content.String()
+	}
+
+	tag := node.Data
+
+	// Handle non-formatting tags first
+	if tag == "br" {
+		return "\n"
+	}
+	if !isFormattingTag(tag) && !isHeaderTag(tag) {
+		var content strings.Builder
+		for c := node.FirstChild; c != nil; c = c.NextSibling {
+			content.WriteString(parseNode(c))
+		}
+		return content.String()
+	}
+
+	// Handle formatting tags (b, i, sup, h1-h4)
+	if tag == "sup" {
+		for _, attr := range node.Attr {
+			if attr.Key == "class" && attr.Val == "footnote" {
+				return "" // Ignore footnote nodes
 			}
-			parts = append(parts, subParts)
 		}
 	}
 
-	text := strings.Join(parts, "")
+	var content strings.Builder
+	var textBuffer strings.Builder
+
+	flushTextBuffer := func() {
+		if textBuffer.Len() > 0 {
+			content.WriteString(wrapText(textBuffer.String(), tag))
+			textBuffer.Reset()
+		}
+	}
 
-	if node.Data == "h1" || node.Data == "h2" || node.Data == "h3" || node.Data == "h4" {
-		text = fmt.Sprintf("<b>%s</b>", text)
+	for c := node.FirstChild; c != nil; c = c.NextSibling {
+		if c.Type == html.ElementNode && (isFormattingTag(c.Data) || isHeaderTag(c.Data)) {
+			flushTextBuffer()
+			content.WriteString(parseNode(c))
+		} else {
+			textBuffer.WriteString(parseNode(c))
+		}
 	}
-	return text
+	flushTextBuffer()
+
+	return content.String()
 }
 
 func ParsePassageFromHtml(rawHtml string) string {
@@ -88,8 +142,7 @@ func ParsePassageFromHtml(rawHtml string) string {
 		log.Printf("Error parsing html: %v", err)
 		return rawHtml
 	}
-
-	return ParseNodesForPassage(doc)
+	return parseNode(doc)
 }
 
 // Deprecated: Using new API service
@@ -119,7 +172,7 @@ func GetPassage(ref string, doc *html.Node, version string) string {
 		return false
 	})
 
-	textBlocks := utils.MapNodeListToString(filtNodes, ParseNodesForPassage)
+	textBlocks := utils.MapNodeListToString(filtNodes, parseNode)
 
 	var passage strings.Builder
 
@@ -173,6 +226,7 @@ func GetBiblePassage(env def.SessionData) def.SessionData {
 }
 
 // Deprecated: Using new API service logic inside GetBiblePassage
+// Deprecated: Using new API service
 func CheckBibleReference(ref string) bool {
 	log.Printf("Checking reference %s", ref)
 
diff --git a/pkg/app/passage_test.go b/pkg/app/passage_test.go
@@ -10,6 +10,11 @@ import (
 	"github.com/julwrites/ScriptureBot/pkg/utils"
 )
 
+func setEnv(key, value string) func() {
+	ResetAPIConfigCache()
+	return utils.SetEnv(key, value)
+}
+
 func TestGetBiblePassageHtml(t *testing.T) {
 	doc := GetPassageHtml("gen 8", "NIV")
 
@@ -69,6 +74,7 @@ func TestGetBiblePassage(t *testing.T) {
 	defer ts.Close()
 
 	defer setEnv("BIBLE_API_URL", ts.URL)()
+	defer setEnv("BIBLE_API_KEY", "test_key")()
 
 	t.Run("Success", func(t *testing.T) {
 		var env def.SessionData
@@ -78,7 +84,7 @@ func TestGetBiblePassage(t *testing.T) {
 		env.User.Config = utils.SerializeUserConfig(conf)
 		env = GetBiblePassage(env)
 
-		if env.Res.Message != "In the beginning God created the heavens and the earth." {
+		if env.Res.Message != `In the beginning God created the heavens and the earth\.` {
 			t.Errorf("Expected passage text, got '%s'", env.Res.Message)
 		}
 	})
@@ -105,26 +111,57 @@ func TestGetBiblePassage(t *testing.T) {
 }
 
 func TestParsePassageFromHtml(t *testing.T) {
-	t.Run("Valid HTML", func(t *testing.T) {
-		html := "<p><span><sup>12 </sup>But to all who did receive him, who believed in his name, he gave the right to become children of God,</span></p>"
-		expected := "<b>12 </b>But to all who did receive him, who believed in his name, he gave the right to become children of God,"
+	t.Run("Valid HTML with superscript", func(t *testing.T) {
+		html := `<p><span><sup>12 </sup>But to all who did receive him, who believed in his name, he gave the right to become children of God,</span></p>`
+		expected := `^12^But to all who did receive him, who believed in his name, he gave the right to become children of God,`
 		if got := ParsePassageFromHtml(html); got != expected {
 			t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
 		}
 	})
 
 	t.Run("HTML with italics", func(t *testing.T) {
-		html := "<p><i>This is italic.</i></p>"
-		expected := "<i>This is italic.</i>"
+		html := `<p><i>This is italic.</i></p>`
+		expected := `_This is italic\._`
+		if got := ParsePassageFromHtml(html); got != expected {
+			t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
+		}
+	})
+
+	t.Run("HTML with bold", func(t *testing.T) {
+		html := `<p><b>This is bold.</b></p>`
+		expected := `*This is bold\.*`
+		if got := ParsePassageFromHtml(html); got != expected {
+			t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
+		}
+	})
+
+	t.Run("HTML with line breaks", func(t *testing.T) {
+		html := `<p>Line 1.<br>Line 2.</p>`
+		expected := "Line 1\\.\nLine 2\\."
 		if got := ParsePassageFromHtml(html); got != expected {
 			t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
 		}
 	})
 
 	t.Run("Invalid HTML", func(t *testing.T) {
-		html := "<p>This is malformed HTML"
-		// The parser should still try its best. In this case, it should just return the text.
-		expected := "This is malformed HTML"
+		html := `<p>This is malformed HTML`
+		expected := `This is malformed HTML`
+		if got := ParsePassageFromHtml(html); got != expected {
+			t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
+		}
+	})
+
+	t.Run("Nested HTML tags", func(t *testing.T) {
+		html := `<p><b>This is bold, <i>and this is italic.</i></b></p>`
+		expected := `*This is bold, *_and this is italic\._`
+		if got := ParsePassageFromHtml(html); got != expected {
+			t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
+		}
+	})
+
+	t.Run("MarkdownV2 escaping", func(t *testing.T) {
+		html := `<p>This has special characters: *_. [hello](world)!</p>`
+		expected := `This has special characters: \*\_\. \[hello\]\(world\)\!`
 		if got := ParsePassageFromHtml(html); got != expected {
 			t.Errorf("ParsePassageFromHtml() = %v, want %v", got, expected)
 		}
diff --git a/pkg/app/test_utils_test.go b/pkg/app/test_utils_test.go
diff --git a/pkg/bot/bot.go b/pkg/bot/bot.go
@@ -6,6 +6,7 @@ package bot
 import (
 	"fmt"
 	"log"
+	"os"
 
 	"github.com/julwrites/BotPlatform/pkg/def"
 
@@ -27,6 +28,9 @@ func RunCommands(env def.SessionData) def.SessionData {
 		env.Msg.Command = app.CMD_CLOSE
 	}
 
+	// Propagate secrets
+	env.Secrets.PROJECT_ID = os.Getenv("GCLOUD_PROJECT_ID")
+
 	env = app.ProcessCommand(env)
 
 	return env
diff --git a/pkg/bot/bot_test.go b/pkg/bot/bot_test.go
@@ -28,8 +28,9 @@ func TestRunCommands(t *testing.T) {
 	}))
 	defer ts.Close()
 
-	// Override API config
+	// Override API config and defer reset
 	app.SetAPIConfigOverride(ts.URL, "dummy")
+	defer app.ResetAPIConfigCache()
 
 	var env def.SessionData
 	var conf utils.UserConfig
@@ -39,7 +40,7 @@ func TestRunCommands(t *testing.T) {
 
 	env = RunCommands(env)
 
-	if !strings.Contains(env.Res.Message, "Not so the wicked!") {
+	if !strings.Contains(env.Res.Message, "Not so the wicked\\!") {
 		t.Errorf("Failed TestRunCommands Passage command. Got: %s", env.Res.Message)
 	}
 }
diff --git a/pkg/utils/test_utils.go b/pkg/utils/test_utils.go