diff --git a/common.go b/common.go index 63ae985..62f39d6 100644 --- a/common.go +++ b/common.go @@ -22,7 +22,7 @@ func validStart(pos int, input []rune) bool { } func validEnd(pos int, input []rune) bool { - // First char is not a valid end char. + // First char is not a valid end char; we do NOT allow empty entities. // If the end char has a space before it, its not valid either. if pos == 0 || unicode.IsSpace(input[pos-1]) { return false diff --git a/commonV2.go b/commonV2.go index 8bc9173..355cdca 100644 --- a/commonV2.go +++ b/commonV2.go @@ -148,7 +148,7 @@ func isClosingTag(in []rune, pos int) bool { return false } -func getClosingTag(in []rune, tag string) (int, int) { +func getClosingTag(in []rune, openingTag string, closingTag string) (int, int) { offset := 0 subtags := 0 for offset < len(in) { @@ -164,9 +164,9 @@ func getClosingTag(in []rune, tag string) (int, int) { } closingTagIdx := openingTagIdx + 2 + c - if string(in[openingTagIdx+1:closingTagIdx]) == tag { // found a nested tag, this is annoying + if string(in[openingTagIdx+1:closingTagIdx]) == openingTag { // found a nested tag, this is annoying subtags++ - } else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == tag { + } else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == closingTag { if subtags == 0 { return openingTagIdx, closingTagIdx } diff --git a/md2htmlV2.go b/md2htmlV2.go index 56d3a5a..f3e17a2 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -4,6 +4,7 @@ import ( "html" "sort" "strings" + "unicode" ) var defaultConverterV2 = ConverterV2{ @@ -42,20 +43,23 @@ func MD2HTMLButtonsV2(in string) (string, []ButtonV2) { } var chars = map[string]string{ - "`": "code", - "```": "pre", - "_": "i", - "*": "b", - "~": "s", - "__": "u", - "|": "", // this is a placeholder for || to work - "||": "span class=\"tg-spoiler\"", - "!": "", // for emoji - "[": "", // for links - "]": "", // for links/emoji - "(": "", // for links/emoji - ")": "", // for links/emoji - "\\": "", // for escapes + "`": "code", + "```": "pre", + "_": "i", + "*": "b", + "~": "s", + "__": "u", + "|": "", // this is a placeholder for || to work + "||": "span class=\"tg-spoiler\"", + "!": "", // for emoji + "![": "", // for emoji + "[": "", // for links + "]": "", // for links/emoji + "(": "", // for links/emoji + ")": "", // for links/emoji + "\\": "", // for escapes + "&": "", // for blockquotes + ">": "blockquote", } var AllMarkdownV2Chars = func() []rune { @@ -86,6 +90,53 @@ var skipStarts = map[rune]bool{ '[': true, // links } +func getItem(in []rune, i int) (string, int, bool) { + c := in[i] + if _, ok := chars[string(c)]; !ok { + return "", 0, false + } + + if !validStart(i, in) && !skipStarts[c] { + if c == '\\' && i+1 < len(in) { + escaped := string(in[i+1]) + if _, ok := chars[escaped]; ok { + return escaped, 1, false + } + } + return "", 0, false + } + + item := string(c) + if c == '|' && + i+1 < len(in) && in[i+1] == '|' { + return "||", 1, true + + } else if c == '_' && + i+1 < len(in) && in[i+1] == '_' { // support __ + return "__", 1, true + + } else if c == '`' && + i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` + return "```", 2, true + + } else if c == '&' && + i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';' && + validBlockQuoteStart(in, i) { + return ">", 3, true + + } else if c == '*' && + i+5 < len(in) && in[i+1] == '*' && in[i+2] == '&' && in[i+3] == 'g' && in[i+4] == 't' && in[i+5] == ';' && + // We force support for **> to allow for people to separate quotes/expandable quote blocks with ** + validBlockQuoteStart(in, i) { + return "**>", 5, true + + } else if c == '!' && i+1 < len(in) && in[i+1] == '[' { + return "![", 1, true + } + + return item, 0, true +} + // TODO: add support for a map-like check of which items cannot be included. // // Eg: `code` cannot be italic/bold/underline/strikethrough @@ -96,49 +147,20 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 out := strings.Builder{} for i := 0; i < len(in); i++ { - c := in[i] - if _, ok := chars[string(c)]; !ok { - out.WriteRune(c) - continue - } - - if !validStart(i, in) && !skipStarts[c] { - if c == '\\' && i+1 < len(in) { - if _, ok := chars[string(in[i+1])]; ok { - out.WriteRune(in[i+1]) - i++ - continue - } + item, offset, ok := getItem(in, i) + if !ok { + if item == "" { + item = string(in[i]) } - out.WriteRune(c) + out.WriteString(item) + i += offset continue } + i += offset - switch c { - case '`', '*', '~', '_', '|': // '||', '__', and '```' are included here too - item := string(c) - if c == '|' { // support || - // if single |, ignore. We only care about double || - if i+1 >= len(in) || in[i+1] != '|' { - out.WriteRune(c) - continue - } - - item = "||" - i++ - } else if c == '_' && i+1 < len(in) && in[i+1] == '_' { // support __ - item = "__" - i++ - } else if c == '`' && i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` - item = "```" - i += 2 - } - - if i+1 >= len(in) { - out.WriteString(item) - continue - } - + switch item { + // All cases where start and closing tags are the same. + case "`", "*", "~", "_", "```", "||", "__": idx := getValidEnd(in[i+1:], item) if idx < 0 { // not found; write and move on. @@ -174,18 +196,33 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 nestedT, nestedB := cv.md2html(in[nStart:nEnd], enableButtons) return out.String() + "<" + chars[item] + ">" + nestedT + "" + closeSpans(chars[item]) + ">" + followT, append(nestedB, followB...) - case '!': - if len(in) <= i+1 || in[i+1] != '[' { - out.WriteRune(c) + case ">", "**>": + nStart := i + 1 + for nStart < len(in) && unicode.IsSpace(in[nStart]) { + nStart++ + } + + if nStart >= len(in) { + out.WriteString(item) continue } - ok, text, content, newEnd := getLinkContents(in[i+1:], true) + nEnd, contents, expandable := getBlockQuoteEnd(in, nStart) + nestedT, nestedB := cv.md2html(contents, enableButtons) + followT, followB := cv.md2html(in[nEnd:], enableButtons) + + if expandable { + return out.String() + "
" + strings.TrimSpace(nestedT) + "" + followT, append(nestedB, followB...) + } + return out.String() + "
" + strings.TrimSpace(nestedT) + "" + followT, append(nestedB, followB...) + + case "![": + ok, text, content, newEnd := getLinkContents(in[i:], true) if !ok { - out.WriteRune(c) + out.WriteString(item) continue } - end := i + 1 + newEnd + end := i + newEnd content = strings.TrimPrefix(content, "tg://emoji?id=") @@ -193,10 +230,10 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 followT, followB := cv.md2html(in[end:], enableButtons) return out.String() + `
content", @@ -64,18 +67,40 @@ var basicMDv2 = []struct { }, { // ensure that premium stickers can get converted in: ``, out: `
quote", + }, { + in: ">multi\n> line", + out: "
multi\nline", + }, { + in: ">expandable multi\n>line\n>quote||", + out: "
expandable multi\nline\nquote", + }, { + in: ">expandable multi\n>line\n>quote||\nMore text on another line", + out: "
expandable multi\nline\nquote\nMore text on another line", + }, { + in: "**>expandable multi with star prefix\n>line\n>quote||", + out: "
expandable multi with star prefix\nline\nquote", + }, { + in: ">normal quote\n**>expandable multi\n>idk||", + out: "
normal quote\n
expandable multi\nidk", + }, } func TestMD2HTMLV2Basic(t *testing.T) { for _, x := range append(basicMD, basicMDv2...) { - assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + t.Run(x.in, func(t *testing.T) { + assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + }) } } func TestMD2HTMLV2Advanced(t *testing.T) { for _, x := range advancedMD { - assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + t.Run(x.in, func(t *testing.T) { + assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + }) } } @@ -108,6 +133,9 @@ func TestNotMD2HTMLV2(t *testing.T) { }, { in: "|noop|", out: "|noop|", + }, { + in: "end with >", + out: "end with >", }, { in: "no premium ! in text", // confirm that a '!' doesnt break premiums out: "no premium ! in text", @@ -243,6 +271,15 @@ var md2HTMLV2Buttons = []struct { SameLine: false, }, }, + }, { + in: "text\n> quote\ntext", + out: "text\n
quote\ntext", + }, { + in: "> `code quote`", + out: "
code quote",
+ }, {
+ in: "```go\ntext\n> not quote\nmore text```",
+ out: "text\n> not quote\nmore text",
},
}
diff --git a/reverseV2.go b/reverseV2.go
index a7bd6cc..ad1f296 100644
--- a/reverseV2.go
+++ b/reverseV2.go
@@ -38,12 +38,12 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) {
if len(tagFields) < 1 {
return "", fmt.Errorf("no tag name for HTML tag started at %d", i)
}
- tag := tagFields[0]
+ tagType := tagFields[0]
- co, cc := getClosingTag(in[closeTag+1:], tag)
+ co, cc := getClosingTag(in[closeTag+1:], tagContent, tagType)
if co < 0 || cc < 0 {
// "no closing open"
- return "", fmt.Errorf("no closing tag for HTML tag %q started at %d", tag, i)
+ return "", fmt.Errorf("no closing tag for HTML tag %q started at %d", tagType, i)
}
closingOpen, closingClose := closeTag+1+co, closeTag+1+cc
out.WriteString(html.UnescapeString(string(in[prev:i])))
@@ -53,7 +53,7 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) {
return "", err
}
- switch tag {
+ switch tagType {
case "b", "strong":
out.WriteString("*" + nested + "*")
case "i", "em":
@@ -85,9 +85,9 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) {
switch spanType := tagFields[1]; spanType {
case "class=\"tg-spoiler\"":
- out.WriteString("||" + html.UnescapeString(string(in[closeTag+1:closingOpen])) + "||")
+ out.WriteString("||" + nested + "||")
default:
- return "", fmt.Errorf("unknown tag type %q", spanType)
+ return "", fmt.Errorf("unknown span type %q", spanType)
}
case "a":
if link.MatchString(tagContent) {
@@ -103,14 +103,21 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) {
} else {
return "", fmt.Errorf("badly formatted anchor tag %q", tagContent)
}
+ case "blockquote":
+ if len(tagFields) == 2 && tagFields[1] == "expandable" {
+ out.WriteString("**>" + strings.Join(strings.Split(nested, "\n"), "\n>") + "||")
+ } else {
+ out.WriteString(">" + strings.Join(strings.Split(nested, "\n"), "\n>"))
+ }
+
default:
- return "", fmt.Errorf("unknown tag %q", tag)
+ return "", fmt.Errorf("unknown tag %q", tagType)
}
prev = closingClose + 1
i = closingClose
- case '\\', '_', '*', '~', '`', '[', ']', '(', ')': // these all need to be escaped to ensure we retain the same message
+ case '\\', '_', '*', '~', '`', '[', ']', '(', ')', '>': // these all need to be escaped to ensure we retain the same message
out.WriteString(html.UnescapeString(string(in[prev:i])))
out.WriteRune('\\')
out.WriteRune(in[i])
diff --git a/reverseV2_test.go b/reverseV2_test.go
index 0220ff6..5697a8a 100644
--- a/reverseV2_test.go
+++ b/reverseV2_test.go
@@ -10,25 +10,36 @@ import (
func TestReverseV2(t *testing.T) {
for _, test := range reverseTest {
- out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil)
- assert.NoError(t, err, "Error for:\n%s", test)
- assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out))
+ t.Run(test, func(t *testing.T) {
+ out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil)
+ assert.NoError(t, err, "Error for:\n%s", test)
+ assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out))
+ })
}
for _, test := range append(append(basicMD, basicMDv2...), advancedMD...) {
- out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test.in), nil)
- assert.NoError(t, err, "Error for:\n%s", test)
- assert.Equal(t, tg_md2html.MD2HTMLV2(test.in), tg_md2html.MD2HTMLV2(out))
+ t.Run(test.in, func(t *testing.T) {
+ out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test.in), nil)
+ assert.NoError(t, err, " Error for:\n%s", test)
+ assert.Equal(t, tg_md2html.MD2HTMLV2(test.in), tg_md2html.MD2HTMLV2(out))
+ })
}
for _, test := range []string{
- "___________test_______",
- "|||||spoiler|||",
- ``,
+ "___________test_______", // uneven underlines
+ "|||||spoiler|||", // uneven spoilers
+ "||