diff --git a/common.go b/common.go index 63ae985..62f39d6 100644 --- a/common.go +++ b/common.go @@ -22,7 +22,7 @@ func validStart(pos int, input []rune) bool { } func validEnd(pos int, input []rune) bool { - // First char is not a valid end char. + // First char is not a valid end char; we do NOT allow empty entities. // If the end char has a space before it, its not valid either. if pos == 0 || unicode.IsSpace(input[pos-1]) { return false diff --git a/commonV2.go b/commonV2.go index 8bc9173..355cdca 100644 --- a/commonV2.go +++ b/commonV2.go @@ -148,7 +148,7 @@ func isClosingTag(in []rune, pos int) bool { return false } -func getClosingTag(in []rune, tag string) (int, int) { +func getClosingTag(in []rune, openingTag string, closingTag string) (int, int) { offset := 0 subtags := 0 for offset < len(in) { @@ -164,9 +164,9 @@ func getClosingTag(in []rune, tag string) (int, int) { } closingTagIdx := openingTagIdx + 2 + c - if string(in[openingTagIdx+1:closingTagIdx]) == tag { // found a nested tag, this is annoying + if string(in[openingTagIdx+1:closingTagIdx]) == openingTag { // found a nested tag, this is annoying subtags++ - } else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == tag { + } else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == closingTag { if subtags == 0 { return openingTagIdx, closingTagIdx } diff --git a/md2htmlV2.go b/md2htmlV2.go index 56d3a5a..f3e17a2 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -4,6 +4,7 @@ import ( "html" "sort" "strings" + "unicode" ) var defaultConverterV2 = ConverterV2{ @@ -42,20 +43,23 @@ func MD2HTMLButtonsV2(in string) (string, []ButtonV2) { } var chars = map[string]string{ - "`": "code", - "```": "pre", - "_": "i", - "*": "b", - "~": "s", - "__": "u", - "|": "", // this is a placeholder for || to work - "||": "span class=\"tg-spoiler\"", - "!": "", // for emoji - "[": "", // for links - "]": "", // for links/emoji - "(": "", // for links/emoji - ")": "", // for links/emoji - "\\": "", // for escapes + "`": "code", + "```": "pre", + "_": "i", + "*": "b", + "~": "s", + "__": "u", + "|": "", // this is a placeholder for || to work + "||": "span class=\"tg-spoiler\"", + "!": "", // for emoji + "![": "", // for emoji + "[": "", // for links + "]": "", // for links/emoji + "(": "", // for links/emoji + ")": "", // for links/emoji + "\\": "", // for escapes + "&": "", // for blockquotes + ">": "blockquote", } var AllMarkdownV2Chars = func() []rune { @@ -86,6 +90,53 @@ var skipStarts = map[rune]bool{ '[': true, // links } +func getItem(in []rune, i int) (string, int, bool) { + c := in[i] + if _, ok := chars[string(c)]; !ok { + return "", 0, false + } + + if !validStart(i, in) && !skipStarts[c] { + if c == '\\' && i+1 < len(in) { + escaped := string(in[i+1]) + if _, ok := chars[escaped]; ok { + return escaped, 1, false + } + } + return "", 0, false + } + + item := string(c) + if c == '|' && + i+1 < len(in) && in[i+1] == '|' { + return "||", 1, true + + } else if c == '_' && + i+1 < len(in) && in[i+1] == '_' { // support __ + return "__", 1, true + + } else if c == '`' && + i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` + return "```", 2, true + + } else if c == '&' && + i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';' && + validBlockQuoteStart(in, i) { + return ">", 3, true + + } else if c == '*' && + i+5 < len(in) && in[i+1] == '*' && in[i+2] == '&' && in[i+3] == 'g' && in[i+4] == 't' && in[i+5] == ';' && + // We force support for **> to allow for people to separate quotes/expandable quote blocks with ** + validBlockQuoteStart(in, i) { + return "**>", 5, true + + } else if c == '!' && i+1 < len(in) && in[i+1] == '[' { + return "![", 1, true + } + + return item, 0, true +} + // TODO: add support for a map-like check of which items cannot be included. // // Eg: `code` cannot be italic/bold/underline/strikethrough @@ -96,49 +147,20 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 out := strings.Builder{} for i := 0; i < len(in); i++ { - c := in[i] - if _, ok := chars[string(c)]; !ok { - out.WriteRune(c) - continue - } - - if !validStart(i, in) && !skipStarts[c] { - if c == '\\' && i+1 < len(in) { - if _, ok := chars[string(in[i+1])]; ok { - out.WriteRune(in[i+1]) - i++ - continue - } + item, offset, ok := getItem(in, i) + if !ok { + if item == "" { + item = string(in[i]) } - out.WriteRune(c) + out.WriteString(item) + i += offset continue } + i += offset - switch c { - case '`', '*', '~', '_', '|': // '||', '__', and '```' are included here too - item := string(c) - if c == '|' { // support || - // if single |, ignore. We only care about double || - if i+1 >= len(in) || in[i+1] != '|' { - out.WriteRune(c) - continue - } - - item = "||" - i++ - } else if c == '_' && i+1 < len(in) && in[i+1] == '_' { // support __ - item = "__" - i++ - } else if c == '`' && i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` - item = "```" - i += 2 - } - - if i+1 >= len(in) { - out.WriteString(item) - continue - } - + switch item { + // All cases where start and closing tags are the same. + case "`", "*", "~", "_", "```", "||", "__": idx := getValidEnd(in[i+1:], item) if idx < 0 { // not found; write and move on. @@ -174,18 +196,33 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 nestedT, nestedB := cv.md2html(in[nStart:nEnd], enableButtons) return out.String() + "<" + chars[item] + ">" + nestedT + "" + followT, append(nestedB, followB...) - case '!': - if len(in) <= i+1 || in[i+1] != '[' { - out.WriteRune(c) + case ">", "**>": + nStart := i + 1 + for nStart < len(in) && unicode.IsSpace(in[nStart]) { + nStart++ + } + + if nStart >= len(in) { + out.WriteString(item) continue } - ok, text, content, newEnd := getLinkContents(in[i+1:], true) + nEnd, contents, expandable := getBlockQuoteEnd(in, nStart) + nestedT, nestedB := cv.md2html(contents, enableButtons) + followT, followB := cv.md2html(in[nEnd:], enableButtons) + + if expandable { + return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) + } + return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) + + case "![": + ok, text, content, newEnd := getLinkContents(in[i:], true) if !ok { - out.WriteRune(c) + out.WriteString(item) continue } - end := i + 1 + newEnd + end := i + newEnd content = strings.TrimPrefix(content, "tg://emoji?id=") @@ -193,10 +230,10 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 followT, followB := cv.md2html(in[end:], enableButtons) return out.String() + `` + nestedT + "" + followT, append(nestedB, followB...) - case '[': + case "[": ok, text, content, newEnd := getLinkContents(in[i:], false) if !ok { - out.WriteRune(c) + out.WriteString(item) continue } end := i + newEnd @@ -227,10 +264,7 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 nestedT, nestedB := cv.md2html(text, enableButtons) return out.String() + `` + nestedT + "" + followT, append(nestedB, followB...) - case ']', '(', ')': - out.WriteRune(c) - - case '\\': + case "\\": if i+1 < len(in) { if _, ok := chars[string(in[i+1])]; ok { out.WriteRune(in[i+1]) @@ -238,13 +272,70 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 continue } } - out.WriteRune(c) + out.WriteString(item) + + default: + out.WriteString(item) } } return out.String(), nil } +func getBlockQuoteEnd(in []rune, nStart int) (int, []rune, bool) { + var contents []rune // We store all the contents, minus the > characters, so we avoid double-html tags + lineStart := true + for j := nStart; j < len(in); j++ { + if lineStart && in[j] == ' ' { + // Skip space chars at start of lines + continue + } + + lineStart = in[j] == '\n' + contents = append(contents, in[j]) + + // Keep skipping until we get a newline + if in[j] != '\n' { + continue + } + + if isExpandableEnd(in, j) { + // Extra -1 to include newline + return j, contents[:len(contents)-3], true + } + + if j+4 < len(in) && in[j+1] == '&' && in[j+2] == 'g' && in[j+3] == 't' && in[j+4] == ';' { + j = j + 4 // skip '>' symbol for the next blockquote start + continue + } + return j, contents, false + } + + if isExpandableEnd(in, len(in)) { + return len(in), contents[:len(contents)-2], true + } + + return len(in), contents, false +} + +func isExpandableEnd(in []rune, j int) bool { + return j-2 >= 0 && in[j-1] == '|' && in[j-2] == '|' +} + +func validBlockQuoteStart(in []rune, i int) bool { + for j := i - 1; j >= 0; j-- { + if !unicode.IsSpace(in[j]) { + return false + } + if in[j] == '\n' { + return true + } + } + + // Start of message; must be valid. + return true +} + func EscapeMarkdownV2(r []rune) string { out := strings.Builder{} for i, x := range r { diff --git a/md2htmlV2_test.go b/md2htmlV2_test.go index 1f68a0e..68dec8f 100644 --- a/md2htmlV2_test.go +++ b/md2htmlV2_test.go @@ -21,6 +21,9 @@ var basicMDv2 = []struct { }, { in: "||hello||", out: "hello", + }, { + in: "||||", + out: "<hello>", }, { in: "```content```", out: "
content
", @@ -64,18 +67,40 @@ var basicMDv2 = []struct { }, { // ensure that premium stickers can get converted in: `![👍](tg://emoji?id=5368324170671202286)`, out: `👍`, - }, {}, + }, { + in: "> quote", + out: "
quote
", + }, { + in: ">multi\n> line", + out: "
multi\nline
", + }, { + in: ">expandable multi\n>line\n>quote||", + out: "
expandable multi\nline\nquote
", + }, { + in: ">expandable multi\n>line\n>quote||\nMore text on another line", + out: "
expandable multi\nline\nquote
\nMore text on another line", + }, { + in: "**>expandable multi with star prefix\n>line\n>quote||", + out: "
expandable multi with star prefix\nline\nquote
", + }, { + in: ">normal quote\n**>expandable multi\n>idk||", + out: "
normal quote
\n
expandable multi\nidk
", + }, } func TestMD2HTMLV2Basic(t *testing.T) { for _, x := range append(basicMD, basicMDv2...) { - assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + t.Run(x.in, func(t *testing.T) { + assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + }) } } func TestMD2HTMLV2Advanced(t *testing.T) { for _, x := range advancedMD { - assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + t.Run(x.in, func(t *testing.T) { + assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + }) } } @@ -108,6 +133,9 @@ func TestNotMD2HTMLV2(t *testing.T) { }, { in: "|noop|", out: "|noop|", + }, { + in: "end with >", + out: "end with >", }, { in: "no premium ! in text", // confirm that a '!' doesnt break premiums out: "no premium ! in text", @@ -243,6 +271,15 @@ var md2HTMLV2Buttons = []struct { SameLine: false, }, }, + }, { + in: "text\n> quote\ntext", + out: "text\n
quote
\ntext", + }, { + in: "> `code quote`", + out: "
code quote
", + }, { + in: "```go\ntext\n> not quote\nmore text```", + out: "
text\n> not quote\nmore text
", }, } diff --git a/reverseV2.go b/reverseV2.go index a7bd6cc..ad1f296 100644 --- a/reverseV2.go +++ b/reverseV2.go @@ -38,12 +38,12 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { if len(tagFields) < 1 { return "", fmt.Errorf("no tag name for HTML tag started at %d", i) } - tag := tagFields[0] + tagType := tagFields[0] - co, cc := getClosingTag(in[closeTag+1:], tag) + co, cc := getClosingTag(in[closeTag+1:], tagContent, tagType) if co < 0 || cc < 0 { // "no closing open" - return "", fmt.Errorf("no closing tag for HTML tag %q started at %d", tag, i) + return "", fmt.Errorf("no closing tag for HTML tag %q started at %d", tagType, i) } closingOpen, closingClose := closeTag+1+co, closeTag+1+cc out.WriteString(html.UnescapeString(string(in[prev:i]))) @@ -53,7 +53,7 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { return "", err } - switch tag { + switch tagType { case "b", "strong": out.WriteString("*" + nested + "*") case "i", "em": @@ -85,9 +85,9 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { switch spanType := tagFields[1]; spanType { case "class=\"tg-spoiler\"": - out.WriteString("||" + html.UnescapeString(string(in[closeTag+1:closingOpen])) + "||") + out.WriteString("||" + nested + "||") default: - return "", fmt.Errorf("unknown tag type %q", spanType) + return "", fmt.Errorf("unknown span type %q", spanType) } case "a": if link.MatchString(tagContent) { @@ -103,14 +103,21 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { } else { return "", fmt.Errorf("badly formatted anchor tag %q", tagContent) } + case "blockquote": + if len(tagFields) == 2 && tagFields[1] == "expandable" { + out.WriteString("**>" + strings.Join(strings.Split(nested, "\n"), "\n>") + "||") + } else { + out.WriteString(">" + strings.Join(strings.Split(nested, "\n"), "\n>")) + } + default: - return "", fmt.Errorf("unknown tag %q", tag) + return "", fmt.Errorf("unknown tag %q", tagType) } prev = closingClose + 1 i = closingClose - case '\\', '_', '*', '~', '`', '[', ']', '(', ')': // these all need to be escaped to ensure we retain the same message + case '\\', '_', '*', '~', '`', '[', ']', '(', ')', '>': // these all need to be escaped to ensure we retain the same message out.WriteString(html.UnescapeString(string(in[prev:i]))) out.WriteRune('\\') out.WriteRune(in[i]) diff --git a/reverseV2_test.go b/reverseV2_test.go index 0220ff6..5697a8a 100644 --- a/reverseV2_test.go +++ b/reverseV2_test.go @@ -10,25 +10,36 @@ import ( func TestReverseV2(t *testing.T) { for _, test := range reverseTest { - out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) - assert.NoError(t, err, "Error for:\n%s", test) - assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out)) + t.Run(test, func(t *testing.T) { + out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) + assert.NoError(t, err, "Error for:\n%s", test) + assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out)) + }) } for _, test := range append(append(basicMD, basicMDv2...), advancedMD...) { - out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test.in), nil) - assert.NoError(t, err, "Error for:\n%s", test) - assert.Equal(t, tg_md2html.MD2HTMLV2(test.in), tg_md2html.MD2HTMLV2(out)) + t.Run(test.in, func(t *testing.T) { + out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test.in), nil) + assert.NoError(t, err, " Error for:\n%s", test) + assert.Equal(t, tg_md2html.MD2HTMLV2(test.in), tg_md2html.MD2HTMLV2(out)) + }) } for _, test := range []string{ - "___________test_______", - "|||||spoiler|||", - `![👍](tg://emoji?id=5368324170671202286)`, + "___________test_______", // uneven underlines + "|||||spoiler|||", // uneven spoilers + "||||", // spoilers, but with HTML bits inside + "![👍](tg://emoji?id=5368324170671202286)", // premium emoji + "> ", // empty quotes + "test\n>\ntest", // multiline quotes + "||||||||| test", // nested spoilers } { - out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) - assert.NoError(t, err, "Error for:\n%s", test) - assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out)) + t.Run(test, func(t *testing.T) { + htmlv2 := tg_md2html.MD2HTMLV2(test) + out, err := tg_md2html.ReverseV2(htmlv2, nil) + assert.NoError(t, err, "Error for:\n%s", test) + assert.Equal(t, htmlv2, tg_md2html.MD2HTMLV2(out)) + }) } }