From e45660d42793ba692c54c4c228b438d666148f27 Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Mon, 20 Nov 2023 18:38:47 +0000 Subject: [PATCH 1/8] Add basic blockquote formatting --- md2htmlV2.go | 72 ++++++++++++++++++++++++++++++++++++++--------- md2htmlV2_test.go | 14 ++++++++- reverseV2.go | 3 ++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/md2htmlV2.go b/md2htmlV2.go index 56d3a5a..e00342a 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -4,6 +4,7 @@ import ( "html" "sort" "strings" + "unicode" ) var defaultConverterV2 = ConverterV2{ @@ -42,20 +43,22 @@ func MD2HTMLButtonsV2(in string) (string, []ButtonV2) { } var chars = map[string]string{ - "`": "code", - "```": "pre", - "_": "i", - "*": "b", - "~": "s", - "__": "u", - "|": "", // this is a placeholder for || to work - "||": "span class=\"tg-spoiler\"", - "!": "", // for emoji - "[": "", // for links - "]": "", // for links/emoji - "(": "", // for links/emoji - ")": "", // for links/emoji - "\\": "", // for escapes + "`": "code", + "```": "pre", + "_": "i", + "*": "b", + "~": "s", + "__": "u", + "|": "", // this is a placeholder for || to work + "||": "span class=\"tg-spoiler\"", + "!": "", // for emoji + "[": "", // for links + "]": "", // for links/emoji + "(": "", // for links/emoji + ")": "", // for links/emoji + "\\": "", // for escapes + ">": "blockquote", + "&": "", // for blockquotes } var AllMarkdownV2Chars = func() []rune { @@ -174,6 +177,33 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 nestedT, nestedB := cv.md2html(in[nStart:nEnd], enableButtons) return out.String() + "<" + chars[item] + ">" + nestedT + "" + followT, append(nestedB, followB...) + case '&': + if !(i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { + out.WriteRune(c) + continue + } + + if !validBlockQuoteStart(in, i) { + out.WriteRune(c) + continue + } + nStart := i + 4 + for unicode.IsSpace(in[nStart]) { + nStart++ + } + + nEnd := len(in) + for j := i + 1; j < len(in); j++ { + if in[j] == '\n' { + nEnd = j + break + } + } + + nestedT, nestedB := cv.md2html(in[nStart:nEnd], enableButtons) + followT, followB := cv.md2html(in[nEnd:], enableButtons) + return out.String() + "
" + nestedT + "
" + followT, append(nestedB, followB...) + case '!': if len(in) <= i+1 || in[i+1] != '[' { out.WriteRune(c) @@ -245,6 +275,20 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 return out.String(), nil } +func validBlockQuoteStart(in []rune, i int) bool { + for j := i - 1; j >= 0; j-- { + if !unicode.IsSpace(in[j]) { + return false + } + if in[j] == '\n' { + return true + } + } + + // Start of message; must be valid. + return true +} + func EscapeMarkdownV2(r []rune) string { out := strings.Builder{} for i, x := range r { diff --git a/md2htmlV2_test.go b/md2htmlV2_test.go index 1f68a0e..e82e4ad 100644 --- a/md2htmlV2_test.go +++ b/md2htmlV2_test.go @@ -64,7 +64,10 @@ var basicMDv2 = []struct { }, { // ensure that premium stickers can get converted in: `![👍](tg://emoji?id=5368324170671202286)`, out: `👍`, - }, {}, + }, { + in: "> quote", + out: "
quote
", + }, } func TestMD2HTMLV2Basic(t *testing.T) { @@ -243,6 +246,15 @@ var md2HTMLV2Buttons = []struct { SameLine: false, }, }, + }, { + in: "text\n> quote\ntext", + out: "text\n
quote
\ntext", + }, { + in: "> `code quote`", + out: "
code quote
", + }, { + in: "```go\ntext\n> not quote\nmore text```", + out: "
text\n> not quote\nmore text
", }, } diff --git a/reverseV2.go b/reverseV2.go index a7bd6cc..3168681 100644 --- a/reverseV2.go +++ b/reverseV2.go @@ -103,6 +103,9 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { } else { return "", fmt.Errorf("badly formatted anchor tag %q", tagContent) } + case "blockquote": + out.WriteString("> " + nested) + default: return "", fmt.Errorf("unknown tag %q", tag) } From 0289beeff39db94ac734dcc3355892ca5bdfd207 Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Sun, 31 Dec 2023 15:39:48 +0000 Subject: [PATCH 2/8] Improve multiline blockquote support --- md2htmlV2.go | 20 +++++++++++++++++--- md2htmlV2_test.go | 3 +++ reverseV2.go | 4 ++-- reverseV2_test.go | 16 ++++++++++------ 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/md2htmlV2.go b/md2htmlV2.go index e00342a..5265396 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -193,16 +193,30 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 } nEnd := len(in) - for j := i + 1; j < len(in); j++ { + var contents []rune // We store all the contents, minus the > characters, so we avoid double-html tags + lineStart := true + for j := i + 4; j < len(in); j++ { + if lineStart && in[j] == ' ' { + // Skip space chars at start of lines + continue + } + + lineStart = in[j] == '\n' + contents = append(contents, in[j]) + if in[j] == '\n' { + if j+4 < len(in) && in[j+1] == '&' && in[j+2] == 'g' && in[j+3] == 't' && in[j+4] == ';' { + j = j + 4 // skip '>' symbol + continue + } nEnd = j break } } - nestedT, nestedB := cv.md2html(in[nStart:nEnd], enableButtons) + nestedT, nestedB := cv.md2html(contents, enableButtons) followT, followB := cv.md2html(in[nEnd:], enableButtons) - return out.String() + "
" + nestedT + "
" + followT, append(nestedB, followB...) + return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) case '!': if len(in) <= i+1 || in[i+1] != '[' { diff --git a/md2htmlV2_test.go b/md2htmlV2_test.go index e82e4ad..1e6eac5 100644 --- a/md2htmlV2_test.go +++ b/md2htmlV2_test.go @@ -67,6 +67,9 @@ var basicMDv2 = []struct { }, { in: "> quote", out: "
quote
", + }, { + in: ">multi\n> line", + out: "
multi\nline
", }, } diff --git a/reverseV2.go b/reverseV2.go index 3168681..b241abb 100644 --- a/reverseV2.go +++ b/reverseV2.go @@ -104,7 +104,7 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { return "", fmt.Errorf("badly formatted anchor tag %q", tagContent) } case "blockquote": - out.WriteString("> " + nested) + out.WriteString(">" + strings.Join(strings.Split(nested, "\n"), "\n>")) default: return "", fmt.Errorf("unknown tag %q", tag) @@ -113,7 +113,7 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { prev = closingClose + 1 i = closingClose - case '\\', '_', '*', '~', '`', '[', ']', '(', ')': // these all need to be escaped to ensure we retain the same message + case '\\', '_', '*', '~', '`', '[', ']', '(', ')', '>': // these all need to be escaped to ensure we retain the same message out.WriteString(html.UnescapeString(string(in[prev:i]))) out.WriteRune('\\') out.WriteRune(in[i]) diff --git a/reverseV2_test.go b/reverseV2_test.go index 0220ff6..fe41219 100644 --- a/reverseV2_test.go +++ b/reverseV2_test.go @@ -10,15 +10,19 @@ import ( func TestReverseV2(t *testing.T) { for _, test := range reverseTest { - out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) - assert.NoError(t, err, "Error for:\n%s", test) - assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out)) + t.Run(test, func(t *testing.T) { + out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) + assert.NoError(t, err, "Error for:\n%s", test) + assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out)) + }) } for _, test := range append(append(basicMD, basicMDv2...), advancedMD...) { - out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test.in), nil) - assert.NoError(t, err, "Error for:\n%s", test) - assert.Equal(t, tg_md2html.MD2HTMLV2(test.in), tg_md2html.MD2HTMLV2(out)) + t.Run(test.in, func(t *testing.T) { + out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test.in), nil) + assert.NoError(t, err, "Error for:\n%s", test) + assert.Equal(t, tg_md2html.MD2HTMLV2(test.in), tg_md2html.MD2HTMLV2(out)) + }) } for _, test := range []string{ From 0099e0e552d4f513d6271cd9f2168f2b81e9fc5b Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Sat, 6 Jan 2024 17:06:28 +0000 Subject: [PATCH 3/8] Fix OOB error and improve tests --- md2htmlV2.go | 2 +- md2htmlV2_test.go | 3 +++ reverseV2_test.go | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/md2htmlV2.go b/md2htmlV2.go index 5265396..4ac4e05 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -178,7 +178,7 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 return out.String() + "<" + chars[item] + ">" + nestedT + "" + followT, append(nestedB, followB...) case '&': - if !(i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { + if !(i+4 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { out.WriteRune(c) continue } diff --git a/md2htmlV2_test.go b/md2htmlV2_test.go index 1e6eac5..6d57c7d 100644 --- a/md2htmlV2_test.go +++ b/md2htmlV2_test.go @@ -114,6 +114,9 @@ func TestNotMD2HTMLV2(t *testing.T) { }, { in: "|noop|", out: "|noop|", + }, { + in: "end with >", + out: "end with >", }, { in: "no premium ! in text", // confirm that a '!' doesnt break premiums out: "no premium ! in text", diff --git a/reverseV2_test.go b/reverseV2_test.go index fe41219..0efa1b1 100644 --- a/reverseV2_test.go +++ b/reverseV2_test.go @@ -28,7 +28,9 @@ func TestReverseV2(t *testing.T) { for _, test := range []string{ "___________test_______", "|||||spoiler|||", - `![👍](tg://emoji?id=5368324170671202286)`, + "![👍](tg://emoji?id=5368324170671202286)", + ">", + "test\n>\ntest", } { out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) assert.NoError(t, err, "Error for:\n%s", test) From 918e6144480e9585e56fa2313a4077c3481b910f Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Sat, 6 Jan 2024 17:15:40 +0000 Subject: [PATCH 4/8] Improve OOB checks --- md2htmlV2.go | 9 +++++++-- reverseV2_test.go | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/md2htmlV2.go b/md2htmlV2.go index 4ac4e05..c8e7bbc 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -178,7 +178,7 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 return out.String() + "<" + chars[item] + ">" + nestedT + "" + followT, append(nestedB, followB...) case '&': - if !(i+4 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { + if !(i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { out.WriteRune(c) continue } @@ -187,10 +187,15 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 out.WriteRune(c) continue } + nStart := i + 4 - for unicode.IsSpace(in[nStart]) { + for nStart < len(in) && unicode.IsSpace(in[nStart]) { nStart++ } + if nStart >= len(in) { + out.WriteRune(c) + continue + } nEnd := len(in) var contents []rune // We store all the contents, minus the > characters, so we avoid double-html tags diff --git a/reverseV2_test.go b/reverseV2_test.go index 0efa1b1..379d0a1 100644 --- a/reverseV2_test.go +++ b/reverseV2_test.go @@ -29,7 +29,7 @@ func TestReverseV2(t *testing.T) { "___________test_______", "|||||spoiler|||", "![👍](tg://emoji?id=5368324170671202286)", - ">", + "> ", "test\n>\ntest", } { out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) From 3cf1fcfabf0e724f9c70e70b8caef67f45baca08 Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Thu, 29 Feb 2024 21:02:38 +0000 Subject: [PATCH 5/8] Improve handling of nested spoiler tags --- commonV2.go | 6 +++--- md2htmlV2_test.go | 11 +++++++++-- reverseV2.go | 14 +++++++------- reverseV2_test.go | 23 ++++++++++++++--------- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/commonV2.go b/commonV2.go index 8bc9173..355cdca 100644 --- a/commonV2.go +++ b/commonV2.go @@ -148,7 +148,7 @@ func isClosingTag(in []rune, pos int) bool { return false } -func getClosingTag(in []rune, tag string) (int, int) { +func getClosingTag(in []rune, openingTag string, closingTag string) (int, int) { offset := 0 subtags := 0 for offset < len(in) { @@ -164,9 +164,9 @@ func getClosingTag(in []rune, tag string) (int, int) { } closingTagIdx := openingTagIdx + 2 + c - if string(in[openingTagIdx+1:closingTagIdx]) == tag { // found a nested tag, this is annoying + if string(in[openingTagIdx+1:closingTagIdx]) == openingTag { // found a nested tag, this is annoying subtags++ - } else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == tag { + } else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == closingTag { if subtags == 0 { return openingTagIdx, closingTagIdx } diff --git a/md2htmlV2_test.go b/md2htmlV2_test.go index 6d57c7d..e4a99c1 100644 --- a/md2htmlV2_test.go +++ b/md2htmlV2_test.go @@ -21,6 +21,9 @@ var basicMDv2 = []struct { }, { in: "||hello||", out: "hello", + }, { + in: "||||", + out: "<hello>", }, { in: "```content```", out: "
content
", @@ -75,13 +78,17 @@ var basicMDv2 = []struct { func TestMD2HTMLV2Basic(t *testing.T) { for _, x := range append(basicMD, basicMDv2...) { - assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + t.Run(x.in, func(t *testing.T) { + assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + }) } } func TestMD2HTMLV2Advanced(t *testing.T) { for _, x := range advancedMD { - assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + t.Run(x.in, func(t *testing.T) { + assert.Equal(t, x.out, tg_md2html.MD2HTMLV2(x.in)) + }) } } diff --git a/reverseV2.go b/reverseV2.go index b241abb..d3b8e28 100644 --- a/reverseV2.go +++ b/reverseV2.go @@ -38,12 +38,12 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { if len(tagFields) < 1 { return "", fmt.Errorf("no tag name for HTML tag started at %d", i) } - tag := tagFields[0] + tagType := tagFields[0] - co, cc := getClosingTag(in[closeTag+1:], tag) + co, cc := getClosingTag(in[closeTag+1:], tagContent, tagType) if co < 0 || cc < 0 { // "no closing open" - return "", fmt.Errorf("no closing tag for HTML tag %q started at %d", tag, i) + return "", fmt.Errorf("no closing tag for HTML tag %q started at %d", tagType, i) } closingOpen, closingClose := closeTag+1+co, closeTag+1+cc out.WriteString(html.UnescapeString(string(in[prev:i]))) @@ -53,7 +53,7 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { return "", err } - switch tag { + switch tagType { case "b", "strong": out.WriteString("*" + nested + "*") case "i", "em": @@ -85,9 +85,9 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { switch spanType := tagFields[1]; spanType { case "class=\"tg-spoiler\"": - out.WriteString("||" + html.UnescapeString(string(in[closeTag+1:closingOpen])) + "||") + out.WriteString("||" + nested + "||") default: - return "", fmt.Errorf("unknown tag type %q", spanType) + return "", fmt.Errorf("unknown span type %q", spanType) } case "a": if link.MatchString(tagContent) { @@ -107,7 +107,7 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { out.WriteString(">" + strings.Join(strings.Split(nested, "\n"), "\n>")) default: - return "", fmt.Errorf("unknown tag %q", tag) + return "", fmt.Errorf("unknown tag %q", tagType) } prev = closingClose + 1 diff --git a/reverseV2_test.go b/reverseV2_test.go index 379d0a1..5697a8a 100644 --- a/reverseV2_test.go +++ b/reverseV2_test.go @@ -20,21 +20,26 @@ func TestReverseV2(t *testing.T) { for _, test := range append(append(basicMD, basicMDv2...), advancedMD...) { t.Run(test.in, func(t *testing.T) { out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test.in), nil) - assert.NoError(t, err, "Error for:\n%s", test) + assert.NoError(t, err, " Error for:\n%s", test) assert.Equal(t, tg_md2html.MD2HTMLV2(test.in), tg_md2html.MD2HTMLV2(out)) }) } for _, test := range []string{ - "___________test_______", - "|||||spoiler|||", - "![👍](tg://emoji?id=5368324170671202286)", - "> ", - "test\n>\ntest", + "___________test_______", // uneven underlines + "|||||spoiler|||", // uneven spoilers + "||||", // spoilers, but with HTML bits inside + "![👍](tg://emoji?id=5368324170671202286)", // premium emoji + "> ", // empty quotes + "test\n>\ntest", // multiline quotes + "||||||||| test", // nested spoilers } { - out, err := tg_md2html.ReverseV2(tg_md2html.MD2HTMLV2(test), nil) - assert.NoError(t, err, "Error for:\n%s", test) - assert.Equal(t, tg_md2html.MD2HTMLV2(test), tg_md2html.MD2HTMLV2(out)) + t.Run(test, func(t *testing.T) { + htmlv2 := tg_md2html.MD2HTMLV2(test) + out, err := tg_md2html.ReverseV2(htmlv2, nil) + assert.NoError(t, err, "Error for:\n%s", test) + assert.Equal(t, htmlv2, tg_md2html.MD2HTMLV2(out)) + }) } } From 9220a931faf9a70ab344f993ed476f194e30f887 Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Tue, 28 May 2024 18:45:38 +0100 Subject: [PATCH 6/8] Clean up and add support for the new expandable_blockquote resources --- md2htmlV2.go | 228 +++++++++++++++++++++++++++------------------- md2htmlV2_test.go | 6 ++ reverseV2.go | 6 +- 3 files changed, 145 insertions(+), 95 deletions(-) diff --git a/md2htmlV2.go b/md2htmlV2.go index c8e7bbc..56350c3 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -43,22 +43,23 @@ func MD2HTMLButtonsV2(in string) (string, []ButtonV2) { } var chars = map[string]string{ - "`": "code", - "```": "pre", - "_": "i", - "*": "b", - "~": "s", - "__": "u", - "|": "", // this is a placeholder for || to work - "||": "span class=\"tg-spoiler\"", - "!": "", // for emoji - "[": "", // for links - "]": "", // for links/emoji - "(": "", // for links/emoji - ")": "", // for links/emoji - "\\": "", // for escapes - ">": "blockquote", - "&": "", // for blockquotes + "`": "code", + "```": "pre", + "_": "i", + "*": "b", + "~": "s", + "__": "u", + "|": "", // this is a placeholder for || to work + "||": "span class=\"tg-spoiler\"", + "!": "", // for emoji + "[": "", // for links + "]": "", // for links/emoji + "(": "", // for links/emoji + ")": "", // for links/emoji + "\\": "", // for escapes + "&": "", // for blockquotes + ">": "blockquote", + "**>": "blockquote", // expandable blockquotes } var AllMarkdownV2Chars = func() []rune { @@ -89,6 +90,54 @@ var skipStarts = map[rune]bool{ '[': true, // links } +func getItem(in []rune, i int) (string, int, bool) { + c := in[i] + if _, ok := chars[string(c)]; !ok { + return "", 0, false + } + + if !validStart(i, in) && !skipStarts[c] { + if c == '\\' && i+1 < len(in) { + escaped := string(in[i+1]) + if _, ok := chars[escaped]; ok { + return escaped, 1, false + } + } + return "", 0, false + } + + item := string(c) + if c == '|' { // support || + if !(i+1 < len(in) && in[i+1] == '|') { + return "", 0, false + } + return "||", 1, true + + } else if c == '_' && i+1 < len(in) && in[i+1] == '_' { // support __ + return "__", 1, true + + } else if c == '`' && i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` + return "```", 2, true + + } else if c == '&' { + if !(i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { + return "", 0, false + } + if !validBlockQuoteStart(in, i) { + return "", 0, false + } + return ">", 3, true + + } else if c == '*' && i+5 < len(in) && in[i+1] == '*' && in[i+2] == '&' && in[i+3] == 'g' && in[i+4] == 't' && in[i+5] == ';' { + if !validBlockQuoteStart(in, i) { + return "", 0, false + } + return "**>", 5, true + } + + return item, 0, true +} + // TODO: add support for a map-like check of which items cannot be included. // // Eg: `code` cannot be italic/bold/underline/strikethrough @@ -99,44 +148,21 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 out := strings.Builder{} for i := 0; i < len(in); i++ { - c := in[i] - if _, ok := chars[string(c)]; !ok { - out.WriteRune(c) - continue - } - - if !validStart(i, in) && !skipStarts[c] { - if c == '\\' && i+1 < len(in) { - if _, ok := chars[string(in[i+1])]; ok { - out.WriteRune(in[i+1]) - i++ - continue - } + // TODO: Add blockquote handling here too + item, offset, ok := getItem(in, i) + if !ok { + if item == "" { + item = string(in[i]) } - out.WriteRune(c) + out.WriteString(item) + i += offset continue } + i += offset - switch c { - case '`', '*', '~', '_', '|': // '||', '__', and '```' are included here too - item := string(c) - if c == '|' { // support || - // if single |, ignore. We only care about double || - if i+1 >= len(in) || in[i+1] != '|' { - out.WriteRune(c) - continue - } - - item = "||" - i++ - } else if c == '_' && i+1 < len(in) && in[i+1] == '_' { // support __ - item = "__" - i++ - } else if c == '`' && i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` - item = "```" - i += 2 - } - + switch item { + // All cases where start and closing tags are the same. + case "`", "*", "~", "_", "|", "```", "||", "__": if i+1 >= len(in) { out.WriteString(item) continue @@ -177,61 +203,36 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 nestedT, nestedB := cv.md2html(in[nStart:nEnd], enableButtons) return out.String() + "<" + chars[item] + ">" + nestedT + "" + followT, append(nestedB, followB...) - case '&': - if !(i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { - out.WriteRune(c) - continue - } - - if !validBlockQuoteStart(in, i) { - out.WriteRune(c) - continue - } - - nStart := i + 4 + case ">", "**>": + nStart := i + 1 for nStart < len(in) && unicode.IsSpace(in[nStart]) { nStart++ } + if nStart >= len(in) { - out.WriteRune(c) + out.WriteString(item) continue } - nEnd := len(in) - var contents []rune // We store all the contents, minus the > characters, so we avoid double-html tags - lineStart := true - for j := i + 4; j < len(in); j++ { - if lineStart && in[j] == ' ' { - // Skip space chars at start of lines - continue - } - - lineStart = in[j] == '\n' - contents = append(contents, in[j]) - - if in[j] == '\n' { - if j+4 < len(in) && in[j+1] == '&' && in[j+2] == 'g' && in[j+3] == 't' && in[j+4] == ';' { - j = j + 4 // skip '>' symbol - continue - } - nEnd = j - break - } - } - + nEnd, contents := getBlockQuoteEnd(in, nStart, item == "**>") nestedT, nestedB := cv.md2html(contents, enableButtons) followT, followB := cv.md2html(in[nEnd:], enableButtons) + + if item == "**>" { + return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) + } return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) - case '!': + case "!": + // TODO: Handle this in item selection if len(in) <= i+1 || in[i+1] != '[' { - out.WriteRune(c) + out.WriteString(item) continue } ok, text, content, newEnd := getLinkContents(in[i+1:], true) if !ok { - out.WriteRune(c) + out.WriteString(item) continue } end := i + 1 + newEnd @@ -242,10 +243,10 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 followT, followB := cv.md2html(in[end:], enableButtons) return out.String() + `` + nestedT + "" + followT, append(nestedB, followB...) - case '[': + case "[": ok, text, content, newEnd := getLinkContents(in[i:], false) if !ok { - out.WriteRune(c) + out.WriteString(item) continue } end := i + newEnd @@ -276,10 +277,10 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 nestedT, nestedB := cv.md2html(text, enableButtons) return out.String() + `` + nestedT + "" + followT, append(nestedB, followB...) - case ']', '(', ')': - out.WriteRune(c) + case "]", "(", ")": + out.WriteString(item) - case '\\': + case "\\": if i+1 < len(in) { if _, ok := chars[string(in[i+1])]; ok { out.WriteRune(in[i+1]) @@ -287,13 +288,52 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 continue } } - out.WriteRune(c) + out.WriteString(item) } } return out.String(), nil } +func getBlockQuoteEnd(in []rune, nStart int, expandable bool) (int, []rune) { + var contents []rune // We store all the contents, minus the > characters, so we avoid double-html tags + lineStart := true + for j := nStart; j < len(in); j++ { + if lineStart && in[j] == ' ' { + // Skip space chars at start of lines + continue + } + + lineStart = in[j] == '\n' + contents = append(contents, in[j]) + + // Keep skipping until we get a newline + if in[j] != '\n' { + continue + } + + if isExpandableEnd(in, expandable, j) { + return j, contents[:len(contents)-3] + } + + if j+4 < len(in) && in[j+1] == '&' && in[j+2] == 'g' && in[j+3] == 't' && in[j+4] == ';' { + j = j + 4 // skip '>' symbol for the next blockquote start + continue + } + return j, contents + } + + if isExpandableEnd(in, expandable, len(in)) { + return len(in), contents[:len(contents)-2] + } + + return len(in), contents +} + +func isExpandableEnd(in []rune, expandable bool, j int) bool { + return expandable && j-2 >= 0 && in[j-1] == '|' && in[j-2] == '|' +} + func validBlockQuoteStart(in []rune, i int) bool { for j := i - 1; j >= 0; j-- { if !unicode.IsSpace(in[j]) { diff --git a/md2htmlV2_test.go b/md2htmlV2_test.go index e4a99c1..e8cb95d 100644 --- a/md2htmlV2_test.go +++ b/md2htmlV2_test.go @@ -73,6 +73,12 @@ var basicMDv2 = []struct { }, { in: ">multi\n> line", out: "
multi\nline
", + }, { + in: "**>expandable multi\n>line\n>quote||", + out: "
expandable multi\nline\nquote
", + }, { + in: "**>expandable multi\n>line\n>quote||\nMore text on another line", + out: "
expandable multi\nline\nquote
\nMore text on another line", }, } diff --git a/reverseV2.go b/reverseV2.go index d3b8e28..ad1f296 100644 --- a/reverseV2.go +++ b/reverseV2.go @@ -104,7 +104,11 @@ func (cv ConverterV2) reverse(in []rune, buttons []ButtonV2) (string, error) { return "", fmt.Errorf("badly formatted anchor tag %q", tagContent) } case "blockquote": - out.WriteString(">" + strings.Join(strings.Split(nested, "\n"), "\n>")) + if len(tagFields) == 2 && tagFields[1] == "expandable" { + out.WriteString("**>" + strings.Join(strings.Split(nested, "\n"), "\n>") + "||") + } else { + out.WriteString(">" + strings.Join(strings.Split(nested, "\n"), "\n>")) + } default: return "", fmt.Errorf("unknown tag %q", tagType) From e6e12f2acf9fa2af3193567b873c1508755515fb Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Tue, 28 May 2024 21:38:04 +0100 Subject: [PATCH 7/8] clean up some missing todos and improve some parsing components --- md2htmlV2.go | 59 ++++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/md2htmlV2.go b/md2htmlV2.go index 56350c3..ffd8afa 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -52,6 +52,7 @@ var chars = map[string]string{ "|": "", // this is a placeholder for || to work "||": "span class=\"tg-spoiler\"", "!": "", // for emoji + "![": "", // for emoji "[": "", // for links "]": "", // for links/emoji "(": "", // for links/emoji @@ -107,32 +108,30 @@ func getItem(in []rune, i int) (string, int, bool) { } item := string(c) - if c == '|' { // support || - if !(i+1 < len(in) && in[i+1] == '|') { - return "", 0, false - } + if c == '|' && + i+1 < len(in) && in[i+1] == '|' { return "||", 1, true - } else if c == '_' && i+1 < len(in) && in[i+1] == '_' { // support __ + } else if c == '_' && + i+1 < len(in) && in[i+1] == '_' { // support __ return "__", 1, true - } else if c == '`' && i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` + } else if c == '`' && + i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ``` return "```", 2, true - } else if c == '&' { - if !(i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';') { - return "", 0, false - } - if !validBlockQuoteStart(in, i) { - return "", 0, false - } + } else if c == '&' && + i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';' && + validBlockQuoteStart(in, i) { return ">", 3, true - } else if c == '*' && i+5 < len(in) && in[i+1] == '*' && in[i+2] == '&' && in[i+3] == 'g' && in[i+4] == 't' && in[i+5] == ';' { - if !validBlockQuoteStart(in, i) { - return "", 0, false - } + } else if c == '*' && + i+5 < len(in) && in[i+1] == '*' && in[i+2] == '&' && in[i+3] == 'g' && in[i+4] == 't' && in[i+5] == ';' && + validBlockQuoteStart(in, i) { return "**>", 5, true + + } else if c == '!' && i+1 < len(in) && in[i+1] == '[' { + return "![", 1, true } return item, 0, true @@ -148,7 +147,6 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 out := strings.Builder{} for i := 0; i < len(in); i++ { - // TODO: Add blockquote handling here too item, offset, ok := getItem(in, i) if !ok { if item == "" { @@ -162,12 +160,7 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 switch item { // All cases where start and closing tags are the same. - case "`", "*", "~", "_", "|", "```", "||", "__": - if i+1 >= len(in) { - out.WriteString(item) - continue - } - + case "`", "*", "~", "_", "```", "||", "__": idx := getValidEnd(in[i+1:], item) if idx < 0 { // not found; write and move on. @@ -223,19 +216,13 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 } return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) - case "!": - // TODO: Handle this in item selection - if len(in) <= i+1 || in[i+1] != '[' { - out.WriteString(item) - continue - } - - ok, text, content, newEnd := getLinkContents(in[i+1:], true) + case "![": + ok, text, content, newEnd := getLinkContents(in[i:], true) if !ok { out.WriteString(item) continue } - end := i + 1 + newEnd + end := i + newEnd content = strings.TrimPrefix(content, "tg://emoji?id=") @@ -277,9 +264,6 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 nestedT, nestedB := cv.md2html(text, enableButtons) return out.String() + `` + nestedT + "" + followT, append(nestedB, followB...) - case "]", "(", ")": - out.WriteString(item) - case "\\": if i+1 < len(in) { if _, ok := chars[string(in[i+1])]; ok { @@ -289,6 +273,9 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 } } out.WriteString(item) + + default: + out.WriteString(item) } } From 03e44abc7e1641e96749a77c71fef26a381db10f Mon Sep 17 00:00:00 2001 From: Paul Larsen Date: Wed, 5 Jun 2024 22:52:19 +0100 Subject: [PATCH 8/8] improve blockquote detection --- common.go | 2 +- md2htmlV2.go | 59 ++++++++++++++++++++++++----------------------- md2htmlV2_test.go | 10 ++++++-- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/common.go b/common.go index 63ae985..62f39d6 100644 --- a/common.go +++ b/common.go @@ -22,7 +22,7 @@ func validStart(pos int, input []rune) bool { } func validEnd(pos int, input []rune) bool { - // First char is not a valid end char. + // First char is not a valid end char; we do NOT allow empty entities. // If the end char has a space before it, its not valid either. if pos == 0 || unicode.IsSpace(input[pos-1]) { return false diff --git a/md2htmlV2.go b/md2htmlV2.go index ffd8afa..f3e17a2 100644 --- a/md2htmlV2.go +++ b/md2htmlV2.go @@ -43,24 +43,23 @@ func MD2HTMLButtonsV2(in string) (string, []ButtonV2) { } var chars = map[string]string{ - "`": "code", - "```": "pre", - "_": "i", - "*": "b", - "~": "s", - "__": "u", - "|": "", // this is a placeholder for || to work - "||": "span class=\"tg-spoiler\"", - "!": "", // for emoji - "![": "", // for emoji - "[": "", // for links - "]": "", // for links/emoji - "(": "", // for links/emoji - ")": "", // for links/emoji - "\\": "", // for escapes - "&": "", // for blockquotes - ">": "blockquote", - "**>": "blockquote", // expandable blockquotes + "`": "code", + "```": "pre", + "_": "i", + "*": "b", + "~": "s", + "__": "u", + "|": "", // this is a placeholder for || to work + "||": "span class=\"tg-spoiler\"", + "!": "", // for emoji + "![": "", // for emoji + "[": "", // for links + "]": "", // for links/emoji + "(": "", // for links/emoji + ")": "", // for links/emoji + "\\": "", // for escapes + "&": "", // for blockquotes + ">": "blockquote", } var AllMarkdownV2Chars = func() []rune { @@ -127,6 +126,7 @@ func getItem(in []rune, i int) (string, int, bool) { } else if c == '*' && i+5 < len(in) && in[i+1] == '*' && in[i+2] == '&' && in[i+3] == 'g' && in[i+4] == 't' && in[i+5] == ';' && + // We force support for **> to allow for people to separate quotes/expandable quote blocks with ** validBlockQuoteStart(in, i) { return "**>", 5, true @@ -207,11 +207,11 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 continue } - nEnd, contents := getBlockQuoteEnd(in, nStart, item == "**>") + nEnd, contents, expandable := getBlockQuoteEnd(in, nStart) nestedT, nestedB := cv.md2html(contents, enableButtons) followT, followB := cv.md2html(in[nEnd:], enableButtons) - if item == "**>" { + if expandable { return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) } return out.String() + "
" + strings.TrimSpace(nestedT) + "
" + followT, append(nestedB, followB...) @@ -282,7 +282,7 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2 return out.String(), nil } -func getBlockQuoteEnd(in []rune, nStart int, expandable bool) (int, []rune) { +func getBlockQuoteEnd(in []rune, nStart int) (int, []rune, bool) { var contents []rune // We store all the contents, minus the > characters, so we avoid double-html tags lineStart := true for j := nStart; j < len(in); j++ { @@ -299,26 +299,27 @@ func getBlockQuoteEnd(in []rune, nStart int, expandable bool) (int, []rune) { continue } - if isExpandableEnd(in, expandable, j) { - return j, contents[:len(contents)-3] + if isExpandableEnd(in, j) { + // Extra -1 to include newline + return j, contents[:len(contents)-3], true } if j+4 < len(in) && in[j+1] == '&' && in[j+2] == 'g' && in[j+3] == 't' && in[j+4] == ';' { j = j + 4 // skip '>' symbol for the next blockquote start continue } - return j, contents + return j, contents, false } - if isExpandableEnd(in, expandable, len(in)) { - return len(in), contents[:len(contents)-2] + if isExpandableEnd(in, len(in)) { + return len(in), contents[:len(contents)-2], true } - return len(in), contents + return len(in), contents, false } -func isExpandableEnd(in []rune, expandable bool, j int) bool { - return expandable && j-2 >= 0 && in[j-1] == '|' && in[j-2] == '|' +func isExpandableEnd(in []rune, j int) bool { + return j-2 >= 0 && in[j-1] == '|' && in[j-2] == '|' } func validBlockQuoteStart(in []rune, i int) bool { diff --git a/md2htmlV2_test.go b/md2htmlV2_test.go index e8cb95d..68dec8f 100644 --- a/md2htmlV2_test.go +++ b/md2htmlV2_test.go @@ -74,11 +74,17 @@ var basicMDv2 = []struct { in: ">multi\n> line", out: "
multi\nline
", }, { - in: "**>expandable multi\n>line\n>quote||", + in: ">expandable multi\n>line\n>quote||", out: "
expandable multi\nline\nquote
", }, { - in: "**>expandable multi\n>line\n>quote||\nMore text on another line", + in: ">expandable multi\n>line\n>quote||\nMore text on another line", out: "
expandable multi\nline\nquote
\nMore text on another line", + }, { + in: "**>expandable multi with star prefix\n>line\n>quote||", + out: "
expandable multi with star prefix\nline\nquote
", + }, { + in: ">normal quote\n**>expandable multi\n>idk||", + out: "
normal quote
\n
expandable multi\nidk
", }, }