Skip to content

Commit 15edb71

Browse files
authored
feat(submit): unwrap hard line breaks in generated PR descriptions (#34)
* feat(submit): unwrap hard line breaks in generated PR descriptions Commit message bodies are typically hard-wrapped at ~72 columns, but GitHub renders single newlines as `<br>` in PR descriptions, resulting in ugly narrow paragraphs. `generatePRBody` now unwraps paragraph lines while preserving markdown structure (code blocks, lists, headers, blockquotes, tables, horizontal rules). If HTML tags are detected, the body is left as-is to avoid mangling intentional formatting. * fix(submit): track fence markers to prevent mismatched close The fenced code block state machine treated `````` and `~~~` as interchangeable openers/closers. Now each code block tracks which marker opened it and only closes on the same marker, preventing a `~~~` inside a `````` block (or vice versa) from prematurely ending code block mode. Also adds explanatory comment to `submit_internal_test.go` for why it uses `package cmd` instead of `package cmd_test`. * fix(submit): handle hyphenated HTML tags, fix doc comment placement - Expand `htmlTagRe` to match custom elements with hyphens (e.g. `<my-component>`) and namespaced tags (e.g. `<xml:tag>`) - Move orphaned doc comment block to directly precede `unwrapParagraphs` so it attaches to the function in godoc
1 parent f23002e commit 15edb71

2 files changed

Lines changed: 419 additions & 2 deletions

File tree

cmd/submit.go

Lines changed: 201 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package cmd
44
import (
55
"fmt"
66
"os"
7+
"regexp"
78
"strings"
89

910
"github.com/boneskull/gh-stack/internal/config"
@@ -514,6 +515,10 @@ func promptMarkPRReady(ghClient *github.Client, prNumber int, branch, trunk stri
514515
// generatePRBody creates a PR description from the commits between base and head.
515516
// For a single commit: returns the commit body.
516517
// For multiple commits: returns each commit as a markdown section.
518+
//
519+
// Commit message bodies are unwrapped so that hard line breaks within paragraphs
520+
// (typical of the ~72-column git convention) are removed. This produces better
521+
// rendering in GitHub's PR description, which treats single newlines as <br> tags.
517522
func generatePRBody(g *git.Git, base, head string) (string, error) {
518523
commits, err := g.GetCommits(base, head)
519524
if err != nil {
@@ -526,7 +531,7 @@ func generatePRBody(g *git.Git, base, head string) (string, error) {
526531

527532
if len(commits) == 1 {
528533
// Single commit: just use the body
529-
return commits[0].Body, nil
534+
return unwrapParagraphs(commits[0].Body), nil
530535
}
531536

532537
// Multiple commits: format as markdown sections
@@ -540,10 +545,204 @@ func generatePRBody(g *git.Git, base, head string) (string, error) {
540545
sb.WriteString("\n")
541546
if commit.Body != "" {
542547
sb.WriteString("\n")
543-
sb.WriteString(commit.Body)
548+
sb.WriteString(unwrapParagraphs(commit.Body))
544549
sb.WriteString("\n")
545550
}
546551
}
547552

548553
return sb.String(), nil
549554
}
555+
556+
// htmlTagRe matches anything that looks like an HTML tag, including custom
557+
// elements with hyphens (e.g. <my-component>) and namespaced tags (e.g. <xml:tag>).
558+
var htmlTagRe = regexp.MustCompile(`</?[a-zA-Z][-:a-zA-Z0-9]*[\s/>]`)
559+
560+
// inlineCodeRe matches backtick-enclosed inline code spans so we can strip them
561+
// before checking for HTML. Otherwise `<token>` in code would trigger a false positive.
562+
var inlineCodeRe = regexp.MustCompile("`[^`]+`")
563+
564+
// fenceMarker returns the fence prefix ("```" or "~~~") if the line opens or
565+
// closes a fenced code block, or "" otherwise.
566+
func fenceMarker(trimmedLine string) string {
567+
if strings.HasPrefix(trimmedLine, "```") {
568+
return "```"
569+
}
570+
if strings.HasPrefix(trimmedLine, "~~~") {
571+
return "~~~"
572+
}
573+
return ""
574+
}
575+
576+
// containsHTMLOutsideCode scans the text for HTML tags that appear in prose,
577+
// ignoring content inside fenced code blocks, indented code blocks, and inline
578+
// code spans. Returns true if HTML is found in any prose line.
579+
func containsHTMLOutsideCode(text string) bool {
580+
lines := strings.Split(text, "\n")
581+
var openFence string // tracks the opening fence marker ("```" or "~~~"), empty when outside
582+
583+
for _, line := range lines {
584+
trimmed := strings.TrimRight(line, " \t")
585+
marker := fenceMarker(trimmed)
586+
587+
// Track fenced code blocks — only the matching marker can close a block
588+
if openFence == "" && marker != "" {
589+
openFence = marker
590+
continue
591+
}
592+
if openFence != "" {
593+
if marker == openFence {
594+
openFence = ""
595+
}
596+
continue
597+
}
598+
599+
// Skip indented code blocks (4+ spaces or tab)
600+
if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") {
601+
continue
602+
}
603+
604+
// Strip inline code spans, then check for HTML
605+
stripped := inlineCodeRe.ReplaceAllString(line, "")
606+
if htmlTagRe.MatchString(stripped) {
607+
return true
608+
}
609+
}
610+
611+
return false
612+
}
613+
614+
// unwrapParagraphs removes hard line breaks within plain-text paragraphs while
615+
// preserving intentional structure: blank lines, markdown block-level syntax
616+
// (headers, lists, blockquotes, horizontal rules), and code blocks (both fenced
617+
// and indented). This converts the ~72-column convention used in commit messages
618+
// into flowing text suitable for GitHub's markdown renderer.
619+
//
620+
// If HTML tags are found in prose (outside code blocks and inline code spans),
621+
// the entire text is returned as-is — anyone writing raw HTML in a commit message
622+
// is doing something intentional with formatting.
623+
func unwrapParagraphs(text string) string {
624+
if text == "" {
625+
return ""
626+
}
627+
628+
// Bail if the text contains HTML tags in prose — don't mess with it.
629+
if containsHTMLOutsideCode(text) {
630+
return text
631+
}
632+
633+
lines := strings.Split(text, "\n")
634+
var result []string
635+
var paragraph []string
636+
var openFence string // tracks the opening fence marker ("```" or "~~~"), empty when outside
637+
638+
flushParagraph := func() {
639+
if len(paragraph) > 0 {
640+
result = append(result, strings.Join(paragraph, " "))
641+
paragraph = nil
642+
}
643+
}
644+
645+
for _, line := range lines {
646+
trimmed := strings.TrimRight(line, " \t")
647+
marker := fenceMarker(trimmed)
648+
649+
// Track fenced code blocks — only the matching marker can close a block
650+
if openFence == "" && marker != "" {
651+
flushParagraph()
652+
result = append(result, line)
653+
openFence = marker
654+
continue
655+
}
656+
if openFence != "" {
657+
result = append(result, line)
658+
if marker == openFence {
659+
openFence = ""
660+
}
661+
continue
662+
}
663+
664+
// Blank line = paragraph break
665+
if trimmed == "" {
666+
flushParagraph()
667+
result = append(result, "")
668+
continue
669+
}
670+
671+
// Preserve lines that are markdown block-level elements
672+
if isBlockElement(trimmed) {
673+
flushParagraph()
674+
result = append(result, line)
675+
continue
676+
}
677+
678+
// Indented code block (4+ spaces or tab)
679+
if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") {
680+
flushParagraph()
681+
result = append(result, line)
682+
continue
683+
}
684+
685+
// Otherwise it's a paragraph line — accumulate it
686+
paragraph = append(paragraph, trimmed)
687+
}
688+
689+
flushParagraph()
690+
691+
return strings.Join(result, "\n")
692+
}
693+
694+
// isBlockElement returns true if the line starts with markdown block-level syntax
695+
// that should not be joined with adjacent lines.
696+
func isBlockElement(line string) bool {
697+
// Headers
698+
if strings.HasPrefix(line, "#") {
699+
return true
700+
}
701+
// Unordered lists
702+
if strings.HasPrefix(line, "- ") || strings.HasPrefix(line, "* ") || strings.HasPrefix(line, "+ ") ||
703+
line == "-" || line == "*" || line == "+" {
704+
return true
705+
}
706+
// Ordered lists (e.g. "1. ", "12. ")
707+
for i, ch := range line {
708+
if ch >= '0' && ch <= '9' {
709+
continue
710+
}
711+
if ch == '.' && i > 0 && i+1 < len(line) && line[i+1] == ' ' {
712+
return true
713+
}
714+
break
715+
}
716+
// Blockquotes
717+
if strings.HasPrefix(line, ">") {
718+
return true
719+
}
720+
// Horizontal rules (---, ***, ___)
721+
if isHorizontalRule(line) {
722+
return true
723+
}
724+
// Pipe tables
725+
if strings.HasPrefix(line, "|") {
726+
return true
727+
}
728+
return false
729+
}
730+
731+
// isHorizontalRule checks for markdown horizontal rules: three or more
732+
// -, *, or _ characters (with optional spaces).
733+
func isHorizontalRule(line string) bool {
734+
stripped := strings.ReplaceAll(line, " ", "")
735+
if len(stripped) < 3 {
736+
return false
737+
}
738+
ch := stripped[0]
739+
if ch != '-' && ch != '*' && ch != '_' {
740+
return false
741+
}
742+
for _, c := range stripped {
743+
if byte(c) != ch {
744+
return false
745+
}
746+
}
747+
return true
748+
}

0 commit comments

Comments
 (0)