diff --git a/src/lyric_auto_edit.cpp b/src/lyric_auto_edit.cpp index 992640c2..ccf1fb8b 100644 --- a/src/lyric_auto_edit.cpp +++ b/src/lyric_auto_edit.cpp @@ -49,7 +49,7 @@ static std::optional RemoveRepeatedSpaces(const LyricData& lyrics) size_t search_start = 0; while(search_start < line.text.length()) { - size_t next_space = line.text.find_first_of(_T(' '), search_start); + size_t next_space = find_first_whitespace(line.text, search_start); // NOTE: If the line was empty we would not enter this loop. // We subtract 1 from the length to avoid overflowing when next_space == npos == (size_t)-1 @@ -60,7 +60,7 @@ static std::optional RemoveRepeatedSpaces(const LyricData& lyrics) } size_t erase_start = next_space + 1; - size_t erase_end = line.text.find_first_not_of(_T(' '), erase_start); + size_t erase_end = find_first_nonwhitespace(line.text, erase_start); if((erase_end != std::tstring::npos) && (erase_end > erase_start)) { @@ -90,7 +90,7 @@ static std::optional RemoveRepeatedBlankLines(const LyricData& lyrics LyricData new_lyrics = lyrics; for(auto iter = new_lyrics.lines.begin(); iter != new_lyrics.lines.end(); /*Omitted*/) { - size_t first_non_space = iter->text.find_first_not_of(' '); + size_t first_non_space = find_first_nonwhitespace(iter->text); bool is_blank = (first_non_space == std::tstring::npos); if(is_blank && previous_blank) { diff --git a/src/ui_lyric_editor.cpp b/src/ui_lyric_editor.cpp index fef84e93..243f6389 100644 --- a/src/ui_lyric_editor.cpp +++ b/src/ui_lyric_editor.cpp @@ -326,7 +326,7 @@ void LyricEditor::SelectLineWithTimestampGreaterOrEqual(double threshold_timesta line_buffer_len); // EM_GETLINE reads the first word as the number of characters in the buffer LRESULT chars_copied = SendDlgItemMessage(IDC_LYRIC_TEXT, EM_GETLINE, i, (LPARAM)line_buffer); std::string linestr = from_tstring(std::tstring_view { line_buffer, (size_t)chars_copied }); - if(linestr.empty() || ((linestr.length() == 1) && (linestr[0] == ' '))) continue; + if(linestr.empty() || ((linestr.length() == 1) && is_char_whitespace(linestr[0]))) continue; if(parsers::lrc::is_tag_line(linestr)) continue; double line_timestamp = parsers::lrc::get_line_first_timestamp(linestr); diff --git a/src/ui_lyrics_externalwindow.cpp b/src/ui_lyrics_externalwindow.cpp index 2d1a6116..4a5ba638 100644 --- a/src/ui_lyrics_externalwindow.cpp +++ b/src/ui_lyrics_externalwindow.cpp @@ -427,8 +427,8 @@ static int _WrapSimpleLyricsLineToRect(D2DTextRenderContext& render, // Remove trailing whitespace // We do this once now (before allocating anything dependent on string length) // and then since we don't ever move the "end" of the string, we assume that line - // doesn't end in a space for the rest of the function. - size_t last_not_space = line.find_last_not_of(_T(' ')); + // doesn't end in whitespace for the rest of the function. + size_t last_not_space = find_last_nonwhitespace(line); if(last_not_space == std::tstring_view::npos) { return line_height; // Our line is exclusively whitespace diff --git a/src/ui_lyrics_panel.cpp b/src/ui_lyrics_panel.cpp index d418b62f..5c83c6fd 100644 --- a/src/ui_lyrics_panel.cpp +++ b/src/ui_lyrics_panel.cpp @@ -447,10 +447,10 @@ static int _WrapSimpleLyricsLineToRect(HDC dc, CRect clip_rect, std::tstring_vie int total_height = 0; while(text_outstanding.length() > 0) { - size_t leading_spaces = text_outstanding.find_first_not_of(_T(' ')); + size_t leading_spaces = find_first_nonwhitespace(text_outstanding); text_outstanding.remove_prefix(std::min(leading_spaces, text_outstanding.size())); - size_t last_not_space = text_outstanding.find_last_not_of(_T(' ')); + size_t last_not_space = find_last_nonwhitespace(text_outstanding); if(last_not_space != std::tstring_view::npos) { size_t trailing_spaces = text_outstanding.length() - 1 - last_not_space; @@ -476,7 +476,7 @@ static int _WrapSimpleLyricsLineToRect(HDC dc, CRect clip_rect, std::tstring_vie else { assert(chars_to_draw > 0); - const int previous_space_index = int(text_outstanding.rfind(' ', chars_to_draw - 1)); + const int previous_space_index = int(find_last_whitespace(text_outstanding, chars_to_draw - 1)); if(previous_space_index == std::tstring::npos) { // There is a single word that doesn't fit on the line diff --git a/src/win32_util.cpp b/src/win32_util.cpp index ecba2ff3..241d6b0f 100644 --- a/src/win32_util.cpp +++ b/src/win32_util.cpp @@ -158,6 +158,63 @@ std::tstring normalise_utf8(std::tstring_view input) return result; } +bool is_char_whitespace(TCHAR c) +{ + // U+00A0 and U+202F are non-breaking spaces. + // U+180E was classified as a space when microsoft first defined isspace, but was later removed from the standard. + return (_istspace(c) > 0) && (c != L'\u00A0') && (c != L'\u202F') && (c != L'\u180E'); +} + +size_t find_first_whitespace(const std::tstring_view str, size_t pos) +{ + // match behavior of std::string_view::find_first_of + if(pos >= str.length() || str.empty()) return std::tstring_view::npos; + + const auto it = std::find_if(std::next(str.begin(), pos), str.end(), is_char_whitespace); + + if(it == str.end()) return std::tstring_view::npos; + + return it - str.begin(); +} + +size_t find_first_nonwhitespace(const std::tstring_view str, size_t pos) +{ + // match behavior of std::string_view::find_first_not_of + if(pos >= str.length() || str.empty()) return std::tstring_view::npos; + + const auto it = std::find_if_not(std::next(str.begin(), pos), str.end(), is_char_whitespace); + + if(it == str.end()) return std::tstring_view::npos; + + return it - str.begin(); +} + +size_t find_last_whitespace(const std::tstring_view str, size_t pos) +{ + if(str.empty()) return std::tstring_view::npos; + + size_t offset = 0; + if(pos != std::tstring_view::npos && pos < str.length()) offset = str.length() - pos - 1; + + const auto it = std::find_if(std::next(str.rbegin(), offset), str.rend(), is_char_whitespace); + if(it == str.rend()) return std::tstring_view::npos; + + return str.rend() - it - 1; +} + +size_t find_last_nonwhitespace(const std::tstring_view str, size_t pos) +{ + if(str.empty()) return std::tstring_view::npos; + + size_t offset = 0; + if(pos != std::tstring_view::npos && pos < str.length()) offset = str.length() - pos - 1; + + const auto it = std::find_if_not(std::next(str.rbegin(), offset), str.rend(), is_char_whitespace); + if(it == str.rend()) return std::tstring_view::npos; + + return str.rend() - it - 1; +} + bool hr_success(HRESULT result, const char* filename, int line_number) { const bool success = (result == S_OK); @@ -210,4 +267,121 @@ MVTF_TEST(win32_string_narrow_to_wide_handles_ascii) const std::wstring output = std::wstring(output_buffer.data(), output_chars); ASSERT(output == L"test string!\nwith a newline :O"); } + +MVTF_TEST(win32_is_char_whitespace_true_for_breaking_whitespace) +{ + ASSERT(is_char_whitespace(L'\t')); + ASSERT(is_char_whitespace(L'\n')); + ASSERT(is_char_whitespace(L'\v')); + ASSERT(is_char_whitespace(L'\f')); + ASSERT(is_char_whitespace(L'\r')); + ASSERT(is_char_whitespace(L' ')); + + ASSERT(is_char_whitespace(L'\u0085')); // Next line + ASSERT(is_char_whitespace(L'\u1680')); // Ogham space mark + + ASSERT(is_char_whitespace(L'\u2000')); // En quad + ASSERT(is_char_whitespace(L'\u2001')); // Em quad + ASSERT(is_char_whitespace(L'\u2002')); // En space + ASSERT(is_char_whitespace(L'\u2003')); // Em space + ASSERT(is_char_whitespace(L'\u2004')); // Three-per-em space + ASSERT(is_char_whitespace(L'\u2005')); // Four-per-em space + ASSERT(is_char_whitespace(L'\u2006')); // Six-per-em space + ASSERT(is_char_whitespace(L'\u2007')); // Figure space + ASSERT(is_char_whitespace(L'\u2008')); // Punctuation space + ASSERT(is_char_whitespace(L'\u2009')); // Thin space + ASSERT(is_char_whitespace(L'\u200A')); // Hair space + + ASSERT(is_char_whitespace(L'\u2028')); // Line separator + ASSERT(is_char_whitespace(L'\u2029')); // Paragraph separator + ASSERT(is_char_whitespace(L'\u205F')); // Medium mathematical space + ASSERT(is_char_whitespace(L'\u3000')); // Ideographic space + + ASSERT(!is_char_whitespace(L'\u00A0')); // Non-breaking space + ASSERT(!is_char_whitespace(L'\u202F')); // Narrow non-breaking space + ASSERT(!is_char_whitespace(L'\u180E')); // Mongolian vowel separator + ASSERT(!is_char_whitespace(L'A')); + ASSERT(!is_char_whitespace(L'1')); + ASSERT(!is_char_whitespace(L'-')); +} + +MVTF_TEST(win32_find_first_whitespace_gives_correct_indices) +{ + std::tstring_view input = _T("Test string.\u3000Second sentence."); + ASSERT(find_first_whitespace(input) == 4); + ASSERT(find_first_whitespace(input, 4) == 4); + ASSERT(find_first_whitespace(input, 5) == 12); + ASSERT(find_first_whitespace(input, 23) == std::tstring_view::npos); + ASSERT(find_first_whitespace(input, 100) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_first_whitespace_empty_string) +{ + ASSERT(find_first_whitespace(_T("")) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_first_whitespace_no_whitespace) +{ + ASSERT(find_last_whitespace(_T("abcdef")) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_first_nonwhitespace_gives_correct_indices) +{ + std::tstring_view input = _T(" \u3000Test string. "); + ASSERT(find_first_nonwhitespace(input) == 4); + ASSERT(find_first_nonwhitespace(input, 4) == 4); + ASSERT(find_first_nonwhitespace(input, 10) == 12); + ASSERT(find_first_nonwhitespace(input, 20) == std::tstring_view::npos); + ASSERT(find_first_nonwhitespace(input, 100) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_first_nonwhitespace_empty_string) +{ + ASSERT(find_first_nonwhitespace(_T("")) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_first_nonwhitespace_no_nonwhitespace) +{ + ASSERT(find_last_nonwhitespace(_T(" ")) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_last_whitespace_gives_correct_indices) +{ + std::tstring_view input = _T("Test string.\u3000Second sentence."); + ASSERT(find_last_whitespace(input) == 19); + ASSERT(find_last_whitespace(input, 19) == 19); + ASSERT(find_last_whitespace(input, 15) == 12); + ASSERT(find_last_whitespace(input, 2) == std::tstring_view::npos); + ASSERT(find_last_whitespace(input, 100) == 19); +} + +MVTF_TEST(win32_find_last_whitespace_empty_string) +{ + ASSERT(find_last_whitespace(_T("")) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_last_whitespace_no_whitespace) +{ + ASSERT(find_last_whitespace(_T("abcdef")) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_last_nonwhitespace_gives_correct_indices) +{ + std::tstring_view input = _T(" \u3000Test string. "); + ASSERT(find_last_nonwhitespace(input) == 18); + ASSERT(find_last_nonwhitespace(input, 18) == 18); + ASSERT(find_last_nonwhitespace(input, 10) == 7); + ASSERT(find_last_nonwhitespace(input, 3) == std::tstring_view::npos); + ASSERT(find_last_nonwhitespace(input, 100) == 18); +} + +MVTF_TEST(win32_find_last_nonwhitespace_empty_string) +{ + ASSERT(find_last_nonwhitespace(_T("")) == std::tstring_view::npos); +} + +MVTF_TEST(win32_find_last_nonwhitespace_no_nonwhitespace) +{ + ASSERT(find_last_nonwhitespace(_T(" ")) == std::tstring_view::npos); +} #endif diff --git a/src/win32_util.h b/src/win32_util.h index 0c804079..d5eb64df 100644 --- a/src/win32_util.h +++ b/src/win32_util.h @@ -29,5 +29,11 @@ std::string from_tstring(const std::tstring& string); std::tstring normalise_utf8(std::tstring_view input); +bool is_char_whitespace(TCHAR c); +size_t find_first_whitespace(const std::tstring_view str, size_t pos = 0); +size_t find_first_nonwhitespace(const std::tstring_view str, size_t pos = 0); +size_t find_last_whitespace(const std::tstring_view str, size_t pos = std::tstring_view::npos); +size_t find_last_nonwhitespace(const std::tstring_view str, size_t pos = std::tstring_view::npos); + #define HR_SUCCESS(hr) hr_success(hr, __FILE__, __LINE__) bool hr_success(HRESULT result, const char* filename, int line_number);