Skip to content

Commit 2424122

Browse files
committed
#48 - Implement method CppStringT::split()
Completed.
1 parent a66f7b5 commit 2424122

File tree

1 file changed

+103
-8
lines changed

1 file changed

+103
-8
lines changed

cpp-strings/cppstrings.h

Lines changed: 103 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,15 @@ namespace pcs // i.e. "pythonic c++ strings"
912912

913913

914914
//--- rsplit() ----------------------------------------
915-
/** \brief Returns a vector of the words in the whole string, as seperated with whitespace strings. */
915+
/** \brief Returns a vector of the words in the whole string, as seperated with whitespace strings.
916+
*
917+
* Notice: runs of consecutive whitespace are regarded as a single
918+
* separator, and the result will contain no empty strings at the
919+
* start or end if the string has leading or trailing whitespace.
920+
* Consequently, splitting an empty string or a string consisting
921+
* of just whitespace with a whitespace separator returns an ampty
922+
* vector.
923+
*/
916924
inline std::vector<CppStringT> rsplit() const noexcept
917925
{
918926
return split();
@@ -969,10 +977,7 @@ namespace pcs // i.e. "pythonic c++ strings"
969977
return res;
970978
}
971979

972-
/** \brief Returns a vector of the words in the string, using sep as the delimiter string.
973-
*
974-
* At most maxsplit splits are done, the rightmost ones.
975-
*/
980+
/** \brief Returns a vector of the words in the string, using sep as the delimiter string. At most maxsplit splits are done, the rightmost ones. */
976981
std::vector<CppStringT> rsplit(const CppStringT& sep, const size_type maxsplit) const noexcept
977982
{
978983
std::vector<CppStringT> res{};
@@ -1056,17 +1061,107 @@ namespace pcs // i.e. "pythonic c++ strings"
10561061

10571062

10581063
//--- split() -----------------------------------------
1064+
/** \brief Returns a vector of the words in the whole string, as seperated with whitespace strings.
1065+
*
1066+
* Notice: runs of consecutive whitespace are regarded as a single
1067+
* separator, and the result will contain no empty strings at the
1068+
* start or end if the string has leading or trailing whitespace.
1069+
* Consequently, splitting an empty string or a string consisting
1070+
* of just whitespace with a whitespace separator returns an ampty
1071+
* vector.
1072+
*/
10591073
inline std::vector<CppStringT> split() const noexcept
10601074
{
1061-
return std::vector<CppStringT>();
1075+
std::vector<std::string> res;
1076+
constexpr CppStringT whitespace(value_type(' '));
1077+
for (const auto& word : *this | std::views::split(whitespace))
1078+
if (!word.empty())
1079+
res.push_back(CppStringT(word.begin(), word.end()));
1080+
return res;
10621081
}
10631082

1064-
/** \brief Returns a vector of the words in the string, using sep as the delimiter string. */
1083+
/** \brief Returns a vector of the words in the whole string, using sep as the delimiter string.
1084+
*
1085+
* Notice: consecutive delimiters are not grouped together and are
1086+
* deemed to delimit empty strings (for example, "1,,2".split(",")
1087+
* returns {"1", "", "2"}). The sep argument may consist of multiple
1088+
* characters (for example, "1<>2<>3".split("<>") returns {"1", "2",
1089+
* "3"]). Splitting an empty string with a specified separator
1090+
* returns {""}.
1091+
*/
10651092
inline std::vector<CppStringT> split(const CppStringT& sep) const noexcept
10661093
{
1067-
return std::vector<CppStringT>();
1094+
std::vector<std::string> res;
1095+
for (const auto& word : *this | std::views::split(sep))
1096+
res.push_back(CppStringT(word.begin(), word.end()));
1097+
return res;
10681098
}
10691099

1100+
/** \brief Returns a vector of the words in the string, as seperated with whitespace strings. At most maxsplit splits are done, the leftmost ones. */
1101+
std::vector<CppStringT> split(const size_type maxsplit) const noexcept
1102+
{
1103+
std::vector<CppStringT> res{};
1104+
1105+
if (maxsplit == 0) {
1106+
res.push_back(*this);
1107+
}
1108+
else {
1109+
const CppStringT whitespace(value_type(' '));
1110+
std::vector<CppStringT> all_words{ this->split(whitespace) };
1111+
1112+
size_type count = maxsplit;
1113+
auto word_it = all_words.cbegin();
1114+
while (count > 0 && word_it != all_words.cend()) {
1115+
if (!word_it->empty()) {
1116+
res.insert(res.cbegin(), *word_it);
1117+
--count;
1118+
}
1119+
word_it++;
1120+
}
1121+
1122+
size_type chars_count = 0;
1123+
for (auto it = word_it; it != all_words.cend(); ++it) {
1124+
chars_count += it->size() + 1;
1125+
}
1126+
if (chars_count > 0)
1127+
res.insert(res.cbegin(), this->substr(this->cbegin() + chars_count - 1, this->cend()));
1128+
}
1129+
1130+
return res;
1131+
}
1132+
1133+
/** \brief Returns a vector of the words in the string, using sep as the delimiter string. At most maxsplit splits are done, the leftmost ones. */
1134+
inline std::vector<CppStringT> split(const CppStringT& sep, const size_type maxsplit) const noexcept
1135+
{
1136+
std::vector<CppStringT> res{};
1137+
1138+
if (maxsplit == 0) {
1139+
res.push_back(*this);
1140+
}
1141+
else {
1142+
const CppStringT whitespace(value_type(' '));
1143+
std::vector<CppStringT> all_words{ this->split(whitespace) };
1144+
1145+
size_type count = maxsplit;
1146+
auto word_it = all_words.cbegin();
1147+
while (count > 0 && word_it != all_words.cend()) {
1148+
res.insert(res.cbegin(), *word_it);
1149+
--count;
1150+
word_it++;
1151+
}
1152+
1153+
size_type chars_count = 0;
1154+
for (auto it = word_it; it != all_words.cend(); ++it) {
1155+
chars_count += it->size() + 1;
1156+
}
1157+
if (chars_count > 0)
1158+
res.insert(res.cbegin(), this->substr(this->cbegin() + chars_count - 1, this->cend()));
1159+
}
1160+
1161+
return res;
1162+
}
1163+
1164+
10701165

10711166
//--- title() -----------------------------------------
10721167
/** \brief Returns a titlecased copy of the string where words start with an uppercase character and the remaining characters are lowercase. */

0 commit comments

Comments
 (0)