@@ -912,7 +912,15 @@ namespace pcs // i.e. "pythonic c++ strings"
912912
913913
914914 // --- rsplit() ----------------------------------------
915- /* * \brief Returns a vector of the words in the whole string, as seperated with whitespace strings. */
915+ /* * \brief Returns a vector of the words in the whole string, as seperated with whitespace strings.
916+ *
917+ * Notice: runs of consecutive whitespace are regarded as a single
918+ * separator, and the result will contain no empty strings at the
919+ * start or end if the string has leading or trailing whitespace.
920+ * Consequently, splitting an empty string or a string consisting
921+ * of just whitespace with a whitespace separator returns an ampty
922+ * vector.
923+ */
916924 inline std::vector<CppStringT> rsplit () const noexcept
917925 {
918926 return split ();
@@ -969,10 +977,7 @@ namespace pcs // i.e. "pythonic c++ strings"
969977 return res;
970978 }
971979
972- /* * \brief Returns a vector of the words in the string, using sep as the delimiter string.
973- *
974- * At most maxsplit splits are done, the rightmost ones.
975- */
980+ /* * \brief Returns a vector of the words in the string, using sep as the delimiter string. At most maxsplit splits are done, the rightmost ones. */
976981 std::vector<CppStringT> rsplit (const CppStringT& sep, const size_type maxsplit) const noexcept
977982 {
978983 std::vector<CppStringT> res{};
@@ -1056,17 +1061,107 @@ namespace pcs // i.e. "pythonic c++ strings"
10561061
10571062
10581063 // --- split() -----------------------------------------
1064+ /* * \brief Returns a vector of the words in the whole string, as seperated with whitespace strings.
1065+ *
1066+ * Notice: runs of consecutive whitespace are regarded as a single
1067+ * separator, and the result will contain no empty strings at the
1068+ * start or end if the string has leading or trailing whitespace.
1069+ * Consequently, splitting an empty string or a string consisting
1070+ * of just whitespace with a whitespace separator returns an ampty
1071+ * vector.
1072+ */
10591073 inline std::vector<CppStringT> split () const noexcept
10601074 {
1061- return std::vector<CppStringT>();
1075+ std::vector<std::string> res;
1076+ constexpr CppStringT whitespace (value_type (' ' ));
1077+ for (const auto & word : *this | std::views::split (whitespace))
1078+ if (!word.empty ())
1079+ res.push_back (CppStringT (word.begin (), word.end ()));
1080+ return res;
10621081 }
10631082
1064- /* * \brief Returns a vector of the words in the string, using sep as the delimiter string. */
1083+ /* * \brief Returns a vector of the words in the whole string, using sep as the delimiter string.
1084+ *
1085+ * Notice: consecutive delimiters are not grouped together and are
1086+ * deemed to delimit empty strings (for example, "1,,2".split(",")
1087+ * returns {"1", "", "2"}). The sep argument may consist of multiple
1088+ * characters (for example, "1<>2<>3".split("<>") returns {"1", "2",
1089+ * "3"]). Splitting an empty string with a specified separator
1090+ * returns {""}.
1091+ */
10651092 inline std::vector<CppStringT> split (const CppStringT& sep) const noexcept
10661093 {
1067- return std::vector<CppStringT>();
1094+ std::vector<std::string> res;
1095+ for (const auto & word : *this | std::views::split (sep))
1096+ res.push_back (CppStringT (word.begin (), word.end ()));
1097+ return res;
10681098 }
10691099
1100+ /* * \brief Returns a vector of the words in the string, as seperated with whitespace strings. At most maxsplit splits are done, the leftmost ones. */
1101+ std::vector<CppStringT> split (const size_type maxsplit) const noexcept
1102+ {
1103+ std::vector<CppStringT> res{};
1104+
1105+ if (maxsplit == 0 ) {
1106+ res.push_back (*this );
1107+ }
1108+ else {
1109+ const CppStringT whitespace (value_type (' ' ));
1110+ std::vector<CppStringT> all_words{ this ->split (whitespace) };
1111+
1112+ size_type count = maxsplit;
1113+ auto word_it = all_words.cbegin ();
1114+ while (count > 0 && word_it != all_words.cend ()) {
1115+ if (!word_it->empty ()) {
1116+ res.insert (res.cbegin (), *word_it);
1117+ --count;
1118+ }
1119+ word_it++;
1120+ }
1121+
1122+ size_type chars_count = 0 ;
1123+ for (auto it = word_it; it != all_words.cend (); ++it) {
1124+ chars_count += it->size () + 1 ;
1125+ }
1126+ if (chars_count > 0 )
1127+ res.insert (res.cbegin (), this ->substr (this ->cbegin () + chars_count - 1 , this ->cend ()));
1128+ }
1129+
1130+ return res;
1131+ }
1132+
1133+ /* * \brief Returns a vector of the words in the string, using sep as the delimiter string. At most maxsplit splits are done, the leftmost ones. */
1134+ inline std::vector<CppStringT> split (const CppStringT& sep, const size_type maxsplit) const noexcept
1135+ {
1136+ std::vector<CppStringT> res{};
1137+
1138+ if (maxsplit == 0 ) {
1139+ res.push_back (*this );
1140+ }
1141+ else {
1142+ const CppStringT whitespace (value_type (' ' ));
1143+ std::vector<CppStringT> all_words{ this ->split (whitespace) };
1144+
1145+ size_type count = maxsplit;
1146+ auto word_it = all_words.cbegin ();
1147+ while (count > 0 && word_it != all_words.cend ()) {
1148+ res.insert (res.cbegin (), *word_it);
1149+ --count;
1150+ word_it++;
1151+ }
1152+
1153+ size_type chars_count = 0 ;
1154+ for (auto it = word_it; it != all_words.cend (); ++it) {
1155+ chars_count += it->size () + 1 ;
1156+ }
1157+ if (chars_count > 0 )
1158+ res.insert (res.cbegin (), this ->substr (this ->cbegin () + chars_count - 1 , this ->cend ()));
1159+ }
1160+
1161+ return res;
1162+ }
1163+
1164+
10701165
10711166 // --- title() -----------------------------------------
10721167 /* * \brief Returns a titlecased copy of the string where words start with an uppercase character and the remaining characters are lowercase. */
0 commit comments