-
Notifications
You must be signed in to change notification settings - Fork 50
Expand file tree
/
Copy pathStringExtensions.cs
More file actions
237 lines (210 loc) · 10.4 KB
/
StringExtensions.cs
File metadata and controls
237 lines (210 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;
using System.Linq;
namespace HedgehogDevelopment.CodeGeneration.Extensions
{
public static class StringExtensions
{
public static string TitleCase(this string word)
{
string newWord = System.Text.RegularExpressions.Regex.Replace(word, "([a-z](?=[A-Z])|[A-Z](?=[A-Z][a-z]))", "$1+");
newWord = System.Globalization.CultureInfo.InvariantCulture.TextInfo.ToTitleCase(newWord);
newWord = newWord.Replace("+", "");
return newWord;
}
public static string CamelCase(this string word)
{
if (word == null)
{
return "";
}
// Something -> something
// ISomething -> iSomething
// SomethingElse -> somethingElse
// ISomethingElse -> iSomethingElse
string titleCase = word.TitleCase();
return titleCase.Substring(0, 1).ToLower() + titleCase.Substring(1);
}
public static bool IsInterfaceWord(this string word)
{
// looks like an interface if... I[A-Z]xxxx
// proper definition is http://msdn.microsoft.com/en-us/library/8bc1fexb(v=VS.71).aspx
return (word.Length > 2 && !word.Contains(" ") && (word[0] == 'I' && char.IsUpper(word, 1) && char.IsLower(word, 2)));
}
public static string AsInterfaceName(this string word)
{
// return I[TitleCaseWord]
// something -> ISomething
// Something -> ISomething
// ISomething -> ISomething
string interfaceWord = GetFormattedWord(word, TitleCase);
// Only prefix the word with a 'I' if we don't have a word that already looks like an interface.
if (!word.IsInterfaceWord())
{
interfaceWord = string.Concat("I", interfaceWord);
}
return interfaceWord.RemoveUnderscores();
}
public static string AsClassName(this string word)
{
// TitleCase the word
return GetFormattedWord(word, TitleCase).RemoveUnderscores();
}
public static string AsPropertyName(this string word, bool pluralize = false)
{
// TitleCase the word and pluralize it
if (pluralize)
return GetFormattedWord(word, TitleCase, Inflector.Pluralize);
else
return GetFormattedWord(word, TitleCase);
}
public static string AsFieldName(this string word)
{
// return _someParam.
// Note, this isn't MS guideline, but it easier to deal with than using this. everywhere to avoid name collisions
return GetFormattedWord(word, CamelCase, (string s) => "_" + s);
}
/// <summary>
/// Tests whether the words conflicts with reserved or language keywords, and if so, attempts to return
/// valid words that do not conflict. Usually the returned words are only slightly modified to differentiate
/// the identifier from the keyword; for example, the word might be preceded by the underscore ("_") character.
/// </summary>
/// <param name="words">The words.</param>
/// <returns></returns>
public static IEnumerable<string> AsValidWords(this IEnumerable<string> words, string illegalCharReplacement = "")
{
foreach (string word in words)
{
yield return AsValidWord(word, illegalCharReplacement);
}
}
/// <summary>
/// Tests whether the word conflicts with reserved or language keywords, and if so, attempts to return a
/// valid word that does not conflict. Usually the returned word is only slightly modified to differentiate
/// the identifier from the keyword; for example, the word might be preceded by the underscore ("_") character.
/// <para>
/// Valid identifiers in C# are defined in the C# Language Specification, item 2.4.2. The rules are very simple:
/// - An identifier must start with a letter or an underscore
/// - After the first character, it may contain numbers, letters, connectors, etc
/// - If the identifier is a keyword, it must be prepended with “@”
/// </para>
/// </summary>
/// <param name="word">The word.</param>
/// <returns>A valid word for the specified word.</returns>
public static string AsValidWord(this string word, string illegalCharReplacement = "")
{
string identifier = word;
if (identifier == "*") identifier = "Wildcard";
if (String.IsNullOrEmpty(illegalCharReplacement)) identifier = identifier.Replace("_", "");
//identifier = RemoveDiacritics(identifier);
// C# Identifiers - http://msdn.microsoft.com/en-us/library/aa664670(VS.71).aspx
// replace all illegal chars with the value passed in via illegalCharReplacement, default is ""
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(@"[^\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Nl}\p{Mn}\p{Mc}\p{Cf}\p{Pc}\p{Lm}]");
identifier = regex.Replace(identifier, illegalCharReplacement);
//The identifier must start with a character or '_'
if (! (char.IsLetter(identifier, 0) || identifier[0] == '_'))
{
identifier = string.Concat("_", identifier);
}
// fix language specific reserved words
identifier = FixReservedWord(identifier);
// Let's make sure we have a valid name
Debug.Assert(System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier(identifier), string.Format("'{0}' is an invalid name for a Template or Field", word));
return identifier;
}
/// <summary>
/// Concatenates all of the <paramref name="words"/> with a '.' separator.
/// <para>Each word is passed through the <c>AsValidWord</c> method ensuring that it is a valid for a namespace segment.</para>
/// <para>Leading, trailing, and more than one consecutive '.' are removed.</para>
/// </summary>
/// <example>
/// This sample shows how to call the <see cref="AsNamespace"/> method.
/// <code>
/// string[] segments = new string[5]{ ".My", "Namespace.", "For", "The...Sample..", "Project."};
/// string ns = segments.AsNamespace();
/// </code>
/// The <c>ns</c> variable would contain "<c>My.Namespace.For.The.Sample.Project</c>".
/// </example>
/// <param name="words">The namespace segments.</param>
/// <returns>A valid string in valid namespace format.</returns>
public static string AsNamespace(this IEnumerable<string> words, string illegalCharReplacement = "_")
{
List<string> joinedNamespace = new List<string>();
foreach (string segment in words)
{
if (segment != null)
{
// split apart any strings with a '.' and remove any consecutive multiple '.' and
// ensure that the first character is capitalized while preserving the rest
var segments = segment.Split(new char[1] { '.' }, StringSplitOptions.RemoveEmptyEntries)
.Select(s => s.Capitalize(true));
// being we are making a namespace, make sure the segments are valid
IEnumerable<string> validSegments = segments.AsValidWords(illegalCharReplacement);
joinedNamespace.AddRange(validSegments);
}
}
string ns = string.Join(".", joinedNamespace.ToArray());
return ns.RemoveUnderscores();
}
/// <summary>
/// Remplacement des caractères accentués
/// </summary>
/// <param name="s">The string to replace diacritics on.</param>
/// <remarks>A diacritic is a glyph added to a letter, or basic glyph</remarks>
/// <returns></returns>
private static string RemoveDiacritics(string s)
{
string normalizedString = s.Normalize(NormalizationForm.FormD);
StringBuilder stringBuilder = new StringBuilder();
foreach (char c in normalizedString)
{
if (CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark)
stringBuilder.Append(c);
}
return stringBuilder.ToString();
}
/// <summary>
/// Replaces any underscores with an empty string, for instance User_Defined becomes UserDefined
/// </summary>
/// <param name="word">The string to replace underscores on.</param>
/// <returns>The string without any underscores.</returns>
public static string RemoveUnderscores(this string word)
{
return !String.IsNullOrEmpty(word)
? word.Replace("_", "")
: word;
}
/// <summary>
/// Tests whether the word conflicts with reserved or language keywords, and if so, attempts to return a
/// valid word that does not conflict. Usually the returned word is only slightly modified to differentiate
/// the identifier from the keyword; for example, the word might be preceded by the underscore ("_") character.
/// </summary>
/// <param name="word">The word.</param>
/// <returns></returns>
private static string FixReservedWord(string word)
{
// turns keywords into usable words.
// i.e. class -> _class
Microsoft.CSharp.CSharpCodeProvider codeProvider = new Microsoft.CSharp.CSharpCodeProvider();
return codeProvider.CreateValidIdentifier(word);
}
private static string GetFormattedWord(this string word, params Func<string, string>[] transformations)
{
string newWord = word;
foreach (var item in transformations)
{
if (item != null)
{
newWord = item(newWord);
}
}
// Now that the basic transforms are done, make sure we have a valid word to use
newWord = newWord.AsValidWord();
return newWord;
}
}
}