66 lines
1.9 KiB
C#
66 lines
1.9 KiB
C#
using System.Globalization;
|
||
using System.Text;
|
||
using BenchmarkDotNet.Attributes;
|
||
using BenchmarkDotNet.Running;
|
||
|
||
BenchmarkRunner.Run<DiacriticBenchmarks>();
|
||
|
||
[SimpleJob, MemoryDiagnoser(false)]
|
||
public class DiacriticBenchmarks
|
||
{
|
||
private const string Sample = "ἠἡὀὁἱἰὠὡἐἑὑὐᾐ";
|
||
|
||
[Benchmark]
|
||
[Arguments(Sample)]
|
||
public string StackOverflow_RemoveDiacritics(string input)
|
||
{
|
||
string normalizedString = input.Normalize(NormalizationForm.FormD);
|
||
var stringBuilder = new StringBuilder(capacity: normalizedString.Length);
|
||
|
||
for (var i = 0; i < normalizedString.Length; i++)
|
||
{
|
||
char c = normalizedString[i];
|
||
UnicodeCategory unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
|
||
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
|
||
{
|
||
stringBuilder.Append(c);
|
||
}
|
||
}
|
||
|
||
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
|
||
}
|
||
|
||
[Benchmark]
|
||
[Arguments(Sample)]
|
||
public string Boas_RemoveSpiritus(string input)
|
||
{
|
||
string output = MyReplace(input, "ἀἁ", "α");
|
||
output = MyReplace(output, "ἠἡ", "η");
|
||
output = MyReplace(output, "ὀὁ", "ο");
|
||
output = MyReplace(output, "ἱἰ", "ι");
|
||
output = MyReplace(output, "ὠὡ", "ω");
|
||
output = MyReplace(output, "ἐἑ", "ε");
|
||
output = MyReplace(output, "ὑὐ", "υ");
|
||
output = MyReplace(output, "ᾐ", "ῃ");
|
||
return output;
|
||
}
|
||
|
||
private string MyReplace(string input, string pattern, string replacement)
|
||
{
|
||
var sb = new StringBuilder();
|
||
foreach (char t in input)
|
||
{
|
||
if (!pattern.Contains(t))
|
||
{
|
||
sb.Append(t);
|
||
}
|
||
else
|
||
{
|
||
sb.Append(replacement);
|
||
}
|
||
}
|
||
|
||
return sb.ToString();
|
||
}
|
||
}
|