Add IsEmoji method

Emoji validation is performed by using a regex which is generated from Twemoji. Source: https://raw.githubusercontent.com/twitter/twemoji-parser/master/src/lib/regex.js
This commit is contained in:
Oliver Booth 2022-05-07 23:37:14 +01:00
parent 064011cb68
commit d405faf6e2
No known key found for this signature in database
GPG Key ID: 32A00B35503AF634
10 changed files with 227 additions and 2 deletions

View File

@ -24,6 +24,9 @@
- Added `IEnumerable<T>.ForEach` (#50)
- Added `IEnumerable<T>.DisposeAll`
- Added `IEnumerable<T>.DisposeAllAsync`
- Added `char.IsEmoji`
- Added `Rune.IsEmoji`
- Added `string.IsEmoji`
## [3.0.0]

View File

@ -0,0 +1,68 @@
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Text;
namespace X10D.SourceGenerator;
[Generator]
internal sealed class EmojiRegexGenerator : ISourceGenerator
{
// ReSharper disable once IdentifierTypo
private const string TwemojiRegexUrl = "https://raw.githubusercontent.com/twitter/twemoji-parser/master/src/lib/regex.js";
private static readonly HttpClient HttpClient = new();
private string _emojiRegex = string.Empty;
/// <inheritdoc />
public void Initialize(GeneratorInitializationContext context)
{
string response = HttpClient.GetStringAsync(TwemojiRegexUrl).GetAwaiter().GetResult();
using var reader = new StringReader(response);
while (reader.ReadLine() is { } line)
{
if (!line.StartsWith("export default /"))
{
continue;
}
Match match = Regex.Match(line, @"export default /(?<regex>.+)/g;");
if (!match.Success)
{
continue;
}
_emojiRegex = $"^{match.Groups["regex"].Value}$";
break;
}
}
/// <inheritdoc />
public void Execute(GeneratorExecutionContext context)
{
if (string.IsNullOrEmpty(_emojiRegex))
{
return;
}
var builder = new StringBuilder();
builder.AppendLine("// This file was auto-generated by X10D.SourceGenerator");
builder.AppendLine("// Do not edit this file manually");
builder.AppendLine();
builder.AppendLine("using System.Text.RegularExpressions;");
builder.AppendLine();
builder.AppendLine("namespace X10D.Text;");
builder.AppendLine();
builder.AppendLine("internal static class EmojiRegex");
builder.AppendLine("{");
builder.AppendLine(" internal static readonly Regex Value = new Regex(");
builder.AppendLine($" @\"{_emojiRegex}\",");
// ReSharper disable once StringLiteralTypo
builder.AppendLine(" RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.Singleline");
builder.AppendLine(" );");
builder.AppendLine("}");
context.AddSource("EmojiRegex.g.cs", SourceText.From(builder.ToString(), Encoding.UTF8));
}
}

View File

@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<LangVersion>10.0</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="4.0.1" PrivateAssets="all"/>
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.3.3">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>
</Project>

View File

@ -1,4 +1,5 @@
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Text;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using X10D.Text;
namespace X10D.Tests.Text;
@ -6,6 +7,25 @@ namespace X10D.Tests.Text;
[TestClass]
public class CharTests
{
[TestMethod]
public void IsEmoji_ShouldReturnTrue_GivenBasicEmoji()
{
Assert.IsTrue('✂'.IsEmoji());
Assert.IsTrue('✅'.IsEmoji());
Assert.IsTrue('❎'.IsEmoji());
Assert.IsTrue(''.IsEmoji());
Assert.IsTrue(''.IsEmoji());
}
[TestMethod]
public void IsEmoji_ShouldReturnFalse_GivenNonEmoji()
{
for (var letter = 'A'; letter <= 'Z'; letter++)
{
Assert.IsFalse(letter.IsEmoji());
}
}
[TestMethod]
public void RepeatShouldBeCorrect()
{

View File

@ -8,6 +8,25 @@ namespace X10D.Tests.Text;
[TestClass]
public class RuneTests
{
[TestMethod]
public void IsEmoji_ShouldReturnTrue_GivenBasicEmoji()
{
Assert.IsTrue(new Rune('✂').IsEmoji());
Assert.IsTrue(new Rune('✅').IsEmoji());
Assert.IsTrue(new Rune('❎').IsEmoji());
Assert.IsTrue(new Rune('').IsEmoji());
Assert.IsTrue(new Rune('').IsEmoji());
}
[TestMethod]
public void IsEmoji_ShouldReturnFalse_GivenNonEmoji()
{
for (var letter = 'A'; letter <= 'Z'; letter++)
{
Assert.IsFalse(new Rune(letter).IsEmoji());
}
}
[TestMethod]
public void RepeatShouldBeCorrect()
{

View File

@ -1,4 +1,4 @@
using System.Text;
using System.Text;
#if NET5_0_OR_GREATER
using System.Text.Json.Serialization;
#endif
@ -199,6 +199,44 @@ public class StringTests
Assert.ThrowsException<ArgumentNullException>(() => "Hello World".GetBytes(null!));
}
[TestMethod]
public void IsEmoji_ShouldReturnTrue_GivenBasicEmoji()
{
Assert.IsTrue("😀".IsEmoji());
Assert.IsTrue("🤓".IsEmoji());
Assert.IsTrue("🟦".IsEmoji());
Assert.IsTrue("🟧".IsEmoji());
Assert.IsTrue("🟨".IsEmoji());
Assert.IsTrue("🟩".IsEmoji());
Assert.IsTrue("🟪".IsEmoji());
Assert.IsTrue("🟫".IsEmoji());
Assert.IsTrue("📱".IsEmoji());
Assert.IsTrue("🎨".IsEmoji());
}
[TestMethod]
public void IsEmoji_ShouldReturnTrue_GivenMultiByteEmoji()
{
string[] regionalIndicatorCodes = Enumerable.Range(0, 26)
.Select(i => Encoding.Unicode.GetString(new byte[] {0x3C, 0xD8, (byte)(0xE6 + i), 0xDD}))
.ToArray();
for (var i = 0; i < 26; i++)
for (var j = 0; j < 26; j++)
{
string flag = (regionalIndicatorCodes[i] + regionalIndicatorCodes[j]);
Assert.IsTrue(flag.IsEmoji());
}
}
[TestMethod]
public void IsEmoji_ShouldReturnFalse_GivenNonEmoji()
{
Assert.IsFalse("Hello World".IsEmoji());
Assert.IsFalse("Hello".IsEmoji());
Assert.IsFalse("World".IsEmoji());
}
[TestMethod]
public void IsLower_ShouldReturnTrue_GivenLowercaseString()
{

View File

@ -78,4 +78,10 @@
</EmbeddedResource>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\X10D.SourceGenerator\X10D.SourceGenerator.csproj"
OutputItemType="Analyzer"
ReferenceOutputAssembly="false"/>
</ItemGroup>
</Project>

View File

@ -8,6 +8,22 @@ namespace X10D.Text;
/// </summary>
public static class CharExtensions
{
/// <summary>
/// Returns a value indicating whether this character constitutes an emoji.
/// </summary>
/// <param name="value">The character to check.</param>
/// <returns><see langword="true" /> if this character is an emoji; otherwise, <see langword="false" />.</returns>
[Pure]
#if NETSTANDARD2_1
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#else
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
#endif
public static bool IsEmoji(this char value)
{
return value.ToString().IsEmoji();
}
/// <summary>
/// Returns a string composed of the current character repeated a specified number of times.
/// </summary>

View File

@ -10,6 +10,18 @@ namespace X10D.Text;
/// </summary>
public static class RuneExtensions
{
/// <summary>
/// Returns a value indicating whether this rune constitutes an emoji.
/// </summary>
/// <param name="value">The rune to check.</param>
/// <returns><see langword="true" /> if this rune is an emoji; otherwise, <see langword="false" />.</returns>
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static bool IsEmoji(this Rune value)
{
return value.ToString().IsEmoji();
}
/// <summary>
/// Returns a string composed of the current rune repeated a specified number of times.
/// </summary>

View File

@ -295,6 +295,31 @@ public static class StringExtensions
return encoding.GetBytes(value);
}
/// <summary>
/// Returns a value indicating whether this string constitutes an emoji.
/// </summary>
/// <param name="value">The input string.</param>
/// <returns><see langword="true" /> if this string is an emoji; otherwise, <see langword="false" />.</returns>
[Pure]
#if NETSTANDARD2_1
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#else
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
#endif
public static bool IsEmoji(this string value)
{
#if NET6_0_OR_GREATER
ArgumentNullException.ThrowIfNull(value);
#else
if (value is null)
{
throw new ArgumentNullException(nameof(value));
}
#endif
return EmojiRegex.Value.IsMatch(value);
}
/// <summary>
/// Determines if all alpha characters in this string are considered lowercase.
/// </summary>