From d405faf6e21397279c49c2f31efda856084606c6 Mon Sep 17 00:00:00 2001 From: Oliver Booth Date: Sat, 7 May 2022 23:37:14 +0100 Subject: [PATCH] Add IsEmoji method Emoji validation is performed by using a regex which is generated from Twemoji. Source: https://raw.githubusercontent.com/twitter/twemoji-parser/master/src/lib/regex.js --- CHANGELOG.md | 3 + X10D.SourceGenerator/EmojiRegexGenerator.cs | 68 +++++++++++++++++++ .../X10D.SourceGenerator.csproj | 18 +++++ X10D.Tests/src/Text/CharTests.cs | 22 +++++- X10D.Tests/src/Text/RuneTests.cs | 19 ++++++ X10D.Tests/src/Text/StringTests.cs | 40 ++++++++++- X10D/X10D.csproj | 6 ++ X10D/src/Text/CharExtensions.cs | 16 +++++ X10D/src/Text/RuneExtensions.cs | 12 ++++ X10D/src/Text/StringExtensions.cs | 25 +++++++ 10 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 X10D.SourceGenerator/EmojiRegexGenerator.cs create mode 100644 X10D.SourceGenerator/X10D.SourceGenerator.csproj diff --git a/CHANGELOG.md b/CHANGELOG.md index 3597928..f25ffcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,9 @@ - Added `IEnumerable.ForEach` (#50) - Added `IEnumerable.DisposeAll` - Added `IEnumerable.DisposeAllAsync` +- Added `char.IsEmoji` +- Added `Rune.IsEmoji` +- Added `string.IsEmoji` ## [3.0.0] diff --git a/X10D.SourceGenerator/EmojiRegexGenerator.cs b/X10D.SourceGenerator/EmojiRegexGenerator.cs new file mode 100644 index 0000000..9b44b2f --- /dev/null +++ b/X10D.SourceGenerator/EmojiRegexGenerator.cs @@ -0,0 +1,68 @@ +using System.Text; +using System.Text.RegularExpressions; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Text; + +namespace X10D.SourceGenerator; + +[Generator] +internal sealed class EmojiRegexGenerator : ISourceGenerator +{ + // ReSharper disable once IdentifierTypo + private const string TwemojiRegexUrl = "https://raw.githubusercontent.com/twitter/twemoji-parser/master/src/lib/regex.js"; + private static readonly HttpClient HttpClient = new(); + private string _emojiRegex = string.Empty; + + /// + public void Initialize(GeneratorInitializationContext context) + { + string response = HttpClient.GetStringAsync(TwemojiRegexUrl).GetAwaiter().GetResult(); + using var reader = new StringReader(response); + + while (reader.ReadLine() is { } line) + { + if (!line.StartsWith("export default /")) + { + continue; + } + + Match match = Regex.Match(line, @"export default /(?.+)/g;"); + if (!match.Success) + { + continue; + } + + _emojiRegex = $"^{match.Groups["regex"].Value}$"; + break; + } + } + + /// + public void Execute(GeneratorExecutionContext context) + { + if (string.IsNullOrEmpty(_emojiRegex)) + { + return; + } + + var builder = new StringBuilder(); + builder.AppendLine("// This file was auto-generated by X10D.SourceGenerator"); + builder.AppendLine("// Do not edit this file manually"); + builder.AppendLine(); + + builder.AppendLine("using System.Text.RegularExpressions;"); + builder.AppendLine(); + builder.AppendLine("namespace X10D.Text;"); + builder.AppendLine(); + builder.AppendLine("internal static class EmojiRegex"); + builder.AppendLine("{"); + builder.AppendLine(" internal static readonly Regex Value = new Regex("); + builder.AppendLine($" @\"{_emojiRegex}\","); + // ReSharper disable once StringLiteralTypo + builder.AppendLine(" RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.Singleline"); + builder.AppendLine(" );"); + builder.AppendLine("}"); + + context.AddSource("EmojiRegex.g.cs", SourceText.From(builder.ToString(), Encoding.UTF8)); + } +} diff --git a/X10D.SourceGenerator/X10D.SourceGenerator.csproj b/X10D.SourceGenerator/X10D.SourceGenerator.csproj new file mode 100644 index 0000000..8220df9 --- /dev/null +++ b/X10D.SourceGenerator/X10D.SourceGenerator.csproj @@ -0,0 +1,18 @@ + + + + netstandard2.0 + 10.0 + enable + enable + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + diff --git a/X10D.Tests/src/Text/CharTests.cs b/X10D.Tests/src/Text/CharTests.cs index cea0606..7963f21 100644 --- a/X10D.Tests/src/Text/CharTests.cs +++ b/X10D.Tests/src/Text/CharTests.cs @@ -1,4 +1,5 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +using System.Text; +using Microsoft.VisualStudio.TestTools.UnitTesting; using X10D.Text; namespace X10D.Tests.Text; @@ -6,6 +7,25 @@ namespace X10D.Tests.Text; [TestClass] public class CharTests { + [TestMethod] + public void IsEmoji_ShouldReturnTrue_GivenBasicEmoji() + { + Assert.IsTrue('✂'.IsEmoji()); + Assert.IsTrue('✅'.IsEmoji()); + Assert.IsTrue('❎'.IsEmoji()); + Assert.IsTrue('➕'.IsEmoji()); + Assert.IsTrue('➖'.IsEmoji()); + } + + [TestMethod] + public void IsEmoji_ShouldReturnFalse_GivenNonEmoji() + { + for (var letter = 'A'; letter <= 'Z'; letter++) + { + Assert.IsFalse(letter.IsEmoji()); + } + } + [TestMethod] public void RepeatShouldBeCorrect() { diff --git a/X10D.Tests/src/Text/RuneTests.cs b/X10D.Tests/src/Text/RuneTests.cs index 8edfa9b..c8181b1 100644 --- a/X10D.Tests/src/Text/RuneTests.cs +++ b/X10D.Tests/src/Text/RuneTests.cs @@ -8,6 +8,25 @@ namespace X10D.Tests.Text; [TestClass] public class RuneTests { + [TestMethod] + public void IsEmoji_ShouldReturnTrue_GivenBasicEmoji() + { + Assert.IsTrue(new Rune('✂').IsEmoji()); + Assert.IsTrue(new Rune('✅').IsEmoji()); + Assert.IsTrue(new Rune('❎').IsEmoji()); + Assert.IsTrue(new Rune('➕').IsEmoji()); + Assert.IsTrue(new Rune('➖').IsEmoji()); + } + + [TestMethod] + public void IsEmoji_ShouldReturnFalse_GivenNonEmoji() + { + for (var letter = 'A'; letter <= 'Z'; letter++) + { + Assert.IsFalse(new Rune(letter).IsEmoji()); + } + } + [TestMethod] public void RepeatShouldBeCorrect() { diff --git a/X10D.Tests/src/Text/StringTests.cs b/X10D.Tests/src/Text/StringTests.cs index 852bec2..fa99f40 100644 --- a/X10D.Tests/src/Text/StringTests.cs +++ b/X10D.Tests/src/Text/StringTests.cs @@ -1,4 +1,4 @@ -using System.Text; +using System.Text; #if NET5_0_OR_GREATER using System.Text.Json.Serialization; #endif @@ -199,6 +199,44 @@ public class StringTests Assert.ThrowsException(() => "Hello World".GetBytes(null!)); } + [TestMethod] + public void IsEmoji_ShouldReturnTrue_GivenBasicEmoji() + { + Assert.IsTrue("😀".IsEmoji()); + Assert.IsTrue("🤓".IsEmoji()); + Assert.IsTrue("🟦".IsEmoji()); + Assert.IsTrue("🟧".IsEmoji()); + Assert.IsTrue("🟨".IsEmoji()); + Assert.IsTrue("🟩".IsEmoji()); + Assert.IsTrue("🟪".IsEmoji()); + Assert.IsTrue("🟫".IsEmoji()); + Assert.IsTrue("📱".IsEmoji()); + Assert.IsTrue("🎨".IsEmoji()); + } + + [TestMethod] + public void IsEmoji_ShouldReturnTrue_GivenMultiByteEmoji() + { + string[] regionalIndicatorCodes = Enumerable.Range(0, 26) + .Select(i => Encoding.Unicode.GetString(new byte[] {0x3C, 0xD8, (byte)(0xE6 + i), 0xDD})) + .ToArray(); + + for (var i = 0; i < 26; i++) + for (var j = 0; j < 26; j++) + { + string flag = (regionalIndicatorCodes[i] + regionalIndicatorCodes[j]); + Assert.IsTrue(flag.IsEmoji()); + } + } + + [TestMethod] + public void IsEmoji_ShouldReturnFalse_GivenNonEmoji() + { + Assert.IsFalse("Hello World".IsEmoji()); + Assert.IsFalse("Hello".IsEmoji()); + Assert.IsFalse("World".IsEmoji()); + } + [TestMethod] public void IsLower_ShouldReturnTrue_GivenLowercaseString() { diff --git a/X10D/X10D.csproj b/X10D/X10D.csproj index b72b7f1..346271b 100644 --- a/X10D/X10D.csproj +++ b/X10D/X10D.csproj @@ -78,4 +78,10 @@ + + + + \ No newline at end of file diff --git a/X10D/src/Text/CharExtensions.cs b/X10D/src/Text/CharExtensions.cs index c16e201..1e6d9f6 100644 --- a/X10D/src/Text/CharExtensions.cs +++ b/X10D/src/Text/CharExtensions.cs @@ -8,6 +8,22 @@ namespace X10D.Text; /// public static class CharExtensions { + /// + /// Returns a value indicating whether this character constitutes an emoji. + /// + /// The character to check. + /// if this character is an emoji; otherwise, . + [Pure] +#if NETSTANDARD2_1 + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#else + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] +#endif + public static bool IsEmoji(this char value) + { + return value.ToString().IsEmoji(); + } + /// /// Returns a string composed of the current character repeated a specified number of times. /// diff --git a/X10D/src/Text/RuneExtensions.cs b/X10D/src/Text/RuneExtensions.cs index 0a70866..0249fa3 100644 --- a/X10D/src/Text/RuneExtensions.cs +++ b/X10D/src/Text/RuneExtensions.cs @@ -10,6 +10,18 @@ namespace X10D.Text; /// public static class RuneExtensions { + /// + /// Returns a value indicating whether this rune constitutes an emoji. + /// + /// The rune to check. + /// if this rune is an emoji; otherwise, . + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static bool IsEmoji(this Rune value) + { + return value.ToString().IsEmoji(); + } + /// /// Returns a string composed of the current rune repeated a specified number of times. /// diff --git a/X10D/src/Text/StringExtensions.cs b/X10D/src/Text/StringExtensions.cs index c4cdfa0..b7ffa57 100644 --- a/X10D/src/Text/StringExtensions.cs +++ b/X10D/src/Text/StringExtensions.cs @@ -295,6 +295,31 @@ public static class StringExtensions return encoding.GetBytes(value); } + /// + /// Returns a value indicating whether this string constitutes an emoji. + /// + /// The input string. + /// if this string is an emoji; otherwise, . + [Pure] +#if NETSTANDARD2_1 + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#else + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] +#endif + public static bool IsEmoji(this string value) + { +#if NET6_0_OR_GREATER + ArgumentNullException.ThrowIfNull(value); +#else + if (value is null) + { + throw new ArgumentNullException(nameof(value)); + } +#endif + + return EmojiRegex.Value.IsMatch(value); + } + /// /// Determines if all alpha characters in this string are considered lowercase. ///