From 801dfe09cb005256b157348dd470142edb59804e Mon Sep 17 00:00:00 2001 From: Oliver Booth Date: Wed, 17 Apr 2024 16:11:01 +0100 Subject: [PATCH] feat: add support for multiple hash algs --- FindDuplicates/AlgListCommand.cs | 26 +++++++++++++++++ FindDuplicates/Algorithm.cs | 15 ++++++++++ FindDuplicates/AlgorithmExtensions.cs | 42 +++++++++++++++++++++++++++ FindDuplicates/FindDuplicates.csproj | 1 + FindDuplicates/ListCommand.cs | 9 +++--- FindDuplicates/ListSettings.cs | 5 ++++ FindDuplicates/Program.cs | 1 + 7 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 FindDuplicates/AlgListCommand.cs create mode 100644 FindDuplicates/Algorithm.cs create mode 100644 FindDuplicates/AlgorithmExtensions.cs diff --git a/FindDuplicates/AlgListCommand.cs b/FindDuplicates/AlgListCommand.cs new file mode 100644 index 0000000..e716486 --- /dev/null +++ b/FindDuplicates/AlgListCommand.cs @@ -0,0 +1,26 @@ +using System.ComponentModel; +using Humanizer; +using Spectre.Console; +using Spectre.Console.Cli; + +namespace FindDuplicates; + +[Description("Display a list of usable hashing algorithms.")] +internal sealed class AlgListCommand : Command +{ + public override int Execute(CommandContext context) + { + AnsiConsole.WriteLine("The default algorithm fdup uses is SHA512."); + AnsiConsole.MarkupLine("To specify a different one, use the [cyan]-a[/] or [cyan]--algorithm[/] flag, and pass one of the values below:"); + + var table = new Table(); + table.AddColumn("Algorithm"); + table.AddColumn("Value"); + + foreach (Algorithm algorithm in Enum.GetValues()) + table.AddRow($"{algorithm.Humanize()}", $"{algorithm.ToString().ToLower()}"); + + AnsiConsole.Write(table); + return 0; + } +} diff --git a/FindDuplicates/Algorithm.cs b/FindDuplicates/Algorithm.cs new file mode 100644 index 0000000..f409614 --- /dev/null +++ b/FindDuplicates/Algorithm.cs @@ -0,0 +1,15 @@ +using System.ComponentModel; + +namespace FindDuplicates; + +internal enum Algorithm +{ + [Description("SHA512")] Sha512, + [Description("SHA384")] Sha384, + [Description("SHA256")] Sha256, + [Description("SHA3-512")] Sha3512, + [Description("SHA3-384")] Sha3384, + [Description("SHA3-256")] Sha3256, + [Description("SHA1")] Sha1, + [Description("MD5")] Md5 +} diff --git a/FindDuplicates/AlgorithmExtensions.cs b/FindDuplicates/AlgorithmExtensions.cs new file mode 100644 index 0000000..f8e1719 --- /dev/null +++ b/FindDuplicates/AlgorithmExtensions.cs @@ -0,0 +1,42 @@ +using System.Security.Cryptography; + +namespace FindDuplicates; + +internal static class AlgorithmExtensions +{ + public static int GetByteCount(this Algorithm algorithm) + { + return algorithm switch + { + Algorithm.Sha512 => SHA512.HashSizeInBytes, + Algorithm.Sha384 => SHA384.HashSizeInBytes, + Algorithm.Sha256 => SHA256.HashSizeInBytes, + Algorithm.Sha3512 => SHA3_512.HashSizeInBytes, + Algorithm.Sha3384 => SHA3_384.HashSizeInBytes, + Algorithm.Sha3256 => SHA3_256.HashSizeInBytes, + Algorithm.Sha1 => SHA1.HashSizeInBytes, + Algorithm.Md5 => MD5.HashSizeInBytes, + _ => 0 + }; + } + + public static int HashData(this Algorithm algorithm, Stream source, Span destination) + { + // I'd love to use a dictionary to cache the function map, but you can't use Span<> as a type argument, + // probably due to the fact that a lambda heap allocs, and we can't have that for ref structs, oh no (!) + // so enjoy this absolutely cursed switch expression which checks each algorithm separately. I hate it too. + + return algorithm switch + { + Algorithm.Sha512 => SHA512.HashData(source, destination), + Algorithm.Sha384 => SHA384.HashData(source, destination), + Algorithm.Sha256 => SHA256.HashData(source, destination), + Algorithm.Sha3512 => SHA3_512.HashData(source, destination), + Algorithm.Sha3384 => SHA3_384.HashData(source, destination), + Algorithm.Sha3256 => SHA3_256.HashData(source, destination), + Algorithm.Sha1 => SHA1.HashData(source, destination), + Algorithm.Md5 => MD5.HashData(source, destination), + _ => -1 + }; + } +} diff --git a/FindDuplicates/FindDuplicates.csproj b/FindDuplicates/FindDuplicates.csproj index 09d7ddb..51d68c8 100644 --- a/FindDuplicates/FindDuplicates.csproj +++ b/FindDuplicates/FindDuplicates.csproj @@ -33,6 +33,7 @@ + diff --git a/FindDuplicates/ListCommand.cs b/FindDuplicates/ListCommand.cs index 4c571bc..66669c8 100644 --- a/FindDuplicates/ListCommand.cs +++ b/FindDuplicates/ListCommand.cs @@ -1,6 +1,6 @@ using System.Collections.Concurrent; -using System.Security.Cryptography; using System.Text; +using Humanizer; using Spectre.Console; using Spectre.Console.Cli; @@ -21,6 +21,7 @@ internal sealed class ListCommand : AsyncCommand AnsiConsole.MarkupLineInterpolated($"Searching [cyan]{inputDirectory.FullName}[/]"); AnsiConsole.MarkupLine($"Recursive mode is {(settings.Recursive ? "[green]ON" : "[red]OFF")}[/]"); + AnsiConsole.MarkupLine($"Using hash algorithm [cyan]{settings.Algorithm.Humanize()}[/]"); await AnsiConsole.Status() .StartAsync("Waiting to hash files...", DoHashWaitAsync) @@ -109,12 +110,12 @@ internal sealed class ListCommand : AsyncCommand private void ProcessFile(FileInfo file, ListSettings settings) { - Span buffer = stackalloc byte[64]; + Span buffer = stackalloc byte[settings.Algorithm.GetByteCount()]; try { using FileStream stream = file.OpenRead(); using BufferedStream bufferedStream = new BufferedStream(stream, 1048576 /* 1MB */); - SHA512.HashData(bufferedStream, buffer); + settings.Algorithm.HashData(bufferedStream, buffer); string hash = ByteSpanToString(buffer); if (settings.Verbose) AnsiConsole.WriteLine($"{file.FullName} ->\n {hash}"); @@ -146,7 +147,7 @@ internal sealed class ListCommand : AsyncCommand private static string ByteSpanToString(ReadOnlySpan buffer) { - var builder = new StringBuilder(); + var builder = new StringBuilder(buffer.Length * 2); foreach (byte b in buffer) builder.Append($"{b:X2}"); diff --git a/FindDuplicates/ListSettings.cs b/FindDuplicates/ListSettings.cs index ecc14b6..987d14d 100644 --- a/FindDuplicates/ListSettings.cs +++ b/FindDuplicates/ListSettings.cs @@ -10,6 +10,11 @@ internal sealed class ListSettings : CommandSettings [DefaultValue(".")] public string InputPath { get; set; } = "."; + [CommandOption("-a|--algorithm ")] + [Description("The hash algorithm used for comparison. Defaults to SHA512. For a list of all available algorithms, run fdup alglist")] + [DefaultValue(Algorithm.Sha512)] + public Algorithm Algorithm { get; set; } = Algorithm.Sha512; + [CommandOption("-r|--recursive")] [Description("Scans the directory recursively. This may increase run time and is not advised to use when at high order directories such as C: or /")] [DefaultValue(false)] diff --git a/FindDuplicates/Program.cs b/FindDuplicates/Program.cs index 459b4cd..7647439 100644 --- a/FindDuplicates/Program.cs +++ b/FindDuplicates/Program.cs @@ -2,4 +2,5 @@ using FindDuplicates; using Spectre.Console.Cli; var app = new CommandApp(); +app.Configure(cfg => cfg.AddCommand("alglist")); await app.RunAsync(args).ConfigureAwait(false);