1
0
mirror of https://github.com/oliverbooth/fdup.git synced 2024-11-14 16:15:41 +00:00
fdup/FindDuplicates/ListCommand.cs

188 lines
6.3 KiB
C#
Raw Normal View History

2024-04-16 20:33:16 +01:00
using System.Collections.Concurrent;
2024-04-16 20:15:07 +01:00
using System.Text;
using Humanizer;
2024-04-16 20:15:07 +01:00
using Spectre.Console;
using Spectre.Console.Cli;
namespace FindDuplicates;
internal sealed class ListCommand : AsyncCommand<ListSettings>
{
private readonly ConcurrentDictionary<long, ConcurrentBag<FileInfo>> _fileSizeMap = new();
2024-04-17 14:35:04 +01:00
private readonly ConcurrentDictionary<string, ConcurrentBag<FileInfo>> _fileHashMap = new();
2024-04-16 20:15:07 +01:00
public override async Task<int> ExecuteAsync(CommandContext context, ListSettings settings)
{
var inputDirectory = new DirectoryInfo(settings.InputPath);
if (!inputDirectory.Exists)
{
2024-04-17 19:23:22 +01:00
AnsiConsole.MarkupLineInterpolated($"[red]{inputDirectory} does not exist![/]");
2024-04-16 20:15:07 +01:00
return -1;
}
AnsiConsole.MarkupLineInterpolated($"Searching [cyan]{inputDirectory.FullName}[/]");
AnsiConsole.MarkupLine($"Recursive mode is {(settings.Recursive ? "[green]ON" : "[red]OFF")}[/]");
AnsiConsole.MarkupLine($"Using hash algorithm [cyan]{settings.Algorithm.Humanize()}[/]");
2024-04-16 20:15:07 +01:00
await AnsiConsole.Status()
.StartAsync("Waiting to hash files...", DoHashWaitAsync)
.ConfigureAwait(false);
2024-04-16 20:15:07 +01:00
AnsiConsole.WriteLine();
int duplicates = 0;
2024-04-17 14:35:04 +01:00
foreach ((string hash, ConcurrentBag<FileInfo> files) in _fileHashMap)
2024-04-16 20:15:07 +01:00
{
int fileCount = files.Count;
2024-04-17 16:00:46 +01:00
if (fileCount <= 1)
continue;
duplicates += fileCount;
AnsiConsole.MarkupLineInterpolated($"Found [cyan]{fileCount}[/] identical files");
AnsiConsole.MarkupLineInterpolated($"{settings.Algorithm.Humanize()} [green]{hash}[/]:");
2024-04-16 20:15:07 +01:00
2024-04-17 16:00:46 +01:00
foreach (FileInfo file in files)
AnsiConsole.MarkupLineInterpolated($"- {file.FullName}");
2024-04-16 20:15:07 +01:00
2024-04-17 16:00:46 +01:00
AnsiConsole.WriteLine();
2024-04-16 20:15:07 +01:00
}
if (duplicates == 0)
AnsiConsole.MarkupLine("[green]No duplicates found![/]");
else
AnsiConsole.MarkupLineInterpolated($"[yellow]Found [cyan]{duplicates}[/] duplicates![/]");
return 0;
async Task DoHashWaitAsync(StatusContext ctx)
{
await WaitForHashCompletionAsync(settings, inputDirectory, ctx);
}
2024-04-16 20:15:07 +01:00
}
private async Task WaitForHashCompletionAsync(ListSettings settings,
DirectoryInfo inputDirectory,
StatusContext ctx)
2024-04-16 20:15:07 +01:00
{
var tasks = new List<Task>();
2024-04-17 19:22:49 +01:00
SearchDuplicates(ctx, inputDirectory, settings, tasks);
await Task.Run(() =>
{
int incompleteTasks;
do
{
incompleteTasks = tasks.Count(t => !t.IsCompleted);
ctx.Status($"Waiting to hash {incompleteTasks} {(incompleteTasks == 1 ? "file" : "files")}...");
ctx.Refresh();
} while (tasks.Count > 0 && incompleteTasks > 0);
ctx.Status("Hash complete");
}).ConfigureAwait(false);
}
2024-04-17 19:22:49 +01:00
private void SearchDuplicates(StatusContext ctx, DirectoryInfo inputDirectory, ListSettings settings, ICollection<Task> tasks)
{
2024-04-16 20:15:07 +01:00
var directoryStack = new Stack<DirectoryInfo>([inputDirectory]);
2024-04-17 19:22:49 +01:00
2024-04-16 20:15:07 +01:00
while (directoryStack.Count > 0)
{
DirectoryInfo currentDirectory = directoryStack.Pop();
2024-04-17 19:22:49 +01:00
ctx.Status(currentDirectory.FullName.EscapeMarkup());
2024-04-16 20:15:07 +01:00
AddChildDirectories(settings, currentDirectory, directoryStack);
2024-04-16 20:15:07 +01:00
try
2024-04-16 20:15:07 +01:00
{
foreach (FileInfo file in currentDirectory.EnumerateFiles())
{
try
{
ConcurrentBag<FileInfo> cache = _fileSizeMap.GetOrAdd(file.Length, _ => []);
cache.Add(file);
}
catch (Exception ex)
{
AnsiConsole.MarkupLineInterpolated($"[red]Error:[/] {ex.Message}");
}
}
}
catch (Exception ex)
{
AnsiConsole.MarkupLineInterpolated($"[red]Error:[/] {ex.Message}");
2024-04-16 20:15:07 +01:00
}
}
foreach ((_, ConcurrentBag<FileInfo> files) in _fileSizeMap)
{
if (files.Count < 1)
continue;
tasks.Add(Task.Run(() =>
{
foreach (FileInfo file in files)
{
try
{
AnsiConsole.MarkupLineInterpolated($"Checking hash for [cyan]{file.Name}[/]");
ProcessFile(file, settings);
}
catch (Exception ex)
{
AnsiConsole.MarkupLineInterpolated($"[red]Error:[/] {ex.Message}");
}
}
}));
}
_fileSizeMap.Clear();
2024-04-16 20:15:07 +01:00
}
private void ProcessFile(FileInfo file, ListSettings settings)
2024-04-16 20:15:07 +01:00
{
Span<byte> buffer = stackalloc byte[settings.Algorithm.GetByteCount()];
try
{
using FileStream stream = file.OpenRead();
using BufferedStream bufferedStream = new BufferedStream(stream, 1048576 /* 1MB */);
settings.Algorithm.HashData(bufferedStream, buffer);
string hash = ByteSpanToString(buffer);
if (settings.Verbose)
AnsiConsole.WriteLine($"{file.FullName} ->\n {hash}");
2024-04-16 20:15:07 +01:00
2024-04-17 14:35:04 +01:00
ConcurrentBag<FileInfo> cache = _fileHashMap.GetOrAdd(hash, _ => []);
cache.Add(file);
}
catch (Exception ex)
{
AnsiConsole.MarkupLineInterpolated($"[red]Error:[/] {ex.Message}");
}
2024-04-16 20:15:07 +01:00
}
private static void AddChildDirectories(ListSettings settings, DirectoryInfo directory, Stack<DirectoryInfo> stack)
{
if (!settings.Recursive)
return;
try
{
foreach (DirectoryInfo childDirectory in directory.EnumerateDirectories())
stack.Push(childDirectory);
}
catch (Exception ex)
{
AnsiConsole.MarkupLineInterpolated($"[red]Error:[/] {ex.Message}");
}
}
2024-04-16 20:15:07 +01:00
private static string ByteSpanToString(ReadOnlySpan<byte> buffer)
{
var builder = new StringBuilder(buffer.Length * 2);
2024-04-16 20:15:07 +01:00
foreach (byte b in buffer)
builder.Append($"{b:X2}");
return builder.ToString();
}
}