Skip to content

Instantly share code, notes, and snippets.

@kenkendk
Created January 11, 2016 13:46
Show Gist options
  • Select an option

  • Save kenkendk/62f56a39f68f1d580d77 to your computer and use it in GitHub Desktop.

Select an option

Save kenkendk/62f56a39f68f1d580d77 to your computer and use it in GitHub Desktop.
Hash Collision Checking code
using System;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Collections.Generic;
namespace HashCollisionChecker
{
class MainClass
{
private class Options
{
public int BlockSize = 1024 * 1024;
public string Outputfile = "data.csv";
public string Collisionfile = "collisions.txt";
public bool Overwrite = false;
}
private class LineEntry
{
public long Size { get; private set; }
public long Offset { get; private set; }
public string Sha1 { get; private set; }
public string Sha256 { get; private set; }
public string MD5 { get; private set; }
public string Path { get; private set; }
private LineEntry(string line)
{
var parts = line.Split(new string[] {", "}, 6, StringSplitOptions.None);
Size = long.Parse(parts[0]);
Offset = long.Parse(parts[1]);
Sha1 = parts[2].Trim();
Sha256 = parts[3].Trim();
MD5 = parts[4].Trim();
Path = parts[5];
}
public LineEntry(long size, long offset, string sha1, string sha256, string md5, string path)
{
Size = size;
Offset = offset;
Sha1 = sha1;
Sha256 = sha256;
MD5 = md5;
Path = path;
}
public override string ToString()
{
return string.Format("{0}, {1}, {2}, {3}, {4}, {5}", Size, Offset, Sha1, Sha256, MD5, Path);
}
public static IEnumerable<LineEntry> ParseFile(string filename)
{
using (var f = File.OpenRead(filename))
using( var fs = new StreamReader(f))
{
string line;
while ((line = fs.ReadLine()) != null)
yield return new LineEntry(line);
}
}
public static Dictionary<string, LineEntry[]> BuildLookup(string filename, Func<LineEntry, string> keysel)
{
var res = new Dictionary<string, LineEntry[]>();
foreach (var line in ParseFile(filename))
{
LineEntry[] n;
var key = keysel(line);
if (!res.TryGetValue(key, out n))
res[key] = new LineEntry[] { line };
else
{
Array.Resize(ref n, n.Length + 1);
n[n.Length - 1] = line;
res[key] = n;
}
}
return res;
}
}
public static int Main(string[] args)
{
try
{
var opts = new Options();
args = args ?? new string[0];
var inputfiles = args.Where(n => !n.StartsWith("--"));
var optionargs = args.Where(n => n.StartsWith("--"));
if (inputfiles.Count() < 1)
{
Console.WriteLine("Usage: ");
Console.WriteLine(" HasCollisionChecker.exe <file1> <file2> ... <fileN> " + string.Join(" ", from n in typeof(Options).GetFields() select string.Format("[--{0}={1}]", n.Name.ToLowerInvariant(), n.GetValue(opts))));
Console.WriteLine();
Console.WriteLine("Output files are created in the current directory, make sure it is write-able");
return 1;
}
var optfields = typeof(Options).GetFields();
foreach(var opt in optionargs)
{
var parts = opt.Split(new char[] {'='}, 2);
var name = parts[0].Substring(2);
var value = parts.Length == 1 ? null : parts[1];
var field = optfields.Where(x => string.Equals(x.Name, name, StringComparison.OrdinalIgnoreCase)).FirstOrDefault();
if (field != null)
field.SetValue(opts, Convert.ChangeType(value, field.FieldType));
else
Console.WriteLine("No option named: {0}", name);
}
opts.Outputfile = Path.GetFullPath(opts.Outputfile);
opts.Collisionfile = Path.GetFullPath(opts.Collisionfile);
if (File.Exists(opts.Collisionfile))
{
Console.WriteLine("Collision file alread exists: {0}", opts.Collisionfile);
return 1;
}
try
{
using(File.Create(opts.Collisionfile));
}
catch
{
Console.WriteLine("Failed to create collision file, check that you have write permissions to {0}", opts.Collisionfile);
return 1;
}
File.Delete(opts.Collisionfile);
var skipBuildingHashes = false;
if (!opts.Overwrite && File.Exists(opts.Outputfile))
{
Console.WriteLine("Target file already exists: {0}", opts.Outputfile);
Console.WriteLine("Skipping the hash building process and using the existing file as input");
Console.WriteLine("Specify another output file with --outputfile, or allow overwrite with --overwrite=true to restart the hashing");
skipBuildingHashes = true;
}
if (!skipBuildingHashes)
{
using(var of = File.Open(opts.Outputfile, FileMode.Create, FileAccess.Write, FileShare.None))
using(var os = new StreamWriter(of))
{
foreach(var relname in inputfiles)
{
try
{
var fullname = Path.GetFullPath(relname);
var paths = new string[] { fullname }.AsEnumerable();
if (Directory.Exists(fullname))
paths = Directory.EnumerateFiles(fullname, "*", SearchOption.AllDirectories);
foreach(var rname in paths)
{
var fname = Path.GetFullPath(rname);
var buffer = new byte[opts.BlockSize];
Console.WriteLine("Building hash list for: {0}", fname);
var sha1 = System.Security.Cryptography.HashAlgorithm.Create("sha1");
var sha256 = System.Security.Cryptography.HashAlgorithm.Create("sha256");
var md5 = System.Security.Cryptography.HashAlgorithm.Create("md5");
var offset = 0L;
using(var fs = File.OpenRead(fname))
{
int len;
while((len = fs.Read(buffer, 0, buffer.Length)) > 0)
{
os.WriteLine(new LineEntry(
len,
offset,
Convert.ToBase64String(sha1.ComputeHash(buffer, 0, len)),
Convert.ToBase64String(sha256.ComputeHash(buffer, 0, len)),
Convert.ToBase64String(md5.ComputeHash(buffer, 0, len)),
fname
));
offset += len;
}
}
}
}
catch (Exception ex)
{
Console.WriteLine("Failed while processing path: {0}{1}Error: {2}", relname, Environment.NewLine, ex.Message);
}
}
}
}
Console.WriteLine("Completed building hash list for all input files, checking for duplicate hashes.");
Console.WriteLine("This is very memory intensive, and may cause the process to crash.");
//TODO: Could also account for the length of data, to avoid collisions with small and large blocks
GC.Collect();
var sha1_lookup = LineEntry.BuildLookup(opts.Outputfile, x => x.Sha1);
var sha1_duplicates = sha1_lookup.Where(x => x.Value.Length > 1).ToArray();
Console.WriteLine("Found {0} distinct SHA1 hashes and {1} duplicates", sha1_lookup.Count, sha1_duplicates.Length);
sha1_lookup = null;
GC.Collect();
var sha256_lookup = LineEntry.BuildLookup(opts.Outputfile, x => x.Sha256);
var sha256_duplicates = sha256_lookup.Where(x => x.Value.Length > 1).ToArray();
Console.WriteLine("Found {0} distinct SHA256 hashes and {1} duplicates", sha256_lookup.Count, sha256_duplicates.Length);
sha256_lookup = null;
GC.Collect();
var md5_lookup = LineEntry.BuildLookup(opts.Outputfile, x => x.MD5);
var md5_duplicates = md5_lookup.Where(x => x.Value.Length > 1).ToArray();
Console.WriteLine("Found {0} distinct MD5 hashes and {1} duplicates", md5_lookup.Count, md5_duplicates.Length);
md5_lookup = null;
GC.Collect();
Console.WriteLine("Done collecting duplicates, checking for collisions");
var flat = (sha1_duplicates.SelectMany(x => x.Value)
.Union(sha256_duplicates.SelectMany(x => x.Value))
.Union(md5_duplicates.SelectMany(x => x.Value))).Distinct().ToArray();
Console.WriteLine("Checking for collisions in {0} entries", flat.Length);
var colisions = flat.Select(n =>
{
var cols = flat.Where(x => x.Sha256 == n.Sha256 && (x.Sha1 != n.Sha1 || x.MD5 != x.MD5)).ToList();
if (cols.Count > 0)
cols.Insert(0, n);
return cols;
}
).Where(x => x.Count > 0).SelectMany(x => x).ToArray();
if (colisions.Length > 0)
{
Console.WriteLine("Found {0} collisions !!!", colisions.Length);
Console.WriteLine("Writing collision report to {0} ...", opts.Collisionfile);
using(var of = File.Open(opts.Collisionfile, FileMode.Create, FileAccess.Write, FileShare.None))
using(var os = new StreamWriter(of))
foreach(var e in colisions)
{
try
{
var buffer = new byte[e.Size];
using(var x = File.OpenRead(e.Path))
{
x.Position = e.Offset;
x.Read(buffer, 0, buffer.Length);
}
os.Write(e);
os.Write(Path.PathSeparator);
os.Write(Convert.ToBase64String(buffer));
}
catch(Exception ex)
{
Console.WriteLine("Failed to process collision {0}{1}{2}", e, Environment.NewLine, ex);
}
os.WriteLine();
}
return 1;
}
else
{
Console.WriteLine("No collisions found");
}
return 0;
}
catch (Exception ex)
{
Console.WriteLine("Crash!");
Console.WriteLine(ex.ToString());
return 2;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment