Created
January 11, 2016 13:46
-
-
Save kenkendk/62f56a39f68f1d580d77 to your computer and use it in GitHub Desktop.
Hash Collision Checking code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.IO; | |
| using System.Linq; | |
| using System.Reflection; | |
| using System.Collections.Generic; | |
| namespace HashCollisionChecker | |
| { | |
| class MainClass | |
| { | |
| private class Options | |
| { | |
| public int BlockSize = 1024 * 1024; | |
| public string Outputfile = "data.csv"; | |
| public string Collisionfile = "collisions.txt"; | |
| public bool Overwrite = false; | |
| } | |
| private class LineEntry | |
| { | |
| public long Size { get; private set; } | |
| public long Offset { get; private set; } | |
| public string Sha1 { get; private set; } | |
| public string Sha256 { get; private set; } | |
| public string MD5 { get; private set; } | |
| public string Path { get; private set; } | |
| private LineEntry(string line) | |
| { | |
| var parts = line.Split(new string[] {", "}, 6, StringSplitOptions.None); | |
| Size = long.Parse(parts[0]); | |
| Offset = long.Parse(parts[1]); | |
| Sha1 = parts[2].Trim(); | |
| Sha256 = parts[3].Trim(); | |
| MD5 = parts[4].Trim(); | |
| Path = parts[5]; | |
| } | |
| public LineEntry(long size, long offset, string sha1, string sha256, string md5, string path) | |
| { | |
| Size = size; | |
| Offset = offset; | |
| Sha1 = sha1; | |
| Sha256 = sha256; | |
| MD5 = md5; | |
| Path = path; | |
| } | |
| public override string ToString() | |
| { | |
| return string.Format("{0}, {1}, {2}, {3}, {4}, {5}", Size, Offset, Sha1, Sha256, MD5, Path); | |
| } | |
| public static IEnumerable<LineEntry> ParseFile(string filename) | |
| { | |
| using (var f = File.OpenRead(filename)) | |
| using( var fs = new StreamReader(f)) | |
| { | |
| string line; | |
| while ((line = fs.ReadLine()) != null) | |
| yield return new LineEntry(line); | |
| } | |
| } | |
| public static Dictionary<string, LineEntry[]> BuildLookup(string filename, Func<LineEntry, string> keysel) | |
| { | |
| var res = new Dictionary<string, LineEntry[]>(); | |
| foreach (var line in ParseFile(filename)) | |
| { | |
| LineEntry[] n; | |
| var key = keysel(line); | |
| if (!res.TryGetValue(key, out n)) | |
| res[key] = new LineEntry[] { line }; | |
| else | |
| { | |
| Array.Resize(ref n, n.Length + 1); | |
| n[n.Length - 1] = line; | |
| res[key] = n; | |
| } | |
| } | |
| return res; | |
| } | |
| } | |
| public static int Main(string[] args) | |
| { | |
| try | |
| { | |
| var opts = new Options(); | |
| args = args ?? new string[0]; | |
| var inputfiles = args.Where(n => !n.StartsWith("--")); | |
| var optionargs = args.Where(n => n.StartsWith("--")); | |
| if (inputfiles.Count() < 1) | |
| { | |
| Console.WriteLine("Usage: "); | |
| Console.WriteLine(" HasCollisionChecker.exe <file1> <file2> ... <fileN> " + string.Join(" ", from n in typeof(Options).GetFields() select string.Format("[--{0}={1}]", n.Name.ToLowerInvariant(), n.GetValue(opts)))); | |
| Console.WriteLine(); | |
| Console.WriteLine("Output files are created in the current directory, make sure it is write-able"); | |
| return 1; | |
| } | |
| var optfields = typeof(Options).GetFields(); | |
| foreach(var opt in optionargs) | |
| { | |
| var parts = opt.Split(new char[] {'='}, 2); | |
| var name = parts[0].Substring(2); | |
| var value = parts.Length == 1 ? null : parts[1]; | |
| var field = optfields.Where(x => string.Equals(x.Name, name, StringComparison.OrdinalIgnoreCase)).FirstOrDefault(); | |
| if (field != null) | |
| field.SetValue(opts, Convert.ChangeType(value, field.FieldType)); | |
| else | |
| Console.WriteLine("No option named: {0}", name); | |
| } | |
| opts.Outputfile = Path.GetFullPath(opts.Outputfile); | |
| opts.Collisionfile = Path.GetFullPath(opts.Collisionfile); | |
| if (File.Exists(opts.Collisionfile)) | |
| { | |
| Console.WriteLine("Collision file alread exists: {0}", opts.Collisionfile); | |
| return 1; | |
| } | |
| try | |
| { | |
| using(File.Create(opts.Collisionfile)); | |
| } | |
| catch | |
| { | |
| Console.WriteLine("Failed to create collision file, check that you have write permissions to {0}", opts.Collisionfile); | |
| return 1; | |
| } | |
| File.Delete(opts.Collisionfile); | |
| var skipBuildingHashes = false; | |
| if (!opts.Overwrite && File.Exists(opts.Outputfile)) | |
| { | |
| Console.WriteLine("Target file already exists: {0}", opts.Outputfile); | |
| Console.WriteLine("Skipping the hash building process and using the existing file as input"); | |
| Console.WriteLine("Specify another output file with --outputfile, or allow overwrite with --overwrite=true to restart the hashing"); | |
| skipBuildingHashes = true; | |
| } | |
| if (!skipBuildingHashes) | |
| { | |
| using(var of = File.Open(opts.Outputfile, FileMode.Create, FileAccess.Write, FileShare.None)) | |
| using(var os = new StreamWriter(of)) | |
| { | |
| foreach(var relname in inputfiles) | |
| { | |
| try | |
| { | |
| var fullname = Path.GetFullPath(relname); | |
| var paths = new string[] { fullname }.AsEnumerable(); | |
| if (Directory.Exists(fullname)) | |
| paths = Directory.EnumerateFiles(fullname, "*", SearchOption.AllDirectories); | |
| foreach(var rname in paths) | |
| { | |
| var fname = Path.GetFullPath(rname); | |
| var buffer = new byte[opts.BlockSize]; | |
| Console.WriteLine("Building hash list for: {0}", fname); | |
| var sha1 = System.Security.Cryptography.HashAlgorithm.Create("sha1"); | |
| var sha256 = System.Security.Cryptography.HashAlgorithm.Create("sha256"); | |
| var md5 = System.Security.Cryptography.HashAlgorithm.Create("md5"); | |
| var offset = 0L; | |
| using(var fs = File.OpenRead(fname)) | |
| { | |
| int len; | |
| while((len = fs.Read(buffer, 0, buffer.Length)) > 0) | |
| { | |
| os.WriteLine(new LineEntry( | |
| len, | |
| offset, | |
| Convert.ToBase64String(sha1.ComputeHash(buffer, 0, len)), | |
| Convert.ToBase64String(sha256.ComputeHash(buffer, 0, len)), | |
| Convert.ToBase64String(md5.ComputeHash(buffer, 0, len)), | |
| fname | |
| )); | |
| offset += len; | |
| } | |
| } | |
| } | |
| } | |
| catch (Exception ex) | |
| { | |
| Console.WriteLine("Failed while processing path: {0}{1}Error: {2}", relname, Environment.NewLine, ex.Message); | |
| } | |
| } | |
| } | |
| } | |
| Console.WriteLine("Completed building hash list for all input files, checking for duplicate hashes."); | |
| Console.WriteLine("This is very memory intensive, and may cause the process to crash."); | |
| //TODO: Could also account for the length of data, to avoid collisions with small and large blocks | |
| GC.Collect(); | |
| var sha1_lookup = LineEntry.BuildLookup(opts.Outputfile, x => x.Sha1); | |
| var sha1_duplicates = sha1_lookup.Where(x => x.Value.Length > 1).ToArray(); | |
| Console.WriteLine("Found {0} distinct SHA1 hashes and {1} duplicates", sha1_lookup.Count, sha1_duplicates.Length); | |
| sha1_lookup = null; | |
| GC.Collect(); | |
| var sha256_lookup = LineEntry.BuildLookup(opts.Outputfile, x => x.Sha256); | |
| var sha256_duplicates = sha256_lookup.Where(x => x.Value.Length > 1).ToArray(); | |
| Console.WriteLine("Found {0} distinct SHA256 hashes and {1} duplicates", sha256_lookup.Count, sha256_duplicates.Length); | |
| sha256_lookup = null; | |
| GC.Collect(); | |
| var md5_lookup = LineEntry.BuildLookup(opts.Outputfile, x => x.MD5); | |
| var md5_duplicates = md5_lookup.Where(x => x.Value.Length > 1).ToArray(); | |
| Console.WriteLine("Found {0} distinct MD5 hashes and {1} duplicates", md5_lookup.Count, md5_duplicates.Length); | |
| md5_lookup = null; | |
| GC.Collect(); | |
| Console.WriteLine("Done collecting duplicates, checking for collisions"); | |
| var flat = (sha1_duplicates.SelectMany(x => x.Value) | |
| .Union(sha256_duplicates.SelectMany(x => x.Value)) | |
| .Union(md5_duplicates.SelectMany(x => x.Value))).Distinct().ToArray(); | |
| Console.WriteLine("Checking for collisions in {0} entries", flat.Length); | |
| var colisions = flat.Select(n => | |
| { | |
| var cols = flat.Where(x => x.Sha256 == n.Sha256 && (x.Sha1 != n.Sha1 || x.MD5 != x.MD5)).ToList(); | |
| if (cols.Count > 0) | |
| cols.Insert(0, n); | |
| return cols; | |
| } | |
| ).Where(x => x.Count > 0).SelectMany(x => x).ToArray(); | |
| if (colisions.Length > 0) | |
| { | |
| Console.WriteLine("Found {0} collisions !!!", colisions.Length); | |
| Console.WriteLine("Writing collision report to {0} ...", opts.Collisionfile); | |
| using(var of = File.Open(opts.Collisionfile, FileMode.Create, FileAccess.Write, FileShare.None)) | |
| using(var os = new StreamWriter(of)) | |
| foreach(var e in colisions) | |
| { | |
| try | |
| { | |
| var buffer = new byte[e.Size]; | |
| using(var x = File.OpenRead(e.Path)) | |
| { | |
| x.Position = e.Offset; | |
| x.Read(buffer, 0, buffer.Length); | |
| } | |
| os.Write(e); | |
| os.Write(Path.PathSeparator); | |
| os.Write(Convert.ToBase64String(buffer)); | |
| } | |
| catch(Exception ex) | |
| { | |
| Console.WriteLine("Failed to process collision {0}{1}{2}", e, Environment.NewLine, ex); | |
| } | |
| os.WriteLine(); | |
| } | |
| return 1; | |
| } | |
| else | |
| { | |
| Console.WriteLine("No collisions found"); | |
| } | |
| return 0; | |
| } | |
| catch (Exception ex) | |
| { | |
| Console.WriteLine("Crash!"); | |
| Console.WriteLine(ex.ToString()); | |
| return 2; | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment