-
-
Save praschl/46baef88bba5d36d43e5e7674c4c0187 to your computer and use it in GitHub Desktop.
A simple Diff algorithm with example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Algorithm taken from | |
| // http://devdirective.com/post/91/creating-a-reusable-though-simple-diff-implementation-in-csharp-part-1 | |
| // http://devdirective.com/post/109/creating-a-reusable-though-simple-diff-implementation-in-csharp-part-2 | |
| // http://devdirective.com/post/115/creating-a-reusable-though-simple-diff-implementation-in-csharp-part-3 | |
| // Site is not available anymore | |
| const string FIRST = "Hello, beautiful world, I like you"; // -lie | |
| const string SECOND = "Hello, incredible world, I love you!"; // +ös | |
| private void Main() | |
| { | |
| string text1 = FIRST; | |
| string text2 = SECOND; | |
| var w = Stopwatch.StartNew(); | |
| IEnumerable<DiffSection> result = Diff(text1.ToCharArray(), 0, text1.Length, text2.ToCharArray(), 0, text2.Length, EqualityComparer<char>.Default).ToArray(); | |
| w.Stop(); | |
| w.Dump(); | |
| result.Dump(); | |
| int i1 = 0; | |
| int i2 = 0; | |
| StringBuilder html = new StringBuilder(); | |
| foreach (var diff in result) | |
| { | |
| if (diff.Type == DiffSectionType.Copy) | |
| { | |
| var copy = FIRST.Substring(i1, diff.Length); | |
| html.Append(copy); | |
| i1 += diff.Length; | |
| i2 += diff.Length; | |
| } | |
| if (diff.Type == DiffSectionType.Delete) | |
| { | |
| var deleted = FIRST.Substring(i1, diff.Length); | |
| Console.WriteLine("- " + deleted); | |
| html.Append("<span style=\"color:red;text-decoration: line-through;\">" + deleted + "</span>"); | |
| i1 += diff.Length; | |
| } | |
| if (diff.Type == DiffSectionType.Insert) | |
| { | |
| var insert = SECOND.Substring(i2, diff.Length); | |
| Console.WriteLine("+ " + insert); | |
| html.Append("<span style=\"color:green\">" + insert + "</span>"); | |
| i2 += diff.Length; | |
| } | |
| } | |
| html.ToString().Dump(); | |
| Util.RawHtml(html.ToString()).Dump(); | |
| } | |
| public LongestCommonSubstringResult FindLongestCommonSubstring<T>(IList<T> firstCollection, int firstStart, int firstEnd, IList<T> secondCollection, int secondStart, int secondEnd, IEqualityComparer<T> equalityComparer) | |
| { | |
| // default result, if we can't find anything | |
| LongestCommonSubstringResult result = new LongestCommonSubstringResult(); | |
| for (int index1 = firstStart; index1 < firstEnd; index1++) | |
| { | |
| for (int index2 = secondStart; index2 < secondEnd; index2++) | |
| { | |
| if (equalityComparer.Equals(firstCollection[index1], secondCollection[index2])) | |
| { | |
| int length = CountEqual(firstCollection, index1, firstEnd, secondCollection, index2, secondEnd, equalityComparer); // Is longer than what we already have --> record new LCS | |
| if (length > result.Length) | |
| { | |
| result = new LongestCommonSubstringResult(index1, index2, length); | |
| } | |
| } | |
| } | |
| } | |
| return result; | |
| } | |
| public int CountEqual<T>(IList<T> firstCollection, int firstPosition, int firstEnd, IList<T> secondCollection, int secondPosition, int secondEnd, IEqualityComparer<T> equalityComparer) | |
| { | |
| int length = 0; | |
| while (firstPosition < firstEnd && secondPosition < secondEnd) | |
| { | |
| if (!equalityComparer.Equals(firstCollection[firstPosition], secondCollection[secondPosition])) | |
| { | |
| break; | |
| } | |
| firstPosition++; | |
| secondPosition++; | |
| length++; | |
| } | |
| return length; | |
| } | |
| private IEnumerable<DiffSection> Diff<T>(IList<T> firstCollection, int firstStart, int firstEnd, IList<T> secondCollection, int secondStart, int secondEnd, IEqualityComparer<T> equalityComparer) | |
| { | |
| LongestCommonSubstringResult lcs = FindLongestCommonSubstring(firstCollection, firstStart, firstEnd, secondCollection, secondStart, secondEnd, equalityComparer); | |
| if (lcs.Success) | |
| { | |
| // deal with the section before | |
| IEnumerable<DiffSection> sectionsBefore = Diff(firstCollection, firstStart, lcs.PositionInFirstCollection, secondCollection, secondStart, lcs.PositionInSecondCollection, equalityComparer); | |
| foreach (DiffSection section in sectionsBefore) | |
| yield return section; // output the copy operation | |
| yield return new DiffSection(DiffSectionType.Copy, lcs.Length); // deal with the section after | |
| IEnumerable<DiffSection> sectionsAfter = Diff(firstCollection, lcs.PositionInFirstCollection + lcs.Length, firstEnd, secondCollection, lcs.PositionInSecondCollection + lcs.Length, secondEnd, equalityComparer); | |
| foreach (DiffSection section in sectionsAfter) | |
| yield return section; | |
| yield break; | |
| } // if we get here, no LCS | |
| if (firstStart < firstEnd) | |
| { | |
| // we got content from first collection --> deleted | |
| yield return new DiffSection(DiffSectionType.Delete, firstEnd - firstStart); | |
| } | |
| if (secondStart < secondEnd) | |
| { | |
| // we got content from second collection --> inserted | |
| yield return new DiffSection(DiffSectionType.Insert, secondEnd - secondStart); | |
| } | |
| } | |
| public enum DiffSectionType | |
| { | |
| Copy, | |
| Insert, | |
| Delete | |
| } | |
| public struct DiffSection | |
| { | |
| private readonly DiffSectionType _Type; | |
| private readonly int _Length; | |
| public DiffSection(DiffSectionType type, int length) | |
| { | |
| _Type = type; | |
| _Length = length; | |
| } | |
| public override string ToString() | |
| { | |
| return string.Format("{0} {1}", Type, Length); | |
| } | |
| public DiffSectionType Type | |
| { | |
| get | |
| { | |
| return _Type; | |
| } | |
| } | |
| public int Length | |
| { | |
| get | |
| { | |
| return _Length; | |
| } | |
| } | |
| } | |
| /////////////////////////////////////////////////////////////// // rest is from part 2, unchanged | |
| public struct LongestCommonSubstringResult | |
| { | |
| private readonly bool _Success; | |
| private readonly int _PositionInFirstCollection; | |
| private readonly int _PositionInSecondCollection; | |
| private readonly int _Length; | |
| public LongestCommonSubstringResult(int positionInFirstCollection, int positionInSecondCollection, int length) | |
| { | |
| _Success = true; | |
| _PositionInFirstCollection = positionInFirstCollection; | |
| _PositionInSecondCollection = positionInSecondCollection; | |
| _Length = length; | |
| } | |
| public override string ToString() | |
| { | |
| if (Success) | |
| return string.Format("LCS ({0}, {1} x{2})", PositionInFirstCollection, PositionInSecondCollection, Length); | |
| else | |
| return "LCS (-)"; | |
| } | |
| public bool Success | |
| { | |
| get | |
| { | |
| return _Success; | |
| } | |
| } | |
| public int PositionInFirstCollection | |
| { | |
| get | |
| { | |
| return _PositionInFirstCollection; | |
| } | |
| } | |
| public int PositionInSecondCollection | |
| { | |
| get | |
| { | |
| return _PositionInSecondCollection; | |
| } | |
| } | |
| public int Length | |
| { | |
| get | |
| { | |
| return _Length; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment