Skip to content

Instantly share code, notes, and snippets.

@praschl
Last active January 31, 2018 13:34
Show Gist options
  • Select an option

  • Save praschl/46baef88bba5d36d43e5e7674c4c0187 to your computer and use it in GitHub Desktop.

Select an option

Save praschl/46baef88bba5d36d43e5e7674c4c0187 to your computer and use it in GitHub Desktop.
A simple Diff algorithm with example
// Algorithm taken from
// http://devdirective.com/post/91/creating-a-reusable-though-simple-diff-implementation-in-csharp-part-1
// http://devdirective.com/post/109/creating-a-reusable-though-simple-diff-implementation-in-csharp-part-2
// http://devdirective.com/post/115/creating-a-reusable-though-simple-diff-implementation-in-csharp-part-3
// Site is not available anymore
const string FIRST = "Hello, beautiful world, I like you"; // -lie
const string SECOND = "Hello, incredible world, I love you!"; // +ös
private void Main()
{
string text1 = FIRST;
string text2 = SECOND;
var w = Stopwatch.StartNew();
IEnumerable<DiffSection> result = Diff(text1.ToCharArray(), 0, text1.Length, text2.ToCharArray(), 0, text2.Length, EqualityComparer<char>.Default).ToArray();
w.Stop();
w.Dump();
result.Dump();
int i1 = 0;
int i2 = 0;
StringBuilder html = new StringBuilder();
foreach (var diff in result)
{
if (diff.Type == DiffSectionType.Copy)
{
var copy = FIRST.Substring(i1, diff.Length);
html.Append(copy);
i1 += diff.Length;
i2 += diff.Length;
}
if (diff.Type == DiffSectionType.Delete)
{
var deleted = FIRST.Substring(i1, diff.Length);
Console.WriteLine("- " + deleted);
html.Append("<span style=\"color:red;text-decoration: line-through;\">" + deleted + "</span>");
i1 += diff.Length;
}
if (diff.Type == DiffSectionType.Insert)
{
var insert = SECOND.Substring(i2, diff.Length);
Console.WriteLine("+ " + insert);
html.Append("<span style=\"color:green\">" + insert + "</span>");
i2 += diff.Length;
}
}
html.ToString().Dump();
Util.RawHtml(html.ToString()).Dump();
}
public LongestCommonSubstringResult FindLongestCommonSubstring<T>(IList<T> firstCollection, int firstStart, int firstEnd, IList<T> secondCollection, int secondStart, int secondEnd, IEqualityComparer<T> equalityComparer)
{
// default result, if we can't find anything
LongestCommonSubstringResult result = new LongestCommonSubstringResult();
for (int index1 = firstStart; index1 < firstEnd; index1++)
{
for (int index2 = secondStart; index2 < secondEnd; index2++)
{
if (equalityComparer.Equals(firstCollection[index1], secondCollection[index2]))
{
int length = CountEqual(firstCollection, index1, firstEnd, secondCollection, index2, secondEnd, equalityComparer); // Is longer than what we already have --> record new LCS
if (length > result.Length)
{
result = new LongestCommonSubstringResult(index1, index2, length);
}
}
}
}
return result;
}
public int CountEqual<T>(IList<T> firstCollection, int firstPosition, int firstEnd, IList<T> secondCollection, int secondPosition, int secondEnd, IEqualityComparer<T> equalityComparer)
{
int length = 0;
while (firstPosition < firstEnd && secondPosition < secondEnd)
{
if (!equalityComparer.Equals(firstCollection[firstPosition], secondCollection[secondPosition]))
{
break;
}
firstPosition++;
secondPosition++;
length++;
}
return length;
}
private IEnumerable<DiffSection> Diff<T>(IList<T> firstCollection, int firstStart, int firstEnd, IList<T> secondCollection, int secondStart, int secondEnd, IEqualityComparer<T> equalityComparer)
{
LongestCommonSubstringResult lcs = FindLongestCommonSubstring(firstCollection, firstStart, firstEnd, secondCollection, secondStart, secondEnd, equalityComparer);
if (lcs.Success)
{
// deal with the section before
IEnumerable<DiffSection> sectionsBefore = Diff(firstCollection, firstStart, lcs.PositionInFirstCollection, secondCollection, secondStart, lcs.PositionInSecondCollection, equalityComparer);
foreach (DiffSection section in sectionsBefore)
yield return section; // output the copy operation
yield return new DiffSection(DiffSectionType.Copy, lcs.Length); // deal with the section after
IEnumerable<DiffSection> sectionsAfter = Diff(firstCollection, lcs.PositionInFirstCollection + lcs.Length, firstEnd, secondCollection, lcs.PositionInSecondCollection + lcs.Length, secondEnd, equalityComparer);
foreach (DiffSection section in sectionsAfter)
yield return section;
yield break;
} // if we get here, no LCS
if (firstStart < firstEnd)
{
// we got content from first collection --> deleted
yield return new DiffSection(DiffSectionType.Delete, firstEnd - firstStart);
}
if (secondStart < secondEnd)
{
// we got content from second collection --> inserted
yield return new DiffSection(DiffSectionType.Insert, secondEnd - secondStart);
}
}
public enum DiffSectionType
{
Copy,
Insert,
Delete
}
public struct DiffSection
{
private readonly DiffSectionType _Type;
private readonly int _Length;
public DiffSection(DiffSectionType type, int length)
{
_Type = type;
_Length = length;
}
public override string ToString()
{
return string.Format("{0} {1}", Type, Length);
}
public DiffSectionType Type
{
get
{
return _Type;
}
}
public int Length
{
get
{
return _Length;
}
}
}
/////////////////////////////////////////////////////////////// // rest is from part 2, unchanged
public struct LongestCommonSubstringResult
{
private readonly bool _Success;
private readonly int _PositionInFirstCollection;
private readonly int _PositionInSecondCollection;
private readonly int _Length;
public LongestCommonSubstringResult(int positionInFirstCollection, int positionInSecondCollection, int length)
{
_Success = true;
_PositionInFirstCollection = positionInFirstCollection;
_PositionInSecondCollection = positionInSecondCollection;
_Length = length;
}
public override string ToString()
{
if (Success)
return string.Format("LCS ({0}, {1} x{2})", PositionInFirstCollection, PositionInSecondCollection, Length);
else
return "LCS (-)";
}
public bool Success
{
get
{
return _Success;
}
}
public int PositionInFirstCollection
{
get
{
return _PositionInFirstCollection;
}
}
public int PositionInSecondCollection
{
get
{
return _PositionInSecondCollection;
}
}
public int Length
{
get
{
return _Length;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment