Created
August 9, 2018 17:36
-
-
Save programmerextraordinaire/089822af2b648f5a45ed5afd838c5dd8 to your computer and use it in GitHub Desktop.
Character histogram of a string into 64-bit unsigned long.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// | |
/// CharacterHistogram.cs | |
/// by tkp | |
/// | |
void Main() | |
{ | |
var s1 = "NOE TIMMY HEATH"; | |
var s2 = "NOEL TIMOTHY HEATH"; | |
var mash1 = Histogram(s1); | |
var mash2 = Histogram(s2); | |
var mashhex1 = mash1.ToString("X16"); | |
var mashhex2 = mash2.ToString("X16"); | |
Console.WriteLine("{0} {1} {2}", s1, mashhex1, mash1); | |
Console.WriteLine("{0} {1} {2}", s2, mashhex2, mash2); | |
var diff = HistogramDiffChars(mash1, mash2); | |
var mashList1 = s1.Split(new [] {' '}).Select(n => Histogram(n)); | |
mashList1.Dump(); | |
diff.Dump(); | |
} | |
// NOT CURRENTLY USED | |
public class WordBin | |
{ | |
public char Key { get; set; } | |
public int BinNum { get; set; } | |
public int BinSize { get; set; } | |
public int BinOffset { get; set; } | |
} | |
// Put character histogram in to 64-bit ULONG | |
private ulong Histogram(string s) | |
{ | |
ulong retval = 0UL; | |
var hist = new List<int>(16) {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
foreach (var c in s.ToLower()) | |
{ | |
var i = (int)c; | |
var m = i % 16; | |
hist[m] += 1; | |
} | |
hist.Reverse(); | |
var idx = 0; | |
foreach (var v in hist) | |
{ | |
ulong k = (ulong)v << (4*idx++); | |
retval += k; | |
//Console.WriteLine("{0} {1} {2} {3}", idx, v, k, retval); | |
} | |
return retval; | |
} | |
private int HistogramDiffChars(ulong mash1, ulong mash2) | |
{ | |
// Quick check for equality | |
if (mash1 == mash2) | |
{ | |
return 0; | |
} | |
int hist1, hist2; | |
int dist1 = 0; | |
int dist2 = 0; | |
for (int i = 0; i < 64; i+=4) | |
{ | |
ulong mask = (ulong)15 << i; | |
hist1 = (int)((mash1 & mask) >> i); | |
hist2 = (int)((mash2 & mask) >> i); | |
int diff = Math.Abs(hist1 - hist2); | |
if (diff > 0) | |
{ | |
if (hist1 == 0) | |
{ | |
dist2 += diff; | |
} | |
else if (hist2 == 0) | |
{ | |
dist1 += diff; | |
} | |
else | |
{ | |
dist1 += diff; | |
dist2 += diff; | |
} | |
} | |
} | |
return Math.Min(dist1, dist2); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment