Last active
May 10, 2022 03:12
-
-
Save Geograph-us/a1beae713c337243c478cb5575a22f75 to your computer and use it in GitHub Desktop.
Compression by PPM + Arithmetic Coder C# implementation (based on C++ algorithm from the book "Data Compression Methods")
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//The context compressor, author M.Smirnov. | |
//The source also can be found at | |
//http://compression.ru/ | |
//usage: | |
//c infile outfile //encoding | |
//d infile outfile //decoding | |
/* | |
* Контекстный компрессор Dummy, автор М.Смирнов. | |
* Исходный текст этой программы также можно найти на сайо | |
* http://compression.ru/ | |
* Применение: | |
* c infile outfile - закодировать infile в outfile | |
* d infile outfile - декодировать infile в outfile | |
*/ | |
// Арифметическое кодирование Субботина | |
// Реализация range-кодера, автор Е.Шелвин | |
// http://www.pilabs.org.ua/sh/aridemo6.zip | |
const uint Top = 1 << 24; | |
static readonly RangeCoder ArithmeticCoder = new RangeCoder(); | |
public class RangeCoder | |
{ | |
private uint code, range, ffNum, cache; | |
private long low; | |
private Stream stream; | |
public void StartEncode(Stream output) | |
{ | |
low = ffNum = cache = 0; | |
range = uint.MaxValue; | |
this.stream = output; | |
} | |
public void StartDecode(Stream input) | |
{ | |
code = 0; | |
range = uint.MaxValue; | |
this.stream = input; | |
for (var _ = 0; _ < 5; _++) | |
{ | |
code = (code << 8) | (uint)stream.ReadByte(); | |
} | |
} | |
public void FinishEncode() | |
{ | |
low += 1; | |
for (var _ = 0; _ < 5; _++) | |
{ | |
ShiftLow(); | |
} | |
} | |
//public void FinishDecode() { } | |
public void Encode(uint cumFreq, uint freq, uint totFreq) | |
{ | |
low += cumFreq * (range /= totFreq); | |
range *= freq; | |
while (range < Top) | |
{ | |
ShiftLow(); | |
range <<= 8; | |
} | |
} | |
private void ShiftLow() | |
{ | |
if ((low >> 24) != 0xFF) | |
{ | |
stream.WriteByte((byte)(cache + (low >> 32))); | |
var c = 0xFF + (int)(low >> 32); | |
while (ffNum > 0) | |
{ | |
stream.WriteByte((byte)c); | |
ffNum--; | |
} | |
cache = (uint)low >> 24; | |
} | |
else | |
ffNum++; | |
low = (uint)low << 8; | |
} | |
public uint GetFreq(uint totFreq) | |
{ | |
return code / (range /= totFreq); | |
} | |
public void DecodeUpdate(uint cumFreq, uint freq, uint totFreq) | |
{ | |
code -= cumFreq * range; | |
range *= freq; | |
while (range < Top) | |
{ | |
code = (code << 8) | (uint)stream.ReadByte(); | |
range <<= 8; | |
} | |
} | |
} | |
public class ContextModel | |
{ | |
public int Esc; | |
public int TotFr; | |
public int[] Count = new int[256]; | |
}; | |
static readonly ContextModel[] cm = new ContextModel[257]; | |
static readonly ContextModel[] Stack = new ContextModel[2]; | |
static readonly int[] Context = new int[1]; | |
static int SP; | |
const int MaxTotFr = 0x3fff; | |
static void InitModel() | |
{ | |
for (var i = 0; i < cm.Length; i++) cm[i] = new ContextModel(); | |
for (var i = 0; i < Stack.Length; i++) Stack[i] = new ContextModel(); | |
for (var j = 0; j < 256; j++) cm[256].Count[j] = 1; | |
cm[256].TotFr = 256; | |
cm[256].Esc = 1; | |
Context[0] = SP = 0; | |
} | |
static int EncodeSym(ContextModel CM, int c) | |
{ | |
Stack[SP++] = CM; | |
if (CM.Count[c] > 0) | |
{ | |
var cumFreqUnder = 0; | |
for (var i = 0; i < c; i++) cumFreqUnder += CM.Count[i]; | |
ArithmeticCoder.Encode((uint)cumFreqUnder, (uint)CM.Count[c], (uint)(CM.TotFr + CM.Esc)); | |
return 1; | |
} | |
else | |
{ | |
if (CM.Esc > 0) | |
{ | |
ArithmeticCoder.Encode((uint)CM.TotFr, (uint)CM.Esc, (uint)(CM.TotFr + CM.Esc)); | |
} | |
return 0; | |
} | |
} | |
static int DecodeSym(ContextModel CM) | |
{ | |
Stack[SP++] = CM; | |
if (CM.Esc == 0) return -1; | |
var cumFreq = (int)ArithmeticCoder.GetFreq((uint)(CM.TotFr + CM.Esc)); | |
if (cumFreq < CM.TotFr) | |
{ | |
var cumFreqUnder = 0; | |
var i = 0; | |
while (true) | |
{ | |
if ((cumFreqUnder + CM.Count[i]) <= cumFreq) cumFreqUnder += CM.Count[i]; | |
else break; | |
i++; | |
} | |
ArithmeticCoder.DecodeUpdate((uint)cumFreqUnder, (uint)CM.Count[i], (uint)(CM.TotFr + CM.Esc)); | |
return i; | |
} | |
else | |
{ | |
ArithmeticCoder.DecodeUpdate((uint)CM.TotFr, (uint)CM.Esc, (uint)(CM.TotFr + CM.Esc)); | |
return -1; | |
} | |
} | |
static void Rescale(ContextModel CM) | |
{ | |
CM.TotFr = 0; | |
for (int i = 0; i < 256; i++) | |
{ | |
CM.Count[i] -= CM.Count[i] >> 1; | |
CM.TotFr += CM.Count[i]; | |
} | |
} | |
static void UpdateModel(int c) | |
{ | |
while (SP > 0) | |
{ | |
SP--; | |
if (Stack[SP].TotFr >= MaxTotFr) Rescale(Stack[SP]); | |
Stack[SP].TotFr += 1; | |
if (Stack[SP].Count[c] == 0) Stack[SP].Esc += 1; | |
Stack[SP].Count[c] += 1; | |
} | |
} | |
static void Encode(Stream dataFile, Stream compressedFile) | |
{ | |
var c = 0; | |
InitModel(); | |
ArithmeticCoder.StartEncode(compressedFile); | |
while ((c = dataFile.ReadByte()) != -1) | |
{ | |
var success = EncodeSym(cm[Context[0]], c); | |
if (success == 0) EncodeSym(cm[256], c); | |
UpdateModel(c); | |
Context[0] = c; | |
} | |
ArithmeticCoder.Encode((uint)cm[Context[0]].TotFr, (uint)cm[Context[0]].Esc, (uint)(cm[Context[0]].TotFr + cm[Context[0]].Esc)); | |
ArithmeticCoder.Encode((uint)cm[256].TotFr, (uint)cm[256].Esc, (uint)(cm[256].TotFr + cm[256].Esc)); | |
ArithmeticCoder.FinishEncode(); | |
} | |
static void Decode(Stream compressedFile, Stream dataFile) | |
{ | |
InitModel(); | |
ArithmeticCoder.StartDecode(compressedFile); | |
while (true) | |
{ | |
var c = DecodeSym(cm[Context[0]]); | |
if (c == -1) | |
{ | |
c = DecodeSym(cm[256]); | |
if (c == -1) break; | |
} | |
UpdateModel(c); | |
Context[0] = c; | |
dataFile.WriteByte((byte)c); | |
} | |
} | |
static long GetFileSize(string fileName) | |
{ | |
return new FileInfo(fileName).Length; | |
} | |
static void Main(string[] args) | |
{ | |
if (args.Length != 3) | |
{ | |
Console.WriteLine("Context compressor, author M.Smirnov."); | |
Console.WriteLine("Usage for compression : c infile outfile"); | |
Console.WriteLine("Usage for decompression: d infile outfile"); | |
return; | |
} | |
if (args[0][0] == 'c') | |
{ | |
var sw = Stopwatch.StartNew(); | |
using (var dataFile = File.Open(args[1], FileMode.Open)) | |
using (var compressedFile = File.Open(args[2], FileMode.Create)) | |
{ | |
Encode(dataFile, compressedFile); | |
} | |
var dataSize = GetFileSize(args[1]); | |
var resultSize = GetFileSize(args[2]); | |
Console.WriteLine($"{"Original size",-45}{dataSize,10} bytes"); | |
Console.WriteLine($"{"Actual encoded size",-45}{resultSize,10} bytes"); | |
Console.WriteLine(); | |
Console.WriteLine($"{"Time for encoding",-50}{sw.Elapsed.TotalSeconds:N3} sec."); | |
var ratio = (double)resultSize / dataSize; | |
Console.WriteLine($"{"Compression ratio",-50}{ratio:N3} of original size."); | |
} | |
else if (args[0][0] == 'd') | |
{ | |
Console.WriteLine($"{"Compressed file size",-45}{GetFileSize(args[1]),10} bytes"); | |
var sw = Stopwatch.StartNew(); | |
using (var compressedFile = File.Open(args[1], FileMode.Open)) | |
using (var dataFile = File.Open(args[2], FileMode.Create)) | |
{ | |
Decode(compressedFile, dataFile); | |
} | |
Console.WriteLine($"{"Decompression is ended, time is",-50}{sw.Elapsed.TotalSeconds:N3} sec."); | |
} | |
else | |
{ | |
Console.WriteLine("Misformatted command."); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment