Created
August 9, 2015 00:45
Revisions
-
snarkbait created this gist
Aug 9, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,273 @@ /* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ import java.util.Arrays; /** * BitStream class * for reading and writing variable lengths of bits to a byte array * @author /u/Philboyd_Studge */ public class BitStream { private final int DEFAULT_SIZE = 256; // byte array for actual BitStream private byte[] bank; // boolean array representation of a single byte private boolean[] bits = new boolean[8]; // current position in bit array private int bitPosition; // current position in byte array private int bytePosition; // are we at the end of the bank private boolean endOfBank = false; // number of leftover bits short of a full byte private byte padBits; private boolean closed = false; /** * Creates a new BitStream of DEFAULT_SIZE */ public BitStream() { bank = new byte[DEFAULT_SIZE]; } /** * Creates a new BitStream of given size * @param size size of BitStream */ public BitStream(int size) { bank = new byte[size]; } /** * Creates a new BitStream from a byte array * assumes last byte is padBits * @param bank byte array of encoded bits */ public BitStream(byte[] bank) { this.bank = bank; this.padBits = bank[bank.length - 1]; pullByte(); } /** * Get the byte array * @return byte array of BitStream */ public byte[] getBank() { return bank; } /** * grow byte array by DEFAULT_SIZE * @return byte array grown by DEFAULT_SIZE */ private byte[] grow() { return Arrays.copyOf(bank, bank.length + DEFAULT_SIZE); } /** * size of array * @return size of array */ public int length() { return bank.length; } /** * returns EOB() or EndOfBank flag * @return true if end of bank */ public boolean EOB() { return endOfBank; } /** * add a single bit to the BitStream * * @param bit <code>true</code> for 1, <code>false</code> for 0 */ public void pushBit(boolean bit) { if (closed) return; bits[bitPosition] = bit; bitPosition++; if (bitPosition > 7) { flush(); } } /** * flush the full byte to the BitStream */ private void flush() { int makeByte = 0; for (int i = 0; i < 8; i++) { // push 1's to the correct bit position if (bits[i]) makeByte |= 1 << 7 - i; } bank[bytePosition] = (byte) (makeByte & 0xff); bytePosition++; if (bytePosition >= bank.length) bank = grow(); bitPosition = 0; clearBits(); } /** * set all bits in the current bit[] array to 0 */ private void clearBits() { for (int i = 0; i < bits.length; i++) { bits[i] = false; } } /** * Push bits to BitStream * length will be based on highest one-bit in integer * @param inBits integer value to push */ public void pushBits(int inBits) { int len = 31 - Integer.numberOfLeadingZeros(inBits); while(len >= 0) { pushBit((inBits & (1 << len)) == 1 << len); len--; } } /** * Push bits to BitStream * pushes a set length of bits, padding with zeroes if necessary * if value length is greater than length, will push entire value * @param value integer value to push * @param length number of bits to push */ public void pushBits(int value, int length) { for (int i = length - 1; i >= 0; i--) { pushBit((value >> i & 1) == 1); } } /** * Push bits to BitStream * pushes a String representation of a binary number * @param bitString */ public void pushBits(String bitString) { for (int i = 0; i < bitString.length(); i++) { pushBit(bitString.charAt(i) == '1'); } } /** * pull a byte of the current byte position * in the array */ private void pullByte() { clearBits(); byte current = bank[bytePosition]; for (int i = 7, j = 0; i >= 0; i--, j++) { int flagBit = 1 << i; if ((current & flagBit) == flagBit) bits[j] = true; } bytePosition++; if (bytePosition > bank.length - 1) endOfBank = true; bitPosition = 0; } /** * Returns a single bit at bitPosition * @return bit boolean true for 1, false for 0 */ public boolean readBit() { boolean bit = bits[bitPosition]; bitPosition++; if (bytePosition == bank.length - 1 && bitPosition > 7 - padBits) endOfBank = true; if (bitPosition > 7) { pullByte(); } return bit; } /** * read <code>length</code> number of bits from the stream * @param length * @return integer */ public int readBits(int length) { int retval = 0; for (int i = length - 1; i >= 0; i--) { retval |= ((readBit()) ? 1 : 0) << i; } return retval; } /** * close stream for writing operations. * will pull the first byte for reading operations. */ public void close() { if (closed) return; closed = true; padBits = (byte)((-(bitPosition) + 8) % 8); if (padBits > 0) { flush(); } bank[bytePosition] = padBits; bank = Arrays.copyOfRange(bank, 0, bytePosition + 1); bitPosition = 0; bytePosition = 0; pullByte(); } @Override public String toString() { if (bitPosition != 0 ) flush(); String retval = ""; for (int i = 0; i < bank.length; i++) { if (i % 24 == 0) retval += "\n"; retval += Integer.toHexString((int) (bank[i] & 0xff)) + " "; } return retval; } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,390 @@ /* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * Huffman Tree class * for /r/javaexamples * * @author /u/Philboyd_Studge */ import java.util.Arrays; public class HuffmanTree { // the Tree of Trees private Tree htree; // the bit codes and lengths private int[] huffmanCodes = new int[256]; private int[] huffmanLengths = new int[256]; /** * Create Huffman Tree for encoding * and populates the code/length arrays * based on a 256 element array of frequency counts * @param frequencies integer array of frequency counts */ public HuffmanTree(int[] frequencies) { this.htree = getHuffmanTree(frequencies); getCodes(); } /** * Create Huffman Tree for decoding * from a BitStream of the encoded tree * frequencies have been discarded * @param bs BitStream of encoded tree */ public HuffmanTree(BitStream bs) { this.htree = getHuffmanTree(bs); getCodes(); } /** * get Tree * @return Tree Huffman Tree */ public Tree getTree() { return htree; } /** * Encode a string to a huffman-encoded BitStream * @param input String to encode * @return output BitStream encoded stream */ public BitStream encode(String input) { BitStream output = new BitStream(); for (int i = 0; i < input.length() ;i++ ) { output.pushBits(huffmanCodes[(int) input.charAt(i)], huffmanLengths[(int) input.charAt(i)]); } output.close(); return output; } /** * Encode a byte array to a huffman-encoded BitStream * @param input byte[] array to encode * @return output BitStream encoded stream */ public BitStream encode(byte[] input) { BitStream output = new BitStream(); for (int i = 0; i < input.length; i++) { output.pushBits(huffmanCodes[input[i]], huffmanLengths[input[i]]); } output.close(); return output; } /** * Decode a huffman-encoded BitStream to String * @param input BitStream of encoded data * @return output decoded String */ public String decode(BitStream input) { String output = ""; while (!input.EOB()) { output += (char) htree.getCode(input); } return output; } /** * Decode a huffman-encoded BitStream to byte array * @param input BitStream of encoded data * @return output byte array */ public byte[] decodeBytes(BitStream input) { byte[] output = new byte[input.length() * 4]; int counter = 0; while (!input.EOB()) { output[counter] = (byte) (htree.getCode(input) & 0xff); counter++; } return Arrays.copyOfRange(output, 0, counter + 1); } /** * Build Tree from frequency array * Stores them in a Priority Queue * pulls out two smallest and adds them together * creates a new subtree * @param frequencies integer array of frequencies * @return Tree huffman tree */ private Tree getHuffmanTree(int[] frequencies) { BinaryHeap<Tree> heap = new BinaryHeap<>(); for (int i = 0; i < frequencies.length ; i++) { if (frequencies[i] > 0) { // add all frequencies > 0 as new subtree with no children heap.add(new Tree(i, frequencies[i])); } } while (heap.length() > 1) { Tree t1 = heap.remove(); Tree t2 = heap.remove(); heap.add(new Tree(t1, t2)); } return heap.remove(); } /** * Re-build tree from BitStream * frequencies were not stored but are * now unimportant * @param bs BitStream of encoded tree 1 bit + literal byte or 0 * @return Tree Huffman Tree */ public Tree getHuffmanTree(BitStream bs) { if (bs.readBit()) { return new Tree(bs.readBits(8), 0); } else { Tree left = getHuffmanTree(bs); Tree right = getHuffmanTree(bs); return new Tree(left, right); } } /** * Build huffman codes based on current tree */ private void getCodes() { if (htree.root == null) return; getCodes(htree.root); } /** * recursive helper class */ private void getCodes(Node root) { if (!htree.isLeaf(root)) { root.left.huffmanCode = root.huffmanCode << 1; root.left.huffmanLen = root.huffmanLen + 1; getCodes(root.left); root.right.huffmanCode = root.huffmanCode << 1 ^ 1; root.right.huffmanLen = root.huffmanLen + 1; getCodes(root.right); } else { huffmanCodes[root.index] = root.huffmanCode; huffmanLengths[root.index] = root.huffmanLen; } } /** * Show all non-zero codes * */ public void displayCodes() { for (int i = 0; i < 256; i++) { if (huffmanLengths[i] > 0) { System.out.println(i + " : " + toBinaryString(huffmanCodes[i], huffmanLengths[i])); } } } /** * Binary String method with padding/truncating to specified length * @param value integer value to convert * @param length integer length to convert, adding zeroes at the front if necessary * @return retval String binary representation of value at specified length */ public static String toBinaryString(int value, int length) { String retval = ""; for (int i = length - 1; i >= 0; i--) { retval += ((value >> i & 1) == 1) ? "1" : "0"; } return retval; } /** * Tree class */ class Tree implements Comparable<Tree> { Node root; /** * Create tree with childless leaf node * @param index integer of character/byte value * @param frequency count of occuring frequency */ public Tree(int index, int frequency) { root = new Node(index, frequency); } /** * Create subtree with null node as root * and total of two subtree frequencies * @param tree1 left subtree * @param tree2 right subtree */ public Tree(Tree tree1, Tree tree2) { root = new Node(); root.left = tree1.root; root.right = tree2.root; root.frequency = tree1.root.frequency + tree2.root.frequency; } /** * Encode Huffman Tree to BitStream * if leaf node pushes 1 + literal byte * otherwise 0 * @return bs BitStream with encoded tree */ public BitStream encodedTree() { BitStream bs = new BitStream(); encodedTree(root, bs); bs.close(); //System.out.println(bs); return bs; } /** * recursive helper method */ private void encodedTree(Node node, BitStream bs) { if (isLeaf(node)) { bs.pushBit(true); bs.pushBits(node.index, 8); } else { bs.pushBit(false); encodedTree(node.left, bs); encodedTree(node.right, bs); } } /** * Get individual huffman code from current spot in tree * recurses until leaf node found */ public int getCode(BitStream bs) { Node current = root; boolean bit; while (!isLeaf(current)) { bit = bs.readBit(); if (bit) current = current.right; else current = current.left; } return current.index; } /** * is node a leaf node (childless) * @param n Node to test * @return true if node has no children */ public boolean isLeaf(Node n) { return n.left == null; } @Override public int compareTo(Tree t) { return root.frequency - t.root.frequency; } } /** * Node Class * */ class Node { // actual ascii character value int index; // count int frequency; // integer value of huffman code int huffmanCode; // legth of huffman code in bits int huffmanLen; // left child Node left; //right child Node right; /** * Create blank Node */ public Node() { } /** * create Node based on index value and frequency count */ public Node(int index, int frequency) { this.index = index; this.frequency = frequency; } @Override public String toString() { return this.index + " : " + this.frequency; } } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,68 @@ import java.util.Arrays; public class HuffmanTreeTest { public static int[] getFrequencies(String input) { int[] freq = new int[256]; for (int i = 0; i < input.length(); i++ ) { int value = (int) input.charAt(i); freq[value]++; } return freq; } public static void main(String[] args) { String testString = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco " + "laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in " + "voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat " + "non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; // get frequency array int[] frequencies = getFrequencies(testString); // get original length int orig = testString.length(); System.out.println("Length of original string: " + orig); System.out.println("\nFrequency array:"); System.out.println(Arrays.toString(frequencies)); // create huffman tree from frequencies HuffmanTree ht = new HuffmanTree(frequencies); ht.displayCodes(); // get BitStream of tree BitStream bs = ht.getTree().encodedTree(); System.out.println("Huffman Tree BitStream:"); System.out.println(bs); int treelen = bs.getBank().length; System.out.println("Length of tree stream: " + treelen); // encode string to bitstream BitStream os = ht.encode(testString); //BitStream os = ht.encode(testString.getBytes()); System.out.println("Data BitStream:"); System.out.println(os); int datalen = os.getBank().length; System.out.println("length of data stream: " + datalen); System.out.println("Compressed length: " + (datalen + treelen)); System.out.println("Compressed %:" + (((float)(orig - treelen - datalen)/orig) * 100)); // re-create huffman tree from encoded bitstream HuffmanTree dt = new HuffmanTree(bs); dt.displayCodes(); // decode back to string String output = dt.decode(os); System.out.println(output); } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,90 @@ Length of original string: 445 Frequency array: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 , 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 3, 16, 18, 37, 3, 3, 1, 42, 0, 0, 21, 17, 24, 29, 11, 5, 22, 18 , 32, 28, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 32 : 110 44 : 0101111 46 : 000000 68 : 000011010 69 : 000011011 76 : 00001000 85 : 00001001 97 : 1000 98 : 0101100 99 : 10110 100 : 11101 101 : 1111 102 : 0101101 103 : 0101110 104 : 00001100 105 : 001 108 : 0001 109 : 10111 110 : 0110 111 : 1001 112 : 01010 113 : 000001 114 : 0100 115 : 11100 116 : 1010 117 : 0111 118 : 0000111 120 : 0000101 Huffman Tree BitStream: 2 5d 71 14 ca ad e0 b4 28 94 5b b5 b2 d2 5c 97 2 c5 66 59 e5 8b 75 d4 58 6d eb a2 c7 6d 48 b 9d 92 ca 1 Length of tree stream: 36 Data BitStream: 8 94 fb e2 ae 3d f7 64 65 37 d 68 bf d2 fd 69 6e 7d ab e9 d3 47 4a 8f 2c 59 76 f1 34 bf 73 fb bb 3b cb f2 f3 dd 5f 75 4a 62 d6 3d 3d 76 ac f5 86 16 4a 7e fa dd 91 94 fd 78 5c d1 a0 48 2f 0 60 9a de c6 fa 3b ae 58 df 7 f6 31 75 f8 17 3c cd 3c a4 7e ef b e9 63 51 46 5b 38 8c 5e d3 86 16 4a 1e 66 3c 39 eb 40 90 5c ab 78 5d f1 ad 37 bc f6 75 a5 b9 e0 bc 50 18 34 e7 9a 1e bf 14 74 fd d9 19 4c 5b 27 a9 3c 33 db be 86 b1 6c 1e 45 d5 51 5f 83 f8 9a df ce 7e b1 11 7b ee c8 ca 7e f7 cb 5d 71 8a cc e2 31 95 8 62 9d 0 c1 b0 b6 f5 57 ba 6e 16 ad 36 b4 7d a2 b5 9d 47 b1 51 59 a5 b2 92 4f 7d a9 7e e3 b5 62 da ce 2a 8c b 9d 2b 56 9b 18 dd fe 7a 3b 56 bc 88 9a d0 c6 f8 f7 7f 2b c 2c 94 7b 80 1 length of data stream: 232 Compressed length: 268 Compressed %:39.775284 32 : 110 44 : 0101111 46 : 000000 68 : 000011010 69 : 000011011 76 : 00001000 85 : 00001001 97 : 1000 98 : 0101100 99 : 10110 100 : 11101 101 : 1111 102 : 0101101 103 : 0101110 104 : 00001100 105 : 001 108 : 0001 109 : 10111 110 : 0110 111 : 1001 112 : 01010 113 : 000001 114 : 0100 115 : 11100 116 : 1010 117 : 0111 118 : 0000111 120 : 0000101 Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliq ua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aut e irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat c upidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.