Last active
October 30, 2022 16:31
-
-
Save lynzrand/2180dbd7defb6e6dd3b20603efe948e6 to your computer and use it in GitHub Desktop.
用最短的代码画一只刘看山!当然弄巧成拙太长了
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Any | |
import bitstream | |
liukanshan = open("liukanshan.txt", "r").read() | |
lks_lines = liukanshan.splitlines() | |
rle = [] | |
# do run-length encoding of the lines | |
for line in lks_lines: | |
rle_line = [] | |
run = 0 | |
last_c = ' ' | |
for c in line: | |
if c == last_c: | |
run += 1 | |
else: | |
rle_line.append(run) | |
last_c = c | |
run = 1 | |
if (run != 1): | |
rle_line.append(run) | |
rle.append(rle_line) | |
rle_single = [] | |
for line in rle: | |
rle_single += line | |
rle_single += [0] | |
# Generates the huffman encoding for input | |
def gen_huffman(input: list[int]): | |
# count the number of times each value occurs | |
counts = {} | |
for i in input: | |
if i in counts: | |
counts[i] += 1 | |
else: | |
counts[i] = 1 | |
# create a list of tuples (count, value) | |
counts = [(counts[i], i) for i in counts] | |
# sort the list by count | |
counts.sort() | |
# create a list of tuples (count, value, children) | |
counts: list[Any] = [(i[0], i[1], None) for i in counts] | |
# while there are more than 1 items in the list | |
while len(counts) > 1: | |
# get the first two items | |
a = counts.pop(0) | |
b = counts.pop(0) | |
# create a new item with the sum of the counts | |
# and the children being the two items | |
c = (a[0] + b[0], None, (a, b)) | |
# insert the new item into the list | |
counts.append(c) | |
# sort the list | |
counts.sort(key=lambda x: x[0]) | |
# create a list of tuples (value, huffman code) | |
huffman = [] | |
def traverse_tree(node: tuple, code: str): | |
if node[1] is not None: | |
huffman.append((node[1], code)) | |
else: | |
traverse_tree(node[2][0], code + '0') | |
traverse_tree(node[2][1], code + '1') | |
traverse_tree(counts[0], '') | |
return huffman | |
# Generates the canonical huffman encoding for input | |
def gen_canonical_huffman(input: list[int]): | |
# huff is (value, code) | |
huff = gen_huffman(input) | |
# sort the list first by length, then by value | |
huff.sort(key=lambda x: (len(x[1]), x[0])) | |
# Each of the existing codes are replaced with a new one of the same length, using the following algorithm: | |
# | |
# - The first symbol in the list gets assigned a codeword which is the same length as the symbol's original codeword but all zeros. This will often be a single zero ('0'). | |
# - Each subsequent symbol is assigned the next binary number in sequence, ensuring that following codes are always higher in value. | |
# - When you reach a longer codeword, then after incrementing, append zeros until the length of the new codeword is equal to the length of the old codeword. This can be thought of as a left shift. | |
canonical_huff = [] | |
canonical_huff.append((huff[0][0], "0" * len(huff[0][1]))) | |
for code in huff[1:]: | |
# get the last code | |
last_code = canonical_huff[-1][1] | |
# convert the last code to an int | |
last_code_i = int(last_code, 2) | |
# increment the last code | |
last_code_i += 1 | |
def pad_zeros(code: str, length: int): | |
while len(code) < length: | |
code = "0" + code | |
return code | |
# pad the last code with zeros | |
this_code = pad_zeros(bin(last_code_i)[2:], len(last_code)) | |
# append zeros until the length of the new code is equal to the length of the old code | |
while len(this_code) < len(code[1]): | |
this_code += '0' | |
canonical_huff.append((code[0], this_code)) | |
return canonical_huff | |
def encode_canonical_huffman(huffman: list[tuple[int, str]]): | |
huffman.sort(key=lambda x: x[1]) | |
encoded = [] | |
last_len = -1 | |
last_sym = 0 | |
for sym, code in huffman: | |
if len(code) != last_len: | |
encoded.append(-(len(code))) | |
last_len = len(code) | |
last_sym = 0 | |
encoded.append(sym - last_sym) | |
last_sym = sym | |
return encoded | |
# Also encode the input using the canonical huffman encoding | |
def encode_with_huffman(input: list[int], | |
huffman: list[tuple[int, str]]) -> bitstream.BitStream: | |
# sort the huffman encoding by value | |
huffman.sort(key=lambda x: x[0]) | |
# create a dict of value -> code | |
huffman_dict = {} | |
for value, code in huffman: | |
huffman_dict[value] = code | |
# encode the input | |
bs = bitstream.BitStream() | |
for i in input: | |
huff_code = huffman_dict[i] | |
# print("writing", i, huff_code) | |
for c in huff_code: | |
bs.write(c == '1', bool) | |
return bs | |
def huffman_tree_to_str(encoded_huff: list[int]): | |
s = "" | |
for i in encoded_huff: | |
s += chr(ord('A') + i) | |
return s | |
# The main encoding is a bitstream encoded as a custom base64 table from ASCII | |
# code 35 to 99. Bits is encoded from lower to higher in each 6-bit number. | |
# Like this: | |
# bitstream: 001110111000101010 | |
# ==> 001110 111000 101010 | |
# ==> | 011100 | 000111 | 010101 | | |
# This is to make reading easier, since shifting right is all you need. | |
def base64_encode_bitstream(encoded: bitstream.BitStream): | |
encoded = encoded.copy() | |
curr = 0 | |
offset = 0 | |
s = "" | |
l = len(encoded) | |
while offset < l: | |
curr |= encoded.read(bool) << (offset % 6) | |
offset += 1 | |
if offset % 6 == 0: | |
s += chr(curr + 35) | |
curr = 0 | |
if offset % 6 != 0: | |
s += chr(curr + 35) | |
return s | |
huffman = gen_canonical_huffman(rle_single) | |
encoded_huffman = encode_canonical_huffman(huffman) | |
# if we use 4 bits per symbol, the length of the canonical huffman encoding is | |
print(huffman) | |
print(encoded_huffman) | |
print(huffman_tree_to_str(encoded_huffman)) | |
encoded_text = encode_with_huffman(rle_single, huffman) | |
str_encoded = base64_encode_bitstream(encoded_text) | |
print(rle_single) | |
# print(encoded_text) | |
print(str_encoded) | |
print(len(str_encoded)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 原始版本 | |
#include <stdio.h> | |
// huffman tree encoding: | |
// if a node at pos [i] is positive, it is the number that the code is | |
// representing. else, it has two children located at [2*i] and [2*i+1] | |
int huffman[512]; | |
// int encoded_huff[] = {-2, 0, -3, 2, 8, -4, 1, -5, 3, 2, 1, 3, 15, 8, | |
// 25, | |
// -6, 4, 3, 1, 3, 1, 13, 20, 11, -7, 13, 2, 2, 2, 7, | |
// 3, 8, -8, 20, 13, 9, 4, 1, 4, 3, 1, 3, 3}; | |
// offset by 'A' | |
char *encoded_huff = ">ADH=CEB<BDEBCV[;FHBMBTM:OEDGDIY9PBDBEJBJDCEDBD"; | |
int code_to_offset(int code, int len) { | |
int c = 1; | |
for (int i = 0; i < len; i++) { | |
c = c * 2 + (code >> (len - i - 1)) % 2; | |
} | |
return c; | |
} | |
/* | |
Given a list of symbols sorted by bit-length, the following pseudocode will | |
print a canonical Huffman code book: | |
code := 0 | |
while more symbols do | |
print symbol, code | |
code := (code + 1) << ((bit length of the next symbol) − (current bit | |
length)) | |
*/ | |
void decode_huff() { | |
for (int i = 0; i < 512; i++) { | |
huffman[i] = -1; | |
} | |
int code = 0; // current huffman code | |
int sym = 0; // current symbol | |
int curr_len = 0; // current huffman code length | |
for (int i = 0; encoded_huff[i] != 0; i++) { | |
int c = encoded_huff[i] - 'A'; | |
if (c < 0) { | |
// c is -len | |
curr_len = -c; | |
sym = 0; | |
code <<= 1; | |
} else { | |
sym += c; | |
int off = code_to_offset(code, curr_len); | |
huffman[off] = sym; | |
// printf("decode: %i, code %x, len %i\n", sym, code, curr_len); | |
code++; | |
} | |
} | |
} | |
/* | |
The main encoding is a bitstream encoded as a custom base64 table from ASCII | |
code 35 to 99. Bits is encoded from lower to higher in each 6-bit number. | |
Like this: | |
bitstream: 001110111000101010 | |
==> 001110 111000 101010 | |
==> | 011100 | 000111 | 010101 | | |
This is to make reading easier, since shifting right is all you need. | |
*/ | |
int refill_base64(char *b64) { | |
int v = *b64++ - 35; | |
v |= (*b64++ - 35) << 6; | |
v |= (*b64 - 35) << 12; | |
return v; | |
} | |
void decode_and_print(char *data, int len) { | |
char *init = data + len; | |
int decoded = 0; // currently decoded data | |
int decoded_len = 0; // decoded data length (bits) | |
int rl = 0; | |
int color = 0; // 0: ' ', 1: '@' | |
// decode the next run-length encoding | |
while (1) { // outer loop: for each rle | |
int huff_entry = 1; // Node ID in tree | |
while (huffman[huff_entry] < 0) { // inner loop: for each bit | |
// < 0 == not leaf | |
if (!decoded_len) { | |
// refill bits | |
decoded = refill_base64(data); | |
decoded_len = 18; | |
data += 3; | |
} | |
int b = decoded % 2; | |
// printf("%i", b); | |
decoded >>= 1; | |
decoded_len--; | |
huff_entry = huff_entry * 2 + b; | |
} | |
int rle = huffman[huff_entry]; | |
// printf("%i ", rle); | |
for (int i = 0; i < rle; i++) { | |
putchar(color ? '@' : ' '); | |
} | |
color = !color; | |
if (rle == 0) { | |
// line feed, reset color | |
color = 0; | |
putchar('\n'); | |
} | |
if (data > init) | |
break; | |
} | |
} | |
int main() { | |
decode_huff(); | |
decode_and_print( | |
"B(R9Sa?^'YL*OG'YPII'=\\>Y,6%=a??PE-J=_\\N(7<^W@*UA[3Yb1'2+':`0%TX;H3?>=" | |
"F3_C3_CD$TD<(+1O7$E&=1'b/" | |
"Z'':LMP#EN<8='EN<(V'E>1X%D[XHV'EZE.'E&ES*'7&%.J\\C+<1*'L7S$4-(?3GOS)')@^" | |
"CCPb+_UR'V\\B+9Ia1FJE^(_UU_(_Y*K^8*@R]K^8*@R]K^8*`573R]8_A7%#", | |
225); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@@@@@@@ @@@@@@@@ | |
@@@@@@@@@@@@ @@@@@@@@@@@@@@ | |
@@@ @@@@@@@ @@@@@@@@ @@@@@@@@@@ | |
@@ @@@@@@@@@@ @@@@@@@@@@ | |
@@@@ @@ @@ @@@@@@@@@@ | |
@@@@@@@@@@@@ @@@@@@@ @@@@@@@@@ | |
@@@ @@@@@@@@@@@ @@@@@@@@@@@@@@@@ @@@@@@@@ | |
@@@ @@@@@@@@@@@@@@ @@@@@@ | |
@@ @@@@@@ | |
@@ @@@@@@ | |
@@@ @@@ | |
@@@ @@@@@@ @@@ | |
@@@ @@@@@@@ @@@ | |
@@@ @@@@@ @@@ | |
@@@ @@@ | |
@@@ @@@ @@ | |
@@@ @@ @@@@@@@@ | |
@@@ @@ @@@@@@@@ | |
@@@ @@@ @@@@@@ | |
@@@ @@@@ @@@@@@@@@@ | |
@@@ @@@ @@@@@@@@@ @@@@@@@ | |
@@@ @@@@ @@@@@@@@ @@@ | |
@@@ @@@@ @@@@@@@ @@@ | |
@@@ @@@@@@ @@@@@@@@@@ @@@ | |
@@@ @@@@@@@@@@@@@ @@@ | |
@@@ @@@@@@@ @@@ | |
@@@ @@@ | |
@@@@@@ @@@ | |
@@@@@@@@@@ @@@ | |
@@@@ @@@ @@@ | |
@@ @@@ @@@ | |
@ @@@ @@@ | |
@ @@@ @@@ | |
@@ @@ @@@ | |
@@@ @@ @@@ | |
@@@@@@@@@@@ @@@ | |
@@@@@@@@ @@@ | |
@@@@ @@@ | |
@@@@@ @@@@ | |
@@@@@@@ @@@@@@@@ | |
@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@ | |
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
@@@@@@ @@@@@@ | |
@@@@@@ @@@@@@ | |
@@@@@@ @@@@@@ | |
@@@@@@ @@@@@@ | |
@@@@@@ @@@@@@ | |
@@@@@@@@ @@@@@@@@ | |
@@@@@@@@@ @@@@@@@@@ | |
@@@@@@@ @@@@@@@ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 半压缩版本 | |
char * | |
E = ">ADH=CEB<BDEBCV[;FHBMBTM:OEDGDIY9PBDBEJBJDCEDBD", | |
*B = | |
"B(R9Sa?^'YL*OG'YPII'=\\>Y,6%=a??PE-J=_\\N(7<^W@*UA[3Yb1'2+':`0%TX;H3?>=" | |
"F3_C3_CD$TD<(+1O7$E&=1'b/" | |
"Z'':LMP#EN<8='EN<(V'E>1X%D[XHV'EZE.'E&ES*'7&%.J\\C+<1*'L7S$4-(?3GOS)')@" | |
"^" | |
"CCPb+_UR'V\\B+9Ia1FJE^(_UU_(_Y*K^8*@R]K^8*@R]K^8*`573R]8_A7%#", | |
*F; | |
d, l, r, c, h, b, i, H[512], N = 35; | |
main() { | |
i = 512; | |
while (--i) | |
H[i] = -1; | |
d = l = r = i = 0; | |
for (; E[i]; i++) { | |
c = E[i] - 65; | |
if (c < 0) { | |
r = -c; | |
l = 0; | |
d *= 2; | |
} else { | |
l += c; | |
c = 1; | |
for (b = 0; b < r; b++) { | |
c = c * 2 + (d >> (r - b - 1)) % 2; | |
} | |
H[c] = l; | |
d++; | |
} | |
} | |
F = B + 225; | |
d = l = r = c = 0; | |
while (B < F) { // outer loop: for each rle | |
h = 1; // Node ID in tree | |
while (H[h] < 0) { // inner loop: for each bit | |
// < 0 == not leaf | |
if (!l--) { | |
d = *B++ - N; | |
d |= (*B++ - N) << 6; | |
d |= (*B++ - N) << 12; | |
l = 17; | |
} | |
h = h * 2 + d % 2; | |
d /= 2; | |
} | |
b = H[h]; | |
// printf("%i ", b); | |
for (i = 0; i < b; i++) | |
putchar(c ? '@' : ' '); | |
c = !c; | |
if (!b) { | |
c = 0; | |
putchar('\n'); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 全压缩版本 | |
char*E=">ADH=CEB<BDEBCV[;FHBMBTM:OEDGDIY9PBDBEJBJDCEDBD",*B="B(R9Sa?^'YL*OG'YPII'=\\>Y,6%=a??PE-J=_\\N(7<^W@*UA[3Yb1'2+':`0%TX;H3?>=F3_C3_CD$TD<(+1O7$E&=1'b/Z'':LMP#EN<8='EN<(V'E>1X%D[XHV'EZE.'E&ES*'7&%.J\\C+<1*'L7S$4-(?3GOS)')@^CCPb+_UR'V\\B+9Ia1FJE^(_UU_(_Y*K^8*@R]K^8*@R]K^8*`573R]8_A7%#",*F;d,l,r,c,h,b,i,H[512];main(){i=512;while(--i)H[i]=-1;d=l=r=i=0;for(;E[i];i++){c=E[i]-65;if(c<0){r=-c;l=0;d*=2;}else{l+=c;c=1;for(int i=0;i<r;i++){c=c*2+(d>>(r-i-1))%2;}H[c]=l;d++;}}F=B+225;d=l=r=c=0;while(B<F){h=1;while(H[h]<0){if(!l--){d=*B++-35;d|=(*B++-35)<<6;d|=(*B++-35)<<12;l=17;}h=h*2+d%2;d/=2;}b=H[h];for(i=0;i<b;i++)putchar(c?'@':' ');c=!c;if(!b){c=0;putchar('\n');}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment