Last active
May 24, 2024 08:18
-
-
Save UserUnknownFactor/8f70ddc3dc33b07e158d19aaa335c887 to your computer and use it in GitHub Desktop.
Pure Python implementation of MD5 algorithm for experiments and custom mods
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# RSA Data Security, Inc., MD5 message-digest algorithm | |
# Copyright (C) 1991-1992, RSA Data Security, Inc. | |
""" | |
## pymd5 module | |
### The MD5 hash implementation in pure Python | |
The module exposes same methods as hashlib.md5 and a couple of | |
low-level methods to help with crypto experiments. | |
""" | |
import struct | |
# Constants for compression function. | |
S11 = 7 | |
S12 = 12 | |
S13 = 17 | |
S14 = 22 | |
S21 = 5 | |
S22 = 9 | |
S23 = 14 | |
S24 = 20 | |
S31 = 4 | |
S32 = 11 | |
S33 = 16 | |
S34 = 23 | |
S41 = 6 | |
S42 = 10 | |
S43 = 15 | |
S44 = 21 | |
PADDING = b"\x80" + 63 * b"\0" | |
MD5_A = 0x98BADCFE | |
MD5_B = 0x10325476 | |
MD5_STANDARD_INIT = (~MD5_A & 0xFFFFFFFF, ~MD5_B & 0xFFFFFFFF, MD5_A, MD5_B) | |
# F, G, H and I: basic MD5 functions. | |
def F(x, y, z): return (((x) & (y)) | ((~x) & (z))) | |
def G(x, y, z): return (((x) & (z)) | ((y) & (~z))) | |
def H(x, y, z): return ((x) ^ (y) ^ (z)) | |
def I(x, y, z): return ((y) ^ ((x) | (~z))) | |
def ROTATE_LEFT(x, n): | |
x = x & 0xffffffff # make shift unsigned | |
return (((x) << (n)) | ((x) >> (32-(n)))) & 0xffffffff | |
# FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. | |
# Rotation is separate from addition to prevent recomputation. | |
def FF(a, b, c, d, x, s, ac): | |
a = a + F ((b), (c), (d)) + (x) + (ac) | |
a = ROTATE_LEFT ((a), (s)) | |
a = a + b | |
return a # must assign this to a | |
def GG(a, b, c, d, x, s, ac): | |
a = a + G ((b), (c), (d)) + (x) + (ac) | |
a = ROTATE_LEFT ((a), (s)) | |
a = a + b | |
return a # must assign this to a | |
def HH(a, b, c, d, x, s, ac): | |
a = a + H ((b), (c), (d)) + (x) + (ac) | |
a = ROTATE_LEFT ((a), (s)) | |
a = a + b | |
return a # must assign this to a | |
def II(a, b, c, d, x, s, ac): | |
a = a + I ((b), (c), (d)) + (x) + (ac) | |
a = ROTATE_LEFT ((a), (s)) | |
a = a + b | |
return a # must assign this to a | |
class md5(object): | |
""" Returns a new md5 objects. | |
Optional advanced parameters allow you to specify the initial string | |
resume an earlier computation by setting the internal state of | |
the function and the counter of message bits processed so far. | |
Most of the interface matches Python's standard hashlib. | |
### Attributes: | |
`digest_size`: The size of the resulting hash in bytes (16). | |
`block_size`: The internal block size of the hash algorithm in bytes (64). | |
### Examples: | |
To obtain the digest of the string: | |
>>> import pymd5 | |
>>> m = pymd5.md5() | |
>>> m.update("Nobody inspects") | |
>>> m.update(" the spammish repetition") | |
>>> m.digest() | |
More condensed: | |
>>> pymd5.md5("Nobody inspects the spammish repetition").hexdigest() | |
'bb649c83dd1ea5c9d9dec9a18df0ffe9' | |
""" | |
digest_size = 16 # size of the resulting hash in bytes | |
block_size = 64 # hash algorithm's internal block size | |
def __init__(self, data:bytes=b'', state:tuple=None, count:int=0, buffer=b''): | |
"""Returns a new md5 hash object. | |
Optionally initialized to a given internal state and count of message | |
bits processed so far (in this case buffer needs to be restored too), | |
then processes the string. | |
""" | |
self.count = 0 | |
self.buffer = buffer | |
if state is None: | |
# initial state defined by standard | |
self.state = MD5_STANDARD_INIT # (0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476) | |
else: | |
self.state = state#_decode(state, md5.digest_size) | |
if count is not None: | |
self.count = count | |
if data: | |
self.update(data) | |
def update(self, data: bytes): | |
"""Updates the md5 object with the string arg. | |
Repeated calls are equivalent to a single call with the | |
concatenation of all the arguments. | |
""" | |
inputLen = len(data) | |
index = int(self.count >> 3) & 0x3F # self.count // 8 -> fit 7 bits | |
self.count = self.count + (inputLen << 3) # update number of bits | |
partLen = md5.block_size - index | |
# apply compression function to as many blocks as we have | |
if inputLen >= partLen: | |
self.buffer = self.buffer[:index] + data[:partLen] | |
self.state = self._compress(self.buffer) | |
i = partLen | |
while i + 63 < inputLen: | |
self.state = self._compress(data[i:i+md5.block_size]) | |
i = i + md5.block_size | |
index = 0 | |
else: | |
i = 0 | |
# buffer remaining output | |
self.buffer = self.buffer[:index] + data[i:inputLen] | |
def digest(self, keep_state=False) -> bytes: | |
"""Return the MD5 hash of the strings passed to the update() | |
method so far. This is a string of digest_size bytes which | |
may contain non-ASCII characters, including null bytes. | |
""" | |
_buffer, _count, _state = self.buffer, self.count, self.state | |
self.update(self.padding()) | |
result = self.state | |
self.buffer, self.count = _buffer, _count | |
if not keep_state: | |
self.state = _state | |
return md5._encode(result, md5.digest_size) | |
def hexdigest(self) -> str: | |
""" Like digest() except the hash value is returned | |
as a string of hexadecimal digits. | |
""" | |
return self.digest().hex() | |
def getstate(self) -> tuple: | |
"""Gets current state form state tuple. | |
First four items are state and the fifth is processed bits.""" | |
return self.state + (self.count,) | |
def setstate(self, state:tuple): | |
"""Sets current state form state tuple. | |
First four items are state and the fifth is processed bits.""" | |
self.state = state[:4] | |
self.count = state[5] | |
def padding(self, message_bits:int=None, offset=0): | |
""" Generates the padding that should be appended | |
to the end of a message of the given size | |
to reach a multiple of the block size.""" | |
if message_bits is None: | |
message_bits = self.count | |
index = int((message_bits >> 3) & 0x3f) | |
padLen = 56 - index if index < 56 else 120 - index | |
# (the last 8 bytes store the number of bits in the message) | |
#print(f"Bit length: {hex(offset + message_bits)}") | |
return PADDING[:padLen] + struct.pack('<Q', offset + message_bits) | |
def _compress(self, block:bytes): | |
"""The MD5 compression function. | |
Outputs a 16-byte state based on a 16-byte previous state and a | |
512-bit message block. | |
""" | |
a, b, c, d = self.state | |
x = md5._decode(block, md5.block_size) | |
# Round | |
a = FF (a, b, c, d, x[ 0], S11, 0xd76aa478) # 1 | |
d = FF (d, a, b, c, x[ 1], S12, 0xe8c7b756) # 2 | |
c = FF (c, d, a, b, x[ 2], S13, 0x242070db) # 3 | |
b = FF (b, c, d, a, x[ 3], S14, 0xc1bdceee) # 4 | |
a = FF (a, b, c, d, x[ 4], S11, 0xf57c0faf) # 5 | |
d = FF (d, a, b, c, x[ 5], S12, 0x4787c62a) # 6 | |
c = FF (c, d, a, b, x[ 6], S13, 0xa8304613) # 7 | |
b = FF (b, c, d, a, x[ 7], S14, 0xfd469501) # 8 | |
a = FF (a, b, c, d, x[ 8], S11, 0x698098d8) # 9 | |
d = FF (d, a, b, c, x[ 9], S12, 0x8b44f7af) # 10 | |
c = FF (c, d, a, b, x[10], S13, 0xffff5bb1) # 11 | |
b = FF (b, c, d, a, x[11], S14, 0x895cd7be) # 12 | |
a = FF (a, b, c, d, x[12], S11, 0x6b901122) # 13 | |
d = FF (d, a, b, c, x[13], S12, 0xfd987193) # 14 | |
c = FF (c, d, a, b, x[14], S13, 0xa679438e) # 15 | |
b = FF (b, c, d, a, x[15], S14, 0x49b40821) # 16 | |
# Round 2 | |
a = GG (a, b, c, d, x[ 1], S21, 0xf61e2562) # 17 | |
d = GG (d, a, b, c, x[ 6], S22, 0xc040b340) # 18 | |
c = GG (c, d, a, b, x[11], S23, 0x265e5a51) # 19 | |
b = GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa) # 20 | |
a = GG (a, b, c, d, x[ 5], S21, 0xd62f105d) # 21 | |
d = GG (d, a, b, c, x[10], S22, 0x2441453) # 22 | |
c = GG (c, d, a, b, x[15], S23, 0xd8a1e681) # 23 | |
b = GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8) # 24 | |
a = GG (a, b, c, d, x[ 9], S21, 0x21e1cde6) # 25 | |
d = GG (d, a, b, c, x[14], S22, 0xc33707d6) # 26 | |
c = GG (c, d, a, b, x[ 3], S23, 0xf4d50d87) # 27 | |
b = GG (b, c, d, a, x[ 8], S24, 0x455a14ed) # 28 | |
a = GG (a, b, c, d, x[13], S21, 0xa9e3e905) # 29 | |
d = GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8) # 30 | |
c = GG (c, d, a, b, x[ 7], S23, 0x676f02d9) # 31 | |
b = GG (b, c, d, a, x[12], S24, 0x8d2a4c8a) # 32 | |
# Round 3 | |
a = HH (a, b, c, d, x[ 5], S31, 0xfffa3942) # 33 | |
d = HH (d, a, b, c, x[ 8], S32, 0x8771f681) # 34 | |
c = HH (c, d, a, b, x[11], S33, 0x6d9d6122) # 35 | |
b = HH (b, c, d, a, x[14], S34, 0xfde5380c) # 36 | |
a = HH (a, b, c, d, x[ 1], S31, 0xa4beea44) # 37 | |
d = HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9) # 38 | |
c = HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60) # 39 | |
b = HH (b, c, d, a, x[10], S34, 0xbebfbc70) # 40 | |
a = HH (a, b, c, d, x[13], S31, 0x289b7ec6) # 41 | |
d = HH (d, a, b, c, x[ 0], S32, 0xeaa127fa) # 42 | |
c = HH (c, d, a, b, x[ 3], S33, 0xd4ef3085) # 43 | |
b = HH (b, c, d, a, x[ 6], S34, 0x4881d05) # 44 | |
a = HH (a, b, c, d, x[ 9], S31, 0xd9d4d039) # 45 | |
d = HH (d, a, b, c, x[12], S32, 0xe6db99e5) # 46 | |
c = HH (c, d, a, b, x[15], S33, 0x1fa27cf8) # 47 | |
b = HH (b, c, d, a, x[ 2], S34, 0xc4ac5665) # 48 | |
# Round 4 | |
a = II (a, b, c, d, x[ 0], S41, 0xf4292244) # 49 | |
d = II (d, a, b, c, x[ 7], S42, 0x432aff97) # 50 | |
c = II (c, d, a, b, x[14], S43, 0xab9423a7) # 51 | |
b = II (b, c, d, a, x[ 5], S44, 0xfc93a039) # 52 | |
a = II (a, b, c, d, x[12], S41, 0x655b59c3) # 53 | |
d = II (d, a, b, c, x[ 3], S42, 0x8f0ccc92) # 54 | |
c = II (c, d, a, b, x[10], S43, 0xffeff47d) # 55 | |
b = II (b, c, d, a, x[ 1], S44, 0x85845dd1) # 56 | |
a = II (a, b, c, d, x[ 8], S41, 0x6fa87e4f) # 57 | |
d = II (d, a, b, c, x[15], S42, 0xfe2ce6e0) # 58 | |
c = II (c, d, a, b, x[ 6], S43, 0xa3014314) # 59 | |
b = II (b, c, d, a, x[13], S44, 0x4e0811a1) # 60 | |
a = II (a, b, c, d, x[ 4], S41, 0xf7537e82) # 61 | |
d = II (d, a, b, c, x[11], S42, 0xbd3af235) # 62 | |
c = II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb) # 63 | |
b = II (b, c, d, a, x[ 9], S44, 0xeb86d391) # 64 | |
return (0xffffffff & (self.state[0] + a), | |
0xffffffff & (self.state[1] + b), | |
0xffffffff & (self.state[2] + c), | |
0xffffffff & (self.state[3] + d),) | |
@staticmethod | |
def _encode(buffer:tuple, length:int): | |
k = length // 4 | |
res = struct.pack(f"{k}I", *buffer[:k]) | |
return res | |
@staticmethod | |
def _decode(buffer:bytes, length:int): | |
k = length // 4 | |
res = struct.unpack(f"{k}I", buffer[:k*4]) | |
return list(res) | |
def test(buffer=b"jk8ssl"): | |
""" Displays results of input hashed with our md5 function | |
and the standard Python hashlib implementation. | |
""" | |
print(md5(buffer).hexdigest()) | |
import hashlib | |
print(hashlib.md5(buffer).hexdigest()) | |
if __name__=="__main__": | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment