Clearly the scheme given is not a hash function, in the sense that it is very easy to invert the function to obtain the original message. Hence, with any arbitrary hash, we can easily construct a message with that hash by inverting the operation.
Python Implementation:
xfrom pwn import *
import json
BLOCK_SIZE = 32
# Nothing up my sleeve numbers (ref: Dual_EC_DRBG P-256 coordinates)
W = [0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296]
X = [0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5]
Y = [0xc97445f4, 0x5cdef9f0, 0xd3e05e1e, 0x585fc297, 0x235b82b5, 0xbe8ff3ef, 0xca67c598, 0x52018192]
Z = [0xb28ef557, 0xba31dfcb, 0xdd21ac46, 0xe2a91e3c, 0x304f44cb, 0x87058ada, 0x2cb81515, 0x1e610046]
# Lets work with bytes instead!
W_bytes = b''.join([x.to_bytes(4,'big') for x in W])
X_bytes = b''.join([x.to_bytes(4,'big') for x in X])
Y_bytes = b''.join([x.to_bytes(4,'big') for x in Y])
Z_bytes = b''.join([x.to_bytes(4,'big') for x in Z])
def pad(data):
padding_len = (BLOCK_SIZE - len(data)) % BLOCK_SIZE
return data + bytes([padding_len]*padding_len)
def blocks(data):
return [data[i:(i+BLOCK_SIZE)] for i in range(0,len(data),BLOCK_SIZE)]
def xor(a,b):
return bytes([x^y for x,y in zip(a,b)])
def rotate_left(data, x):
x = x % BLOCK_SIZE
return data[x:] + data[:x]
def rotate_right(data, x):
x = x % BLOCK_SIZE
return data[-x:] + data[:-x]
def scramble_block(block):
for _ in range(40):
block = xor(W_bytes, block)
block = rotate_left(block, 6)
block = xor(X_bytes, block)
block = rotate_right(block, 17)
return block
def cryptohash(msg):
initial_state = xor(Y_bytes, Z_bytes)
msg_padded = pad(msg)
msg_blocks = blocks(msg_padded)
for i,b in enumerate(msg_blocks):
mix_in = scramble_block(b)
for _ in range(i):
mix_in = rotate_right(mix_in, i+11)
mix_in = xor(mix_in, X_bytes)
mix_in = rotate_left(mix_in, i+6)
print("Mix in: ", mix_in.hex())
initial_state = xor(initial_state,mix_in)
return initial_state.hex()
# Revert the scramble function
def unscramble(block):
for _ in range(40):
block = rotate_left(block, 17)
block = xor(X_bytes, block)
block = rotate_right(block, 6)
block = xor(W_bytes, block)
return block
# Target that we want is msg1
msg1 = X_bytes
# msg2 consists of two blocks
msg2 = Y_bytes
target = cryptohash(msg1)
state = cryptohash(msg2)
# Reverse the scrambling stage in the for loop in cryptohash
# scrambling only happens in the 2nd block
mix_in = xor(target, state)
mix_in = rotate_right(mix_in, 7)
mix_in = xor(mix_in, X_bytes)
mix_in = rotate_left(mix_in, 12)
# This is the original block
msg2_block = unscramble(mix_in)
msg2 = msg2 + msg2_block
io = remote('socket.cryptohack.org', 13405)
print(io.recvline())
to_send = dict()
to_send['m1'] = msg1.hex()
to_send['m2'] = msg2.hex()
io.sendline(json.dumps(to_send).encode())
io.interactive()
Other solution take advantage of the flaw in the padding function, kudos to unblvr
:
xxxxxxxxxx
def pad(data):
padding_len = (BLOCK_SIZE - len(data)) % BLOCK_SIZE
return data + bytes([padding_len]*padding_len)
What this function does, is to pad the current block up to a full block size, by appending the number of missing bytes, not unlike the padding scheme in PKCS#7
. The flaw, however, is that a block of the correct size has nothing added to it. This is why in PKCS#7
, a message of the correct block size has another padding block appended to it.
This gives us a easy attack, in particular:
xxxxxxxxxx
cryptohash(b"\x01"*63) = '321225b996fc333535f2e1119c9eaae7c527af87f29f0e0bf41643fd46d00e46'
cryptohash(b"\x01"*64) = '321225b996fc333535f2e1119c9eaae7c527af87f29f0e0bf41643fd46d00e46'
And the two messages are obviously different, one with 63 bytes and one with 64 bytes. After padding, the two messages are the same, as \x01
is appended.
Lastly, a cool solution from aloof
, taking advantage of Sage (or more specifically the symbolic evaluation), to observe the patterns in the output of the hash:
Sage Implementation:
xxxxxxxxxx
F = GF(256)
z = F.gen()
K = PolynomialRing(F, 64, names='x')
BLOCK_SIZE = 32
def int_to_field(a):
return sum(((a >> i) & 1)*z**i for i in range(8))
W = [0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296]
X = [0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5]
Y = [0xc97445f4, 0x5cdef9f0, 0xd3e05e1e, 0x585fc297, 0x235b82b5, 0xbe8ff3ef, 0xca67c598, 0x52018192]
Z = [0xb28ef557, 0xba31dfcb, 0xdd21ac46, 0xe2a91e3c, 0x304f44cb, 0x87058ada, 0x2cb81515, 0x1e610046]
def pad(data):
padding_len = (BLOCK_SIZE - len(data)) % BLOCK_SIZE
return data + bytes([padding_len]*padding_len)
def blocks(data):
return [data[i:(i+BLOCK_SIZE)] for i in range(0,len(data),BLOCK_SIZE)]
def rotate_left(data, x):
x = x % BLOCK_SIZE
return data[x:] + data[:x]
def rotate_right(data, x):
x = x % BLOCK_SIZE
return data[-x:] + data[:-x]
# "int" is added because Sage makes integers in the class "Integer"
W_bytes = b''.join([int(x).to_bytes(4,'big') for x in W])
X_bytes = b''.join([int(x).to_bytes(4,'big') for x in X])
Y_bytes = b''.join([int(x).to_bytes(4,'big') for x in Y])
Z_bytes = b''.join([int(x).to_bytes(4,'big') for x in Z])
# the bytes are converted to field elements
W_bytes = [int_to_field(b) for b in W_bytes]
X_bytes = [int_to_field(b) for b in X_bytes]
Y_bytes = [int_to_field(b) for b in Y_bytes]
Z_bytes = [int_to_field(b) for b in Z_bytes]
# xor "^" replaced with "+", which will act as an xor in the binary finite field
def xor(a,b):
return [x + y for x,y in zip(a,b)]
def scramble_block(block):
for _ in range(40):
block = xor(W_bytes, block)
block = rotate_left(block, 6)
block = xor(X_bytes, block)
block = rotate_right(block, 17)
return block
# the padding is removed: we use the full block size
# and we return the state in the finite field
def cryptohash(msg):
initial_state = xor(Y_bytes, Z_bytes)
#msg_padded = pad(msg)
msg_blocks = blocks(msg)
for i,b in enumerate(msg_blocks):
mix_in = scramble_block(b)
for _ in range(i):
mix_in = rotate_right(mix_in, i+11)
mix_in = xor(mix_in, X_bytes)
mix_in = rotate_left(mix_in, i+6)
initial_state = xor(initial_state,mix_in)
# return initial_state.hex()
return initial_state
# Generate 64 variables
block = K.gens()
print(cryptohash(block))
The output is something like:
xxxxxxxxxx
# [x8 + x35 + (z8^5 + z8^4 + z8),
# x9 + x36 + (z8^4 + z8),
# x10 + x37 + (z8^5 + z8^2 + 1),
# x11 + x38 + (z8^7 + z8^5 + z8^4 + z8^3 + 1),
# x12 + x39 + (z8^7 + z8^4 + z8^2 + z8),
# x13 + x40 + (z8^7 + z8^6 + z8^5 + z8^4 + z8^3 + z8^2),
# x14 + x41 + (z8^5 + z8^4 + z8 + 1),
# x15 + x42 + (z8^5 + z8^4 + z8^2 + 1),
# x16 + x43 + (z8^5 + z8^4 + z8^2 + 1),
# x17 + x44 + (z8^7 + z8^6 + z8^5 + z8^4 + z8),
# x18 + x45 + (z8^7 + z8^6 + z8^5 + 1),
# x19 + x46 + (z8^4 + 1),
# x20 + x47 + (z8^7 + z8^4 + z8^3 + z8^2),
# x21 + x48 + (z8^7 + z8^4 + z8^3 + z8^2 + z8),
# x22 + x49 + (z8^7 + z8^5 + z8^3 + z8),
# x23 + x50 + (z8^7 + z8^6 + z8^5 + z8^2 + z8 + 1),
# x24 + x51 + (z8^7 + z8^6 + z8^2 + 1),
# x25 + x52 + (z8^5 + z8^2 + z8 + 1),
# x26 + x53 + (z8^7 + z8^5 + z8^3 + z8^2 + z8 + 1),
# x27 + x54 + (z8^7 + z8^2 + z8 + 1),
# x28 + x55 + (z8^7 + z8^6 + z8^5 + z8^4 + z8),
# x29 + x56 + (z8^7 + z8^4 + z8^3 + z8^2 + z8 + 1),
# x30 + x57 + (z8^3 + z8^2 + z8),
# x31 + x58 + (z8^3 + z8 + 1),
# x0 + x59 + (z8^7 + z8^6 + z8^5 + z8^4 + z8^2),
# x1 + x60 + (z8^4 + z8^2 + z8),
# x2 + x61 + (z8^6 + z8 + 1),
# x3 + x62 + (z8^7 + z8^6 + z8^5 + z8^4 + z8^3 + z8^2 + 1),
# x4 + x63 + (z8^6 + z8^2 + z8),
# x5 + x32 + (z8^7 + z8^6 + z8^4),
# x6 + x33 + (z8^3 + z8^2 + z8),
# x7 + x34 + (z8^6 + z8^2 + z8)]
The
This leads to the solution of:
m1 = '00'*64
m2 = '01'*64