_g_

Fighting Oreans' VM (code virtualizer flavour)

Rate this Entry
If you don't know what code virtualizer is, or how it works, you should read this first:
http://rapidshare.com/files/16968098/Inside_Code_Virtualizer.rar
(Inside Code Virtualizer by scherzo)

Now, as you probably already know from paper by scherzo , one possible way recover virtualized code is to identify each mutated handler (find corresponding non-mutated version). After this done, we can trace virtual opcodes and "decompile" them to VM instructions. Having "clean" decompiled output, we can translate it to x86 assembly. I consider the last step, to be simple "find and replace" job with flex/yacc.

The problem is, oreans' vm obfuscation engine can be a bitch. Consider this piece of code:

Code:
	push ebx 
	mov ebx 0F06h
	inc ebx 
	shr ebx 15h
	push ecx 
	mov ecx 6156h
	xor ebx ecx
	pop ecx 
	add ebx 4114h
	shl ebx 7
	push ecx 
	mov ecx 51351Ch
	xor ebx ecx
	pop ecx 
	add ebx edi
	mov eax [ebx]
	pop ebx 
	push 67E0h 
	mov [esp] edx
	mov edx 1
	and eax edx
	mov edx [esp]
	push edx 
	mov edx esp
	add edx 4
	add edx 4
	xchg edx [esp]
	pop esp 
	or eax eax
	push eax 
	mov eax 3B02h
	not eax 
	push ecx 
	mov ecx 0FFFFC3FFh
	sub eax ecx
	pop ecx 
	and [edi+1Ch] eax
	mov eax [esp]
	add esp 4
	push 3328h 
	mov [esp] ebx
	mov ebx [esp]
	push ecx 
	mov ecx esp
	add ecx 4
	add ecx 4
	xchg ecx [esp]
	pop esp
It's one of VM's handlers. Can you tell what it does? Me neither, so let's try to deobfuscate this crap.

It turns out that simple strategies like contant folding (http://en.wikipedia.org/wiki/Constant_folding), dead code elimination (http://en.wikipedia.org/wiki/Dead_code), peephole optimisation (find and replace :P) plus some stack cleaning , suffice to recover obfuscated code:

Code:
NEW = 47, OLD = 0, -47 lines removed
################################ NEXT ROUND ###########################
################################ original
push ebx 
mov ebx 00000f06 
inc ebx 
shr ebx 00000015 
push ecx 
mov ecx 00006156 
xor ebx ecx 
pop ecx 
add ebx 00004114 
shl ebx 00000007 
push ecx 
mov ecx 0051351c 
xor ebx ecx 
pop ecx 
add ebx edi 
mov eax [ebx ] 
pop ebx 
push 000067e0 
mov [esp ] edx 
mov edx 00000001 
and eax edx 
mov edx [esp ] 
push edx 
mov edx esp 
add edx 00000004 
add edx 00000004 
xchg edx [esp ] 
pop esp 
or eax eax 
push eax 
mov eax 00003b02 
not eax 
push ecx 
mov ecx ffffc3ff 
sub eax ecx 
pop ecx 
and [edi 0000001c ] eax 
mov eax [esp ] 
add esp 00000004 
push 00003328 
mov [esp ] ebx 
mov ebx [esp ] 
push ecx 
mov ecx esp 
add ecx 00000004 
add ecx 00000004 
xchg ecx [esp ] 
pop esp 
################################ after constant propagation and folding
push ebx 
mov ebx 00000000 
push ecx 
mov ecx 00006156 
xor ebx 00006156 
pop ecx 
add ebx 00004114 
shl ebx 00000007 
push ecx 
mov ecx 0051351c 
xor ebx 0051351c 
pop ecx 
add ebx edi 
mov eax [ebx ] 
pop ebx 
push 000067e0 
mov [esp ] edx 
mov edx 00000001 
and eax 00000001 
mov edx [esp ] 
push edx 
mov edx esp 
add edx 00000004 
add edx 00000004 
xchg edx [esp ] 
pop esp 
or eax eax 
push eax 
mov eax ffffc4fd 
push ecx 
mov ecx ffffc3ff 
sub eax ffffc3ff 
pop ecx 
and [edi 0000001c ] eax 
mov eax [esp ] 
add esp 00000004 
push 00003328 
mov [esp ] ebx 
mov ebx [esp ] 
push ecx 
mov ecx esp 
add ecx 00000004 
add ecx 00000004 
xchg ecx [esp ] 
pop esp 
################################ after dead code elimination
push ebx 
mov ebx 00000000 
push ecx 
xor ebx 00006156 
pop ecx 
add ebx 00004114 
shl ebx 00000007 
push ecx 
xor ebx 0051351c 
pop ecx 
add ebx edi 
mov eax [ebx ] 
pop ebx 
push 000067e0 
mov [esp ] edx 
and eax 00000001 
mov edx [esp ] 
push edx 
mov edx esp 
add edx 00000004 
add edx 00000004 
xchg edx [esp ] 
pop esp 
or eax eax 
push eax 
mov eax ffffc4fd 
push ecx 
sub eax ffffc3ff 
pop ecx 
and [edi 0000001c ] eax 
mov eax [esp ] 
add esp 00000004 
push 00003328 
mov [esp ] ebx 
mov ebx [esp ] 
push ecx 
mov ecx esp 
add ecx 00000004 
add ecx 00000004 
xchg ecx [esp ] 
pop esp 
################################ after peephole optimisation
push ebx 
mov ebx 00000000 
push ecx 
xor ebx 00006156 
pop ecx 
add ebx 00004114 
shl ebx 00000007 
push ecx 
xor ebx 0051351c 
pop ecx 
add ebx edi 
mov eax [ebx ] 
pop ebx 
push edx 
and eax 00000001 
pop edx 
or eax eax 
push eax 
mov eax ffffc4fd 
push ecx 
sub eax ffffc3ff 
pop ecx 
and [edi 0000001c ] eax 
pop eax 
push ebx 
pop ebx 
################################ after stack cleaning
mov ebx 00000000 
xor ebx 00006156 
add ebx 00004114 
shl ebx 00000007 
xor ebx 0051351c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
mov eax ffffc4fd 
sub eax ffffc3ff 
and [edi 0000001c ] eax 
NEW = 11, OLD = 47, 36 lines removed
################################ NEXT ROUND ###########################
################################ original
mov ebx 00000000 
xor ebx 00006156 
add ebx 00004114 
shl ebx 00000007 
xor ebx 0051351c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
mov eax ffffc4fd 
sub eax ffffc3ff 
and [edi 0000001c ] eax 
################################ after constant propagation and folding
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
mov eax 000000fe 
and [edi 0000001c ] 000000fe 
################################ after dead code elimination
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe 
################################ after peephole optimisation
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe 
################################ after stack cleaning
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe 
NEW = 5, OLD = 11, 6 lines removed
################################ NEXT ROUND ###########################
################################ original
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe 
################################ after constant propagation and folding
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe 
################################ after dead code elimination
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe 
################################ after peephole optimisation
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe 
################################ after stack cleaning
mov ebx 0000001c 
add ebx edi 
mov eax [ebx ] 
and eax 00000001 
or eax eax 
and [edi 0000001c ] 000000fe
Well almost . Above trash is the verbose output of my little "cleaner" tool. Cleaner is usable, it'll give nice results for most of included code samples. In handlers.clean folder (see link at bottom) there are nonmutated versions of CV handlers. After deobfuscation, few heuristics can be applied to match deobfuscated and clean versions: edit distance / rare instruction matching (for example rol, ror, rcr are rare and show up only in one handler).

The problem is, I got bored with all of this, so if anyone would like to help, I will be more than happy

Here is the code:
http://www.orange-bat.com/oreans.rar

compile with make, will work without problems under cygwin. it should work under linux. to use rip_handlers.py you will need idapython.

There are some bugs in my code, beware .

Submit "Fighting Oreans' VM (code virtualizer flavour)" to Digg Submit "Fighting Oreans' VM (code virtualizer flavour)" to del.icio.us Submit "Fighting Oreans' VM (code virtualizer flavour)" to StumbleUpon Submit "Fighting Oreans' VM (code virtualizer flavour)" to Google

Updated August 19th, 2008 at 12:32 by _g_

Categories
Uncategorized

Comments