That famous IOCCC entry that prints something if compiled on PDP-11:
short main[] = {
277, 04735, -4129, 25, 0, 477, 1019, 0xbef, 0, 12800,
-113, 21119, 0x52d7, -1006, -7151, 0, 0x4bc, 020004,
14880, 10541, 2056, 04010, 4548, 3044, -6716, 0x9,
4407, 6, 5568, 1, -30460, 0, 0x9, 5570, 512, -30419,
0x7e82, 0760, 6, 0, 4, 02400, 15, 0, 4, 1280, 4, 0,
4, 0, 0, 0, 0x8, 0, 4, 0, ',', 0, 12, 0, 4, 0, '#',
0, 020, 0, 4, 0, 30, 0, 026, 0, 0x6176, 120, 25712,
'p', 072163, 'r', 29303, 29801, 'e'
};
https://www.ioccc.org/1984/mullender/
https://github.com/ioccc-src/winner/blob/master/1984/mullender/mullender.c
https://www.ioccc.org/2015/endoh3/
It can be explained easily. There is a code in CRT startup code that jumps to _main symbol. But here, an array declared, not a function. After compilation, there will be an array with _main symbol, which is indiscernible from PDP-11 code.
Let's try to do the same on x64. But we will give instruction to compiler to place the array into .text section (it will be placed in .data otherwise, and a code in these segments cannot be executed).
This code just returns 123 if compiled on Linux x64.
#include <stdint.h>
#include <stdio.h>
uint8_t main[] __attribute__ ((section (".text"))) =
{
0xb8, 0x7b, 0, 0, 0, // mov $0x7b,%eax
0xc3 // ret
};
Let's check:
gcc -o x64 x64.c ./x64 echo $?
Must print 123.
I didn't try, but it may even work for Windows x64, because no OS-dependent code is here.
Same for ARM64:
#include <stdint.h>
#include <stdio.h>
uint8_t main[] __attribute__ ((section (".text"))) =
{
0x52800f60, // mov w0, #0x7b
0xd65f03c0 // ret
};
Can be run on android in termux.
Now 'hello world' example in assembly for Linux:
global _start section .text _start: mov rax, 1 ; write mov rdi, 1 ; STDOUT_FILENO mov rdx, msglen lea rsi, msg syscall mov rax, 60 ; exit mov rdi, 0 ; EXIT_SUCCESS syscall msg: db "Hello, world!", 10 msglen: equ $ - msg
There are at least two ways to make this code position-independent.
The first is:
global _start section .text _start: call _pnt1 ; call next instruction ; call $+5 ; this would also work! (the 'call' instruction length is 5.) _pnt1: pop rsi ; now rsi points to _pnt1 ; $ points to _pnt2 ; calculate difference between _pnt1 and msg ; and consider the fact that 'pop rsi' instruction takes one byte _pnt2: add rsi, msg-$+1 mov rax, 1 ; write syscall mov rdi, 1 ; stdout mov rdx, msglen syscall mov rax, 60 ; exit syscall mov rdi, 0 ; exit code syscall msg: db "Hello, world!", 10 msglen: equ $ - msg
We will use syscalls here, because we don't know _printf() function address. (Even worse, ASLR can be used to randomize loaded modules addresses.)
And in form of _main array:
#include <stdint.h>
#include <stdio.h>
uint8_t main[] __attribute__ ((section (".text"))) =
{
0xe8, 0, 0, 0, 0, //call 401005
0x5e, //pop %rsi
0x48, 0x83, 0xc6, 0x22, //add $0x22,%rsi
0xb8, 1, 0, 0, 0, //mov $0x1,%eax
0xbf, 1, 0, 0, 0, //mov $0x1,%edi
0xba, 0xe, 0, 0, 0, //mov $0xe,%edx
0x0f, 5, //syscall
0xb8, 0x3c, 0, 0, 0, //mov $0x3c,%eax
0xbf, 0x0, 0, 0, 0, //mov $0x0,%edi
0x0f, 5, //syscall
'H','e','l','l','o',',',' ','w','o','r','l','d','!',0xa
};
or:
#include <stdint.h>
#include <stdio.h>
uint8_t main[] __attribute__ ((section (".text"))) =
{
0xe8, 0, 0, 0, 0, 0x5e, 0x48, 0x83, 0xc6, 0x22,
0xb8, 1, 0, 0, 0, 0xbf, 1, 0, 0, 0, 0xba, 0xe, 0, 0, 0,
0x0f, 5, 0xb8, 0x3c, 0, 0, 0, 0xbf, 0x0, 0, 0, 0, 0x0f, 5,
'H','e','l','l','o',',',' ','w','o','r','l','d','!',0xa
};
It will execute on Linux x64 finely.
Another way is allocating space for string in stack:
global _start section .text _start: ; push the bytes to print to stack: sub rsp, 14 ; alloc. space for string mov [rsp], byte 'H' mov [rsp+1], byte 'e' mov [rsp+2], byte 'l' mov [rsp+3], byte 'l' mov [rsp+4], byte 'o' mov [rsp+5], byte ',' mov [rsp+6], byte ' ' mov [rsp+7], byte 'W' mov [rsp+8], byte 'o' mov [rsp+9], byte 'r' mov [rsp+10], byte 'l' mov [rsp+11], byte 'd' mov [rsp+12], byte '!' mov [rsp+13], byte 10 ; that is the pointer to the string in stack: mov rsi, rsp mov rax, 1 ; write syscall mov rdi, 1 ; stdout mov rdx, 14 syscall mov rax, 60 ; exit syscall mov rdi, 0 ; exit code syscall
We can also pass 64-bit words instead of bytes:
global _start section .text _start: ; push the bytes to print to stack: ; this is 64-bit value actually, not string: mov rax, 'orld! ' push rax ; this is 64-bit value actually, not string: mov rax, 'Hello, w' push rax ; that is the pointer to the string in stack: mov rsi, rsp mov rax, 1 ; write syscall mov rdi, 1 ; stdout mov rdx, 16 syscall mov rax, 60 ; exit syscall mov rdi, 0 ; exit code syscall
Now 'hello world' example in assembly for ARM64 linux or android:
.text
.globl _start
_start:
/* syscall write(int fd, const void *buf, size_t count) */
mov x0, #1 /* STDOUT_FILENO */
ldr x1, =msg
mov x2, len
mov w8, #64 /* write syscall */
svc #0
/* syscall exit(int status) */
mov x0, #0 /* status */
mov w8, #93 /* exit syscall */
svc #0
ret
msg:
.ascii "Hello, world!\n"
len = . - msg
Making it PIC:
.text
.globl _start
_start:
bl . + 4
// address of _start+4 is in x30
add x1, x30, (msg - .)
/* syscall write(int fd, const void *buf, size_t count) */
mov x0, #1 /* fd := STDOUT_FILENO */
mov x2, len
mov w8, #64 /* write is syscall #64 */
svc #0
/* syscall exit(int status) */
mov x0, #0
mov w8, #93 /* exit is syscall #93 */
svc #0
msg:
.ascii "Hello, world!\n"
len = . - msg
In form of _main array. It will executed finely under termux in android:
#include <stdint.h>
#include <stdio.h>
uint32_t main[] __attribute__ ((section (".text"))) =
{
// 13 32-bit words:
0x94000001, 0x910083c1, 0xd2800020, 0xd28001c2, 0x52800808,
0xd4000001, 0xd2800000, 0x52800ba8, 0xd4000001, 0x6c6c6548,
0x77202c6f, 0x646c726f, 0x00000a21,
};
Or:
#include <stdint.h>
#include <stdio.h>
uint8_t main[] __attribute__ ((section (".text"))) =
{
// 13 32-bit words:
0x01,0x00,0x00,0x94,// bl 0x40007c
0xc1,0x83,0x00,0x91,// add x1, x30, #0x20
0x20,0x00,0x80,0xd2,// mov x0, #0x1
0xc2,0x01,0x80,0xd2,// mov x2, #0xe
0x08,0x08,0x80,0x52,// mov w8, #0x40
0x01,0x00,0x00,0xd4,// svc #0x0
0x00,0x00,0x80,0xd2,// mov x0, #0x0
0xa8,0x0b,0x80,0x52,// mov w8, #0x5d
0x01,0x00,0x00,0xd4,// svc #0x0
0x48,0x65,0x6c,0x6c,
0x6f,0x2c,0x20,0x77,
0x6f,0x72,0x6c,0x64,
0x21,0x0a,0x00,0x00
};
We can also pass the string by 16-bit words:
.text
.globl _start
_start:
sub sp, sp, 16
mov x2, #0x726f
movk x2, #0x646c, lsl #16
movk x2, #0x0a21, lsl #32
movk x2, #0x0000, lsl #48
str x2, [sp, #8]
mov x2, #0x6548
movk x2, #0x6c6c, lsl #16
movk x2, #0x2c6f, lsl #32
movk x2, #0x7720, lsl #48
str x2, [sp]
mov x1, sp
/* syscall write(int fd, const void *buf, size_t count) */
mov x0, #1 /* STDOUT_FILENO */
mov x2, 16
mov w8, #64
svc #0 /* invoke syscall */
/* syscall exit(int status) */
mov x0, #0
mov w8, #93 /* exit is syscall #93 */
svc #0
Now mixed code that will run on x64 and ARM64.
It's easy to make it.
That instruction will mean jmp short in x64:
0xeb,0x26,0x80,0xd2
On ARM64 it's 0xd28026eb, which is for mov x11, (some noise).
Running this code on x64: first instruction will jump to actual x64 code. But on ARM64, something will be stored into x11 register. Not a problem for us, because we don't use this register.
I also corrected string address so that one string is shared between two pieces of code.
This pure C function can be compiled and executed on both Linux x64 and ARM64.
#include <stdint.h>
#include <stdio.h>
uint8_t main[] __attribute__ ((section (".text"))) =
{
0xeb,0x26,0x80,0xd2, // jmp short ... in x64 or mov x11, ... in ARM64
0x01,0x00,0x00,0x94, // (ARM64) bl 0x40007c
0xc1,0x1f,0x01,0x91, // (ARM64) add x1, x30, #0x20
0x20,0x00,0x80,0xd2, // (ARM64) mov x0, #0x1
0xc2,0x01,0x80,0xd2, // (ARM64) mov x2, #0xe
0x08,0x08,0x80,0x52, // (ARM64) mov w8, #0x40
0x01,0x00,0x00,0xd4, // (ARM64) svc #0x0
0x00,0x00,0x80,0xd2, // (ARM64) mov x0, #0x0
0xa8,0x0b,0x80,0x52, // (ARM64) mov w8, #0x5d
0x01,0x00,0x00,0xd4, // (ARM64) svc #0x0
0xe8, 0, 0, 0, 0, // (x64) call 401005
0x5e, // (x64) pop %rsi
0x48, 0x83, 0xc6, 0x22, // (x64) add $0x22,%rsi
0xb8, 1, 0, 0, 0, // (x64) mov $0x1,%eax
0xbf, 1, 0, 0, 0, // (x64) mov $0x1,%edi
0xba, 0xe, 0, 0, 0, // (x64) mov $0xe,%edx
0x0f, 5, // (x64) syscall
0xb8, 0x3c, 0, 0, 0, // (x64) mov $0x3c,%eax
0xbf, 0x0, 0, 0, 0, // (x64) mov $0x0,%edi
0x0f, 5, // (x64) syscall
'H','e','l','l','o',',',' ','w','o','r','l','d','!',0xa
};
Or:
#include <stdint.h>
#include <stdio.h>
uint8_t main[] __attribute__ ((section (".text"))) =
{
0xeb,0x26,0x80,0xd2,0x01,0x00,0x00,0x94,
0xc1,0x1f,0x01,0x91,0x20,0x00,0x80,0xd2,
0xc2,0x01,0x80,0xd2,0x08,0x08,0x80,0x52,
0x01,0x00,0x00,0xd4,0x00,0x00,0x80,0xd2,
0xa8,0x0b,0x80,0x52,0x01,0x00,0x00,0xd4,
0xe8,0x00,0x00,0x00,0x00,0x5e,0x48,0x83,
0xc6,0x22,0xb8,0x01,0x00,0x00,0x00,0xbf,
0x01,0x00,0x00,0x00,0xba,0x0e,0x00,0x00,
0x00,0x0f,0x05,0xb8,0x3c,0x00,0x00,0x00,
0xbf,0x00,0x00,0x00,0x00,0x0f,0x05,
'H','e','l','l','o',',',' ','w','o','r','l','d','!',0x0a
};
Should I submit it to IOCCC?
Bottom line: same techniques are used in shellcode programming, where syscalls and PIC are used too.
Some links I found useful while working on this blog post: 1, 2, 3.
