[Assembly] Position-independent code

This is an example copypasted from wikibooks (AT&T syntax), it just prints 'hello' string in Linux x64:

        .text

        movq $1, %rax         # `write` [fast] syscall
        movq $1, %rdi         # stdout
        leaq .LC0, %rsi       # offset
        movq $6, %rdx         # write n characters
        syscall

        movq $60, %rax        # `_exit` [fast] syscall
        movq $0, %rdi         # error code 0
        syscall

.LC0:
        .string "hello\n"

Compile and run in Ubuntu Linux x64:

as test.s -o test.o
ld test.o -o test

So far so good. It's a good exercise -- devise such a code that will print 'hello' no matter, at what address the code was loaded into memory.

Passing string via stack

If you don't know the address of 'hello' string, how can you pass it? One idea is to pass the string via stack, byte-by-byte.

        .text
        .globl  main
main:

#  % echo hello, world! | xxd -g 1
# 00000000: 68 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 0a        hello, world!.

        movq $0x00000a21646c726f, %rax # second half of the string
        push %rax

        movq $0x77202c6f6c6c6568, %rax # first half of the string
        push %rax

        movq %rsp, %rsi
        # RSI now points to the string, placed in stack

        movq $1, %rax         # `write` [fast] syscall
        movq $1, %rdi         # stdout

        movq $15, %rdx         # write 15 characters
        syscall

        # get stack pointer back
        # may be omitted, because we exit anyway
        # pop %rax
        # pop %rax

        movq $60, %rax        # `_exit` [fast] syscall
        movq $0, %rdi         # error code 0
        syscall

This piece of code doesn't care at what address it is loaded, it will print the string.

This technique was quite common (or still is?) in shellcodes, when you just never know at which address your code will be located.

This is important thing to understand about stack. Printing function in userland or kernel or wherever it is, doesn't care, where your string is located, in code/text section, in data section, or in stack. As long as the string is readable, it will be printed.

Determining your current address

Another popular method is a CALL/POP pair of instructions:

        .text

        call next
next:   pop %rbp

        # RBP at this point contains address of the 'next' label or POP instruction

        # difference between 'next' label and the address of the string:
        movq $(.LC0 - next), %rsi

        # add difference to the address of the 'next' label:
        addq %rbp, %rsi

        # RSI now points to the string

        movq $1, %rax         # `write` [fast] syscall
        movq $1, %rdi         # stdout
        movq $7, %rdx         # write 7 characters
        syscall

        movq $60, %rax        # `_exit` [fast] syscall
        movq $0, %rdi         # error code 0
        syscall

.LC0:
        .string "hello\n"

CALL uses relative address, so, no matter where it is loaded, it will always jump to the next instruction. The subsequent POP instruction does two things at once: writing its address to RBP register, but also cancels effect of CALL, returning stack pointer back, to the state it was before executing CALL.

Let's see compiled executable's contents using objdump:

% objdump -D file.o

0000000000000000 :
   0:   e8 00 00 00 00          callq  5 

0000000000000005 :
   5:   5d                      pop    %rbp
   6:   48 c7 c6 32 00 00 00    mov    $0x32,%rsi
   d:   48 01 ee                add    %rbp,%rsi
  10:   48 c7 c0 01 00 00 00    mov    $0x1,%rax
  17:   48 c7 c7 01 00 00 00    mov    $0x1,%rdi
  1e:   48 c7 c2 07 00 00 00    mov    $0x7,%rdx
  25:   0f 05                   syscall
  27:   48 c7 c0 3c 00 00 00    mov    $0x3c,%rax
  2e:   48 c7 c7 00 00 00 00    mov    $0x0,%rdi
  35:   0f 05                   syscall
  37:   68 65 6c 6c 6f          pushq  $0x6f6c6c65      # 'hello' string
  3c:   0a 00                   or     (%rax),%al

You see the opcode of 'CALL next' instruction? It's E8 00 00 00 00, i.e., relative address is zero (CALL opcode is 0xE8). So zero is added to the address of the next instruction, and jump to that point is occurred, effectively, calling the 'function' at the next instruction.

0x32 is the difference between 'next' label and the 'hello' string, in bytes.


This is a real piece of code from 1990s. It is a copy-protection dongle (HASP) emulator. It was intended to be compiled as a flat binary (COM-file for MS-DOS) and embedded in place of the original HASP driver (by overwriting) inside of a random executable. You see, it has a memory map in it, and it uses such a technique to determine the table's address, as well as other variables:

                .386c
hasp_krn        segment byte public use32
                assume cs:hasp_krn

                ; HASP Kernel entry point
hasp_entry:     cmp     bh,0
                jne     l001
                jmp     NoService
l001:           cmp     bh,1
                jne     l002
                jmp     IsHASP
l002:           cmp     bh,2
                jne     l003
                jmp     HASPCode
l003:           cmp     bh,3
                jne     l004
                jmp     ReadMemo
l004:           cmp     bh,4
                jne     l005
                jmp     WriteMemo
l005:           cmp     bh,5
                jne     l006
                jmp     HASPStatus
l006:           cmp     bh,6
                jne     NoService
                jmp     HASPID
NoService:      mov     cx,0FC19h       ; Invalid Service ...
                retn
HASPStatus:     mov     ax,112          ; Memory Size
                mov     bx,1            ; MemoHASP R3 ?
                mov     cx,1            ; LPT1
                retn
IsHASP:         mov     ax,1            ; It's a really HASP !
                retn

HASPCode:       call    @@SeedIP
@@SeedIP:       pop     ebp
Seed$Rel        =       offset @@SeedIP
                lea     si, offset Register + [ebp] - Seed$Rel
                lea     di, offset Base_Seq + [ebp] - Seed$Rel
                mov     ch, 07
@@ByteLoop:     mov     cl, 07
                mov     [si], byte ptr 0
@@BitLoop:      xor     dx, dx
                mov     bx, 1989h ; First
                mul     bx
                add     ax, 5
                push    ax
                shr     ax, 9
                and     ax, 3Fh
                mov     bx, ax
                shr     bx, 3
                mov     dl, byte ptr [di+bx]
                mov     bx, ax
                shr     bx, 3
                shl     bx, 3
                sub     ax, bx
                xchg    ax, cx
                shl     dl, cl
                xchg    ax, cx
                shr     dl, 7
                shl     dl, cl
                mov     al, [si]
                or      al, dl
                mov     [si], al
                pop     ax
                dec     cl
                jns     @@BitLoop
                inc     si
                dec     ch
                jns     @@ByteLoop

                mov     dx, [si-2]
                mov     cx, [si-4]
                mov     bx, [si-6]
                mov     ax, [si-8]
                retn

ReadMemo:       call    @@MemoIP
@@MemoIP:       pop     ebp
Memo$Rel        =       offset @@MemoIP
                lea     ebx, offset HASP_Memo + [ebp] - Memo$Rel
                shl     edi, 1
                movzx   ebx, word ptr [ebx+edi]
                shr     edi, 1
                mov     eax, edi
                xor     ecx,ecx
                retn

WriteMemo:      call    @@MemoIP1
@@MemoIP1:      pop     ebp
Memo$Rel1       =       offset @@MemoIP1
                lea     ebx, offset HASP_Memo + [ebp] - Memo$Rel1
                shl     edi, 1
                mov     word ptr [ebx+edi], si
                shr     edi, 1
                mov     eax, edi
                xor     ecx,ecx
                retn

HASPID:         call    @@IDIP
@@IDIP:         pop     ebp
ID$Rel          =       offset @@IDIP
                lea     di, offset HASP_ID + [ebp] - ID$Rel
                mov     ax, [di]
                mov     bx, [di+2]
                xor     ecx,ecx
                retn

Register:       dw      4 dup (0)               ; used while seedcodes calc

                ; Here is dump of HASP
PSW1:           dw      0FFFFh                  ; 1st Password
PSW2:           dw      0FFFFh                  ; 2nd Password
Base_Seq:       db      11000100b               ; Seed codes ... :(
                db      11011011b
                db      11100100b
                db      11111011b
                db      10000100b
                db      10011011b
                db      10100100b
                db      10111011b
HASP_ID:        dw      00000h, 01234h ; S/n    ; --- Real Hardware memory
HASP_Chunk:     db      12 dup (0FFh)           ; Future use ? :)
HASP_Memo:                                      ; Logical memory
        dw      0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh
        dw      0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh
        dw      0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh
        dw      0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh
        dw      0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh
        dw      0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh
        dw      0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh, 0FFFFh

hasp_krn        ends
                end     HASP_Entry

Position-independent code in .so and .DLL files

See in my book: Ctrl-F: "Position-independent code".


List of my other blog posts.

Yes, I know about these lousy Disqus ads. Please use adblocker. I would consider to subscribe to 'pro' version of Disqus if the signal/noise ratio in comments would be good enough.