/* * * add ip, pc, #0xNN00000 * add ip, ip, #0xNN000 * ldr pc, [ip, #0xNNN]! * * So that, effectively, causes the following to happen: * * ip : = pc+0x0NNNNNNN * pc : = *ip * * For thumb the above fragment is preceded by "bx pc, nop" to switch to ARM * mode and the thumb 'bl' must go to PLT-4 - the PLT entry is expanded by * four bytes to accomodate the trampoline code. * * 0x0NNNNNNN is the offset of the GOT entry for this function relative to * the PLT entry for this function (where the code is). So the code in the * PLT causes a branch to whatever is in the GOT, leaving the actual address * of the GOT entry in ip. (Note that the GOT must follow the PLT - the * added value is 28 bit unsigned). * * ip is a pointer to the GOT entry for this function, the first time round * *ip points to this code: * * str lr, [sp, #-4]! @ save lr * ldr lr, [pc, #4] @ lr : = *dat (&GOT_TABLE[0]-.) * add lr, pc, lr @ lr += &dat (so lr == &GOT_TABLE[0]) * ldr pc, [lr, #8]! @ pc : = GOT_TABLE[2] *dat: *.long &GOT_TABLE[0] - . * * (this code is actually held in the first entry of the PLT). The code * preserves lr then uses it as a scratch register (this preserves the ip * value calculated above). GOT_TABLE[2] is initialized by INIT_GOT in * dl-sysdep.h to point to _dl_linux_resolve - this function. The first * three entries in the GOT are reserved, then they are followed by the * entries for the PLT entries, in order. * * The linker initialises the following (non-reserved) GOT entries to * the offset of the PLT with an associated relocation so that on load * the entry is relocated to point to the PLT - the above code. * * The net effect of all this is that on the first call to an external (as * yet unresolved) function all seven of the above instructions are * executed in sequence and the program ends up executing _dl_linux_resolve * with the following important values in registers: * * ip - a pointer to the GOT entry for the as yet unresolved function * lr - &GOT_TABLE[2] * * GOT_TABLE[2] has already been initialised to _dl_linux_resolve, and * GOT_TABLE[1] is a pointer to the (elf_resolve*) from INIT_GOT. * _dl_linux_resolve unfrobnicates the ip and lr values to obtain arguments * for a call to _dl_linux_resolver (not the additional 'r' on the end) - * this is in elfinterp.c in this directory. The call takes arguments: * * _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) * * And returns the address of the function, it also overwrites the GOT * table entry so that the next time round only the first code fragment will * be executed - it will call the function directly. * * [[Of course, this simply doesn't work on ARM 4T with a thumb target - because * 4T did not do the thumb/arm change on ldr pc! It can be made to work by * changing _dl_linux_resolver to return __%s_from_arm for an STT_TFUNC, but * this hasn't been done, and there is no guarantee that the linker generated * that glue anyway.]] * * _dl_linux_resolve gets the arguments to call the resolver as follows: * * tpnt *GOT_TABLE[1], [lr-4] * reloc-entry &GOT-&GOT_TABLE[3], (ip - lr - 4)/4 * * (I.e. 'GOT' means the table entry for this function, the thing for which * ip holds the address.) The reloc-entry is passed as an index, since * since the GOT table has 4 byte entries the code needs to divide this by 4 * to get the actual index. * * John Bowler, August 13, 2005 - determined by experiment and examination * of generated ARM code (there was no documentation...) * * This code is all ARM code - not thumb - _dl_linux_resolver may, itself, * be thumb, in which case the linker will insert the appropriate glue. A * call from thumb to the PLT hits the trampoline code described above. * This code (now) builds a proper stack frame. * * The code does *not* set sb (r9,v6) - to do that the basic PLT instructions * would need to save sb and load the new value and that would require * support in the linker since it generates those instructions. (Also note * that linux/uclibc seems to be using r10 - sl - as a PIC base register - see * dl-startup.c). */ #include #include #include #define sl r10 #define fp r11 #define ip r12 .text .align 4 @ 16 byte boundary and there are 32 bytes below (arm case) #if 1 /*(!defined(__thumb__) || defined __THUMB_INTERWORK__) || defined(__thumb2__)*/ .arm .hidden _dl_linux_resolve .globl _dl_linux_resolve .type _dl_linux_resolve,%function .align 4; _dl_linux_resolve: @ _dl_linux_resolver is a standard subroutine call, therefore it @ preserves everything except r0-r3 (a1-a4), ip and lr. This @ function must branch to the real function, and that expects @ r0-r3 and lr to be as they were before the whole PLT stuff - @ ip can be trashed. @ This routine is called after pushing lr, so we must push an odd @ number of words to keep the stack correctly aligned. stmdb sp!, {r0, r1, r2, r3, r4} ldr r0, [lr, #-4] @ r0 : = [lr-4] (GOT_TABLE[1]) sub r1, lr, ip @ r1 : = (lr-ip) (a multple of 4) mvn r1, r1, ASR #2 @ r1 : = ~((lr-ip)>>2), since -x = (1+~x) @ ~x = -x-1, therefore ~(r1>>2) = (-((lr-ip)>>2)-1) @ = - ((lr-ip)/4) - 1 = (ip - lr - 4)/4, as required bl _dl_linux_resolver mov ip, r0 ldmia sp!, {r0, r1, r2, r3, r4, lr} BX(ip) #else @ In the thumb case _dl_linux_resolver is thumb. If a bl is used @ from arm code the linker will insert a stub call which, with @ binutils 2.16, is not PIC. Since this code is accessed by an @ ldr pc the reasonable fix is to make _dl_linux_resolve thumb too. .thumb .globl _dl_linux_resolve .thumb_func .type _dl_linux_resolve,%function _dl_linux_resolve: @ _dl_linux_resolver is a standard subroutine call, therefore it @ preserves everything except r0-r3 (a1-a4), ip and lr. This @ function must branch to the real function, and that expects @ r0-r3 and lr to be as they were before the whole PLT stuff - @ ip can be trashed. @ This routine is called after pushing lr, so we must push an odd @ number of words to keep the stack correctly aligned. push {r0-r4} mov r1, lr @ &GOT_TABLE[2] sub r0, r1, #4 mov r2, ip @ &GOT[n] ldr r0, [r0] @ r0 := GOT_TABLE[1] @ for the function call r1 := n-3 sub r1, r2 asr r1, r1, #2 mvn r1, r1 @ exactly as in the arm code above bl _dl_linux_resolver @ r0 contains the branch address, the return address is above @ the saved r0..r3 mov ip, r0 ldr r1, [sp, #20] mov lr, r1 pop {r0-r4} add sp, #4 bx ip #endif .size _dl_linux_resolve, .-_dl_linux_resolve