summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2003-11-08 07:42:34 +0000
committerEric Andersen <andersen@codepoet.org>2003-11-08 07:42:34 +0000
commit2a01fa1548e5671880d6dd18c7d216ddf958ea96 (patch)
treedb51930aad916a898e321d50638f20ddeabdcddf
parentb36d61d33ece9afa32733bbcc7def4cb4afc4856 (diff)
Joakim Tjernlund writes:
> Very interesting. Do you have any suggestions for how > we could fix our powerpc shared library loader Removing those instr. comes with a very big performance penalty. To flush the dcache you will have read up to 8KB dummy data and to invalidate the icache you will have to execute up to 16KB nops. I don't know of any other way from user space. hmm, actually I think it will work reliable to perform a store to the same page(s) as the dcbst/icbi will act on. That way you will make the DTLB Error happen(if any) prior to the dcbst/icbi. The worst thing that can happen then is a regular DTLB Miss and that works for dcbst/icbi. You will have to lookout for if dcbst/icbi crosses a page boundary. Then you will have to perform a store to both pages. Jocke # And again later writes: Hi again I think I know what the problem is. The PPC_DCBST;PPC_SYNC;PPC_ICBI;PPC_ISYNC sequence is executed even if no modification has been done i some cases: _dl_linux_resolver(), the last else has no store for insns[0]. these is a insns[1] = OPCODE_B(delta - 4) that does not have a PPC_DCBST. _dl_do_lazy_reloc(), for R_PPC_NONE there is no store. for R_PPC_JMP_SLOT there is a insns[1] = OPCODE_B(delta)that does not have a PPC_DCBST. _dl_do_reloc(), for R_PPC_COPY there is no store. for R_PPC_JMP_SLOT there is a reloc_addr[1] = OPCODE_B(delta) that does not have a PPC_DCBST. _dl_init_got(), I THINK that the PPC_DCBST(plt); PPC_DCBST(plt+4); PPC_DCBST(plt+8); PPC_SYNC; PPC_ICBI(plt); PPC_ICBI(plt+4); PPC_ICBI(plt+8); PPC_ISYNC; is off a bit. The address range does not match the sum of the plt[] and tramp[] address range. Jocke # And then later added the comment: I think that the tramp[] part should be included in the PPC_DCBST/PPC_ICBI sequence. Then you have to add entries for plt+12 and plt+16. If the tramp[] part should be excluded, then all is well. Jocke
-rw-r--r--ldso/ldso/powerpc/elfinterp.c27
1 files changed, 25 insertions, 2 deletions
diff --git a/ldso/ldso/powerpc/elfinterp.c b/ldso/ldso/powerpc/elfinterp.c
index e3b39de16..93468c4ec 100644
--- a/ldso/ldso/powerpc/elfinterp.c
+++ b/ldso/ldso/powerpc/elfinterp.c
@@ -152,10 +152,14 @@ void _dl_init_got(unsigned long *plt,struct elf_resolve *tpnt)
PPC_DCBST(plt);
PPC_DCBST(plt+4);
PPC_DCBST(plt+8);
+ PPC_DCBST(plt+12);
+ PPC_DCBST(plt+16-1);
PPC_SYNC;
PPC_ICBI(plt);
- PPC_ICBI(plt+4);
- PPC_ICBI(plt+8);
+ PPC_ICBI(plt+4); /* glibc thinks this is not needed */
+ PPC_ICBI(plt+8); /* glibc thinks this is not needed */
+ PPC_ICBI(plt+12); /* glibc thinks this is not needed */
+ PPC_ICBI(plt+16-1);
PPC_ISYNC;
}
@@ -245,7 +249,15 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
//PPC_SYNC;
//PPC_ICBI(ptr+index);
//PPC_ISYNC;
+
+ /* instructions were modified */
insns[1] = OPCODE_B(delta - 4);
+ PPC_DCBST(insn_addr+1);
+ PPC_SYNC;
+ PPC_ICBI(insn_addr+1);
+ PPC_ISYNC;
+
+ return new_addr;
}
/* instructions were modified */
@@ -344,6 +356,7 @@ _dl_do_lazy_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope,
switch (reloc_type) {
case R_PPC_NONE:
+ return 0;
break;
case R_PPC_JMP_SLOT:
{
@@ -380,8 +393,11 @@ _dl_do_lazy_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope,
/* instructions were modified */
PPC_DCBST(reloc_addr);
+ PPC_DCBST(reloc_addr+1);
PPC_SYNC;
PPC_ICBI(reloc_addr);
+ PPC_ICBI(reloc_addr+1);
+ PPC_ISYNC;
#if defined (__SUPPORT_LD_DEBUG__)
if(_dl_debug_reloc && _dl_debug_detail)
@@ -435,6 +451,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
#endif
switch (reloc_type) {
case R_PPC_NONE:
+ return 0;
break;
case R_PPC_REL24:
#if 0
@@ -494,6 +511,10 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
//DPRINTF(" index %x delta %x\n",index,delta);
reloc_addr[0] = OPCODE_LI(11,index*4);
reloc_addr[1] = OPCODE_B(delta);
+
+ /* instructions were modified */
+ PPC_DCBST(reloc_addr+1);
+ PPC_ICBI(reloc_addr+1);
}
}
break;
@@ -503,6 +524,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
break;
case R_PPC_COPY:
// handled later
+ return 0;
break;
default:
#if 0
@@ -521,6 +543,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
PPC_DCBST(reloc_addr);
PPC_SYNC;
PPC_ICBI(reloc_addr);
+ PPC_ISYNC;
#if defined (__SUPPORT_LD_DEBUG__)
if(_dl_debug_reloc && _dl_debug_detail)