diff options
Diffstat (limited to 'libc')
51 files changed, 5209 insertions, 0 deletions
| diff --git a/libc/string/ia64/Makefile b/libc/string/ia64/Makefile new file mode 100644 index 000000000..0a95346fd --- /dev/null +++ b/libc/string/ia64/Makefile @@ -0,0 +1,13 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> +# +# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. +# + +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/ia64/bcopy.S b/libc/string/ia64/bcopy.S new file mode 100644 index 000000000..c4eb22b1f --- /dev/null +++ b/libc/string/ia64/bcopy.S @@ -0,0 +1,10 @@ +#include "sysdep.h" + +ENTRY(bcopy) +	.regstk 3, 0, 0, 0 +	mov r8 = in0 +	mov in0 = in1 +	;; +	mov in1 = r8 +	br.cond.sptk.many HIDDEN_JUMPTARGET(memmove) +END(bcopy) diff --git a/libc/string/ia64/bzero.S b/libc/string/ia64/bzero.S new file mode 100644 index 000000000..bcca41d5e --- /dev/null +++ b/libc/string/ia64/bzero.S @@ -0,0 +1,315 @@ +/* Optimized version of the standard bzero() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. +   Contributed by Dan Pop for Itanium <Dan.Pop@cern.ch>. +   Rewritten for McKinley by Sverre Jarp, HP Labs/CERN <Sverre.Jarp@cern.ch> + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: dest + +   Inputs: +        in0:    dest +        in1:    count + +   The algorithm is fairly straightforward: set byte by byte until we +   we get to a 16B-aligned address, then loop on 128 B chunks using an +   early store as prefetching, then loop on 32B chucks, then clear remaining +   words, finally clear remaining bytes. +   Since a stf.spill f0 can store 16B in one go, we use this instruction +   to get peak speed.  */ + +#include <sysdep.h> +#undef ret + +#define dest		in0 +#define	cnt		in1 + +#define tmp		r31 +#define save_lc		r30 +#define ptr0		r29 +#define ptr1		r28 +#define ptr2		r27 +#define ptr3		r26 +#define ptr9 		r24 +#define	loopcnt		r23 +#define linecnt		r22 +#define bytecnt		r21 + +// This routine uses only scratch predicate registers (p6 - p15) +#define p_scr		p6	// default register for same-cycle branches +#define p_unalgn	p9 +#define p_y		p11 +#define p_n		p12 +#define p_yy		p13 +#define p_nn		p14 + +#define movi0		mov + +#define MIN1		15 +#define MIN1P1HALF	8 +#define LINE_SIZE	128 +#define LSIZE_SH        7			// shift amount +#define PREF_AHEAD	8 + +#define USE_FLP +#if defined(USE_INT) +#define store		st8 +#define myval		r0 +#elif defined(USE_FLP) +#define store		stf8 +#define myval		f0 +#endif + +.align	64 +ENTRY(bzero) +{ .mmi +	.prologue +	alloc	tmp = ar.pfs, 2, 0, 0, 0 +	lfetch.nt1 [dest] +	.save   ar.lc, save_lc +	movi0	save_lc = ar.lc +} { .mmi +	.body +	mov	ret0 = dest		// return value +	nop.m	0 +	cmp.eq	p_scr, p0 = cnt, r0 +;; } +{ .mmi +	and	ptr2 = -(MIN1+1), dest	// aligned address +	and	tmp = MIN1, dest	// prepare to check for alignment +	tbit.nz p_y, p_n = dest, 0	// Do we have an odd address? (M_B_U) +} { .mib +	mov	ptr1 = dest +	nop.i	0 +(p_scr)	br.ret.dpnt.many rp		// return immediately if count = 0 +;; } +{ .mib +	cmp.ne	p_unalgn, p0 = tmp, r0 +} { .mib					// NB: # of bytes to move is 1 +	sub	bytecnt = (MIN1+1), tmp		//     higher than loopcnt +	cmp.gt	p_scr, p0 = 16, cnt		// is it a minimalistic task? +(p_scr)	br.cond.dptk.many .move_bytes_unaligned	// go move just a few (M_B_U) +;; } +{ .mmi +(p_unalgn) add	ptr1 = (MIN1+1), ptr2		// after alignment +(p_unalgn) add	ptr2 = MIN1P1HALF, ptr2		// after alignment +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3	// should we do a st8 ? +;; } +{ .mib +(p_y)	add	cnt = -8, cnt +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2	// should we do a st4 ? +} { .mib +(p_y)	st8	[ptr2] = r0,-4 +(p_n)	add	ptr2 = 4, ptr2 +;; } +{ .mib +(p_yy)	add	cnt = -4, cnt +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1	// should we do a st2 ? +} { .mib +(p_yy)	st4	[ptr2] = r0,-2 +(p_nn)	add	ptr2 = 2, ptr2 +;; } +{ .mmi +	mov	tmp = LINE_SIZE+1		// for compare +(p_y)	add	cnt = -2, cnt +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0	// should we do a st1 ? +} { .mmi +	nop.m	0 +(p_y)	st2	[ptr2] = r0,-1 +(p_n)	add	ptr2 = 1, ptr2 +;; } + +{ .mmi +(p_yy)	st1	[ptr2] = r0 +  	cmp.gt	p_scr, p0 = tmp, cnt		// is it a minimalistic task? +} { .mbb +(p_yy)	add	cnt = -1, cnt +(p_scr)	br.cond.dpnt.many .fraction_of_line	// go move just a few +;; } +{ .mib +	nop.m 	0 +	shr.u	linecnt = cnt, LSIZE_SH +	nop.b	0 +;; } + +	.align 32 +.l1b:	// ------------------//  L1B: store ahead into cache lines; fill later +{ .mmi +	and	tmp = -(LINE_SIZE), cnt		// compute end of range +	mov	ptr9 = ptr1			// used for prefetching +	and	cnt = (LINE_SIZE-1), cnt	// remainder +} { .mmi +	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop +	cmp.gt	p_scr, p0 = PREF_AHEAD, linecnt	// check against actual value +;; } +{ .mmi +(p_scr)	add	loopcnt = -1, linecnt +	add	ptr2 = 16, ptr1	// start of stores (beyond prefetch stores) +	add	ptr1 = tmp, ptr1	// first address beyond total range +;; } +{ .mmi +	add	tmp = -1, linecnt	// next loop count +	movi0	ar.lc = loopcnt +;; } +.pref_l1b: +{ .mib +	stf.spill [ptr9] = f0, 128	// Do stores one cache line apart +	nop.i   0 +	br.cloop.dptk.few .pref_l1b +;; } +{ .mmi +	add	ptr0 = 16, ptr2		// Two stores in parallel +	movi0	ar.lc = tmp +;; } +.l1bx: + { .mmi +	stf.spill [ptr2] = f0, 32 +	stf.spill [ptr0] = f0, 32 + ;; } + { .mmi +	stf.spill [ptr2] = f0, 32 +	stf.spill [ptr0] = f0, 32 + ;; } + { .mmi +	stf.spill [ptr2] = f0, 32 +	stf.spill [ptr0] = f0, 64 + 	cmp.lt	p_scr, p0 = ptr9, ptr1	// do we need more prefetching? + ;; } +{ .mmb +	stf.spill [ptr2] = f0, 32 +(p_scr)	stf.spill [ptr9] = f0, 128 +	br.cloop.dptk.few .l1bx +;; } +{ .mib +	cmp.gt  p_scr, p0 = 8, cnt	// just a few bytes left ? +(p_scr)	br.cond.dpnt.many  .move_bytes_from_alignment +;; } + +.fraction_of_line: +{ .mib +	add	ptr2 = 16, ptr1 +	shr.u	loopcnt = cnt, 5   	// loopcnt = cnt / 32 +;; } +{ .mib +	cmp.eq	p_scr, p0 = loopcnt, r0 +	add	loopcnt = -1, loopcnt +(p_scr)	br.cond.dpnt.many .store_words +;; } +{ .mib +	and	cnt = 0x1f, cnt		// compute the remaining cnt +	movi0   ar.lc = loopcnt +;; } +	.align 32 +.l2:	// -----------------------------//  L2A:  store 32B in 2 cycles +{ .mmb +	store	[ptr1] = myval, 8 +	store	[ptr2] = myval, 8 +;; } { .mmb +	store	[ptr1] = myval, 24 +	store	[ptr2] = myval, 24 +	br.cloop.dptk.many .l2 +;; } +.store_words: +{ .mib +	cmp.gt	p_scr, p0 = 8, cnt	// just a few bytes left ? +(p_scr)	br.cond.dpnt.many .move_bytes_from_alignment	// Branch +;; } + +{ .mmi +	store	[ptr1] = myval, 8	// store +	cmp.le	p_y, p_n = 16, cnt	// +	add	cnt = -8, cnt		// subtract +;; } +{ .mmi +(p_y)	store	[ptr1] = myval, 8	// store +(p_y)	cmp.le.unc p_yy, p_nn = 16, cnt +(p_y)	add	cnt = -8, cnt		// subtract +;; } +{ .mmi					// store +(p_yy)	store	[ptr1] = myval, 8 +(p_yy)	add	cnt = -8, cnt		// subtract +;; } + +.move_bytes_from_alignment: +{ .mib +	cmp.eq	p_scr, p0 = cnt, r0 +	tbit.nz.unc p_y, p0 = cnt, 2	// should we terminate with a st4 ? +(p_scr)	br.cond.dpnt.few .restore_and_exit +;; } +{ .mib +(p_y)	st4	[ptr1] = r0,4 +	tbit.nz.unc p_yy, p0 = cnt, 1	// should we terminate with a st2 ? +;; } +{ .mib +(p_yy)	st2	[ptr1] = r0,2 +	tbit.nz.unc p_y, p0 = cnt, 0	// should we terminate with a st1 ? +;; } + +{ .mib +(p_y)	st1	[ptr1] = r0 +;; } +.restore_and_exit: +{ .mib +	nop.m	0 +	movi0	ar.lc = save_lc +	br.ret.sptk.many rp +;; } + +.move_bytes_unaligned: +{ .mmi +       .pred.rel "mutex",p_y, p_n +       .pred.rel "mutex",p_yy, p_nn +(p_n)	cmp.le  p_yy, p_nn = 4, cnt +(p_y)	cmp.le  p_yy, p_nn = 5, cnt +(p_n)	add	ptr2 = 2, ptr1 +} { .mmi +(p_y)	add	ptr2 = 3, ptr1 +(p_y)	st1	[ptr1] = r0, 1		// fill 1 (odd-aligned) byte +(p_y)	add	cnt = -1, cnt		// [15, 14 (or less) left] +;; } +{ .mmi +(p_yy)	cmp.le.unc p_y, p0 = 8, cnt +	add	ptr3 = ptr1, cnt	// prepare last store +	movi0	ar.lc = save_lc +} { .mmi +(p_yy)	st2	[ptr1] = r0, 4		// fill 2 (aligned) bytes +(p_yy)	st2	[ptr2] = r0, 4		// fill 2 (aligned) bytes +(p_yy)	add	cnt = -4, cnt		// [11, 10 (o less) left] +;; } +{ .mmi +(p_y)	cmp.le.unc p_yy, p0 = 8, cnt +	add	ptr3 = -1, ptr3		// last store +	tbit.nz p_scr, p0 = cnt, 1	// will there be a st2 at the end ? +} { .mmi +(p_y)	st2	[ptr1] = r0, 4		// fill 2 (aligned) bytes +(p_y)	st2	[ptr2] = r0, 4		// fill 2 (aligned) bytes +(p_y)	add	cnt = -4, cnt		// [7, 6 (or less) left] +;; } +{ .mmi +(p_yy)	st2	[ptr1] = r0, 4		// fill 2 (aligned) bytes +(p_yy)	st2	[ptr2] = r0, 4		// fill 2 (aligned) bytes +					// [3, 2 (or less) left] +	tbit.nz p_y, p0 = cnt, 0	// will there be a st1 at the end ? +} { .mmi +(p_yy)	add	cnt = -4, cnt +;; } +{ .mmb +(p_scr)	st2	[ptr1] = r0		// fill 2 (aligned) bytes +(p_y)	st1	[ptr3] = r0		// fill last byte (using ptr3) +	br.ret.sptk.many rp +;; } +END(bzero) diff --git a/libc/string/ia64/memccpy.S b/libc/string/ia64/memccpy.S new file mode 100644 index 000000000..53c43c512 --- /dev/null +++ b/libc/string/ia64/memccpy.S @@ -0,0 +1,213 @@ +/* Optimized version of the memccpy() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: a pointer to the next byte after char in dest or NULL + +   Inputs: +        in0:    dest +        in1:    src +  	in2:	char +        in3:    byte count + +   This implementation assumes little endian mode (UM.be = 0). + +   This implementation assumes that it is safe to do read ahead +   in the src block, without getting beyond its limit.  */ + +#include <sysdep.h> +#undef ret + +#define OP_T_THRES 	16 +#define OPSIZ 		8 + +#define saved_pr	r17 +#define saved_lc	r18 +#define dest		r19 +#define src		r20 +#define len		r21 +#define asrc		r22 +#define tmp		r23 +#define char		r24 +#define charx8		r25 +#define saved_ec	r26 +#define sh2		r28 +#define	sh1		r29 +#define loopcnt		r30 +#define	value		r31 + +#ifdef GAS_ALIGN_BREAKS_UNWIND_INFO +/* Manually force proper loop-alignment.  Note: be sure to +   double-check the code-layout after making any changes to +   this routine! */ +# define ALIGN(n)	{ nop 0 } +#else +# define ALIGN(n)	.align n +#endif + +ENTRY(memccpy) +	.prologue +	alloc 	r2 = ar.pfs, 4, 40 - 4, 0, 40 + +#include "softpipe.h" +	.rotr	r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2] +	.rotp	p[MEMLAT + 6 + 1] + +	mov	ret0 = r0		// return NULL if no match +	.save pr, saved_pr +	mov	saved_pr = pr		// save the predicate registers +	mov 	dest = in0		// dest +	.save ar.lc, saved_lc +        mov 	saved_lc = ar.lc	// save the loop counter +        mov 	saved_ec = ar.ec	// save the loop counter +	.body +	mov 	src = in1		// src +	extr.u	char = in2, 0, 8	// char +	mov	len = in3		// len +	sub	tmp = r0, in0		// tmp = -dest +	cmp.ne	p7, p0 = r0, r0		// clear p7 +	;; +	and	loopcnt = 7, tmp	// loopcnt = -dest % 8 +	cmp.ge	p6, p0 = OP_T_THRES, len	// is len <= OP_T_THRES +	mov	ar.ec = 0		// ec not guaranteed zero on entry +(p6)	br.cond.spnt	.cpyfew		// copy byte by byte +	;; +	cmp.eq	p6, p0 = loopcnt, r0 +	mux1	charx8 = char, @brcst +(p6)	br.cond.sptk .dest_aligned +	sub	len = len, loopcnt	// len -= -dest % 8 +	adds	loopcnt = -1, loopcnt	// --loopcnt +	;; +	mov	ar.lc = loopcnt +.l1:					// copy -dest % 8 bytes +	ld1	value = [src], 1	// value = *src++ +	;; +	st1	[dest] = value, 1	// *dest++ = value +	cmp.eq	p6, p0 = value, char +(p6)	br.cond.spnt .foundit +	br.cloop.dptk .l1 +.dest_aligned: +	and	sh1 = 7, src 		// sh1 = src % 8 +	and	tmp = -8, len   	// tmp = len & -OPSIZ +	and	asrc = -8, src		// asrc = src & -OPSIZ  -- align src +	shr.u	loopcnt = len, 3	// loopcnt = len / 8 +	and	len = 7, len ;;		// len = len % 8 +	shl	sh1 = sh1, 3		// sh1 = 8 * (src % 8) +	adds	loopcnt = -1, loopcnt	// --loopcnt +	mov     pr.rot = 1 << 16 ;;	// set rotating predicates +	sub	sh2 = 64, sh1		// sh2 = 64 - sh1 +	mov	ar.lc = loopcnt		// set LC +	cmp.eq  p6, p0 = sh1, r0 	// is the src aligned? +(p6)    br.cond.sptk .src_aligned ;; +	add	src = src, tmp		// src += len & -OPSIZ +	mov	ar.ec = MEMLAT + 6 + 1 	// six more passes needed +	ld8	r[1] = [asrc], 8 	// r[1] = w0 +	cmp.ne	p6, p0 = r0, r0	;;	// clear p6 +	ALIGN(32) +.l2: +(p[0])		ld8.s	r[0] = [asrc], 8		// r[0] = w1 +(p[MEMLAT])	shr.u	tmp1[0] = r[1 + MEMLAT], sh1	// tmp1 = w0 >> sh1 +(p[MEMLAT])	shl	tmp2[0] = r[0 + MEMLAT], sh2  	// tmp2 = w1 << sh2 +(p[MEMLAT+4])	xor	tmp3[0] = val[1], charx8 +(p[MEMLAT+5])	czx1.r	pos0[0] = tmp3[1] +(p[MEMLAT+6])	chk.s	r[6 + MEMLAT], .recovery1	// our data isn't +							// valid - rollback! +(p[MEMLAT+6])	cmp.ne	p6, p0 = 8, pos0[1] +(p6)		br.cond.spnt	.gotit +(p[MEMLAT+6])	st8	[dest] = val[3], 8		// store val to dest +(p[MEMLAT+3])	or	val[0] = tmp1[3], tmp2[3] 	// val = tmp1 | tmp2 +		br.ctop.sptk    .l2 +		br.cond.sptk .cpyfew + +.src_aligned: +		cmp.ne  p6, p0 = r0, r0			// clear p6 +		mov     ar.ec = MEMLAT + 2 + 1 ;;	// set EC +.l3: +(p[0])		ld8.s	r[0] = [src], 8 +(p[MEMLAT])	xor	tmp3[0] = r[MEMLAT], charx8 +(p[MEMLAT+1])	czx1.r	pos0[0] = tmp3[1] +(p[MEMLAT+2])	cmp.ne	p7, p0 = 8, pos0[1] +(p[MEMLAT+2])	chk.s	r[MEMLAT+2], .recovery2 +(p7)		br.cond.spnt	.gotit +.back2: +(p[MEMLAT+2])	st8	[dest] = r[MEMLAT+2], 8 +		br.ctop.dptk .l3 +.cpyfew: +	cmp.eq	p6, p0 = len, r0	// is len == 0 ? +	adds	len = -1, len		// --len; +(p6)	br.cond.spnt	.restore_and_exit ;; +	mov	ar.lc = len +.l4: +	ld1	value = [src], 1 +	;; +	st1	[dest] = value, 1 +	cmp.eq	p6, p0 = value, char +(p6)	br.cond.spnt .foundit +	br.cloop.dptk	.l4 ;; +.foundit: +(p6)	mov	ret0 = dest +.restore_and_exit: +	mov     pr = saved_pr, -1    	// restore the predicate registers +	mov 	ar.lc = saved_lc	// restore the loop counter +	mov 	ar.ec = saved_ec ;;	// restore the epilog counter +	br.ret.sptk.many b0 +.gotit: +	.pred.rel "mutex" p6, p7 +(p6)	mov	value = val[3]		// if coming from l2 +(p7)	mov	value = r[MEMLAT+2]	// if coming from l3 +	mov	ar.lc = pos0[1] ;; +.l5: +	extr.u	tmp = value, 0, 8 ;; +	st1	[dest] = tmp, 1 +	shr.u	value = value, 8 +	br.cloop.sptk .l5 ;; +	mov 	ret0 = dest +	mov	pr = saved_pr, -1 +	mov	ar.lc = saved_lc +	br.ret.sptk.many b0 + +.recovery1: +	adds	src = -(MEMLAT + 6 + 1) * 8, asrc +	mov	loopcnt = ar.lc +	mov	tmp = ar.ec ;; +	sub	sh1 = (MEMLAT + 6 + 1), tmp +	shr.u	sh2 = sh2, 3 +	;;  +	shl	loopcnt = loopcnt, 3 +	sub	src = src, sh2 +	shl	sh1 = sh1, 3 +	shl	tmp = tmp, 3 +	;; +	add	len = len, loopcnt +	add	src = sh1, src ;; +	add	len = tmp, len +.back1: +	br.cond.sptk .cpyfew + +.recovery2: +	add	tmp = -(MEMLAT + 3) * 8, src +(p7)	br.cond.spnt .gotit +	;; +	ld8	r[MEMLAT+2] = [tmp] ;; +	xor	pos0[1] = r[MEMLAT+2], charx8 ;; +	czx1.r	pos0[1] = pos0[1] ;; +	cmp.ne	p7, p6 = 8, pos0[1] +(p7)	br.cond.spnt .gotit +	br.cond.sptk .back2 +END(memccpy) diff --git a/libc/string/ia64/memchr.S b/libc/string/ia64/memchr.S new file mode 100644 index 000000000..d7742fe8a --- /dev/null +++ b/libc/string/ia64/memchr.S @@ -0,0 +1,133 @@ +/* Optimized version of the standard memchr() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: the address of the first occurence of chr in str or NULL + +   Inputs: +  	in0:	str +  	in1:	chr +  	in2:	byte count + +   This implementation assumes little endian mode.  For big endian mode, +   the instruction czx1.r should be replaced by czx1.l. + +   The algorithm is fairly straightforward: search byte by byte until we +   we get to a word aligned address, then search word by word as much as +   possible; the remaining few bytes are searched one at a time. + +   The word by word search is performed by xor-ing the word with a word +   containing chr in every byte.  If there is a hit, the result will +   contain a zero byte in the corresponding position.  The presence and +   position of that zero byte is detected with a czx instruction. + +   All the loops in this function could have had the internal branch removed +   if br.ctop and br.cloop could be predicated :-(.  */ + +#include <sysdep.h> +#undef ret + +#define saved_pr	r15 +#define saved_lc	r16 +#define	chr		r17 +#define len		r18 +#define pos0		r20 +#define val		r21 +#define tmp		r24 +#define chrx8		r25 +#define loopcnt		r30 + +#define str		in0 + +ENTRY(__memchr) +	.prologue +	alloc r2 = ar.pfs, 3, 0, 29, 32 +#include "softpipe.h" +	.rotr	value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2] +	.rotp	p[MEMLAT+3] +	.save ar.lc, saved_lc +        mov 	saved_lc = ar.lc 	// save the loop counter +	.save pr, saved_pr +	mov	saved_pr = pr		// save the predicates +	.body +	mov 	ret0 = str +	and 	tmp = 7, str		// tmp = str % 8 +	cmp.ne	p7, p0 = r0, r0		// clear p7 +	extr.u	chr = in1, 0, 8		// chr = (unsigned char) in1 +	mov	len = in2 +	cmp.gtu	p6, p0 = 16, in2	// use a simple loop for short +(p6)	br.cond.spnt .srchfew ;;	// searches +	sub	loopcnt = 8, tmp	// loopcnt = 8 - tmp +	cmp.eq	p6, p0 = tmp, r0 +(p6)	br.cond.sptk	.str_aligned;; +	sub	len = len, loopcnt +	adds	loopcnt = -1, loopcnt;; +	mov	ar.lc = loopcnt +.l1: +	ld1	val = [ret0], 1 +	;; +	cmp.eq	p6, p0 = val, chr +(p6)	br.cond.spnt	.foundit +	br.cloop.sptk	.l1 ;; +.str_aligned: +	cmp.ne	p6, p0 = r0, r0		// clear p6 +	shr.u	loopcnt = len, 3	// loopcnt = len / 8 +	and 	len = 7, len ;;		// remaining len = len & 7 +	adds	loopcnt = -1, loopcnt +	mov	ar.ec = MEMLAT + 3 +	mux1	chrx8 = chr, @brcst ;;	// get a word full of chr +	mov	ar.lc = loopcnt +	mov	pr.rot = 1 << 16 ;; +.l2: +(p[0])		mov	addr[0] = ret0 +(p[0])		ld8	value[0] = [ret0], 8 +(p[MEMLAT])	xor	aux[0] = value[MEMLAT], chrx8 +(p[MEMLAT+1])	czx1.r	poschr[0] = aux[1] +(p[MEMLAT+2])	cmp.ne	p7, p0 = 8, poschr[1] +(p7)		br.cond.dpnt .foundit +		br.ctop.dptk .l2 +.srchfew: +	adds	loopcnt = -1, len +	cmp.eq	p6, p0 = len, r0 +(p6)	br.cond.spnt .notfound ;; +	mov	ar.lc = loopcnt +.l3: +	ld1	val = [ret0], 1 +	;; +	cmp.eq	p6, p0 = val, chr +(p6)	br.cond.dpnt	.foundit +	br.cloop.sptk	.l3 ;; +.notfound: +	cmp.ne	p6, p0 = r0, r0	// clear p6 (p7 was already 0 when we got here) +	mov	ret0 = r0 ;;	// return NULL +.foundit: +	.pred.rel "mutex" p6, p7 +(p6)	adds	ret0 = -1, ret0 		   // if we got here from l1 or l3 +(p7)	add	ret0 = addr[MEMLAT+2], poschr[1]   // if we got here from l2 +	mov	pr = saved_pr, -1 +	mov	ar.lc = saved_lc +	br.ret.sptk.many b0 + +END(__memchr) + +weak_alias (__memchr, memchr) +#if !__BOUNDED_POINTERS__ +weak_alias (__memchr, __ubp_memchr) +#endif +libc_hidden_def (memchr) diff --git a/libc/string/ia64/memcmp.S b/libc/string/ia64/memcmp.S new file mode 100644 index 000000000..997dad9d4 --- /dev/null +++ b/libc/string/ia64/memcmp.S @@ -0,0 +1,165 @@ +/* Optimized version of the standard memcmp() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2004 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: the result of the comparison + +   Inputs: +        in0:    dest (aka s1) +        in1:    src  (aka s2) +        in2:    byte count + +   In this form, it assumes little endian mode.  For big endian mode, the +   the two shifts in .l2 must be inverted: + +	shl   	tmp1[0] = r[1 + MEMLAT], sh1   // tmp1 = w0 << sh1 +	shr.u   tmp2[0] = r[0 + MEMLAT], sh2   // tmp2 = w1 >> sh2 + +   and all the mux1 instructions should be replaced by plain mov's.  */ + +#include <sysdep.h> +#undef ret + +#define OP_T_THRES 	16 +#define OPSIZ 		8 +#define MEMLAT		2 + +#define start		r15 +#define saved_pr	r17 +#define saved_lc	r18 +#define dest		r19 +#define src		r20 +#define len		r21 +#define asrc		r22 +#define tmp		r23 +#define value1		r24 +#define value2		r25 +#define sh2		r28 +#define	sh1		r29 +#define loopcnt		r30 + +ENTRY(memcmp) +	.prologue +	alloc 	r2 = ar.pfs, 3, 37, 0, 40 + +	.rotr	r[MEMLAT + 2], q[MEMLAT + 5], tmp1[4], tmp2[4], val[2] +	.rotp	p[MEMLAT + 4 + 1] + +	mov	ret0 = r0		// by default return value = 0 +	.save pr, saved_pr +	mov	saved_pr = pr		// save the predicate registers +	.save ar.lc, saved_lc +        mov 	saved_lc = ar.lc	// save the loop counter +	.body +	mov 	dest = in0		// dest +	mov 	src = in1		// src +	mov	len = in2		// len +	sub	tmp = r0, in0		// tmp = -dest +	;; +	and	loopcnt = 7, tmp		// loopcnt = -dest % 8 +	cmp.ge	p6, p0 = OP_T_THRES, len	// is len <= OP_T_THRES +(p6)	br.cond.spnt	.cmpfew			// compare byte by byte +	;; +	cmp.eq	p6, p0 = loopcnt, r0 +(p6)	br.cond.sptk .dest_aligned +	sub	len = len, loopcnt	// len -= -dest % 8 +	adds	loopcnt = -1, loopcnt	// --loopcnt +	;; +	mov	ar.lc = loopcnt +.l1:					// copy -dest % 8 bytes +	ld1	value1 = [src], 1	// value = *src++ +	ld1	value2 = [dest], 1 +	;; +	cmp.ne	p6, p0 = value1, value2 +(p6)	br.cond.spnt .done +	br.cloop.dptk .l1 +.dest_aligned: +	and	sh1 = 7, src 		// sh1 = src % 8 +	and	tmp = -8, len   	// tmp = len & -OPSIZ +	and	asrc = -8, src		// asrc = src & -OPSIZ  -- align src +	shr.u	loopcnt = len, 3	// loopcnt = len / 8 +	and	len = 7, len ;;		// len = len % 8 +	shl	sh1 = sh1, 3		// sh1 = 8 * (src % 8) +	adds	loopcnt = -1, loopcnt	// --loopcnt +	mov     pr.rot = 1 << 16 ;;	// set rotating predicates +	sub	sh2 = 64, sh1		// sh2 = 64 - sh1 +	mov	ar.lc = loopcnt		// set LC +	cmp.eq  p6, p0 = sh1, r0 	// is the src aligned? +(p6)    br.cond.sptk .src_aligned +	add	src = src, tmp		// src += len & -OPSIZ +	mov	ar.ec = MEMLAT + 4 + 1 	// four more passes needed +	ld8	r[1] = [asrc], 8 ;;	// r[1] = w0 +	.align	32 + +// We enter this loop with p6 cleared by the above comparison + +.l2: +(p[0])		ld8	r[0] = [asrc], 8		// r[0] = w1 +(p[0])		ld8	q[0] = [dest], 8 +(p[MEMLAT])	shr.u	tmp1[0] = r[1 + MEMLAT], sh1	// tmp1 = w0 >> sh1 +(p[MEMLAT])	shl	tmp2[0] = r[0 + MEMLAT], sh2  	// tmp2 = w1 << sh2 +(p[MEMLAT+4])	cmp.ne	p6, p0 = q[MEMLAT + 4], val[1] +(p[MEMLAT+3])	or	val[0] = tmp1[3], tmp2[3] 	// val = tmp1 | tmp2 +(p6)		br.cond.spnt .l2exit +		br.ctop.sptk    .l2 +		br.cond.sptk .cmpfew +.l3exit: +	mux1	value1 = r[MEMLAT], @rev +	mux1	value2 = q[MEMLAT], @rev +	cmp.ne	p6, p0 = r0, r0	;;	// clear p6 +.l2exit: +(p6)	mux1	value1 = val[1], @rev +(p6)	mux1	value2 = q[MEMLAT + 4], @rev ;; +	cmp.ltu	p6, p7 = value2, value1 ;; +(p6)	mov	ret0 = -1 +(p7)	mov	ret0 = 1 +	mov     pr = saved_pr, -1    	// restore the predicate registers +	mov 	ar.lc = saved_lc	// restore the loop counter +	br.ret.sptk.many b0 +.src_aligned: +	cmp.ne	p6, p0 = r0, r0		// clear p6 +	mov     ar.ec = MEMLAT + 1 ;;	// set EC +.l3: +(p[0])		ld8	r[0] = [src], 8 +(p[0])		ld8	q[0] = [dest], 8 +(p[MEMLAT])	cmp.ne	p6, p0 = r[MEMLAT], q[MEMLAT] +(p6)		br.cond.spnt .l3exit +		br.ctop.dptk .l3 ;; +.cmpfew: +	cmp.eq	p6, p0 = len, r0	// is len == 0 ? +	adds	len = -1, len		// --len; +(p6)	br.cond.spnt	.restore_and_exit ;; +	mov	ar.lc = len +.l4: +	ld1	value1 = [src], 1 +	ld1	value2 = [dest], 1 +	;; +	cmp.ne	p6, p0 = value1, value2 +(p6)	br.cond.spnt	.done +	br.cloop.dptk	.l4 ;; +.done: +(p6)	sub	ret0 = value2, value1	// don't execute it if falling thru +.restore_and_exit: +	mov     pr = saved_pr, -1    	// restore the predicate registers +	mov 	ar.lc = saved_lc	// restore the loop counter +	br.ret.sptk.many b0 +END(memcmp) + +weak_alias (memcmp, bcmp) +libc_hidden_def (memcmp) diff --git a/libc/string/ia64/memcpy.S b/libc/string/ia64/memcpy.S new file mode 100644 index 000000000..34a3706e0 --- /dev/null +++ b/libc/string/ia64/memcpy.S @@ -0,0 +1,436 @@ +/* Optimized version of the standard memcpy() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop for Itanium <Dan.Pop@cern.ch>. +   Rewritten for McKinley by Sverre Jarp, HP Labs/CERN <Sverre.Jarp@cern.ch> + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: dest + +   Inputs: +        in0:    dest +        in1:    src +        in2:    byte count + +   An assembly implementation of the algorithm used by the generic C +   version from glibc.  The case when source and sest are aligned is +   treated separately, for extra performance. + +   In this form, memcpy assumes little endian mode.  For big endian mode, +   sh1 must be computed using an extra instruction: sub sh1 = 64, sh1 +   and the order of r[MEMLAT] and r[MEMLAT+1] must be reverted in the +   shrp instruction.  */ + +#define USE_LFETCH +#define USE_FLP +#include <sysdep.h> +#undef ret + +#define LFETCH_DIST     500 + +#define ALIGN_UNROLL_no   4 // no. of elements +#define ALIGN_UNROLL_sh	  2 // (shift amount) + +#define MEMLAT	8 +#define Nrot	((4*(MEMLAT+2) + 7) & ~7) + +#define OP_T_THRES 	16 +#define OPSIZ 		8 + +#define loopcnt		r14 +#define elemcnt		r15 +#define saved_pr	r16 +#define saved_lc	r17 +#define adest		r18 +#define dest		r19 +#define asrc		r20 +#define src		r21 +#define len		r22 +#define tmp2		r23 +#define tmp3		r24 +#define	tmp4		r25 +#define ptable		r26 +#define ploop56		r27 +#define	loopaddr	r28 +#define	sh1		r29 +#define ptr1		r30 +#define ptr2		r31 + +#define movi0 		mov + +#define p_scr		p6 +#define p_xtr		p7 +#define p_nxtr		p8 +#define p_few		p9 + +#if defined(USE_FLP) +#define load		ldf8 +#define store		stf8 +#define tempreg		f6 +#define the_r		fr +#define the_s		fs +#define the_t		ft +#define the_q		fq +#define the_w		fw +#define the_x		fx +#define the_y		fy +#define the_z		fz +#elif defined(USE_INT) +#define load		ld8 +#define store		st8 +#define tempreg		tmp2 +#define the_r		r +#define the_s		s +#define the_t		t +#define the_q		q +#define the_w		w +#define the_x		x +#define the_y		y +#define the_z		z +#endif + +#ifdef GAS_ALIGN_BREAKS_UNWIND_INFO +/* Manually force proper loop-alignment.  Note: be sure to +   double-check the code-layout after making any changes to +   this routine! */ +# define ALIGN(n)	{ nop 0 } +#else +# define ALIGN(n)	.align n +#endif + +#if defined(USE_LFETCH) +#define LOOP(shift)						\ +		ALIGN(32);					\ +.loop##shift##:							\ +{ .mmb								\ +(p[0])	ld8.nt1	r[0] = [asrc], 8 ;				\ +(p[0])	lfetch.nt1 [ptr1], 16 ;					\ +	nop.b 0 ;						\ +} { .mib							\ +(p[MEMLAT+1]) st8 [dest] = tmp3, 8 ;				\ +(p[MEMLAT]) shrp tmp3 = r[MEMLAT], s[MEMLAT+1], shift ;		\ + 	nop.b 0 ;;						\ + } { .mmb							\ +(p[0])	ld8.nt1	s[0] = [asrc], 8 ;				\ +(p[0])	lfetch.nt1	[ptr2], 16 ;				\ +	nop.b 0 ;						\ +} { .mib							\ +(p[MEMLAT+1]) st8 [dest] = tmp4, 8 ;				\ +(p[MEMLAT]) shrp tmp4 = s[MEMLAT], r[MEMLAT], shift ;		\ +	br.ctop.sptk.many .loop##shift 				\ +;; }								\ +{ .mib								\ +	br.cond.sptk.many .copy_bytes ; /* deal with the remaining bytes */  \ +} +#else +#define LOOP(shift)						\ +		ALIGN(32);					\ +.loop##shift##:							\ +{ .mmb								\ +(p[0])	ld8.nt1	r[0] = [asrc], 8 ;				\ +	nop.b 0 ;						\ +} { .mib							\ +(p[MEMLAT+1]) st8 [dest] = tmp3, 8 ;				\ +(p[MEMLAT]) shrp tmp3 = r[MEMLAT], s[MEMLAT+1], shift ;		\ + 	nop.b 0 ;;						\ + } { .mmb							\ +(p[0])	ld8.nt1	s[0] = [asrc], 8 ;				\ +	nop.b 0 ;						\ +} { .mib							\ +(p[MEMLAT+1]) st8 [dest] = tmp4, 8 ;				\ +(p[MEMLAT]) shrp tmp4 = s[MEMLAT], r[MEMLAT], shift ;		\ +	br.ctop.sptk.many .loop##shift 				\ +;; }								\ +{ .mib								\ +	br.cond.sptk.many .copy_bytes ; /* deal with the remaining bytes */  \ +} +#endif + + +ENTRY(memcpy) +{ .mmi +	.prologue +	alloc 	r2 = ar.pfs, 3, Nrot - 3, 0, Nrot +	.rotr	r[MEMLAT+1], s[MEMLAT+2], q[MEMLAT+1], t[MEMLAT+1] +	.rotp	p[MEMLAT+2] +	.rotf	fr[MEMLAT+1], fq[MEMLAT+1], fs[MEMLAT+1], ft[MEMLAT+1] +	mov	ret0 = in0		// return tmp2 = dest +	.save   pr, saved_pr +	movi0	saved_pr = pr		// save the predicate registers +} { .mmi +	and	tmp4 = 7, in0 		// check if destination is aligned +	mov 	dest = in0		// dest +	mov 	src = in1		// src +;; } +{ .mii +	cmp.eq	p_scr, p0 = in2, r0	// if (len == 0) +	.save   ar.lc, saved_lc +        movi0 	saved_lc = ar.lc	// save the loop counter +	.body +	cmp.ge	p_few, p0 = OP_T_THRES, in2 // is len <= OP_T_THRESH +} { .mbb +	mov	len = in2		// len +(p_scr)	br.cond.dpnt.few .restore_and_exit // 	Branch no. 1: return dest +(p_few) br.cond.dpnt.many .copy_bytes	// Branch no. 2: copy byte by byte +;; } +{ .mmi +#if defined(USE_LFETCH) +	lfetch.nt1 [dest]		// +	lfetch.nt1 [src]		// +#endif +	shr.u	elemcnt = len, 3	// elemcnt = len / 8 +} { .mib +	cmp.eq	p_scr, p0 = tmp4, r0	// is destination aligned? +	sub	loopcnt = 7, tmp4	// +(p_scr) br.cond.dptk.many .dest_aligned +;; } +{ .mmi +	ld1	tmp2 = [src], 1		// +	sub	len = len, loopcnt, 1	// reduce len +	movi0	ar.lc = loopcnt		// +} { .mib +	cmp.ne  p_scr, p0 = 0, loopcnt	// avoid loading beyond end-point +;; } + +.l0:	// ---------------------------- // L0: Align src on 8-byte boundary +{ .mmi +	st1	[dest] = tmp2, 1	// +(p_scr)	ld1	tmp2 = [src], 1		// +} { .mib +	cmp.lt	p_scr, p0 = 1, loopcnt	// avoid load beyond end-point +	add	loopcnt = -1, loopcnt +	br.cloop.dptk.few .l0		// +;; } + +.dest_aligned: +{ .mmi +	and	tmp4 = 7, src		// ready for alignment check +	shr.u	elemcnt = len, 3	// elemcnt = len / 8 +;; } +{ .mib +	cmp.ne	p_scr, p0 = tmp4, r0	// is source also aligned +	tbit.nz p_xtr, p_nxtr = src, 3	// prepare a separate move if src +} { .mib				// is not 16B aligned +	add	ptr2 = LFETCH_DIST, dest	// prefetch address +	add	ptr1 = LFETCH_DIST, src +(p_scr) br.cond.dptk.many .src_not_aligned +;; } + +// The optimal case, when dest, and src are aligned + +.both_aligned: +{ .mmi +	.pred.rel "mutex",p_xtr,p_nxtr +(p_xtr)	cmp.gt  p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt // Need N + 1 to qualify +(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt  // Need only N to qualify +	movi0	pr.rot = 1 << 16	// set rotating predicates +} { .mib +(p_scr) br.cond.dpnt.many .copy_full_words +;; } + +{ .mmi +(p_xtr)	load	tempreg = [src], 8 +(p_xtr) add 	elemcnt = -1, elemcnt +	movi0	ar.ec = MEMLAT + 1	// set the epilog counter +;; } +{ .mmi +(p_xtr) add	len = -8, len		// +	add 	asrc = 16, src 		// one bank apart (for USE_INT) +	shr.u	loopcnt = elemcnt, ALIGN_UNROLL_sh  // cater for unrolling +;;} +{ .mmi +	add	loopcnt = -1, loopcnt +(p_xtr)	store	[dest] = tempreg, 8	// copy the "extra" word +	nop.i	0 +;; } +{ .mib +	add	adest = 16, dest +	movi0	ar.lc = loopcnt 	// set the loop counter +;; } + +#ifdef  GAS_ALIGN_BREAKS_UNWIND_INFO +	{ nop 0 } +#else +	.align	32 +#endif +#if defined(USE_FLP) +.l1: // ------------------------------- // L1: Everything a multiple of 8 +{ .mmi +#if defined(USE_LFETCH) +(p[0])	lfetch.nt1 [ptr2],32 +#endif +(p[0])	ldfp8	the_r[0],the_q[0] = [src], 16 +(p[0])	add	len = -32, len +} {.mmb +(p[MEMLAT]) store [dest] = the_r[MEMLAT], 8 +(p[MEMLAT]) store [adest] = the_s[MEMLAT], 8 +;; } +{ .mmi +#if defined(USE_LFETCH) +(p[0])	lfetch.nt1 [ptr1],32 +#endif +(p[0])	ldfp8	the_s[0], the_t[0] = [src], 16 +} {.mmb +(p[MEMLAT]) store [dest] = the_q[MEMLAT], 24 +(p[MEMLAT]) store [adest] = the_t[MEMLAT], 24 +	br.ctop.dptk.many .l1 +;; } +#elif defined(USE_INT) +.l1: // ------------------------------- // L1: Everything a multiple of 8 +{ .mmi +(p[0])	load	the_r[0] = [src], 8 +(p[0])	load	the_q[0] = [asrc], 8 +(p[0])	add	len = -32, len +} {.mmb +(p[MEMLAT]) store [dest] = the_r[MEMLAT], 8 +(p[MEMLAT]) store [adest] = the_q[MEMLAT], 8 +;; } +{ .mmi +(p[0])	load	the_s[0]  = [src], 24 +(p[0])	load	the_t[0] = [asrc], 24 +} {.mmb +(p[MEMLAT]) store [dest] = the_s[MEMLAT], 24 +(p[MEMLAT]) store [adest] = the_t[MEMLAT], 24 +#if defined(USE_LFETCH) +;; } +{ .mmb +(p[0])	lfetch.nt1 [ptr2],32 +(p[0])	lfetch.nt1 [ptr1],32 +#endif +	br.ctop.dptk.many .l1 +;; } +#endif + +.copy_full_words: +{ .mib +	cmp.gt	p_scr, p0 = 8, len	// +	shr.u	elemcnt = len, 3	// +(p_scr) br.cond.dpnt.many .copy_bytes +;; } +{ .mii +	load	tempreg = [src], 8 +	add	loopcnt = -1, elemcnt	// +;; } +{ .mii +	cmp.ne	p_scr, p0 = 0, loopcnt	// +	mov	ar.lc = loopcnt		// +;; } + +.l2: // ------------------------------- // L2: Max 4 words copied separately +{ .mmi +	store	[dest] = tempreg, 8 +(p_scr)	load	tempreg = [src], 8	// +	add	len = -8, len +} { .mib +	cmp.lt	p_scr, p0 = 1, loopcnt	// avoid load beyond end-point +	add	loopcnt = -1, loopcnt +	br.cloop.dptk.few  .l2 +;; } + +.copy_bytes: +{ .mib +	cmp.eq	p_scr, p0 = len, r0	// is len == 0 ? +	add	loopcnt = -1, len	// len--; +(p_scr)	br.cond.spnt	.restore_and_exit +;; } +{ .mii +	ld1	tmp2 = [src], 1 +	movi0	ar.lc = loopcnt +	cmp.ne	p_scr, p0 = 0, loopcnt	// avoid load beyond end-point +;; } + +.l3: // ------------------------------- // L3: Final byte move +{ .mmi +	st1	[dest] = tmp2, 1 +(p_scr)	ld1	tmp2 = [src], 1 +} { .mib +	cmp.lt	p_scr, p0 = 1, loopcnt	// avoid load beyond end-point +	add	loopcnt = -1, loopcnt +	br.cloop.dptk.few  .l3 +;; } + +.restore_and_exit: +{ .mmi +	movi0	pr = saved_pr, -1	// restore the predicate registers +;; } +{ .mib +	movi0	ar.lc = saved_lc	// restore the loop counter +	br.ret.sptk.many b0 +;; } + + +.src_not_aligned: +{ .mmi +	cmp.gt	p_scr, p0 = 16, len +	and	sh1 = 7, src 		// sh1 = src % 8 +	shr.u	loopcnt = len, 4	// element-cnt = len / 16 +} { .mib +	add	tmp4 = @ltoff(.table), gp +	add 	tmp3 = @ltoff(.loop56), gp +(p_scr)	br.cond.dpnt.many .copy_bytes	// do byte by byte if too few +;; } +{ .mmi +	and	asrc = -8, src		// asrc = (-8) -- align src for loop +	add 	loopcnt = -1, loopcnt	// loopcnt-- +	shl	sh1 = sh1, 3		// sh1 = 8 * (src % 8) +} { .mmi +	ld8	ptable = [tmp4]		// ptable = &table +	ld8	ploop56 = [tmp3]	// ploop56 = &loop56 +	and	tmp2 = -16, len		// tmp2 = len & -OPSIZ +;; } +{ .mmi +	add	tmp3 = ptable, sh1	// tmp3 = &table + sh1 +	add	src = src, tmp2		// src += len & (-16) +	movi0	ar.lc = loopcnt		// set LC +;; } +{ .mmi +	ld8	tmp4 = [tmp3]		// tmp4 = loop offset +	sub	len = len, tmp2		// len -= len & (-16) +	movi0	ar.ec = MEMLAT + 2 	// one more pass needed +;; } +{ .mmi +	ld8	s[1] = [asrc], 8	// preload +	sub	loopaddr = ploop56,tmp4	// loopadd = &loop56 - loop offset +	movi0   pr.rot = 1 << 16	// set rotating predicates +;; } +{ .mib +	nop.m	0 +	movi0	b6 = loopaddr +	br	b6			// jump to the appropriate loop +;; } + +	LOOP(8) +	LOOP(16) +	LOOP(24) +	LOOP(32) +	LOOP(40) +	LOOP(48) +	LOOP(56) +END(memcpy) +libc_hidden_def (memcpy) + +	.rodata +	.align 8 +.table: +	data8	0			// dummy entry +	data8 	.loop56 - .loop8 +	data8 	.loop56 - .loop16 +	data8 	.loop56 - .loop24 +	data8	.loop56 - .loop32 +	data8	.loop56 - .loop40 +	data8	.loop56 - .loop48 +	data8	.loop56 - .loop56 diff --git a/libc/string/ia64/memmove.S b/libc/string/ia64/memmove.S new file mode 100644 index 000000000..65f5b0383 --- /dev/null +++ b/libc/string/ia64/memmove.S @@ -0,0 +1,251 @@ +/* Optimized version of the standard memmove() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: dest + +   Inputs: +        in0:    dest +        in1:    src +        in2:    byte count + +   The core of the function is the memcpy implementation used in memcpy.S. +   When bytes have to be copied backwards, only the easy case, when +   all arguments are multiples of 8, is optimised. + +   In this form, it assumes little endian mode.  For big endian mode, +   sh1 must be computed using an extra instruction: sub sh1 = 64, sh1 +   or the UM.be bit should be cleared at the beginning and set at the end.  */ + +#include <sysdep.h> +#undef ret + +#define OP_T_THRES 	16 +#define OPSIZ 		 8 + +#define adest		r15 +#define saved_pr	r17 +#define saved_lc	r18 +#define dest		r19 +#define src		r20 +#define len		r21 +#define asrc		r22 +#define tmp2		r23 +#define tmp3		r24 +#define	tmp4		r25 +#define ptable		r26 +#define ploop56		r27 +#define	loopaddr	r28 +#define	sh1		r29 +#define loopcnt		r30 +#define	value		r31 + +#ifdef GAS_ALIGN_BREAKS_UNWIND_INFO +# define ALIGN(n)	{ nop 0 } +#else +# define ALIGN(n)	.align n +#endif + +#define LOOP(shift)							\ +		ALIGN(32);						\ +.loop##shift##:								\ +(p[0])		ld8	r[0] = [asrc], 8 ;	/* w1 */		\ +(p[MEMLAT+1])	st8	[dest] = value, 8 ;				\ +(p[MEMLAT])	shrp	value = r[MEMLAT], r[MEMLAT+1], shift ;		\ +		nop.b	0 ;						\ +		nop.b	0 ;						\ +		br.ctop.sptk .loop##shift ;				\ +		br.cond.sptk .cpyfew ; /* deal with the remaining bytes */ + +#define MEMLAT	21 +#define Nrot	(((2*MEMLAT+3) + 7) & ~7) + +ENTRY(memmove) +	.prologue +	alloc 	r2 = ar.pfs, 3, Nrot - 3, 0, Nrot +	.rotr	r[MEMLAT + 2], q[MEMLAT + 1] +	.rotp	p[MEMLAT + 2] +	mov	ret0 = in0		// return value = dest +	.save pr, saved_pr +	mov	saved_pr = pr		// save the predicate registers +	.save ar.lc, saved_lc +        mov 	saved_lc = ar.lc	// save the loop counter +	.body +	or	tmp3 = in0, in1 ;;	// tmp3 = dest | src +	or	tmp3 = tmp3, in2	// tmp3 = dest | src | len +	mov 	dest = in0		// dest +	mov 	src = in1		// src +	mov	len = in2		// len +	sub	tmp2 = r0, in0		// tmp2 = -dest +	cmp.eq	p6, p0 = in2, r0	// if (len == 0) +(p6)	br.cond.spnt .restore_and_exit;;// 	return dest; +	and	tmp4 = 7, tmp3 		// tmp4 = (dest | src | len) & 7 +	cmp.le	p6, p0 = dest, src	// if dest <= src it's always safe +(p6)	br.cond.spnt .forward		// to copy forward +	add	tmp3 = src, len;; +	cmp.lt	p6, p0 = dest, tmp3	// if dest > src && dest < src + len +(p6)	br.cond.spnt .backward		// we have to copy backward + +.forward: +	shr.u	loopcnt = len, 4 ;;	// loopcnt = len / 16 +	cmp.ne	p6, p0 = tmp4, r0	// if ((dest | src | len) & 7 != 0) +(p6)	br.cond.sptk .next		//	goto next; + +// The optimal case, when dest, src and len are all multiples of 8 + +	and	tmp3 = 0xf, len +	mov	pr.rot = 1 << 16	// set rotating predicates +	mov	ar.ec = MEMLAT + 1 ;;	// set the epilog counter +	cmp.ne	p6, p0 = tmp3, r0	// do we have to copy an extra word? +	adds	loopcnt = -1, loopcnt;;	// --loopcnt +(p6)	ld8	value = [src], 8;; +(p6)	st8	[dest] = value, 8	// copy the "odd" word +	mov	ar.lc = loopcnt 	// set the loop counter +	cmp.eq	p6, p0 = 8, len +(p6)	br.cond.spnt .restore_and_exit;;// the one-word special case +	adds	adest = 8, dest		// set adest one word ahead of dest +	adds	asrc = 8, src ;;	// set asrc one word ahead of src +	nop.b	0			// get the "golden" alignment for +	nop.b	0			// the next loop +.l0: +(p[0])		ld8	r[0] = [src], 16 +(p[0])		ld8	q[0] = [asrc], 16 +(p[MEMLAT])	st8	[dest] = r[MEMLAT], 16 +(p[MEMLAT])	st8	[adest] = q[MEMLAT], 16 +		br.ctop.dptk .l0 ;; + +	mov	pr = saved_pr, -1	// restore the predicate registers +	mov	ar.lc = saved_lc	// restore the loop counter +	br.ret.sptk.many b0 +.next: +	cmp.ge	p6, p0 = OP_T_THRES, len	// is len <= OP_T_THRES +	and	loopcnt = 7, tmp2 		// loopcnt = -dest % 8 +(p6)	br.cond.spnt	.cpyfew			// copy byte by byte +	;; +	cmp.eq	p6, p0 = loopcnt, r0 +(p6)	br.cond.sptk	.dest_aligned +	sub	len = len, loopcnt	// len -= -dest % 8 +	adds	loopcnt = -1, loopcnt	// --loopcnt +	;; +	mov	ar.lc = loopcnt +.l1:					// copy -dest % 8 bytes +	ld1	value = [src], 1	// value = *src++ +	;; +	st1	[dest] = value, 1	// *dest++ = value +	br.cloop.dptk .l1 +.dest_aligned: +	and	sh1 = 7, src 		// sh1 = src % 8 +	and	tmp2 = -8, len   	// tmp2 = len & -OPSIZ +	and	asrc = -8, src		// asrc = src & -OPSIZ  -- align src +	shr.u	loopcnt = len, 3	// loopcnt = len / 8 +	and	len = 7, len;;		// len = len % 8 +	adds	loopcnt = -1, loopcnt	// --loopcnt +	addl	tmp4 = @ltoff(.table), gp +	addl	tmp3 = @ltoff(.loop56), gp +	mov     ar.ec = MEMLAT + 1	// set EC +	mov     pr.rot = 1 << 16;;	// set rotating predicates +	mov	ar.lc = loopcnt		// set LC +	cmp.eq  p6, p0 = sh1, r0 	// is the src aligned? +(p6)    br.cond.sptk .src_aligned +	add	src = src, tmp2		// src += len & -OPSIZ +	shl	sh1 = sh1, 3		// sh1 = 8 * (src % 8) +	ld8	ploop56 = [tmp3]	// ploop56 = &loop56 +	ld8	ptable = [tmp4];;	// ptable = &table +	add	tmp3 = ptable, sh1;;	// tmp3 = &table + sh1 +	mov	ar.ec = MEMLAT + 1 + 1 // one more pass needed +	ld8	tmp4 = [tmp3];;		// tmp4 = loop offset +	sub	loopaddr = ploop56,tmp4	// loopadd = &loop56 - loop offset +	ld8	r[1] = [asrc], 8;;	// w0 +	mov	b6 = loopaddr;; +	br	b6			// jump to the appropriate loop + +	LOOP(8) +	LOOP(16) +	LOOP(24) +	LOOP(32) +	LOOP(40) +	LOOP(48) +	LOOP(56) + +.src_aligned: +.l3: +(p[0])		ld8	r[0] = [src], 8 +(p[MEMLAT])	st8	[dest] = r[MEMLAT], 8 +		br.ctop.dptk .l3 +.cpyfew: +	cmp.eq	p6, p0 = len, r0	// is len == 0 ? +	adds	len = -1, len		// --len; +(p6)	br.cond.spnt	.restore_and_exit ;; +	mov	ar.lc = len +.l4: +	ld1	value = [src], 1 +	;; +	st1	[dest] = value, 1 +	br.cloop.dptk	.l4 ;; +.restore_and_exit: +	mov     pr = saved_pr, -1    	// restore the predicate registers +	mov 	ar.lc = saved_lc	// restore the loop counter +	br.ret.sptk.many b0 + +// In the case of a backward copy, optimise only the case when everything +// is a multiple of 8, otherwise copy byte by byte.  The backward copy is +// used only when the blocks are overlapping and dest > src. + +.backward: +	shr.u	loopcnt = len, 3	// loopcnt = len / 8 +	add	src = src, len		// src points one byte past the end +	add	dest = dest, len ;; 	// dest points one byte past the end +	mov	ar.ec = MEMLAT + 1	// set the epilog counter +	mov	pr.rot = 1 << 16	// set rotating predicates +	adds	loopcnt = -1, loopcnt	// --loopcnt +	cmp.ne	p6, p0 = tmp4, r0	// if ((dest | src | len) & 7 != 0) +(p6)	br.cond.sptk .bytecopy ;;	// copy byte by byte backward +	adds	src = -8, src		// src points to the last word +	adds	dest = -8, dest 	// dest points to the last word +	mov	ar.lc = loopcnt;;	// set the loop counter +.l5: +(p[0])		ld8	r[0] = [src], -8 +(p[MEMLAT])	st8	[dest] = r[MEMLAT], -8 +		br.ctop.dptk .l5 +		br.cond.sptk .restore_and_exit +.bytecopy: +	adds	src = -1, src		// src points to the last byte +	adds	dest = -1, dest		// dest points to the last byte +	adds	loopcnt = -1, len;;	// loopcnt = len - 1 +	mov	ar.lc = loopcnt;;	// set the loop counter +.l6: +(p[0])		ld1	r[0] = [src], -1 +(p[MEMLAT])	st1	[dest] = r[MEMLAT], -1 +		br.ctop.dptk .l6 +		br.cond.sptk .restore_and_exit +END(memmove) + +	.rodata +	.align 8 +.table: +	data8	0			// dummy entry +	data8 	.loop56 - .loop8 +	data8 	.loop56 - .loop16 +	data8 	.loop56 - .loop24 +	data8	.loop56 - .loop32 +	data8	.loop56 - .loop40 +	data8	.loop56 - .loop48 +	data8	.loop56 - .loop56 + +libc_hidden_def (memmove) diff --git a/libc/string/ia64/memset.S b/libc/string/ia64/memset.S new file mode 100644 index 000000000..6754cfb88 --- /dev/null +++ b/libc/string/ia64/memset.S @@ -0,0 +1,400 @@ +/* Optimized version of the standard memset() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop for Itanium <Dan.Pop@cern.ch>. +   Rewritten for McKinley by Sverre Jarp, HP Labs/CERN <Sverre.Jarp@cern.ch> + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: dest + +   Inputs: +        in0:    dest +        in1:    value +        in2:    count + +   The algorithm is fairly straightforward: set byte by byte until we +   we get to a 16B-aligned address, then loop on 128 B chunks using an +   early store as prefetching, then loop on 32B chucks, then clear remaining +   words, finally clear remaining bytes. +   Since a stf.spill f0 can store 16B in one go, we use this instruction +   to get peak speed when value = 0.  */ + +#include <sysdep.h> +#undef ret + +#define dest		in0 +#define value		in1 +#define	cnt		in2 + +#define tmp		r31 +#define save_lc		r30 +#define ptr0		r29 +#define ptr1		r28 +#define ptr2		r27 +#define ptr3		r26 +#define ptr9 		r24 +#define	loopcnt		r23 +#define linecnt		r22 +#define bytecnt		r21 + +#define fvalue		f6 + +// This routine uses only scratch predicate registers (p6 - p15) +#define p_scr		p6			// default register for same-cycle branches +#define p_nz		p7 +#define p_zr		p8 +#define p_unalgn	p9 +#define p_y		p11 +#define p_n		p12 +#define p_yy		p13 +#define p_nn		p14 + +#define movi0		mov + +#define MIN1		15 +#define MIN1P1HALF	8 +#define LINE_SIZE	128 +#define LSIZE_SH        7			// shift amount +#define PREF_AHEAD	8 + +#define USE_FLP +#if defined(USE_INT) +#define store		st8 +#define myval           value +#elif defined(USE_FLP) +#define store		stf8 +#define myval		fvalue +#endif + +.align	64 +ENTRY(memset) +{ .mmi +	.prologue +	alloc	tmp = ar.pfs, 3, 0, 0, 0 +	lfetch.nt1 [dest] +	.save   ar.lc, save_lc +	movi0	save_lc = ar.lc +} { .mmi +	.body +	mov	ret0 = dest		// return value +	cmp.ne	p_nz, p_zr = value, r0	// use stf.spill if value is zero +	cmp.eq	p_scr, p0 = cnt, r0 +;; } +{ .mmi +	and	ptr2 = -(MIN1+1), dest	// aligned address +	and	tmp = MIN1, dest	// prepare to check for alignment +	tbit.nz p_y, p_n = dest, 0	// Do we have an odd address? (M_B_U) +} { .mib +	mov	ptr1 = dest +	mux1	value = value, @brcst	// create 8 identical bytes in word +(p_scr)	br.ret.dpnt.many rp		// return immediately if count = 0 +;; } +{ .mib +	cmp.ne	p_unalgn, p0 = tmp, r0 +} { .mib				// NB: # of bytes to move is 1 higher +	sub	bytecnt = (MIN1+1), tmp	//     than loopcnt +	cmp.gt	p_scr, p0 = 16, cnt		// is it a minimalistic task? +(p_scr)	br.cond.dptk.many .move_bytes_unaligned	// go move just a few (M_B_U) +;; } +{ .mmi +(p_unalgn) add	ptr1 = (MIN1+1), ptr2		// after alignment +(p_unalgn) add	ptr2 = MIN1P1HALF, ptr2		// after alignment +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3	// should we do a st8 ? +;; } +{ .mib +(p_y)	add	cnt = -8, cnt +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2	// should we do a st4 ? +} { .mib +(p_y)	st8	[ptr2] = value, -4 +(p_n)	add	ptr2 = 4, ptr2 +;; } +{ .mib +(p_yy)	add	cnt = -4, cnt +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1	// should we do a st2 ? +} { .mib +(p_yy)	st4	[ptr2] = value, -2 +(p_nn)	add	ptr2 = 2, ptr2 +;; } +{ .mmi +	mov	tmp = LINE_SIZE+1		// for compare +(p_y)	add	cnt = -2, cnt +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0	// should we do a st1 ? +} { .mmi +	setf.sig fvalue=value			// transfer value to FLP side +(p_y)	st2	[ptr2] = value, -1 +(p_n)	add	ptr2 = 1, ptr2 +;; } + +{ .mmi +(p_yy)	st1	[ptr2] = value +  	cmp.gt	p_scr, p0 = tmp, cnt		// is it a minimalistic task? +} { .mbb +(p_yy)	add	cnt = -1, cnt +(p_scr)	br.cond.dpnt.many .fraction_of_line	// go move just a few +;; } + +{ .mib +	nop.m 0 +	shr.u	linecnt = cnt, LSIZE_SH +(p_zr)	br.cond.dptk.many .l1b			// Jump to use stf.spill +;; } + +#ifndef GAS_ALIGN_BREAKS_UNWIND_INFO +	.align 32 // -------- //  L1A: store ahead into cache lines; fill later +#endif +{ .mmi +	and	tmp = -(LINE_SIZE), cnt		// compute end of range +	mov	ptr9 = ptr1			// used for prefetching +	and	cnt = (LINE_SIZE-1), cnt	// remainder +} { .mmi +	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop +	cmp.gt	p_scr, p0 = PREF_AHEAD, linecnt	// check against actual value +;; } +{ .mmi +(p_scr)	add	loopcnt = -1, linecnt		// start of stores +	add	ptr2 = 8, ptr1			// (beyond prefetch stores) +	add	ptr1 = tmp, ptr1		// first address beyond total +;; }						// range +{ .mmi +	add	tmp = -1, linecnt		// next loop count +	movi0	ar.lc = loopcnt +;; } +.pref_l1a: +{ .mib +	store [ptr9] = myval, 128	// Do stores one cache line apart +	nop.i	0 +	br.cloop.dptk.few .pref_l1a +;; } +{ .mmi +	add	ptr0 = 16, ptr2		// Two stores in parallel +	movi0	ar.lc = tmp +;; } +.l1ax: + { .mmi +	store [ptr2] = myval, 8 +	store [ptr0] = myval, 8 + ;; } + { .mmi +	store [ptr2] = myval, 24 +	store [ptr0] = myval, 24 + ;; } + { .mmi +	store [ptr2] = myval, 8 +	store [ptr0] = myval, 8 + ;; } + { .mmi +	store [ptr2] = myval, 24 +	store [ptr0] = myval, 24 + ;; } + { .mmi +	store [ptr2] = myval, 8 +	store [ptr0] = myval, 8 + ;; } + { .mmi +	store [ptr2] = myval, 24 +	store [ptr0] = myval, 24 + ;; } + { .mmi +	store [ptr2] = myval, 8 +	store [ptr0] = myval, 32 + 	cmp.lt	p_scr, p0 = ptr9, ptr1		// do we need more prefetching? + ;; } +{ .mmb +	store [ptr2] = myval, 24 +(p_scr)	store [ptr9] = myval, 128 +	br.cloop.dptk.few .l1ax +;; } +{ .mbb +	cmp.le  p_scr, p0 = 8, cnt		// just a few bytes left ? +(p_scr) br.cond.dpnt.many  .fraction_of_line	// Branch no. 2 +	br.cond.dpnt.many  .move_bytes_from_alignment	// Branch no. 3 +;; } + +#ifdef GAS_ALIGN_BREAKS_UNWIND_INFO +	{ nop 0 } +#else +	.align 32 +#endif +.l1b:	// ------------------ //  L1B: store ahead into cache lines; fill later +{ .mmi +	and	tmp = -(LINE_SIZE), cnt		// compute end of range +	mov	ptr9 = ptr1			// used for prefetching +	and	cnt = (LINE_SIZE-1), cnt	// remainder +} { .mmi +	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop +	cmp.gt	p_scr, p0 = PREF_AHEAD, linecnt	// check against actual value +;; } +{ .mmi +(p_scr)	add	loopcnt = -1, linecnt +	add	ptr2 = 16, ptr1	// start of stores (beyond prefetch stores) +	add	ptr1 = tmp, ptr1	// first address beyond total range +;; } +{ .mmi +	add	tmp = -1, linecnt	// next loop count +	movi0	ar.lc = loopcnt +;; } +.pref_l1b: +{ .mib +	stf.spill [ptr9] = f0, 128	// Do stores one cache line apart +	nop.i   0 +	br.cloop.dptk.few .pref_l1b +;; } +{ .mmi +	add	ptr0 = 16, ptr2		// Two stores in parallel +	movi0	ar.lc = tmp +;; } +.l1bx: + { .mmi +	stf.spill [ptr2] = f0, 32 +	stf.spill [ptr0] = f0, 32 + ;; } + { .mmi +	stf.spill [ptr2] = f0, 32 +	stf.spill [ptr0] = f0, 32 + ;; } + { .mmi +	stf.spill [ptr2] = f0, 32 +	stf.spill [ptr0] = f0, 64 + 	cmp.lt	p_scr, p0 = ptr9, ptr1	// do we need more prefetching? + ;; } +{ .mmb +	stf.spill [ptr2] = f0, 32 +(p_scr)	stf.spill [ptr9] = f0, 128 +	br.cloop.dptk.few .l1bx +;; } +{ .mib +	cmp.gt  p_scr, p0 = 8, cnt	// just a few bytes left ? +(p_scr)	br.cond.dpnt.many  .move_bytes_from_alignment +;; } + +.fraction_of_line: +{ .mib +	add	ptr2 = 16, ptr1 +	shr.u	loopcnt = cnt, 5   	// loopcnt = cnt / 32 +;; } +{ .mib +	cmp.eq	p_scr, p0 = loopcnt, r0 +	add	loopcnt = -1, loopcnt +(p_scr)	br.cond.dpnt.many store_words +;; } +{ .mib +	and	cnt = 0x1f, cnt		// compute the remaining cnt +	movi0   ar.lc = loopcnt +;; } +#ifndef GAS_ALIGN_BREAKS_UNWIND_INFO +	.align 32 +#endif +.l2:	// ---------------------------- //  L2A:  store 32B in 2 cycles +{ .mmb +	store	[ptr1] = myval, 8 +	store	[ptr2] = myval, 8 +;; } { .mmb +	store	[ptr1] = myval, 24 +	store	[ptr2] = myval, 24 +	br.cloop.dptk.many .l2 +;; } +store_words: +{ .mib +	cmp.gt	p_scr, p0 = 8, cnt		// just a few bytes left ? +(p_scr)	br.cond.dpnt.many .move_bytes_from_alignment	// Branch +;; } + +{ .mmi +	store	[ptr1] = myval, 8		// store +	cmp.le	p_y, p_n = 16, cnt		// +	add	cnt = -8, cnt			// subtract +;; } +{ .mmi +(p_y)	store	[ptr1] = myval, 8		// store +(p_y)	cmp.le.unc p_yy, p_nn = 16, cnt		// +(p_y)	add	cnt = -8, cnt			// subtract +;; } +{ .mmi						// store +(p_yy)	store	[ptr1] = myval, 8		// +(p_yy)	add	cnt = -8, cnt			// subtract +;; } + +.move_bytes_from_alignment: +{ .mib +	cmp.eq	p_scr, p0 = cnt, r0 +	tbit.nz.unc p_y, p0 = cnt, 2	// should we terminate with a st4 ? +(p_scr)	br.cond.dpnt.few .restore_and_exit +;; } +{ .mib +(p_y)	st4	[ptr1] = value, 4 +	tbit.nz.unc p_yy, p0 = cnt, 1	// should we terminate with a st2 ? +;; } +{ .mib +(p_yy)	st2	[ptr1] = value, 2 +	tbit.nz.unc p_y, p0 = cnt, 0 +;; } + +{ .mib +(p_y)	st1	[ptr1] = value +;; } +.restore_and_exit: +{ .mib +	nop.m	0 +	movi0	ar.lc = save_lc +	br.ret.sptk.many rp +;; } + +.move_bytes_unaligned: +{ .mmi +       .pred.rel "mutex",p_y, p_n +       .pred.rel "mutex",p_yy, p_nn +(p_n)	cmp.le  p_yy, p_nn = 4, cnt +(p_y)	cmp.le  p_yy, p_nn = 5, cnt +(p_n)	add	ptr2 = 2, ptr1 +} { .mmi +(p_y)	add	ptr2 = 3, ptr1 +(p_y)	st1	[ptr1] = value, 1	// fill 1 (odd-aligned) byte +(p_y)	add	cnt = -1, cnt		// [15, 14 (or less) left] +;; } +{ .mmi +(p_yy)	cmp.le.unc p_y, p0 = 8, cnt +	add	ptr3 = ptr1, cnt	// prepare last store +	movi0	ar.lc = save_lc +} { .mmi +(p_yy)	st2	[ptr1] = value, 4	// fill 2 (aligned) bytes +(p_yy)	st2	[ptr2] = value, 4	// fill 2 (aligned) bytes +(p_yy)	add	cnt = -4, cnt		// [11, 10 (o less) left] +;; } +{ .mmi +(p_y)	cmp.le.unc p_yy, p0 = 8, cnt +	add	ptr3 = -1, ptr3		// last store +	tbit.nz p_scr, p0 = cnt, 1	// will there be a st2 at the end ? +} { .mmi +(p_y)	st2	[ptr1] = value, 4	// fill 2 (aligned) bytes +(p_y)	st2	[ptr2] = value, 4	// fill 2 (aligned) bytes +(p_y)	add	cnt = -4, cnt		// [7, 6 (or less) left] +;; } +{ .mmi +(p_yy)	st2	[ptr1] = value, 4	// fill 2 (aligned) bytes +(p_yy)	st2	[ptr2] = value, 4	// fill 2 (aligned) bytes +					// [3, 2 (or less) left] +	tbit.nz p_y, p0 = cnt, 0	// will there be a st1 at the end ? +} { .mmi +(p_yy)	add	cnt = -4, cnt +;; } +{ .mmb +(p_scr)	st2	[ptr1] = value		// fill 2 (aligned) bytes +(p_y)	st1	[ptr3] = value		// fill last byte (using ptr3) +	br.ret.sptk.many rp +;; } +END(memset) +libc_hidden_def (memset) diff --git a/libc/string/ia64/softpipe.h b/libc/string/ia64/softpipe.h new file mode 100644 index 000000000..cf0eb5355 --- /dev/null +++ b/libc/string/ia64/softpipe.h @@ -0,0 +1,29 @@ +/* This file is part of the GNU C Library. +   Copyright (C) 2000 Free Software Foundation, Inc. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* The latency of a memory load assumed by the assembly implementation +   of the mem and str functions.  Since we don't have any clue about +   where the data might be, let's assume it's in the L2 cache.   +   Assuming L3 would be too pessimistic :-) + +   Some functions define MEMLAT as 2, because they expect their data +   to be in the L1D cache.  */ + +#ifndef MEMLAT +# define MEMLAT 6 +#endif diff --git a/libc/string/ia64/strchr.S b/libc/string/ia64/strchr.S new file mode 100644 index 000000000..24001eea6 --- /dev/null +++ b/libc/string/ia64/strchr.S @@ -0,0 +1,112 @@ +/* Optimized version of the standard strchr() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: the address of the first occurence of chr in str or NULL + +   Inputs: +        in0:    str +        in1:    chr + +   A modified version of memchr.S, the search ends when the character is +   found or the terminating null character is encountered. + +   This implementation assumes little endian mode.  For big endian mode, +   the instruction czx1.r should be replaced by czx1.l.  */ + +#include <sysdep.h> +#undef ret + +#define saved_lc	r18 +#define poschr		r19 +#define pos0		r20 +#define val1		r21 +#define val2		r22 +#define tmp		r24 +#define chrx8		r25 +#define loopcnt		r30 + +#define str		in0 +#define chr		in1 + +ENTRY(strchr) +	.prologue +	alloc r2 = ar.pfs, 2, 0, 0, 0 +	.save ar.lc, saved_lc +        mov 	saved_lc = ar.lc 	// save the loop counter +	.body +	mov 	ret0 = str	 +	and 	tmp = 7, str		// tmp = str % 8 +	mux1	chrx8 = chr, @brcst +	extr.u	chr = chr, 0, 8		// retain only the last byte +	cmp.ne	p8, p0 = r0, r0		// clear p8 +	;; +	sub	loopcnt = 8, tmp	// loopcnt = 8 - tmp +	cmp.eq	p6, p0 = tmp, r0 +(p6)	br.cond.sptk	.str_aligned;; +	adds	loopcnt = -1, loopcnt;; +	mov	ar.lc = loopcnt +.l1: +	ld1	val2 = [ret0], 1 +	;; +	cmp.eq	p6, p0 = val2, chr +	cmp.eq	p7, p0 = val2, r0 +(p6)	br.cond.spnt	.restore_and_exit +(p7)	br.cond.spnt	.notfound +	br.cloop.sptk	.l1 +.str_aligned: +	ld8	val1 = [ret0], 8;; +	nop.b	0 +	nop.b 	0 +.l2:	 +	ld8.s	val2 = [ret0], 8	// don't bomb out here +	czx1.r	pos0 = val1	 +	xor	tmp = val1, chrx8	// if val1 contains chr, tmp will +	;;				// contain a zero in its position +	czx1.r	poschr = tmp +	cmp.ne	p6, p0 = 8, pos0 +	;; +	cmp.ne	p7, p0 = 8, poschr +(p7)	br.cond.spnt .foundit +(p6)	br.cond.spnt .notfound +	chk.s	val2, .recovery +.back: +	mov	val1 = val2	 +	br.cond.dptk .l2 +.foundit: +(p6)	cmp.lt	p8, p0 = pos0, poschr	// we found chr and null in the word +(p8)	br.cond.spnt .notfound		// null was found before chr +	add	ret0 = ret0, poschr ;; +	adds	ret0 = -15, ret0 ;;	// should be -16, but we decrement +.restore_and_exit:			// ret0 in the next instruction +	adds	ret0 = -1, ret0		// ret0 was pointing 1 char too far +	mov 	ar.lc = saved_lc	// restore the loop counter +	br.ret.sptk.many b0 +.notfound: +	mov	ret0 = r0		// return NULL if null was found +	mov 	ar.lc = saved_lc +	br.ret.sptk.many b0 +.recovery: +	adds	ret0 = -8, ret0;; +	ld8	val2 = [ret0], 8	// bomb out here +	br.cond.sptk	.back +END(strchr) + +weak_alias (strchr, index) +libc_hidden_def (strchr) diff --git a/libc/string/ia64/strcmp.S b/libc/string/ia64/strcmp.S new file mode 100644 index 000000000..180b5524f --- /dev/null +++ b/libc/string/ia64/strcmp.S @@ -0,0 +1,54 @@ +/* Optimized version of the standard strcmp() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: the result of the comparison + +   Inputs: +        in0:    s1 +        in1:    s2 + +   Unlike memcmp(), this function is optimized for mismatches within the +   first few characters.  */ + +#include <sysdep.h> +#undef ret + +#define s1		in0 +#define s2		in1 + +#define val1		r15 +#define val2		r16 + + +ENTRY(strcmp) +	alloc	r2 = ar.pfs, 2, 0, 0, 0 +.loop: +	ld1	val1 = [s1], 1 +	ld1	val2 = [s2], 1 +	cmp.eq	p6, p0 = r0, r0		// set p6 +	;; +	cmp.ne.and p6, p0 = val1, r0 +	cmp.ne.and p6, p0 = val2, r0 +	cmp.eq.and p6, p0 = val1, val2 +(p6)	br.cond.sptk .loop +	sub	ret0 = val1, val2 +	br.ret.sptk.many b0 +END(strcmp) +libc_hidden_def (strcmp) diff --git a/libc/string/ia64/strcpy.S b/libc/string/ia64/strcpy.S new file mode 100644 index 000000000..bee5bf7c2 --- /dev/null +++ b/libc/string/ia64/strcpy.S @@ -0,0 +1,145 @@ +/* Optimized version of the standard strcpy() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: dest + +   Inputs: +        in0:    dest +        in1:    src + +   In this form, it assumes little endian mode.  For big endian mode, the +   the two shifts in .l2 must be inverted: + +	shl   	value = r[1], sh1   	// value = w0 << sh1 +	shr.u   tmp = r[0], sh2   	// tmp = w1 >> sh2 + */ + +#include <sysdep.h> +#undef ret + +#define saved_lc	r15 +#define saved_pr	r16 +#define thresh		r17 +#define dest		r19 +#define src		r20 +#define len		r21 +#define asrc		r22 +#define tmp		r23 +#define pos		r24 +#define w0		r25 +#define w1		r26 +#define c		r27 +#define sh2		r28 +#define	sh1		r29 +#define loopcnt		r30 +#define	value		r31 + +ENTRY(strcpy) +	.prologue +	alloc 	r2 = ar.pfs, 2, 0, 30, 32 + +#define MEMLAT 2 +	.rotr	r[MEMLAT + 2] +	.rotp	p[MEMLAT + 1] + +	mov	ret0 = in0		// return value = dest +	.save pr, saved_pr +	mov	saved_pr = pr           // save the predicate registers +	.save ar.lc, saved_lc +        mov 	saved_lc = ar.lc	// save the loop counter +	.body +	sub	tmp = r0, in0 ;;	// tmp = -dest +	mov 	dest = in0		// dest +	mov 	src = in1		// src +	and	loopcnt = 7, tmp ;;	// loopcnt = -dest % 8 +	cmp.eq	p6, p0 = loopcnt, r0 +	adds	loopcnt = -1, loopcnt	// --loopcnt +(p6)	br.cond.sptk .dest_aligned ;; +	mov	ar.lc = loopcnt +.l1:					// copy -dest % 8 bytes +	ld1	c = [src], 1		// c = *src++ +	;; +	st1	[dest] = c, 1		// *dest++ = c +	cmp.eq	p6, p0 = c, r0 +(p6)	br.cond.dpnt .restore_and_exit +	br.cloop.dptk .l1 ;; +.dest_aligned: +	and	sh1 = 7, src 		// sh1 = src % 8 +	mov	ar.lc = -1		// "infinite" loop +	and	asrc = -8, src ;;	// asrc = src & -OPSIZ  -- align src +	sub	thresh = 8, sh1 +	mov	pr.rot = 1 << 16	// set rotating predicates +	cmp.ne	p7, p0 = r0, r0		// clear p7 +	shl	sh1 = sh1, 3 ;;		// sh1 = 8 * (src % 8) +	sub	sh2 = 64, sh1		// sh2 = 64 - sh1 +	cmp.eq  p6, p0 = sh1, r0 	// is the src aligned? +(p6)    br.cond.sptk .src_aligned ;; +	ld8	r[1] = [asrc],8 ;; + +	.align	32 +.l2: +	ld8.s	r[0] = [asrc], 8 +	shr.u	value = r[1], sh1 ;; 	// value = w0 >> sh1 +	czx1.r	pos = value ;;		// do we have an "early" zero +	cmp.lt	p7, p0 = pos, thresh	// in w0 >> sh1? +(p7)	br.cond.dpnt .found0 +	chk.s	r[0], .recovery2	// it is safe to do that only +.back2:					// after the previous test +	shl	tmp = r[0], sh2  	// tmp = w1 << sh2 +	;; +	or	value = value, tmp ;;	// value |= tmp +	czx1.r	pos = value ;; +	cmp.ne	p7, p0 = 8, pos +(p7)	br.cond.dpnt .found0 +	st8	[dest] = value, 8	// store val to dest +	br.ctop.dptk    .l2 ;; +.src_aligned: +.l3: +(p[0])		ld8.s	r[0] = [src], 8 +(p[MEMLAT])	chk.s	r[MEMLAT], .recovery3 +.back3: +(p[MEMLAT])	mov	value = r[MEMLAT] +(p[MEMLAT])	czx1.r	pos = r[MEMLAT] ;; +(p[MEMLAT])	cmp.ne	p7, p0 = 8, pos +(p7)		br.cond.dpnt .found0 +(p[MEMLAT])	st8	[dest] = r[MEMLAT], 8 +		br.ctop.dptk .l3 ;; +.found0: +	mov	ar.lc = pos +.l4: +	extr.u	c = value, 0, 8		// c = value & 0xff +	shr.u	value = value, 8 +	;; +	st1	[dest] = c, 1 +	br.cloop.dptk	.l4 ;; +.restore_and_exit: +	mov 	ar.lc = saved_lc	// restore the loop counter +	mov	pr = saved_pr, -1	// restore the predicate registers +	br.ret.sptk.many b0 +.recovery2: +	add	tmp = -8, asrc ;; +	ld8	r[0] = [tmp] +	br.cond.sptk .back2 +.recovery3: +	add	tmp = -(MEMLAT + 1) * 8, src ;; +	ld8	r[MEMLAT] = [tmp] +	br.cond.sptk .back3 +END(strcpy) +libc_hidden_def (strcpy) diff --git a/libc/string/ia64/strlen.S b/libc/string/ia64/strlen.S new file mode 100644 index 000000000..f5a6ef044 --- /dev/null +++ b/libc/string/ia64/strlen.S @@ -0,0 +1,98 @@ +/* Optimized version of the standard strlen() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: the length of the input string + +   Input: +        in0:    str + +   Look for the null character byte by byte, until we reach a word aligned +   address, then search word by word, using the czx instruction.  We're +   also doing one word of read ahead, which could cause problems if the +   null character is on the last word of a page and the next page is not +   mapped in the process address space.  Hence the use of the speculative +   load. + +   This implementation assumes little endian mode.  For big endian mode, +   the instruction czx1.r should be replaced by czx1.l.  */ + +#include <sysdep.h> +#undef ret + +#define saved_lc	r18 +#define str		r19 +#define pos0		r20 +#define val1		r21 +#define val2		r22 +#define origadd		r23 +#define tmp		r24 +#define loopcnt		r30 +#define len		ret0 + +ENTRY(strlen) +	.prologue +	alloc r2 = ar.pfs, 1, 0, 0, 0 +	.save ar.lc, saved_lc +        mov 	saved_lc = ar.lc 	// save the loop counter +	.body +	mov 	str = in0	 +	mov 	len = r0		// len = 0 +	and 	tmp = 7, in0		// tmp = str % 8 +	;; +	sub	loopcnt = 8, tmp	// loopcnt = 8 - tmp +	cmp.eq	p6, p0 = tmp, r0 +(p6)	br.cond.sptk	.str_aligned;; +	adds	loopcnt = -1, loopcnt;; +	mov	ar.lc = loopcnt +.l1: +	ld1	val2 = [str], 1 +	;; +	cmp.eq	p6, p0 = val2, r0 +(p6)	br.cond.spnt	.restore_and_exit +	adds	len = 1, len +	br.cloop.dptk	.l1 +.str_aligned: +	mov	origadd = str		// origadd = orig +	ld8	val1 = [str], 8;; +	nop.b	0 +	nop.b 	0 +.l2:	ld8.s	val2 = [str], 8		// don't bomb out here +	czx1.r	pos0 = val1	 +	;; +	cmp.ne	p6, p0 = 8, pos0 +(p6)	br.cond.spnt .foundit +	chk.s	val2, .recovery +.back: +	mov	val1 = val2	 +	br.cond.dptk	.l2 +.foundit: +	sub	tmp = str, origadd	// tmp = crt address - orig +	add	len = len, pos0;; +	add	len = len, tmp;; +	adds	len = -16, len +.restore_and_exit: +	mov ar.lc = saved_lc		// restore the loop counter +	br.ret.sptk.many b0 +.recovery: +	adds	str = -8, str;; +	ld8	val2 = [str], 8		// bomb out here +	br.cond.sptk	.back +END(strlen) +libc_hidden_def (strlen) diff --git a/libc/string/ia64/strncmp.S b/libc/string/ia64/strncmp.S new file mode 100644 index 000000000..915ea5aa2 --- /dev/null +++ b/libc/string/ia64/strncmp.S @@ -0,0 +1,62 @@ +/* Optimized version of the standard strncmp() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: the result of the comparison + +   Inputs: +        in0:    s1 +        in1:    s2 +  	in2:	n + +   Unlike memcmp(), this function is optimized for mismatches within the +   first few characters.  */ + +#include <sysdep.h> +#undef ret + +#define s1		in0 +#define s2		in1 +#define n		in2 + +#define val1		r15 +#define val2		r16 + + +ENTRY(strncmp) +	alloc	r2 = ar.pfs, 3, 0, 0, 0 +	mov	ret0 = r0 +	cmp.eq  p6, p0 = r0, r0		// set p6 +	cmp.eq	p7, p0 = n, r0		// return immediately if n == 0 +(p7)	br.cond.spnt .restore_and_exit ;; +.loop: +	ld1	val1 = [s1], 1 +	ld1	val2 = [s2], 1 +	adds	n = -1, n		// n-- +	;; +	cmp.ne.and p6, p0 = val1, r0 +	cmp.ne.and p6, p0 = val2, r0 +	cmp.ne.and p6, p0 = n, r0 +	cmp.eq.and p6, p0 = val1, val2 +(p6)	br.cond.sptk .loop +	sub	ret0 = val1, val2 +.restore_and_exit: +	br.ret.sptk.many b0 +END(strncmp)	 +libc_hidden_def (strncmp) diff --git a/libc/string/ia64/strncpy.S b/libc/string/ia64/strncpy.S new file mode 100644 index 000000000..0a98ea4db --- /dev/null +++ b/libc/string/ia64/strncpy.S @@ -0,0 +1,232 @@ +/* Optimized version of the standard strncpy() function. +   This file is part of the GNU C Library. +   Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +   Contributed by Dan Pop <Dan.Pop@cern.ch> +	      and Jakub Jelinek <jakub@redhat.com>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* Return: dest + +   Inputs: +	in0:    dest +	in1:    src +	in2:	len + +   In this form, it assumes little endian mode. + */ + +#include <sysdep.h> +#undef ret + +#define saved_lc	r15 +#define saved_pr	r16 +#define thresh		r17 +#define dest		r18 +#define dest2		r19 +#define src		r20 +#define len		r21 +#define asrc		r22 +#define tmp		r23 +#define pos		r24 +#define w0		r25 +#define w1		r26 +#define c		r27 +#define sh2		r28 +#define	sh1		r29 +#define loopcnt		r30 +#define	value		r31 + +ENTRY(strncpy) +	.prologue +	alloc 	r2 = ar.pfs, 3, 0, 29, 32 + +#define MEMLAT 2 +	.rotr	r[MEMLAT + 2] +	.rotp	p[MEMLAT + 1] + +	mov	ret0 = in0		// return value = dest +	.save pr, saved_pr +	mov	saved_pr = pr           // save the predicate registers +	.save ar.lc, saved_lc +	mov 	saved_lc = ar.lc	// save the loop counter +	mov	ar.ec = 0		// ec is not guaranteed to +					// be zero upon function entry +	.body +	cmp.geu p6, p5 = 24, in2 +(p6)	br.cond.spnt .short_len +	sub	tmp = r0, in0 ;;	// tmp = -dest +	mov	len = in2		// len +	mov 	dest = in0		// dest +	mov 	src = in1		// src +	and	tmp = 7, tmp ;;		// loopcnt = -dest % 8 +	cmp.eq	p6, p7 = tmp, r0 +	adds	loopcnt = -1, tmp	// --loopcnt +(p6)	br.cond.sptk .dest_aligned ;; +	sub	len = len, tmp		// len -= -dest % 8 +	mov	ar.lc = loopcnt +.l1:					// copy -dest % 8 bytes +(p5)	ld1	c = [src], 1		// c = *src++ +	;; +	st1	[dest] = c, 1		// *dest++ = c +	cmp.ne	p5, p7 = c, r0 +	br.cloop.dptk .l1 ;; +(p7)	br.cond.dpnt	.found0_align + +.dest_aligned:				// p7 should be cleared here +	shr.u	c = len, 3		// c = len / 8 +	and	sh1 = 7, src 		// sh1 = src % 8 +	and	asrc = -8, src ;;	// asrc = src & -OPSIZ  -- align src +	adds	c = (MEMLAT-1), c	// c = (len / 8) + MEMLAT - 1 +	sub	thresh = 8, sh1 +	mov	pr.rot = 1 << 16	// set rotating predicates +	shl	sh1 = sh1, 3 ;;		// sh1 = 8 * (src % 8) +	mov	ar.lc = c		// "infinite" loop +	sub	sh2 = 64, sh1		// sh2 = 64 - sh1 +	cmp.eq  p6, p0 = sh1, r0 	// is the src aligned? +(p6)    br.cond.sptk .src_aligned +	adds	c = -(MEMLAT-1), c ;;	// c = (len / 8) +	ld8	r[1] = [asrc],8 +	mov	ar.lc = c ;; + +	.align	32 +.l2: +(p6)	st8	[dest] = value, 8	// store val to dest +	ld8.s	r[0] = [asrc], 8 +	shr.u	value = r[1], sh1 ;; 	// value = w0 >> sh1 +	czx1.r	pos = value ;;		// do we have an "early" zero +	cmp.lt	p7, p0 = pos, thresh	// in w0 >> sh1? +	adds	len = -8, len		// len -= 8 +(p7)	br.cond.dpnt .nonalign_found0 +	chk.s	r[0], .recovery2	// it is safe to do that only +.back2:					// after the previous test +	shl	tmp = r[0], sh2  	// tmp = w1 << sh2 +	;; +	or	value = value, tmp ;;	// value |= tmp +	czx1.r	pos = value ;; +	cmp.ne	p7, p6 = 8, pos +(p7)	br.cond.dpnt .nonalign_found0 +	br.ctop.dptk    .l2 ;; +	adds	len = 8, len +	br.cond.sptk	.not_found0 ;; +.nonalign_found0: +	cmp.gtu	p6, p0 = -8, len +(p6)	br.cond.dptk .found0 +	adds	len = 8, len +	br.cond.sptk	.not_found0 ;; + +	.align	32 +.src_aligned: +.l3: +(p[0])		ld8.s	r[0] = [src], 8 +(p[MEMLAT])	chk.s	r[MEMLAT], .recovery3 +.back3: +(p[MEMLAT])	mov	value = r[MEMLAT] +(p[MEMLAT])	czx1.r	pos = r[MEMLAT] ;; +(p[MEMLAT])	cmp.ne	p7, p0 = 8, pos +(p[MEMLAT])	adds	len = -8, len	// len -= 8 +(p7)		br.cond.dpnt .found0 +(p[MEMLAT])	st8	[dest] = r[MEMLAT], 8 +		br.ctop.dptk .l3 ;; + +	chk.s	r[MEMLAT-1], .recovery4 +.back4: +	mov	value = r[MEMLAT-1] + +.not_found0: +	cmp.eq	p5, p6 = len, r0 +	adds	len = -1, len +(p5)	br.cond.dptk	.restore_and_exit ;; +	mov	ar.lc = len +.l4: +(p6)	extr.u	c = value, 0, 8		// c = value & 0xff +(p6)	shr.u	value = value, 8 ;; +	st1	[dest] = c, 1 +	cmp.ne	p6, p0 = c, r0 +	br.cloop.dptk	.l4 +	br.cond.sptk	.restore_and_exit + +.found0_align: +	mov	pos = 0 +	adds	len = -8, len +	mov	value = 0 ;; +.found0: +	shl	tmp = pos, 3 +	shr.u	loopcnt = len, 4	// loopcnt = len / 16 +	mov	c = -1 ;; +	cmp.eq	p6, p0 = loopcnt, r0 +	adds	loopcnt = -1, loopcnt +	shl	c = c, tmp ;; +	and	len = 0xf, len +	andcm	value = value, c +	mov	ar.lc = loopcnt ;; +	cmp.le	p7, p0 = 8, len +	adds	dest2 = 16, dest +	st8	[dest] = value, 8 +	and	len = 0x7, len +(p6)	br.cond.dpnt	.l6 ;; +.l5: +	st8	[dest] = r0, 16 +	st8	[dest2] = r0, 16 +	br.cloop.dptk	.l5 ;; +.l6: +(p7)	st8	[dest] = r0, 8 +	cmp.eq	p5, p0 = len, r0 +	adds	len = -1, len +(p5)	br.cond.dptk .restore_and_exit ;; +	mov	ar.lc = len ;; +.l7: +	st1	[dest] = r0, 1 +	br.cloop.dptk	.l7 ;; +.restore_and_exit: +	mov 	ar.lc = saved_lc	// restore the loop counter +	mov	pr = saved_pr, -1	// restore the predicate registers +	br.ret.sptk.many b0 + +.short_len: +	cmp.eq	p5, p0 = in2, r0 +	adds	loopcnt = -1, in2 +(p5)	br.cond.spnt .restore_and_exit ;; +	mov	ar.lc = loopcnt		// p6 should be set when we get here +.l8: +(p6)	ld1	c = [in1], 1		// c = *src++ +	;; +	st1	[in0] = c, 1		// *dest++ = c +(p6)	cmp.ne	p6, p0 = c, r0 +	br.cloop.dptk .l8 +	;; +	mov 	ar.lc = saved_lc	// restore the loop counter +	mov	pr = saved_pr, -1	// restore the predicate registers +	br.ret.sptk.many b0 +.recovery2: +	add	c = 8, len +	add	tmp = -8, asrc ;; +	cmp.gtu	p8, p5 = c, thresh ;; +(p8)	ld8	r[0] = [tmp] +(p5)	mov	r[0] = r0 +	br.cond.sptk .back2 +.recovery3: +	add	tmp = -(MEMLAT + 1) * 8, src ;; +	ld8	r[MEMLAT] = [tmp] +	br.cond.sptk .back3 +.recovery4: +	cmp.eq	p5, p6 = len, r0 +	add	tmp = -MEMLAT * 8, src ;; +(p6)	ld8	r[MEMLAT - 1] = [tmp] +(p5)	mov	r[MEMLAT - 1] = r0 +	br.cond.sptk .back4 +END(strncpy) +libc_hidden_def (strncpy) diff --git a/libc/string/ia64/sysdep.h b/libc/string/ia64/sysdep.h new file mode 100644 index 000000000..03e74360d --- /dev/null +++ b/libc/string/ia64/sysdep.h @@ -0,0 +1,168 @@ +/* Copyright (C) 1999, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Written by Jes Sorensen, <Jes.Sorensen@cern.ch>, April 1999. +   Based on code originally written by David Mosberger-Tang + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _LINUX_IA64_SYSDEP_H +#define _LINUX_IA64_SYSDEP_H 1 + +#include <features.h> +#include <asm/unistd.h> + +#ifdef __ASSEMBLER__ + +/* Macros to help writing .prologue directives in assembly code.  */ +#define ASM_UNW_PRLG_RP			0x8 +#define ASM_UNW_PRLG_PFS		0x4 +#define ASM_UNW_PRLG_PSP		0x2 +#define ASM_UNW_PRLG_PR			0x1 +#define ASM_UNW_PRLG_GRSAVE(ninputs)	(32+(ninputs)) + +#ifdef	__STDC__ +#define C_LABEL(name)		name##: +#else +#define C_LABEL(name)		name/**/: +#endif + +#define CALL_MCOUNT + +#define ENTRY(name)				\ +	.text;					\ +	.align 32;				\ +	.proc C_SYMBOL_NAME(name);		\ +	.global C_SYMBOL_NAME(name);		\ +	C_LABEL(name)				\ +	CALL_MCOUNT + +#define LEAF(name)				\ +  .text;					\ +  .align 32;					\ +  .proc C_SYMBOL_NAME(name);			\ +  .global name;					\ +  C_LABEL(name) + +/* Mark the end of function SYM.  */ +#undef END +#define END(sym)	.endp C_SYMBOL_NAME(sym) + +/* For Linux we can use the system call table in the header file +	/usr/include/asm/unistd.h +   of the kernel.  But these symbols do not follow the SYS_* syntax +   so we have to redefine the `SYS_ify' macro here.  */ +#undef SYS_ify +#ifdef __STDC__ +# define SYS_ify(syscall_name)	__NR_##syscall_name +#else +# define SYS_ify(syscall_name)	__NR_/**/syscall_name +#endif + +/* Linux uses a negative return value to indicate syscall errors, unlike +   most Unices, which use the condition codes' carry flag. + +   Since version 2.1 the return value of a system call might be negative +   even if the call succeeded.  E.g., the `lseek' system call might return +   a large offset.  Therefore we must not anymore test for < 0, but test +   for a real error by making sure the value in %d0 is a real error +   number.  Linus said he will make sure the no syscall returns a value +   in -1 .. -4095 as a valid result so we can savely test with -4095.  */ + +/* We don't want the label for the error handler to be visible in the symbol +   table when we define it here.  */ +#define SYSCALL_ERROR_LABEL __syscall_error + +#undef PSEUDO +#define	PSEUDO(name, syscall_name, args)	\ +  ENTRY(name)					\ +    DO_CALL (SYS_ify(syscall_name));		\ +	cmp.eq p6,p0=-1,r10;			\ +(p6)	br.cond.spnt.few __syscall_error; + +#define DO_CALL_VIA_BREAK(num)			\ +	mov r15=num;				\ +	break __BREAK_SYSCALL + +#ifdef IA64_USE_NEW_STUB +# ifdef SHARED +#  define DO_CALL(num)				\ +	.prologue;				\ +	adds r2 = SYSINFO_OFFSET, r13;;		\ +	ld8 r2 = [r2];				\ +	.save ar.pfs, r11;			\ +	mov r11 = ar.pfs;;			\ +	.body;					\ +	mov r15 = num;				\ +	mov b7 = r2;				\ +	br.call.sptk.many b6 = b7;;		\ +	.restore sp;				\ +	mov ar.pfs = r11;			\ +	.prologue;				\ +	.body +# else /* !SHARED */ +#  define DO_CALL(num)				\ +	.prologue;				\ +	mov r15 = num;				\ +	movl r2 = _dl_sysinfo;;			\ +	ld8 r2 = [r2];				\ +	.save ar.pfs, r11;			\ +	mov r11 = ar.pfs;;			\ +	.body;					\ +	mov b7 = r2;				\ +	br.call.sptk.many b6 = b7;;		\ +	.restore sp;				\ +	mov ar.pfs = r11;			\ +	.prologue;				\ +	.body +# endif +#else +# define DO_CALL(num)				DO_CALL_VIA_BREAK(num) +#endif + +#undef PSEUDO_END +#define PSEUDO_END(name)	.endp C_SYMBOL_NAME(name); + +#undef PSEUDO_NOERRNO +#define	PSEUDO_NOERRNO(name, syscall_name, args)	\ +  ENTRY(name)						\ +    DO_CALL (SYS_ify(syscall_name)); + +#undef PSEUDO_END_NOERRNO +#define PSEUDO_END_NOERRNO(name)	.endp C_SYMBOL_NAME(name); + +#undef PSEUDO_ERRVAL +#define	PSEUDO_ERRVAL(name, syscall_name, args)	\ +  ENTRY(name)					\ +    DO_CALL (SYS_ify(syscall_name));		\ +	cmp.eq p6,p0=-1,r10;			\ +(p6)	mov r10=r8; + + +#undef PSEUDO_END_ERRVAL +#define PSEUDO_END_ERRVAL(name)	.endp C_SYMBOL_NAME(name); + +#undef END +#define END(name)						\ +	.size	C_SYMBOL_NAME(name), . - C_SYMBOL_NAME(name) ;	\ +	.endp	C_SYMBOL_NAME(name) + +#define ret			br.ret.sptk.few b0 +#define ret_NOERRNO		ret +#define ret_ERRVAL		ret + +#endif /* not __ASSEMBLER__ */ + +#endif /* linux/ia64/sysdep.h */ diff --git a/libc/sysdeps/linux/ia64/Makefile b/libc/sysdeps/linux/ia64/Makefile new file mode 100644 index 000000000..b1bf1ef10 --- /dev/null +++ b/libc/sysdeps/linux/ia64/Makefile @@ -0,0 +1,15 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> +# +# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. +# + +TOPDIR=../../../../ + +top_srcdir=$(TOPDIR) +top_builddir=../../../../ +all: objs +include $(top_builddir)Rules.mak +include Makefile.arch +include $(top_srcdir)Makerules diff --git a/libc/sysdeps/linux/ia64/Makefile.arch b/libc/sysdeps/linux/ia64/Makefile.arch new file mode 100644 index 000000000..2b3212498 --- /dev/null +++ b/libc/sysdeps/linux/ia64/Makefile.arch @@ -0,0 +1,14 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> +# +# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. +# + +CSRC := __syscall_error.c + +SSRC := \ +	__longjmp.S brk.S bsd-setjmp.S bsd-_setjmp.S clone2.S fork.S \ +	pipe.S setjmp.S syscall.S vfork.S + +include $(top_srcdir)libc/sysdeps/linux/Makefile.commonarch diff --git a/libc/sysdeps/linux/ia64/__longjmp.S b/libc/sysdeps/linux/ia64/__longjmp.S new file mode 100644 index 000000000..90f70e8e0 --- /dev/null +++ b/libc/sysdeps/linux/ia64/__longjmp.S @@ -0,0 +1,160 @@ +/* Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. +   Contributed by David Mosberger-Tang <davidm@hpl.hp.com>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA. + +   Note that __sigsetjmp() did NOT flush the register stack.  Instead, +   we do it here since __longjmp() is usually much less frequently +   invoked than __sigsetjmp(). The only difficulty is that __sigsetjmp() +   didn't (and wouldn't be able to) save ar.rnat either.  This is a problem +   because if we're not careful, we could end up loading random NaT bits. +   There are two cases: + +	(i)  ar.bsp < ia64_rse_rnat_addr(jmpbuf.ar_bsp) +		ar.rnat contains the desired bits---preserve ar.rnat +		across loadrs and write to ar.bspstore + +	(ii) ar.bsp >= ia64_rse_rnat_addr(jmpbuf.ar_bsp) +		The desired ar.rnat is stored in +		ia64_rse_rnat_addr(jmpbuf.ar_bsp).  Load those +		bits into ar.rnat after setting ar.bspstore. */ + +#include "sysdep.h" +#include <features.h> + +#	define	pPos	p6	/* is rotate count positive? */ +#	define	pNeg	p7	/* is rotate count negative? */ + + +	/* __longjmp(__jmp_buf buf, int val) */ + +LEAF(__longjmp) +	alloc r8=ar.pfs,2,1,0,0 +	mov r27=ar.rsc +	add r2=0x98,in0		// r2 <- &jmpbuf.orig_jmp_buf_addr +	;; +	ld8 r8=[r2],-16		// r8 <- orig_jmp_buf_addr +	mov r10=ar.bsp +	and r11=~0x3,r27	// clear ar.rsc.mode +	;; +	flushrs			// flush dirty regs to backing store (must be first in insn grp) +	ld8 r23=[r2],8		// r23 <- jmpbuf.ar_bsp +	sub r8=r8,in0		// r8 <- &orig_jmpbuf - &jmpbuf +	;; +	ld8 r25=[r2]		// r25 <- jmpbuf.ar_unat +	extr.u r8=r8,3,6	// r8 <- (&orig_jmpbuf - &jmpbuf)/8 & 0x3f +	;; +	cmp.lt pNeg,pPos=r8,r0 +	mov r2=in0 +	;; +(pPos)	mov r16=r8 +(pNeg)	add r16=64,r8 +(pPos)	sub r17=64,r8 +(pNeg)	sub r17=r0,r8 +	;; +	mov ar.rsc=r11		// put RSE in enforced lazy mode +	shr.u r8=r25,r16 +	add r3=8,in0		// r3 <- &jmpbuf.r1 +	shl r9=r25,r17 +	;; +	or r25=r8,r9 +	;; +	mov r26=ar.rnat +	mov ar.unat=r25		// setup ar.unat (NaT bits for r1, r4-r7, and r12) +	;; +	ld8.fill.nta sp=[r2],16	// r12 (sp) +	ld8.fill.nta gp=[r3],16		// r1 (gp) +	dep r11=-1,r23,3,6	// r11 <- ia64_rse_rnat_addr(jmpbuf.ar_bsp) +	;; +	ld8.nta r16=[r2],16		// caller's unat +	ld8.nta r17=[r3],16		// fpsr +	;; +	ld8.fill.nta r4=[r2],16	// r4 +	ld8.fill.nta r5=[r3],16		// r5 (gp) +	cmp.geu p8,p0=r10,r11	// p8 <- (ar.bsp >= jmpbuf.ar_bsp) +	;; +	ld8.fill.nta r6=[r2],16	// r6 +	ld8.fill.nta r7=[r3],16		// r7 +	;; +	mov ar.unat=r16			// restore caller's unat +	mov ar.fpsr=r17			// restore fpsr +	;; +	ld8.nta r16=[r2],16		// b0 +	ld8.nta r17=[r3],16		// b1 +	;; +(p8)	ld8 r26=[r11]		// r26 <- *ia64_rse_rnat_addr(jmpbuf.ar_bsp) +	mov ar.bspstore=r23	// restore ar.bspstore +	;; +	ld8.nta r18=[r2],16		// b2 +	ld8.nta r19=[r3],16		// b3 +	;; +	ld8.nta r20=[r2],16		// b4 +	ld8.nta r21=[r3],16		// b5 +	;; +	ld8.nta r11=[r2],16		// ar.pfs +	ld8.nta r22=[r3],56		// ar.lc +	;; +	ld8.nta r24=[r2],32		// pr +	mov b0=r16 +	;; +	ldf.fill.nta f2=[r2],32 +	ldf.fill.nta f3=[r3],32 +	mov b1=r17 +	;; +	ldf.fill.nta f4=[r2],32 +	ldf.fill.nta f5=[r3],32 +	mov b2=r18 +	;; +	ldf.fill.nta f16=[r2],32 +	ldf.fill.nta f17=[r3],32 +	mov b3=r19 +	;; +	ldf.fill.nta f18=[r2],32 +	ldf.fill.nta f19=[r3],32 +	mov b4=r20 +	;; +	ldf.fill.nta f20=[r2],32 +	ldf.fill.nta f21=[r3],32 +	mov b5=r21 +	;; +	ldf.fill.nta f22=[r2],32 +	ldf.fill.nta f23=[r3],32 +	mov ar.lc=r22 +	;; +	ldf.fill.nta f24=[r2],32 +	ldf.fill.nta f25=[r3],32 +	cmp.eq p8,p9=0,in1 +	;; +	ldf.fill.nta f26=[r2],32 +	ldf.fill.nta f27=[r3],32 +	mov ar.pfs=r11 +	;; +	ldf.fill.nta f28=[r2],32 +	ldf.fill.nta f29=[r3],32 +	;; +	ldf.fill.nta f30=[r2] +	ldf.fill.nta f31=[r3] +(p8)	mov r8=1 + +	mov ar.rnat=r26		// restore ar.rnat +	;; +	mov ar.rsc=r27		// restore ar.rsc +(p9)	mov r8=in1 + +	invala			// virt. -> phys. regnum mapping may change +	mov pr=r24,-1 +	ret +END(__longjmp) diff --git a/libc/sysdeps/linux/ia64/__syscall_error.c b/libc/sysdeps/linux/ia64/__syscall_error.c new file mode 100644 index 000000000..910b32cda --- /dev/null +++ b/libc/sysdeps/linux/ia64/__syscall_error.c @@ -0,0 +1,19 @@ +/* Wrapper for setting errno. + * + * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#include <errno.h> +#include <features.h> + +/* This routine is jumped to by all the syscall handlers, to stash + * an error number into errno.  */ +int __syscall_error(void) attribute_hidden; +int __syscall_error(void) +{ +	register int err_no asm("%r8"); +	__set_errno(-err_no); +	return -1; +} diff --git a/libc/sysdeps/linux/ia64/bits/endian.h b/libc/sysdeps/linux/ia64/bits/endian.h new file mode 100644 index 000000000..98a5e2399 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/endian.h @@ -0,0 +1,7 @@ +/* Linux/ia64 is little-endian.  */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +#define __BYTE_ORDER __LITTLE_ENDIAN diff --git a/libc/sysdeps/linux/ia64/bits/fcntl.h b/libc/sysdeps/linux/ia64/bits/fcntl.h new file mode 100644 index 000000000..d330954d4 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/fcntl.h @@ -0,0 +1,183 @@ +/* O_*, F_*, FD_* bit values for Linux/IA64. +   Copyright (C) 1999, 2000, 2004 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef	_FCNTL_H +# error "Never use <bits/fcntl.h> directly; include <fcntl.h> instead." +#endif + + +#include <sys/types.h> + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files +   located on an ext2 file system */ +#define O_ACCMODE	  0003 +#define O_RDONLY	    00 +#define O_WRONLY	    01 +#define O_RDWR		    02 +#define O_CREAT		  0100	/* not fcntl */ +#define O_EXCL		  0200	/* not fcntl */ +#define O_NOCTTY	  0400	/* not fcntl */ +#define O_TRUNC		 01000	/* not fcntl */ +#define O_APPEND	 02000 +#define O_NONBLOCK	 04000 +#define O_NDELAY	O_NONBLOCK +#define O_SYNC		010000 +#define O_FSYNC		O_SYNC +#define O_ASYNC		020000 +#define O_DIRECT	040000 + +#ifdef __USE_GNU +# define O_DIRECTORY	0200000	/* must be a directory */ +# define O_NOFOLLOW	0400000 /* don't follow links */ +# define O_NOATIME	01000000 /* Do not set atime.  */ +#endif + +#ifdef __USE_LARGEFILE64 +/* Not necessary, files are always with 64bit off_t.  */ +# define O_LARGEFILE	0 +#endif + +/* For now Linux has synchronisity options for data and read operations. +   We define the symbols here but let them do the same as O_SYNC since +   this is a superset.	*/ +#if defined __USE_POSIX199309 || defined __USE_UNIX98 +# define O_DSYNC	O_SYNC	/* Synchronize data.  */ +# define O_RSYNC	O_SYNC	/* Synchronize read operations.	 */ +#endif + +/* Values for the second argument to `fcntl'.  */ +#define F_DUPFD		0	/* Duplicate file descriptor.  */ +#define F_GETFD		1	/* Get file descriptor flags.  */ +#define F_SETFD		2	/* Set file descriptor flags.  */ +#define F_GETFL		3	/* Get file status flags.  */ +#define F_SETFL		4	/* Set file status flags.  */ +#define F_GETLK		5	/* Get record locking info.  */ +#define F_SETLK		6	/* Set record locking info (non-blocking).  */ +#define F_SETLKW	7	/* Set record locking info (blocking).	*/ + +/* Not necessary, we always have 64-bit offsets.  */ +#define F_GETLK64	5	/* Get record locking info.  */ +#define F_SETLK64	6	/* Set record locking info (non-blocking).  */ +#define F_SETLKW64	7	/* Set record locking info (blocking).	*/ + +#if defined __USE_BSD || defined __USE_UNIX98 +# define F_SETOWN	8	/* Get owner of socket (receiver of SIGIO).  */ +# define F_GETOWN	9	/* Set owner of socket (receiver of SIGIO).  */ +#endif + +#ifdef __USE_GNU +# define F_SETSIG	10	/* Set number of signal to be sent.  */ +# define F_GETSIG	11	/* Get number of signal to be sent.  */ +#endif + +#ifdef __USE_GNU +# define F_SETLEASE	1024	/* Set a lease.	 */ +# define F_GETLEASE	1025	/* Enquire what lease is active.  */ +# define F_NOTIFY	1026	/* Request notfications on a directory.	 */ +#endif + +/* For F_[GET|SET]FL.  */ +#define FD_CLOEXEC	1	/* actually anything with low bit set goes */ + +/* For posix fcntl() and `l_type' field of a `struct flock' for lockf().  */ +#define F_RDLCK		0	/* Read lock.  */ +#define F_WRLCK		1	/* Write lock.	*/ +#define F_UNLCK		2	/* Remove lock.	 */ + +/* for old implementation of bsd flock () */ +#define F_EXLCK		4	/* or 3 */ +#define F_SHLCK		8	/* or 4 */ + +#ifdef __USE_BSD +/* Operations for bsd flock(), also used by the kernel implementation */ +# define LOCK_SH	1	/* shared lock */ +# define LOCK_EX	2	/* exclusive lock */ +# define LOCK_NB	4	/* or'd with one of the above to prevent +				   blocking */ +# define LOCK_UN	8	/* remove lock */ +#endif + +#ifdef __USE_GNU +# define LOCK_MAND	32	/* This is a mandatory flock:	*/ +# define LOCK_READ	64	/* ... which allows concurrent read operations.	 */ +# define LOCK_WRITE	128	/* ... which allows concurrent write operations.  */ +# define LOCK_RW	192	/* ... Which allows concurrent read & write operations.	 */ +#endif + + +#ifdef __USE_GNU +/* Types of directory notifications that may be requested with F_NOTIFY.  */ +# define DN_ACCESS	0x00000001	/* File accessed.  */ +# define DN_MODIFY	0x00000002	/* File modified.  */ +# define DN_CREATE	0x00000004	/* File created.  */ +# define DN_DELETE	0x00000008	/* File removed.  */ +# define DN_RENAME	0x00000010	/* File renamed.  */ +# define DN_ATTRIB	0x00000020	/* File changed attibutes.  */ +# define DN_MULTISHOT	0x80000000	/* Don't remove notifier.  */ +#endif + +/* We don't need to support __USE_FILE_OFFSET64.  */ +struct flock +  { +    short int l_type;	/* Type of lock: F_RDLCK, F_WRLCK, or F_UNLCK.	*/ +    short int l_whence;	/* Where `l_start' is relative to (like `lseek').  */ +    __off_t l_start;	/* Offset where the lock begins.  */ +    __off_t l_len;	/* Size of the locked area; zero means until EOF.  */ +    __pid_t l_pid;	/* Process holding the lock.  */ +  }; + +#ifdef __USE_LARGEFILE64 +struct flock64 +  { +    short int l_type;	/* Type of lock: F_RDLCK, F_WRLCK, or F_UNLCK.	*/ +    short int l_whence;	/* Where `l_start' is relative to (like `lseek').  */ +    __off64_t l_start;	/* Offset where the lock begins.  */ +    __off64_t l_len;	/* Size of the locked area; zero means until EOF.  */ +    __pid_t l_pid;	/* Process holding the lock.  */ +  }; +#endif + + +/* Define some more compatibility macros to be backward compatible with +   BSD systems which did not managed to hide these kernel macros.  */ +#ifdef	__USE_BSD +# define FAPPEND	O_APPEND +# define FFSYNC		O_FSYNC +# define FASYNC		O_ASYNC +# define FNONBLOCK	O_NONBLOCK +# define FNDELAY	O_NDELAY +#endif /* Use BSD.  */ + +/* Advise to `posix_fadvise'.  */ +#ifdef __USE_XOPEN2K +# define POSIX_FADV_NORMAL	0 /* No further special treatment.  */ +# define POSIX_FADV_RANDOM	1 /* Expect random page references.  */ +# define POSIX_FADV_SEQUENTIAL	2 /* Expect sequential page references.	 */ +# define POSIX_FADV_WILLNEED	3 /* Will need these pages.  */ +# define POSIX_FADV_DONTNEED	4 /* Don't need these pages.  */ +# define POSIX_FADV_NOREUSE	5 /* Data will be accessed once.  */ +#endif + +__BEGIN_DECLS + +/* Provide kernel hint to read ahead.  */ +extern ssize_t readahead (int __fd, __off64_t __offset, size_t __count) +    __THROW; + +__END_DECLS diff --git a/libc/sysdeps/linux/ia64/bits/kernel_stat.h b/libc/sysdeps/linux/ia64/bits/kernel_stat.h new file mode 100644 index 000000000..b46369c3c --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/kernel_stat.h @@ -0,0 +1,57 @@ +/* Ripped from linux/include/asm-ia64/stat.h + * and renamed 'struct stat' to 'struct kernel_stat' */ + +#ifndef _ASM_IA64_STAT_H +#define _ASM_IA64_STAT_H + +/* + * Modified 1998, 1999 + *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co + */ + +struct kernel_stat { +	unsigned long	st_dev; +	unsigned long	st_ino; +	unsigned long	st_nlink; +	unsigned int	st_mode; +	unsigned int	st_uid; +	unsigned int	st_gid; +	unsigned int	__pad0; +	unsigned long	st_rdev; +	unsigned long	st_size; +	unsigned long	st_atime; +	unsigned long	st_atime_nsec; +	unsigned long	st_mtime; +	unsigned long	st_mtime_nsec; +	unsigned long	st_ctime; +	unsigned long	st_ctime_nsec; +	unsigned long	st_blksize; +	long		st_blocks; +	unsigned long	__unused[3]; +}; + +#define STAT_HAVE_NSEC 1 + +struct __old_kernel_stat { +	unsigned int	st_dev; +	unsigned int	st_ino; +	unsigned int	st_mode; +	unsigned int	st_nlink; +	unsigned int	st_uid; +	unsigned int	st_gid; +	unsigned int	st_rdev; +	unsigned int	__pad1; +	unsigned long	st_size; +	unsigned long	st_atime; +	unsigned long	st_mtime; +	unsigned long	st_ctime; +	unsigned int	st_blksize; +	int		st_blocks; +	unsigned int	__unused1; +	unsigned int	__unused2; +}; + +/* ia64 stat64 is same as stat */ +#define kernel_stat64 kernel_stat + +#endif /* _ASM_IA64_STAT_H */ diff --git a/libc/sysdeps/linux/ia64/bits/kernel_types.h b/libc/sysdeps/linux/ia64/bits/kernel_types.h new file mode 100644 index 000000000..6449b5e27 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/kernel_types.h @@ -0,0 +1,56 @@ +/* Note that we use the exact same include guard #define names + * as asm/posix_types.h.  This will avoid gratuitous conflicts  + * with the posix_types.h kernel header, and will ensure that  + * our private content, and not the kernel header, will win. + *  -Erik + */ +#ifndef _ASM_IA64_POSIX_TYPES_H +#define _ASM_IA64_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc.  Also, we cannot + * assume GCC is being used. + * + * Based on <asm-alpha/posix_types.h>. + * + * Modified 1998-2000, 2003 + *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co + */ + +typedef unsigned long	__kernel_ino_t; +typedef unsigned int	__kernel_mode_t; +typedef unsigned int	__kernel_nlink_t; +typedef long		__kernel_off_t; +typedef long long	__kernel_loff_t; +typedef int		__kernel_pid_t; +typedef int		__kernel_ipc_pid_t; +typedef unsigned int	__kernel_uid_t; +typedef unsigned int	__kernel_gid_t; +typedef unsigned long	__kernel_size_t; +typedef long		__kernel_ssize_t; +typedef long		__kernel_ptrdiff_t; +typedef long		__kernel_time_t; +typedef long		__kernel_suseconds_t; +typedef long		__kernel_clock_t; +typedef int		__kernel_timer_t; +typedef int		__kernel_clockid_t; +typedef int		__kernel_daddr_t; +typedef char *		__kernel_caddr_t; +typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */ +typedef unsigned short	__kernel_uid16_t; +typedef unsigned short	__kernel_gid16_t; + +typedef struct { +	int	val[2]; +} __kernel_fsid_t; + +typedef __kernel_uid_t __kernel_old_uid_t; +typedef __kernel_gid_t __kernel_old_gid_t; +typedef __kernel_uid_t __kernel_uid32_t; +typedef __kernel_gid_t __kernel_gid32_t; + +typedef unsigned int	__kernel_dev_t; +typedef unsigned int	__kernel_old_dev_t; + +#endif /* _ASM_IA64_POSIX_TYPES_H */ diff --git a/libc/sysdeps/linux/ia64/bits/setjmp.h b/libc/sysdeps/linux/ia64/bits/setjmp.h new file mode 100644 index 000000000..76625753d --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/setjmp.h @@ -0,0 +1,40 @@ +/* Define the machine-dependent type `jmp_buf'.  Linux/IA-64 version. +   Copyright (C) 1999, 2000, 2003 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Contributed by David Mosberger-Tang <davidm@hpl.hp.com>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H  1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +/* User code must not depend on the internal representation of jmp_buf. */ + +#define _JBLEN	70 + +/* the __jmp_buf element type should be __float80 per ABI... */ +typedef long __jmp_buf[_JBLEN] __attribute__ ((aligned (16))); /* guarantees 128-bit alignment! */ + +/* Test if longjmp to JMPBUF would unwind the frame containing a local +   variable at ADDRESS.  */ +#define _JMPBUF_UNWINDS(_jmpbuf, _address)		\ +     ((void *)(_address) < (void *)(((long *)_jmpbuf)[0])) + +#endif  /* bits/setjmp.h */ diff --git a/libc/sysdeps/linux/ia64/bits/sigaction.h b/libc/sysdeps/linux/ia64/bits/sigaction.h new file mode 100644 index 000000000..11599d520 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/sigaction.h @@ -0,0 +1,73 @@ +/* Definitions for Linux/ia64 sigaction. +   Copyright (C) 1996, 1997, 2000, 2003 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _SIGNAL_H +# error "Never include <bits/sigaction.h> directly; use <signal.h> instead." +#endif + +/* Structure describing the action to be taken when a signal arrives.  */ +struct sigaction +  { +    /* Signal handler.  */ +#ifdef __USE_POSIX199309 +    union +      { +	/* Used if SA_SIGINFO is not set.  */ +	__sighandler_t sa_handler; +	/* Used if SA_SIGINFO is set.  */ +	void (*sa_sigaction) (int, siginfo_t *, void *); +      } +    __sigaction_handler; +# define sa_handler	__sigaction_handler.sa_handler +# define sa_sigaction	__sigaction_handler.sa_sigaction +#else +    __sighandler_t sa_handler; +#endif + +    /* Special flags.  */ +    unsigned long int sa_flags; + +    /* Additional set of signals to be blocked.  */ +    __sigset_t sa_mask; +  }; + +/* Bits in `sa_flags'.  */ +#define SA_NOCLDSTOP  0x00000001 /* Don't send SIGCHLD when children stop.  */ +#define SA_NOCLDWAIT  0x00000002 /* Don't create zombie on child death.  */ +#define SA_SIGINFO    0x00000004 +#if defined __USE_UNIX98 || defined __USE_MISC +# define SA_ONSTACK   0x08000000 /* Use signal stack by using `sa_restorer'. */ +# define SA_RESTART   0x10000000 /* Restart syscall on signal return.  */ +# define SA_NODEFER   0x40000000 /* Don't automatically block the signal +				    when its handler is being executed.  */ +# define SA_RESETHAND 0x80000000 /* Reset to SIG_DFL on entry to handler.  */ +#endif +#ifdef __USE_MISC +# define SA_INTERRUPT 0x20000000 /* Historic no-op.  */ + +/* Some aliases for the SA_ constants.  */ +# define SA_NOMASK    SA_NODEFER +# define SA_ONESHOT   SA_RESETHAND +# define SA_STACK     SA_ONSTACK +#endif + +/* Values for the HOW argument to `sigprocmask'.  */ +#define SIG_BLOCK          0	/* for blocking signals */ +#define SIG_UNBLOCK        1	/* for unblocking signals */ +#define SIG_SETMASK        2	/* for setting the signal mask */ diff --git a/libc/sysdeps/linux/ia64/bits/sigcontext.h b/libc/sysdeps/linux/ia64/bits/sigcontext.h new file mode 100644 index 000000000..72c60ec24 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/sigcontext.h @@ -0,0 +1,79 @@ +/* Copyright (C) 1996, 1997, 1998, 2000, 2001, 2003, 2004 +   Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Contributed by Jes Sorensen <jes@linuxcare.com>, July 2000 + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#if !defined _SIGNAL_H && !defined _SYS_UCONTEXT_H +# error "Never use <bits/sigcontext.h> directly; include <signal.h> instead." +#endif + +#ifndef _BITS_SIGCONTEXT_H +#define _BITS_SIGCONTEXT_H 1 + +#include <bits/sigstack.h> + +struct ia64_fpreg +  { +    union +      { +	unsigned long bits[2]; +      } u; +  } __attribute__ ((aligned (16))); + +struct sigcontext +{ +  unsigned long int sc_flags;	/* see manifest constants below */ +  unsigned long int sc_nat;	/* bit i == 1 iff scratch reg gr[i] is a NaT */ +  stack_t sc_stack;		/* previously active stack */ + +  unsigned long int sc_ip;	/* instruction pointer */ +  unsigned long int sc_cfm;	/* current frame marker */ +  unsigned long int sc_um;	/* user mask bits */ +  unsigned long int sc_ar_rsc;	/* register stack configuration register */ +  unsigned long int sc_ar_bsp;	/* backing store pointer */ +  unsigned long int sc_ar_rnat;	/* RSE NaT collection register */ +  unsigned long int sc_ar_ccv;	/* compare & exchange compare value register */ +  unsigned long int sc_ar_unat;	/* ar.unat of interrupted context */ +  unsigned long int sc_ar_fpsr;	/* floating-point status register */ +  unsigned long int sc_ar_pfs;	/* previous function state */ +  unsigned long int sc_ar_lc;	/* loop count register */ +  unsigned long int sc_pr;	/* predicate registers */ +  unsigned long int sc_br[8];	/* branch registers */ +  unsigned long int sc_gr[32];	/* general registers (static partition) */ +  struct ia64_fpreg sc_fr[128];	/* floating-point registers */ +  unsigned long int sc_rbs_base;/* NULL or new base of sighandler's rbs */ +  unsigned long int sc_loadrs;	/* see description above */ +  unsigned long int sc_ar25;	/* cmp8xchg16 uses this */ +  unsigned long int sc_ar26;	/* rsvd for scratch use */ +  unsigned long int sc_rsvd[12];/* reserved for future use */ + +  /* sc_mask is actually an sigset_t but we don't want to +   * include the kernel headers here. */ +  unsigned long int sc_mask;	/* signal mask to restore after handler returns */ +}; + +/* sc_flag bit definitions. */ +#define IA64_SC_FLAG_ONSTACK_BIT	0	/* is handler running on signal stack? */ +#define IA64_SC_FLAG_IN_SYSCALL_BIT	1	/* did signal interrupt a syscall? */ +#define IA64_SC_FLAG_FPH_VALID_BIT	2	/* is state in f[32]-f[127] valid? */ + +#define IA64_SC_FLAG_ONSTACK		(1 << IA64_SC_FLAG_ONSTACK_BIT) +#define IA64_SC_FLAG_IN_SYSCALL		(1 << IA64_SC_FLAG_IN_SYSCALL_BIT) +#define IA64_SC_FLAG_FPH_VALID		(1 << IA64_SC_FLAG_FPH_VALID_BIT) + +#endif /* _BITS_SIGCONTEXT_H */ diff --git a/libc/sysdeps/linux/ia64/bits/sigstack.h b/libc/sysdeps/linux/ia64/bits/sigstack.h new file mode 100644 index 000000000..c9c9d2fed --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/sigstack.h @@ -0,0 +1,63 @@ +/* sigstack, sigaltstack definitions. +   Copyright (C) 1998, 2000, 2002, 2003 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _SIGNAL_H +# error "Never include this file directly.  Use <signal.h> instead" +#endif + +#ifndef _SIGSTACK_H +#define _SIGSTACK_H	1 + +/* Structure describing a signal stack (obsolete).  */ +struct sigstack +  { +    __ptr_t ss_sp;		/* Signal stack pointer.  */ +    int ss_onstack;		/* Nonzero if executing on this stack.  */ +  }; + + +/* Possible values for `ss_flags.'.  */ +enum +{ +  SS_ONSTACK = 1, +#define SS_ONSTACK	SS_ONSTACK +  SS_DISABLE +#define SS_DISABLE	SS_DISABLE +}; + +/* Minimum stack size for a signal handler. + +   Yes, this should be 131072 but the constant got defined incorrectly +   in the kernel and we have to live with it.  Users should in any case +   use SIGSTKSZ as the size user-supplied buffers should have.  */ +#define MINSIGSTKSZ	131027 + +/* System default stack size.  */ +#define SIGSTKSZ	262144 + + +/* Alternate, preferred interface.  */ +typedef struct sigaltstack +  { +    __ptr_t ss_sp; +    int ss_flags; +    size_t ss_size; +  } stack_t; + +#endif	/* bits/sigstack.h */ diff --git a/libc/sysdeps/linux/ia64/bits/stackinfo.h b/libc/sysdeps/linux/ia64/bits/stackinfo.h new file mode 100644 index 000000000..b7dc5d91d --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/stackinfo.h @@ -0,0 +1,29 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* This file contains a bit of information about the stack allocation +   of the processor.  */ + +#ifndef _STACKINFO_H +#define _STACKINFO_H	1 + +/* On IA-64 the stack grows down.  The register stack is of no concern +   here.  */ +#define _STACK_GROWS_DOWN	1 + +#endif	/* stackinfo.h */ diff --git a/libc/sysdeps/linux/ia64/bits/syscalls.h b/libc/sysdeps/linux/ia64/bits/syscalls.h new file mode 100644 index 000000000..591075ab0 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/syscalls.h @@ -0,0 +1,198 @@ +/* Copyright (C) 1999, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Written by Jes Sorensen, <Jes.Sorensen@cern.ch>, April 1999. +   Based on code originally written by David Mosberger-Tang + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _BITS_SYSCALLS_H +#define _BITS_SYSCALLS_H + +#ifndef _SYSCALL_H +# error "Never use <bits/syscalls.h> directly; include <sys/syscall.h> instead." +#endif + +/* This includes the `__NR_<name>' syscall numbers taken from the Linux kernel + * header files.  It also defines the traditional `SYS_<name>' macros for older + * programs.  */ +#include <bits/sysnum.h> + +#ifndef __set_errno +# define __set_errno(val) (*__errno_location ()) = (val) +#endif + + +#ifndef __ASSEMBLER__ + +#undef IA64_USE_NEW_STUB + +#define SYS_ify(syscall_name)   __NR_##syscall_name + +/* taken from asm-ia64/break.h */ +#define __IA64_BREAK_SYSCALL	"0x100000" + +#define _DO_SYSCALL(name, nr, args...) \ +    LOAD_ARGS_##nr (args) \ +    register long _r8 asm ("r8"); \ +    register long _r10 asm ("r10"); \ +    register long _r15 asm ("r15") = SYS_ify(name); \ +    long _retval; \ +    LOAD_REGS_##nr \ +    __asm __volatile ("break " __IA64_BREAK_SYSCALL ";;\n\t" \ +		: "=r" (_r8), "=r" (_r10), "=r" (_r15) ASM_OUTARGS_##nr \ +		: "2" (_r15) ASM_ARGS_##nr \ +		: "memory" ASM_CLOBBERS_##nr); \ +    _retval = _r8; \ +	if (_r10 == -1) { \ +		__set_errno (_retval); \ +		_retval = -1; \ +	} + +#define LOAD_ARGS_0() +#define LOAD_REGS_0 +#define LOAD_ARGS_1(a1)					\ +  long _arg1 = (long) (a1);				\ +  LOAD_ARGS_0 () +#define LOAD_REGS_1					\ +  register long _out0 asm ("out0") = _arg1;		\ +  LOAD_REGS_0 +#define LOAD_ARGS_2(a1, a2)				\ +  long _arg2 = (long) (a2);				\ +  LOAD_ARGS_1 (a1) +#define LOAD_REGS_2					\ +  register long _out1 asm ("out1") = _arg2;		\ +  LOAD_REGS_1 +#define LOAD_ARGS_3(a1, a2, a3)				\ +  long _arg3 = (long) (a3);				\ +  LOAD_ARGS_2 (a1, a2) +#define LOAD_REGS_3					\ +  register long _out2 asm ("out2") = _arg3;		\ +  LOAD_REGS_2 +#define LOAD_ARGS_4(a1, a2, a3, a4)			\ +  long _arg4 = (long) (a4);				\ +  LOAD_ARGS_3 (a1, a2, a3) +#define LOAD_REGS_4					\ +  register long _out3 asm ("out3") = _arg4;		\ +  LOAD_REGS_3 +#define LOAD_ARGS_5(a1, a2, a3, a4, a5)			\ +  long _arg5 = (long) (a5);				\ +  LOAD_ARGS_4 (a1, a2, a3, a4) +#define LOAD_REGS_5					\ +  register long _out4 asm ("out4") = _arg5;		\ +  LOAD_REGS_4 +#define LOAD_ARGS_6(a1, a2, a3, a4, a5, a6)		\ +  long _arg6 = (long) (a6);	    			\ +  LOAD_ARGS_5 (a1, a2, a3, a4, a5) +#define LOAD_REGS_6					\ +  register long _out5 asm ("out5") = _arg6;		\ +  LOAD_REGS_5 + +#define ASM_OUTARGS_0 +#define ASM_OUTARGS_1	ASM_OUTARGS_0, "=r" (_out0) +#define ASM_OUTARGS_2	ASM_OUTARGS_1, "=r" (_out1) +#define ASM_OUTARGS_3	ASM_OUTARGS_2, "=r" (_out2) +#define ASM_OUTARGS_4	ASM_OUTARGS_3, "=r" (_out3) +#define ASM_OUTARGS_5	ASM_OUTARGS_4, "=r" (_out4) +#define ASM_OUTARGS_6	ASM_OUTARGS_5, "=r" (_out5) + +#ifdef IA64_USE_NEW_STUB +#define ASM_ARGS_0 +#define ASM_ARGS_1	ASM_ARGS_0, "4" (_out0) +#define ASM_ARGS_2	ASM_ARGS_1, "5" (_out1) +#define ASM_ARGS_3	ASM_ARGS_2, "6" (_out2) +#define ASM_ARGS_4	ASM_ARGS_3, "7" (_out3) +#define ASM_ARGS_5	ASM_ARGS_4, "8" (_out4) +#define ASM_ARGS_6	ASM_ARGS_5, "9" (_out5) +#else +#define ASM_ARGS_0 +#define ASM_ARGS_1	ASM_ARGS_0, "3" (_out0) +#define ASM_ARGS_2	ASM_ARGS_1, "4" (_out1) +#define ASM_ARGS_3	ASM_ARGS_2, "5" (_out2) +#define ASM_ARGS_4	ASM_ARGS_3, "6" (_out3) +#define ASM_ARGS_5	ASM_ARGS_4, "7" (_out4) +#define ASM_ARGS_6	ASM_ARGS_5, "8" (_out5) +#endif + +#define ASM_CLOBBERS_0	ASM_CLOBBERS_1, "out0" +#define ASM_CLOBBERS_1	ASM_CLOBBERS_2, "out1" +#define ASM_CLOBBERS_2	ASM_CLOBBERS_3, "out2" +#define ASM_CLOBBERS_3	ASM_CLOBBERS_4, "out3" +#define ASM_CLOBBERS_4	ASM_CLOBBERS_5, "out4" +#define ASM_CLOBBERS_5	ASM_CLOBBERS_6, "out5" +#define ASM_CLOBBERS_6_COMMON	, "out6", "out7",			\ +  /* Non-stacked integer registers, minus r8, r10, r15.  */		\ +  "r2", "r3", "r9", "r11", "r12", "r13", "r14", "r16", "r17", "r18",	\ +  "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27",	\ +  "r28", "r29", "r30", "r31",						\ +  /* Predicate registers.  */						\ +  "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",	\ +  /* Non-rotating fp registers.  */					\ +  "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",	\ +  /* Branch registers.  */						\ +  "b6" + +#ifdef IA64_USE_NEW_STUB +# define ASM_CLOBBERS_6	ASM_CLOBBERS_6_COMMON +#else +# define ASM_CLOBBERS_6	ASM_CLOBBERS_6_COMMON , "b7" +#endif + + + +#define _syscall0(type,name) \ +type name(void) \ +{ \ +	_DO_SYSCALL(name, 0); return (type) _retval; \ +} + +#define _syscall1(type,name,type1,arg1) \ +type name(type1 arg1) \ +{ \ +	_DO_SYSCALL(name, 1, arg1); return (type) _retval; \ +} + +#define _syscall2(type,name,type1,arg1,type2,arg2) \ +type name(type1 arg1, type2 arg2) \ +{ \ +	_DO_SYSCALL(name, 2, arg1, arg2); return (type) _retval; \ +} + +#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ +type name(type1 arg1, type2 arg2, type3 arg3) \ +{ \ +	_DO_SYSCALL(name, 3, arg1, arg2, arg3); return (type) _retval; \ +} + +#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +{ \ +	_DO_SYSCALL(name, 4, arg1, arg2, arg3, arg4); return (type) _retval; \ +} + +#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ +{ \ +	_DO_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5); return (type) _retval; \ +} + +#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ +{ \ +	_DO_SYSCALL(name, 6, arg1, arg2, arg3, arg4, arg5, arg6); return (type) _retval; \ +} + +#endif /* __ASSEMBLER__ */ +#endif /* _BITS_SYSCALLS_H */ diff --git a/libc/sysdeps/linux/ia64/bits/uClibc_arch_features.h b/libc/sysdeps/linux/ia64/bits/uClibc_arch_features.h new file mode 100644 index 000000000..ebfabce90 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/uClibc_arch_features.h @@ -0,0 +1,41 @@ +/* + * Track misc arch-specific features that aren't config options + */ + +#ifndef _BITS_UCLIBC_ARCH_FEATURES_H +#define _BITS_UCLIBC_ARCH_FEATURES_H + +/* instruction used when calling abort() to kill yourself */ +#define __UCLIBC_ABORT_INSTRUCTION__ "break 0" + +/* can your target use syscall6() for mmap ? */ +#define __UCLIBC_MMAP_HAS_6_ARGS__ + +/* does your target use syscall4() for truncate64 ? (32bit arches only) */ +#undef __UCLIBC_TRUNCATE64_HAS_4_ARGS__ + +/* does your target have a broken create_module() ? */ +#undef __UCLIBC_BROKEN_CREATE_MODULE__ + +/* does your target prefix all symbols with an _ ? */ +#define __UCLIBC_NO_UNDERSCORES__ + +/* does your target have an asm .set ? */ +#define __UCLIBC_HAVE_ASM_SET_DIRECTIVE__ + +/* define if target doesn't like .global */ +#undef __UCLIBC_ASM_GLOBAL_DIRECTIVE__ + +/* define if target supports .weak */ +#define __UCLIBC_HAVE_ASM_WEAK_DIRECTIVE__ + +/* define if target supports .weakext */ +#undef __UCLIBC_HAVE_ASM_WEAKEXT_DIRECTIVE__ + +/* needed probably only for ppc64 */ +#undef __UCLIBC_HAVE_ASM_GLOBAL_DOT_NAME__ + +/* define if target supports IEEE signed zero floats */ +#define __UCLIBC_HAVE_SIGNED_ZERO__ + +#endif /* _BITS_UCLIBC_ARCH_FEATURES_H */ diff --git a/libc/sysdeps/linux/ia64/bits/wordsize.h b/libc/sysdeps/linux/ia64/bits/wordsize.h new file mode 100644 index 000000000..dd698fa97 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bits/wordsize.h @@ -0,0 +1,19 @@ +/* Copyright (C) 1999 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#define __WORDSIZE	64 diff --git a/libc/sysdeps/linux/ia64/brk.S b/libc/sysdeps/linux/ia64/brk.S new file mode 100644 index 000000000..6d146a579 --- /dev/null +++ b/libc/sysdeps/linux/ia64/brk.S @@ -0,0 +1,52 @@ +/* brk system call for Linux/ia64 +   Copyright (C) 1999,2000,2001,2003 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Written by Stephane Eranian <eranian@hpl.hp.com> and +	      Jes Sorensen, <Jes.Sorensen@cern.ch>, April 1999. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#include "sysdep.h" + +#include <asm/unistd.h> +#include <asm/errno.h> + +	.global __curbrk +	.type __curbrk,@object +	.size __curbrk,8 +	.data +	.align	8 +__curbrk: +	data8	0 + +weak_alias (__curbrk, ___brk_addr) + +LEAF(__brk) +	.regstk 1, 0, 0, 0 +	DO_CALL(__NR_brk) +	cmp.ltu	p6, p0 = ret0, in0 +	addl r9 = @ltoff(__curbrk), gp +	;; +	ld8 r9 = [r9] +(p6) 	mov ret0 = ENOMEM +(p6)	br.cond.spnt.few __syscall_error +	;; +	st8 [r9] = ret0 +	mov ret0 = 0 +	ret +END(__brk) + +weak_alias (__brk, brk) diff --git a/libc/sysdeps/linux/ia64/bsd-_setjmp.S b/libc/sysdeps/linux/ia64/bsd-_setjmp.S new file mode 100644 index 000000000..4e6a2da56 --- /dev/null +++ b/libc/sysdeps/linux/ia64/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp is in setjmp.S  */ diff --git a/libc/sysdeps/linux/ia64/bsd-setjmp.S b/libc/sysdeps/linux/ia64/bsd-setjmp.S new file mode 100644 index 000000000..1da848d2f --- /dev/null +++ b/libc/sysdeps/linux/ia64/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp is in setjmp.S  */ diff --git a/libc/sysdeps/linux/ia64/clone2.S b/libc/sysdeps/linux/ia64/clone2.S new file mode 100644 index 000000000..af621662c --- /dev/null +++ b/libc/sysdeps/linux/ia64/clone2.S @@ -0,0 +1,105 @@ +/* Copyright (C) 2000, 2001, 2003, 2004 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#include "sysdep.h" + +#include "sysdep.h" +#include <asm/errno.h> + + +/* int  __clone2(int (*fn) (void *arg), void *child_stack_base, 	*/ +/* 	         size_t child_stack_size, int flags, void *arg,		*/ +/*	         pid_t *parent_tid, void *tls, pid_t *child_tid)	*/ + +#define CHILD	p8 +#define PARENT	p9 + +ENTRY(__clone2) +	.prologue +	alloc r2=ar.pfs,8,1,6,0 +	cmp.eq p6,p0=0,in0 +	mov r8=EINVAL +	mov out0=in3		/* Flags are first syscall argument.	*/ +	mov out1=in1		/* Stack address.			*/ +(p6)	br.cond.spnt.many __syscall_error +	;; +	mov out2=in2		/* Stack size.				*/ +	mov out3=in5		/* Parent TID Pointer			*/ +	mov out4=in7		/* Child TID Pointer			*/ + 	mov out5=in6		/* TLS pointer				*/ +	/* +	 * clone2() is special: the child cannot execute br.ret right +	 * after the system call returns, because it starts out +	 * executing on an empty stack.  Because of this, we can't use +	 * the new (lightweight) syscall convention here.  Instead, we +	 * just fall back on always using "break". +	 * +	 * Furthermore, since the child starts with an empty stack, we +	 * need to avoid unwinding past invalid memory.  To that end, +	 * we'll pretend now that __clone2() is the end of the +	 * call-chain.  This is wrong for the parent, but only until +	 * it returns from clone2() but it's better than the +	 * alternative. +	 */ +	mov r15=SYS_ify (clone2) +	.save rp, r0 +	break __BREAK_SYSCALL +	.body +        cmp.eq p6,p0=-1,r10 +	cmp.eq CHILD,PARENT=0,r8 /* Are we the child?   */ +(p6)	br.cond.spnt.many __syscall_error +	;; +(CHILD)	mov loc0=gp +(PARENT) ret +	;; +#ifdef RESET_PID +	tbit.nz p6,p0=in3,16	/* CLONE_THREAD */ +	tbit.z p7,p10=in3,8	/* CLONE_VM */ +(p6)	br.cond.dptk 1f +	;; +	mov r15=SYS_ify (getpid) +(p10)	addl r8=-1,r0 +(p7)	break __BREAK_SYSCALL +	;; +	add r9=PID,r13 +	add r10=TID,r13 +	;; +	st4 [r9]=r8 +	st4 [r10]=r8 +	;; +#endif +1:	ld8 out1=[in0],8	/* Retrieve code pointer.	*/ +	mov out0=in4		/* Pass proper argument	to fn */ +	;; +	ld8 gp=[in0]		/* Load function gp.		*/ +	mov b6=out1 +	br.call.dptk.many rp=b6	/* Call fn(arg) in the child 	*/ +	;; +	mov out0=r8		/* Argument to _exit		*/ +	mov gp=loc0 +	.globl HIDDEN_JUMPTARGET(_exit) +	br.call.dpnt.many rp=HIDDEN_JUMPTARGET(_exit) +				/* call _exit with result from fn.	*/ +	ret			/* Not reached.		*/ +PSEUDO_END(__clone2) + +/* For now we leave __clone undefined.  This is unlikely to be a	*/ +/* problem, since at least the i386 __clone in glibc always failed	*/ +/* with a 0 sp (eventhough the kernel explicitly handled it).		*/ +/* Thus all such calls needed to pass an explicit sp, and as a result,	*/ +/* would be unlikely to work on ia64.					*/ diff --git a/libc/sysdeps/linux/ia64/crt1.S b/libc/sysdeps/linux/ia64/crt1.S new file mode 100644 index 000000000..774e84ff4 --- /dev/null +++ b/libc/sysdeps/linux/ia64/crt1.S @@ -0,0 +1,129 @@ +/* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Written by Jes Sorensen, <Jes.Sorensen@cern.ch>, April 1999. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   In addition to the permissions in the GNU Lesser General Public +   License, the Free Software Foundation gives you unlimited +   permission to link the compiled version of this file with other +   programs, and to distribute those programs without any restriction +   coming from the use of this file. (The GNU Lesser General Public +   License restrictions do apply in other respects; for example, they +   cover modification of the file, and distribution when not linked +   into another program.) + +   Note that people who make modified versions of this file are not +   obligated to grant this special exception for their modified +   versions; it is their choice whether to do so. The GNU Lesser +   General Public License gives permission to release a modified +   version without this exception; this exception also makes it +   possible to release a modified version which carries forward this +   exception. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#define __ASSEMBLY__ +#include "sysdep.h" + +#include <asm/unistd.h> +#include <asm/fpu.h> + +/* + * Arguments for __uClibc_main: + *	out0:	main + *	out1:	argc + *	out2:	argv + *	out3:	init + *	out4:	fini + *	out5:	rtld_fini + *	out6:	stack_end + */ + +#if defined(__UCLIBC_CTOR_DTOR__) +.type   _init,%function +.type   _fini,%function +#else +.weak   _init +.weak   _fini +#endif + +	.align 32 +	.global _start + +	.proc _start +	.type _start,@function +_start: +	.prologue +	.save rp, r0 +	.body +	.prologue +	{ .mlx +	  alloc r2 = ar.pfs,0,0,7,0 +	  movl r3 = FPSR_DEFAULT +	} +	{ .mlx +	  adds out2 = 16, sp	/* get address of argc value */ +	  movl gp = @gprel(0f) +	  ;; +	} +0:	{ .mmi +	  ld8 out1 = [out2], 8	/* load argc and move out2 to become argv */ +	  mov.m r10 = ar.bsp	/* fetch rbs base address */ +	  mov r9 = ip +	  ;; +	} +	{ .mii +	  mov ar.fpsr = r3 +	  sub gp = r9, gp	/* back-compute gp value */ +	  adds out6 = 16, sp	/* highest non-environment stack address */ +	  ;; +	} +	{ +	  addl r11 = @ltoff(__libc_ia64_register_backing_store_base), gp +	  addl out0 = @ltoff(@fptr(main)), gp +	  addl out3 = @ltoff(@fptr(_init)), gp +	  ;; +	} +	{ .mmi +	  ld8 r3 = [r11]	/* pointer to __libc_ia64_register_backing_store_base */ +	  ld8 out0 = [out0]	/* pointer to `main' function descriptor */ +	  addl out4 = @ltoff(@fptr(_fini)), gp +	  ;; +	} +	{ .mmi +	  ld8 out3 = [out3]	/* pointer to `init' function descriptor */ +	  ld8 out4 = [out4]	/* pointer to `fini' function descriptor */ +	  nop 0 +	} +	.body +	{ .mib +	  st8 [r3] = r10 +	  mov out5 = ret0	/* dynamic linker destructor */ +	  br.call.sptk.few rp = __uClibc_main +	} +	{ .mib +	  break 0	/* break miserably if we ever return */ +	} +	.endp _start + +/* Define a symbol for the first piece of initialized data.  */ +	.data +	.globl __data_start +__data_start: +	.long 0 +	.weak data_start +	data_start = __data_start + +	.common __libc_ia64_register_backing_store_base, 8, 8 diff --git a/libc/sysdeps/linux/ia64/crti.S b/libc/sysdeps/linux/ia64/crti.S new file mode 100644 index 000000000..c22bbf194 --- /dev/null +++ b/libc/sysdeps/linux/ia64/crti.S @@ -0,0 +1,36 @@ +/* glibc's sysdeps/ia64/elf/initfini.c used for reference [PROLOG] */ + +	.text +	.section .init +	.global _init# +	.proc _init# +_init: +	.prologue +	.save ar.pfs, r34 +	alloc r34 = ar.pfs, 0, 3, 0, 0 +	.vframe r32 +	mov r32 = r12 +	.save rp, r33 +	mov r33 = b0 +	.body +	adds r12 = -16, r12 +	;; +	.endp _init# + + + +	.section .fini +	.global _fini# +	.proc _fini# +_fini: +	.prologue +	.save ar.pfs, r34 +	alloc r34 = ar.pfs, 0, 3, 0, 0 +	.vframe r32 +	mov r32 = r12 +	.save rp, r33 +	mov r33 = b0 +	.body +	adds r12 = -16, r12 +	;; +	.endp _fini# diff --git a/libc/sysdeps/linux/ia64/crtn.S b/libc/sysdeps/linux/ia64/crtn.S new file mode 100644 index 000000000..5403446b5 --- /dev/null +++ b/libc/sysdeps/linux/ia64/crtn.S @@ -0,0 +1,33 @@ +/* glibc's sysdeps/ia64/elf/initfini.c used for reference [EPILOG] */ + +	.text +	.section .init +	.proc _init# +_init: +	.prologue +	.save ar.pfs, r34 +	.vframe r32 +	.save rp, r33 +	.body +	.regstk 0,2,0,0 +	mov r12 = r32 +	mov ar.pfs = r34 +	mov b0 = r33 +	br.ret.sptk.many b0 +	.endp _init# + + + +	.section .fini +	.proc _fini# +_fini: +	.prologue +	.save ar.pfs, r34 +	.vframe r32 +	.save rp, r33 +	.body +	mov r12 = r32 +	mov ar.pfs = r34 +	mov b0 = r33 +	br.ret.sptk.many b0 +	.endp _fini# diff --git a/libc/sysdeps/linux/ia64/fork.S b/libc/sysdeps/linux/ia64/fork.S new file mode 100644 index 000000000..96d047f3f --- /dev/null +++ b/libc/sysdeps/linux/ia64/fork.S @@ -0,0 +1,41 @@ +/* Copyright (C) 2000, 2002 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + + +#include "sysdep.h" +#define _SIGNAL_H +#include <bits/signum.h> + +/* pid_t fork(void); */ +/* Implemented as a clone system call with parameters SIGCHLD and 0	*/ + +ENTRY(__libc_fork) +	alloc r2=ar.pfs,0,0,2,0 +	mov out0=SIGCHLD	/* Return SIGCHLD when child finishes	*/ +				/* no other clone flags; nothing shared	*/ +	mov out1=0		/* Standard sp value.			*/ +	;; +	DO_CALL (SYS_ify (clone)) +	cmp.eq p6,p0=-1,r10 +(p6)	br.cond.spnt.few __syscall_error +	ret +PSEUDO_END(__libc_fork) + +weak_alias (__libc_fork, __fork) +libc_hidden_def (__fork) +weak_alias (__libc_fork, fork) diff --git a/libc/sysdeps/linux/ia64/pipe.S b/libc/sysdeps/linux/ia64/pipe.S new file mode 100644 index 000000000..a86536b13 --- /dev/null +++ b/libc/sysdeps/linux/ia64/pipe.S @@ -0,0 +1,38 @@ +/* Copyright (C) 1999, 2000, 2002 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Contributed by David Mosberger <davidm@hpl.hp.com> + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +/* __pipe is a special syscall since it returns two values.  */ + +#include "sysdep.h" + +ENTRY(__pipe) +       .regstk 1,0,0,0 +       DO_CALL (SYS_ify (pipe)) +       cmp.ne p6,p0=-1,r10 +       ;; +(p6)   st4 [in0]=r8,4 +(p6)   mov ret0=0 +       ;; +(p6)   st4 [in0]=r9 +(p6)   ret +       br.cond.spnt.few __syscall_error +PSEUDO_END(__pipe) + +libc_hidden_def (__pipe) +weak_alias (__pipe, pipe) diff --git a/libc/sysdeps/linux/ia64/setjmp.S b/libc/sysdeps/linux/ia64/setjmp.S new file mode 100644 index 000000000..11dc0e62e --- /dev/null +++ b/libc/sysdeps/linux/ia64/setjmp.S @@ -0,0 +1,189 @@ +/* Copyright (C) 1999, 2000, 2001, 2002, 2004, 2005 +   Free Software Foundation, Inc. +   Contributed by David Mosberger-Tang <davidm@hpl.hp.com>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA. + +   The layout of the jmp_buf is as follows.  This is subject to change +   and user-code should never depend on the particular layout of +   jmp_buf! + + +  	offset:	description: +	-------	------------ +  	0x000	stack pointer (r12)	; unchangeable (see _JMPBUF_UNWINDS) +  	0x008	r1 (gp) +	0x010	caller's unat +	0x018	fpsr +  	0x020	r4 +  	0x028	r5 +  	0x030	r6 +  	0x038	r7 +  	0x040	rp (b0) +  	0x048	b1 +  	0x050	b2 +  	0x058	b3 +  	0x060	b4 +  	0x068	b5 +  	0x070	ar.pfs +  	0x078	ar.lc +  	0x080	pr +  	0x088	ar.bsp			; unchangeable (see __longjmp.S) +  	0x090	ar.unat +	0x098	&__jmp_buf	; address of the jmpbuf (needed to locate NaT bits in unat) +	0x0a0	 f2 +	0x0b0	 f3 +	0x0c0	 f4 +	0x0d0	 f5 +	0x0e0	f16 +  	0x0f0	f17 +  	0x100	f18 +  	0x110	f19 +  	0x120	f20 +  	0x130	f21 +  	0x130	f22 +  	0x140	f23 +  	0x150	f24 +  	0x160	f25 +  	0x170	f26 +  	0x180	f27 +  	0x190	f28 +  	0x1a0	f29 +  	0x1b0	f30 +  	0x1c0	f31 */ + +#include "sysdep.h" +#include <features.h> + +	/* The following two entry points are the traditional entry points: */ + +LEAF(setjmp) +	alloc r8=ar.pfs,2,0,0,0 +	mov in1=1 +	br.cond.sptk.many _GI___sigsetjmp +END(setjmp) + +LEAF(_setjmp) +	alloc r8=ar.pfs,2,0,0,0 +	mov in1=0 +	br.cond.sptk.many _GI___sigsetjmp +END(_setjmp) +libc_hidden_def (_setjmp) + +	/* __sigsetjmp(__jmp_buf buf, int savemask) */ + +ENTRY(__sigsetjmp) +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) +	alloc loc1=ar.pfs,2,5,2,0 +	.save ar.unat, loc2 +	mov loc2=ar.unat +	;; +	mov r17=ar.fpsr +	mov r2=in0 +	add r3=8,in0 +	;; +.mem.offset 8,0;	st8.spill.nta [r2]=sp,16	// r12 (sp) +.mem.offset 0,0;	st8.spill.nta [r3]=gp,16	// r1 (gp) +	;; +	st8.nta [r2]=loc2,16		// save caller's unat +	st8.nta [r3]=r17,16		// save fpsr +	add r8=0xa0,in0 +	;; +.mem.offset 8,0;	st8.spill.nta [r2]=r4,16	// r4 +.mem.offset 0,0;	st8.spill.nta [r3]=r5,16	// r5 +	add r9=0xb0,in0 +	;; +	stf.spill.nta [r8]=f2,32 +	stf.spill.nta [r9]=f3,32 +	mov loc0=rp +	.body +	;; +	stf.spill.nta [r8]=f4,32 +	stf.spill.nta [r9]=f5,32 +	mov r17=b1 +	;; +	stf.spill.nta [r8]=f16,32 +	stf.spill.nta [r9]=f17,32 +	mov r18=b2 +	;; +	stf.spill.nta [r8]=f18,32 +	stf.spill.nta [r9]=f19,32 +	mov r19=b3 +	;; +	stf.spill.nta [r8]=f20,32 +	stf.spill.nta [r9]=f21,32 +	mov r20=b4 +	;; +	stf.spill.nta [r8]=f22,32 +	stf.spill.nta [r9]=f23,32 +	mov r21=b5 +	;; +	stf.spill.nta [r8]=f24,32 +	stf.spill.nta [r9]=f25,32 +	mov r22=ar.lc +	;; +	stf.spill.nta [r8]=f26,32 +	stf.spill.nta [r9]=f27,32 +	mov r24=pr +	;; +	stf.spill.nta [r8]=f28,32 +	stf.spill.nta [r9]=f29,32 +	;; +	stf.spill.nta [r8]=f30 +	stf.spill.nta [r9]=f31 + +.mem.offset 8,0;	st8.spill.nta [r2]=r6,16	// r6 +.mem.offset 0,0;	st8.spill.nta [r3]=r7,16	// r7 +	;; +	mov r23=ar.bsp +	mov r25=ar.unat +	mov out0=in0 + +	st8.nta [r2]=loc0,16		// b0 +	st8.nta [r3]=r17,16		// b1 +	mov out1=in1 +	;; +	st8.nta [r2]=r18,16		// b2 +	st8.nta [r3]=r19,16		// b3 +	;; +	st8.nta [r2]=r20,16		// b4 +	st8.nta [r3]=r21,16		// b5 +	;; +	st8.nta [r2]=loc1,16		// ar.pfs +	st8.nta [r3]=r22,16		// ar.lc +	;; +	st8.nta [r2]=r24,16		// pr +	st8.nta [r3]=r23,16		// ar.bsp +	;; +	st8.nta [r2]=r25		// ar.unat +	st8.nta [r3]=in0		// &__jmp_buf +#if defined NOT_IN_libc && defined IS_IN_rtld +	/* In ld.so we never save the signal mask.  */ +	;; +#else +	br.call.dpnt.few rp=__sigjmp_save +#endif +.ret0:					// force a new bundle ::q +	mov.m ar.unat=loc2		// restore caller's unat +	mov rp=loc0 +	mov ar.pfs=loc1 +	mov r8=0 +	ret +END(__sigsetjmp) +strong_alias(__sigsetjmp, _GI___sigsetjmp) + +weak_extern(_setjmp) +weak_extern(setjmp) diff --git a/libc/sysdeps/linux/ia64/sys/io.h b/libc/sysdeps/linux/ia64/sys/io.h new file mode 100644 index 000000000..14736ff1c --- /dev/null +++ b/libc/sysdeps/linux/ia64/sys/io.h @@ -0,0 +1,68 @@ +/* Copyright (C) 1999, 2000 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +     Contributed by David Mosberger-Tang <davidm@hpl.hp.com> + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef	_SYS_IO_H +#define	_SYS_IO_H	1 + +#include <features.h> + +__BEGIN_DECLS + +/* If TURN_ON is TRUE, request for permission to do direct i/o on the +   port numbers in the range [FROM,FROM+NUM-1].  Otherwise, turn I/O +   permission off for that range.  This call requires root privileges. + +   Portability note: not all Linux platforms support this call.  Most +   platforms based on the PC I/O architecture probably will, however. +   E.g., Linux/Alpha for Alpha PCs supports this.  */ +extern int ioperm (unsigned long int __from, unsigned long int __num, +		   int __turn_on); + +/* Set the I/O privilege level to LEVEL.  If LEVEL>3, permission to +   access any I/O port is granted.  This call requires root +   privileges. */ +extern int iopl (int __level); + +extern unsigned int _inb (unsigned long int __port); +extern unsigned int _inb (unsigned long int __port); +extern unsigned int _inw (unsigned long int __port); +extern unsigned int _inl (unsigned long int __port); +extern void _outb (unsigned char __val, unsigned long int __port); +extern void _outw (unsigned short __val, unsigned long int __port); +extern void _outl (unsigned int __val, unsigned long int __port); + +#define inb	_inb +#define inw	_inw +#define inl	_inl +#define outb	_outb +#define outw	_outw +#define outl	_outl + +/* Access PCI space protected from machine checks.  */ +extern int pciconfig_read (unsigned long int __bus, unsigned long int __dfn, +			   unsigned long int __off, unsigned long int __len, +			   unsigned char *__buf); + +extern int pciconfig_write (unsigned long int __bus, unsigned long int __dfn, +			    unsigned long int __off, unsigned long int __len, +			    unsigned char *__buf); + +__END_DECLS + +#endif /* _SYS_IO_H */ diff --git a/libc/sysdeps/linux/ia64/sys/procfs.h b/libc/sysdeps/linux/ia64/sys/procfs.h new file mode 100644 index 000000000..b5196b997 --- /dev/null +++ b/libc/sysdeps/linux/ia64/sys/procfs.h @@ -0,0 +1,130 @@ +/* Copyright (C) 1999, 2000, 2003 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _SYS_PROCFS_H +#define _SYS_PROCFS_H	1 + +/* This is somehow modelled after the file of the same name on SysVr4 +   systems.  It provides a definition of the core file format for ELF +   used on Linux.  */ + +#include <features.h> +#include <signal.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/ucontext.h> +#include <sys/user.h> + +__BEGIN_DECLS + +struct elf_siginfo +  { +    int si_signo;			/* Signal number.  */ +    int si_code;			/* Extra code.  */ +    int si_errno;			/* Errno.  */ +  }; + +/* We really need just 72 but let's leave some headroom...  */ +#define ELF_NGREG	128 +/* f0 and f1 could be omitted, but so what...  */ +#define ELF_NFPREG	128 + +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct ia64_fpreg elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +typedef elf_greg_t greg_t; +typedef elf_gregset_t gregset_t; +typedef elf_fpregset_t fpregset_t; +#define NGREG ELF_NGREG + +/* Definitions to generate Intel SVR4-like core files.  These mostly +   have the same names as the SVR4 types with "elf_" tacked on the +   front to prevent clashes with linux definitions, and the typedef +   forms have been avoided.  This is mostly like the SVR4 structure, +   but more Linuxy, with things that Linux does not support and which +   gdb doesn't really use excluded.  Fields present but not used are +   marked with "XXX".  */ +struct elf_prstatus +  { +#if 0 +    long int pr_flags;			/* XXX Process flags.  */ +    short int pr_why;			/* XXX Reason for process halt.  */ +    short int pr_what;			/* XXX More detailed reason.  */ +#endif +    struct elf_siginfo pr_info;		/* Info associated with signal.  */ +    short int pr_cursig;		/* Current signal.  */ +    unsigned long int pr_sigpend;	/* Set of pending signals.  */ +    unsigned long int pr_sighold;	/* Set of held signals.  */ +#if 0 +    struct sigaltstack pr_altstack;	/* Alternate stack info.  */ +    struct sigaction pr_action;		/* Signal action for current sig.  */ +#endif +    __pid_t pr_pid; +    __pid_t pr_ppid; +    __pid_t pr_pgrp; +    __pid_t pr_sid; +    struct timeval pr_utime;		/* User time.  */ +    struct timeval pr_stime;		/* System time.  */ +    struct timeval pr_cutime;		/* Cumulative user time.  */ +    struct timeval pr_cstime;		/* Cumulative system time.  */ +#if 0 +    long int pr_instr;			/* Current instruction.  */ +#endif +    elf_gregset_t pr_reg;		/* GP registers.  */ +    int pr_fpvalid;			/* True if math copro being used.  */ +  }; + + +#define ELF_PRARGSZ     (80)    /* Number of chars for args */ + +struct elf_prpsinfo +  { +    char pr_state;			/* Numeric process state.  */ +    char pr_sname;			/* Char for pr_state.  */ +    char pr_zomb;			/* Zombie.  */ +    char pr_nice;			/* Nice val.  */ +    unsigned long int pr_flag;		/* Flags.  */ +    unsigned int pr_uid; +    unsigned int pr_gid; +    int pr_pid, pr_ppid, pr_pgrp, pr_sid; +    /* Lots missing */ +    char pr_fname[16];			/* Filename of executable.  */ +    char pr_psargs[ELF_PRARGSZ];	/* Initial part of arg list.  */ +  }; + +/* Addresses.  */ +typedef void *psaddr_t; + +/* Register sets.  Linux has different names.  */ +typedef gregset_t prgregset_t; +typedef fpregset_t prfpregset_t; + +/* We don't have any differences between processes and threads, +   therefore habe only ine PID type.  */ +typedef __pid_t lwpid_t; + + +typedef struct elf_prstatus prstatus_t; +typedef struct elf_prpsinfo prpsinfo_t; + +__END_DECLS + +#endif	/* sys/procfs.h */ diff --git a/libc/sysdeps/linux/ia64/sys/ptrace.h b/libc/sysdeps/linux/ia64/sys/ptrace.h new file mode 100644 index 000000000..986c4b2d3 --- /dev/null +++ b/libc/sysdeps/linux/ia64/sys/ptrace.h @@ -0,0 +1,135 @@ +/* `ptrace' debugger support interface.  Linux/ia64 version. +   Copyright (C) 2001 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _SYS_PTRACE_H +#define _SYS_PTRACE_H	1 + +#include <features.h> +#include <sys/ucontext.h> + +__BEGIN_DECLS + +/* Type of the REQUEST argument to `ptrace.'  */ +enum __ptrace_request +{ +  /* Indicate that the process making this request should be traced. +     All signals received by this process can be intercepted by its +     parent, and its parent can use the other `ptrace' requests.  */ +  PTRACE_TRACEME = 0, +#define PT_TRACE_ME PTRACE_TRACEME + +  /* Return the word in the process's text space at address ADDR.  */ +  PTRACE_PEEKTEXT = 1, +#define PT_READ_I PTRACE_PEEKTEXT + +  /* Return the word in the process's data space at address ADDR.  */ +  PTRACE_PEEKDATA = 2, +#define PT_READ_D PTRACE_PEEKDATA + +  /* Return the word in the process's user area at offset ADDR.  */ +  PTRACE_PEEKUSER = 3, +#define PT_READ_U PTRACE_PEEKUSER + +  /* Write the word DATA into the process's text space at address ADDR.  */ +  PTRACE_POKETEXT = 4, +#define PT_WRITE_I PTRACE_POKETEXT + +  /* Write the word DATA into the process's data space at address ADDR.  */ +  PTRACE_POKEDATA = 5, +#define PT_WRITE_D PTRACE_POKEDATA + +  /* Write the word DATA into the process's user area at offset ADDR.  */ +  PTRACE_POKEUSER = 6, +#define PT_WRITE_U PTRACE_POKEUSER + +  /* Continue the process.  */ +  PTRACE_CONT = 7, +#define PT_CONTINUE PTRACE_CONT + +  /* Kill the process.  */ +  PTRACE_KILL = 8, +#define PT_KILL PTRACE_KILL + +  /* Single step the process. +     This is not supported on all machines.  */ +  PTRACE_SINGLESTEP = 9, +#define PT_STEP PTRACE_SINGLESTEP + +  /* Execute process until next taken branch.  */ +  PTRACE_SINGLEBLOCK = 12, +#define PT_STEPBLOCK PTRACE_SINGLEBLOCK + +  /* Get siginfo for process.  */ +  PTRACE_GETSIGINFO = 13, +#define PT_GETSIGINFO PTRACE_GETSIGINFO + +  /* Set new siginfo for process.  */ +  PTRACE_SETSIGINFO = 14, +#define PT_GETSIGINFO PTRACE_GETSIGINFO + +  /* Attach to a process that is already running. */ +  PTRACE_ATTACH = 16, +#define PT_ATTACH PTRACE_ATTACH + +  /* Detach from a process attached to with PTRACE_ATTACH.  */ +  PTRACE_DETACH = 17, +#define PT_DETACH PTRACE_DETACH + +  /* Get all registers (pt_all_user_regs) in one shot */ +  PTRACE_GETREGS = 18, +#define PT_GETREGS PTRACE_GETREGS + +  /* Set all registers (pt_all_user_regs) in one shot */ +  PTRACE_SETREGS = 19, +#define PT_SETREGS PTRACE_SETREGS + +  /* Continue and stop at the next (return from) syscall.  */ +  PTRACE_SYSCALL = 24 +#define PT_SYSCALL PTRACE_SYSCALL +}; + +/* pt_all_user_regs is used for PTRACE_GETREGS/PTRACE_SETREGS.  */ +struct pt_all_user_regs +  { +    unsigned long nat; +    unsigned long cr_iip; +    unsigned long cfm; +    unsigned long cr_ipsr; +    unsigned long pr; + +    unsigned long gr[32]; +    unsigned long br[8]; +    unsigned long ar[128]; +    struct ia64_fpreg fr[128]; +  }; + +/* Perform process tracing functions.  REQUEST is one of the values +   above, and determines the action to be taken. +   For all requests except PTRACE_TRACEME, PID specifies the process to be +   traced. + +   PID and the other arguments described above for the various requests should +   appear (those that are used for the particular request) as: +     pid_t PID, void *ADDR, int DATA, void *ADDR2 +   after REQUEST.  */ +extern long int ptrace (enum __ptrace_request __request, ...) __THROW; + +__END_DECLS + +#endif /* _SYS_PTRACE_H */ diff --git a/libc/sysdeps/linux/ia64/sys/ucontext.h b/libc/sysdeps/linux/ia64/sys/ucontext.h new file mode 100644 index 000000000..17dc85f99 --- /dev/null +++ b/libc/sysdeps/linux/ia64/sys/ucontext.h @@ -0,0 +1,66 @@ +/* Copyright (C) 1998, 2000, 2001, 2002, 2004 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _SYS_UCONTEXT_H +#define _SYS_UCONTEXT_H	1 + +#include <features.h> +#include <signal.h> + +#include <bits/sigcontext.h> + +/* + * These are here mostly for backwards compatibility with older Unices. + * IA-64 Linux does not distinguish between "struct sigcontext" and + * "ucontext_t" as all the necessary info is inside the former. + */ + +typedef struct sigcontext mcontext_t; + +#if defined __cplusplus && __GNUC_PREREQ (3, 5) +# define _SC_GR0_OFFSET	\ +	__builtin_offsetof (struct sigcontext, sc_gr[0]) +#elif defined __GNUC__ +# define _SC_GR0_OFFSET	\ +	(((char *) &((struct sigcontext *) 0)->sc_gr[0]) - (char *) 0) +#else +# define _SC_GR0_OFFSET	0xc8	/* pray that this is correct... */ +#endif + +typedef struct ucontext +  { +    union +      { +	mcontext_t _mc; +	struct +	  { +	    unsigned long _pad[_SC_GR0_OFFSET/8]; +	    struct ucontext *_link;	/* this should overlay sc_gr[0] */ +	  } +	_uc; +      } +    _u; +  } +ucontext_t; + +#define uc_mcontext	_u._mc +#define uc_sigmask	_u._mc.sc_mask +#define uc_stack	_u._mc.sc_stack +#define uc_link		_u._uc._link + +#endif /* sys/ucontext.h */ diff --git a/libc/sysdeps/linux/ia64/sys/user.h b/libc/sysdeps/linux/ia64/sys/user.h new file mode 100644 index 000000000..039218761 --- /dev/null +++ b/libc/sysdeps/linux/ia64/sys/user.h @@ -0,0 +1,54 @@ +/* Copyright (C) 2002 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _SYS_USER_H +#define _SYS_USER_H	1 + +#include <features.h> +#include <sys/types.h> + +/* This definition comes directly from the kernel headers.  If +   anything changes in them this header has to be changed, too.  */ + + +/* The definition in the kernel has the comment "XXX fix me".  */ +#define EF_SIZE		3072 + + +struct user +{ +  unsigned long int regs[EF_SIZE / 8 + 32];	/* Integer and fp regs.  */ +  size_t u_tsize;				/* Text size (pages).  */ +  size_t u_dsize;				/* Data size (pages).  */ +  size_t u_ssize;				/* Stack size (pages).  */ +  unsigned long int start_code;			/* Text starting address.  */ +  unsigned long int start_data;			/* Data starting address.  */ +  unsigned long int start_stack;		/* Stack starting address.  */ +  long int signal;				/* Signal causing core dump. */ +  struct regs *u_ar0;				/* Help gdb find registers.  */ +  unsigned long int magic;			/* Identifies a core file.  */ +  char u_comm[32];				/* User command name.  */ +}; + +#define NBPG			PAGE_SIZE +#define UPAGES			1 +#define HOST_TEXT_START_ADDR	(u.start_code) +#define HOST_DATA_START_ADDR	(u.start_data) +#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG) + +#endif	/* sys/user.h */ diff --git a/libc/sysdeps/linux/ia64/syscall.S b/libc/sysdeps/linux/ia64/syscall.S new file mode 100644 index 000000000..e4ac834c6 --- /dev/null +++ b/libc/sysdeps/linux/ia64/syscall.S @@ -0,0 +1,30 @@ +/* Copyright (C) 1999, 2000 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Contributed by Jes Sorensen <Jes.Sorensen@cern.ch>. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#include "sysdep.h" + +ENTRY(syscall) +	alloc r2=ar.pfs,1,0,8,0 +	mov r15=r32		/* syscall number */ +	break __BREAK_SYSCALL +	;; +	cmp.ne p6,p0=-1,r10	/* r10 = -1 on error */ +(p6)	ret +	br.cond.spnt.few __syscall_error +PSEUDO_END(syscall) diff --git a/libc/sysdeps/linux/ia64/sysdep.h b/libc/sysdeps/linux/ia64/sysdep.h new file mode 100644 index 000000000..03e74360d --- /dev/null +++ b/libc/sysdeps/linux/ia64/sysdep.h @@ -0,0 +1,168 @@ +/* Copyright (C) 1999, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Written by Jes Sorensen, <Jes.Sorensen@cern.ch>, April 1999. +   Based on code originally written by David Mosberger-Tang + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#ifndef _LINUX_IA64_SYSDEP_H +#define _LINUX_IA64_SYSDEP_H 1 + +#include <features.h> +#include <asm/unistd.h> + +#ifdef __ASSEMBLER__ + +/* Macros to help writing .prologue directives in assembly code.  */ +#define ASM_UNW_PRLG_RP			0x8 +#define ASM_UNW_PRLG_PFS		0x4 +#define ASM_UNW_PRLG_PSP		0x2 +#define ASM_UNW_PRLG_PR			0x1 +#define ASM_UNW_PRLG_GRSAVE(ninputs)	(32+(ninputs)) + +#ifdef	__STDC__ +#define C_LABEL(name)		name##: +#else +#define C_LABEL(name)		name/**/: +#endif + +#define CALL_MCOUNT + +#define ENTRY(name)				\ +	.text;					\ +	.align 32;				\ +	.proc C_SYMBOL_NAME(name);		\ +	.global C_SYMBOL_NAME(name);		\ +	C_LABEL(name)				\ +	CALL_MCOUNT + +#define LEAF(name)				\ +  .text;					\ +  .align 32;					\ +  .proc C_SYMBOL_NAME(name);			\ +  .global name;					\ +  C_LABEL(name) + +/* Mark the end of function SYM.  */ +#undef END +#define END(sym)	.endp C_SYMBOL_NAME(sym) + +/* For Linux we can use the system call table in the header file +	/usr/include/asm/unistd.h +   of the kernel.  But these symbols do not follow the SYS_* syntax +   so we have to redefine the `SYS_ify' macro here.  */ +#undef SYS_ify +#ifdef __STDC__ +# define SYS_ify(syscall_name)	__NR_##syscall_name +#else +# define SYS_ify(syscall_name)	__NR_/**/syscall_name +#endif + +/* Linux uses a negative return value to indicate syscall errors, unlike +   most Unices, which use the condition codes' carry flag. + +   Since version 2.1 the return value of a system call might be negative +   even if the call succeeded.  E.g., the `lseek' system call might return +   a large offset.  Therefore we must not anymore test for < 0, but test +   for a real error by making sure the value in %d0 is a real error +   number.  Linus said he will make sure the no syscall returns a value +   in -1 .. -4095 as a valid result so we can savely test with -4095.  */ + +/* We don't want the label for the error handler to be visible in the symbol +   table when we define it here.  */ +#define SYSCALL_ERROR_LABEL __syscall_error + +#undef PSEUDO +#define	PSEUDO(name, syscall_name, args)	\ +  ENTRY(name)					\ +    DO_CALL (SYS_ify(syscall_name));		\ +	cmp.eq p6,p0=-1,r10;			\ +(p6)	br.cond.spnt.few __syscall_error; + +#define DO_CALL_VIA_BREAK(num)			\ +	mov r15=num;				\ +	break __BREAK_SYSCALL + +#ifdef IA64_USE_NEW_STUB +# ifdef SHARED +#  define DO_CALL(num)				\ +	.prologue;				\ +	adds r2 = SYSINFO_OFFSET, r13;;		\ +	ld8 r2 = [r2];				\ +	.save ar.pfs, r11;			\ +	mov r11 = ar.pfs;;			\ +	.body;					\ +	mov r15 = num;				\ +	mov b7 = r2;				\ +	br.call.sptk.many b6 = b7;;		\ +	.restore sp;				\ +	mov ar.pfs = r11;			\ +	.prologue;				\ +	.body +# else /* !SHARED */ +#  define DO_CALL(num)				\ +	.prologue;				\ +	mov r15 = num;				\ +	movl r2 = _dl_sysinfo;;			\ +	ld8 r2 = [r2];				\ +	.save ar.pfs, r11;			\ +	mov r11 = ar.pfs;;			\ +	.body;					\ +	mov b7 = r2;				\ +	br.call.sptk.many b6 = b7;;		\ +	.restore sp;				\ +	mov ar.pfs = r11;			\ +	.prologue;				\ +	.body +# endif +#else +# define DO_CALL(num)				DO_CALL_VIA_BREAK(num) +#endif + +#undef PSEUDO_END +#define PSEUDO_END(name)	.endp C_SYMBOL_NAME(name); + +#undef PSEUDO_NOERRNO +#define	PSEUDO_NOERRNO(name, syscall_name, args)	\ +  ENTRY(name)						\ +    DO_CALL (SYS_ify(syscall_name)); + +#undef PSEUDO_END_NOERRNO +#define PSEUDO_END_NOERRNO(name)	.endp C_SYMBOL_NAME(name); + +#undef PSEUDO_ERRVAL +#define	PSEUDO_ERRVAL(name, syscall_name, args)	\ +  ENTRY(name)					\ +    DO_CALL (SYS_ify(syscall_name));		\ +	cmp.eq p6,p0=-1,r10;			\ +(p6)	mov r10=r8; + + +#undef PSEUDO_END_ERRVAL +#define PSEUDO_END_ERRVAL(name)	.endp C_SYMBOL_NAME(name); + +#undef END +#define END(name)						\ +	.size	C_SYMBOL_NAME(name), . - C_SYMBOL_NAME(name) ;	\ +	.endp	C_SYMBOL_NAME(name) + +#define ret			br.ret.sptk.few b0 +#define ret_NOERRNO		ret +#define ret_ERRVAL		ret + +#endif /* not __ASSEMBLER__ */ + +#endif /* linux/ia64/sysdep.h */ diff --git a/libc/sysdeps/linux/ia64/vfork.S b/libc/sysdeps/linux/ia64/vfork.S new file mode 100644 index 000000000..ab29f6287 --- /dev/null +++ b/libc/sysdeps/linux/ia64/vfork.S @@ -0,0 +1,44 @@ +/* Copyright (C) 2000, 2002 Free Software Foundation, Inc. +   This file is part of the GNU C Library. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + + +#include "sysdep.h" +#define _SIGNAL_H +#include <bits/signum.h> + +/* The following are defined in linux/sched.h, which unfortunately	*/ +/* is not safe for inclusion in an assembly file.			*/ +#define CLONE_VM        0x00000100      /* set if VM shared between processes */ +#define CLONE_VFORK     0x00004000      /* set if the parent wants the child to wake it up on mm_release */ + +/* pid_t vfork(void); */ +/* Implemented as __clone_syscall(CLONE_VFORK | CLONE_VM | SIGCHLD, 0)	*/ + +ENTRY(__vfork) +	alloc r2=ar.pfs,0,0,2,0 +	mov out0=CLONE_VM+CLONE_VFORK+SIGCHLD +	mov out1=0		/* Standard sp value.			*/ +	;; +	DO_CALL_VIA_BREAK (SYS_ify (clone)) +	cmp.eq p6,p0=-1,r10 +(p6)	br.cond.spnt.few __syscall_error +	ret +PSEUDO_END(__vfork) +libc_hidden_def (__vfork) + +weak_alias (__vfork, vfork) | 
