ape: add machine specific code for spim

This commit is contained in:
cinap_lenrek 2015-10-04 19:50:24 +02:00
parent d2af6b40af
commit 9e3ef5c777
6 changed files with 1034 additions and 21 deletions

View file

@ -0,0 +1,39 @@
TEXT memchr(SB), $0
MOVW R1, 0(FP)
MOVW n+8(FP), R1
MOVW s1+0(FP), R2
MOVBU c+4(FP), R3
ADDU R1, R2, R6
AND $(~1), R1, R5
ADDU R2, R5
BEQ R2, R5, lt2
l1:
MOVBU 0(R2), R4
MOVBU 1(R2), R7
BEQ R3, R4, eq0
ADDU $2, R2
BEQ R3, R7, eq
BNE R2, R5, l1
lt2:
BEQ R2, R6, zret
l2:
MOVBU (R2), R4
ADDU $1, R2
BEQ R3, R4, eq
BNE R2, R6, l2
zret:
MOVW R0, R1
RET
eq0:
MOVW R2, R1
RET
eq:
SUBU $1,R2, R1
RET

View file

@ -0,0 +1,117 @@
TEXT memcmp(SB), $0
MOVW R1, 0(FP)
/*
* performance:
* alligned about 1.0us/call and 17.4mb/sec
* unalligned is about 3.1mb/sec
*/
MOVW n+8(FP), R3 /* R3 is count */
MOVW s1+0(FP), R4 /* R4 is pointer1 */
MOVW s2+4(FP), R5 /* R5 is pointer2 */
ADDU R3,R4, R6 /* R6 is end pointer1 */
/* TODO(mischief): fix multibyte copy */
JMP out
/*
* if not at least 4 chars,
* dont even mess around.
* 3 chars to guarantee any
* rounding up to a word
* boundary and 4 characters
* to get at least maybe one
* full word cmp.
*/
SGT $4,R3, R1
BNE R1, out
/*
* test if both pointers
* are similarly word alligned
*/
XOR R4,R5, R1
AND $3, R1
BNE R1, out
/*
* byte at a time to word allign
*/
l1:
AND $3,R4, R1
BEQ R1, l2
MOVBU 0(R4), R8
MOVBU 0(R5), R9
ADDU $1, R4
BNE R8,R9, ne
ADDU $1, R5
JMP l1
/*
* turn R3 into end pointer1-15
* cmp 16 at a time while theres room
*/
l2:
ADDU $-15,R6, R3
l3:
SGTU R3,R4, R1
BEQ R1, l4
MOVW 0(R4), R8
MOVW 0(R5), R9
MOVW 4(R4), R10
BNE R8,R9, ne
MOVW 4(R5), R11
MOVW 8(R4), R8
BNE R10,R11, ne1
MOVW 8(R5), R9
MOVW 12(R4), R10
BNE R8,R9, ne
MOVW 12(R5), R11
ADDU $16, R4
BNE R10,R11, ne1
BNE R8,R9, ne
ADDU $16, R5
JMP l3
/*
* turn R3 into end pointer1-3
* cmp 4 at a time while theres room
*/
l4:
ADDU $-3,R6, R3
l5:
SGTU R3,R4, R1
BEQ R1, out
MOVW 0(R4), R8
MOVW 0(R5), R9
ADDU $4, R4
BNE R8,R9, ne /* only works because big endian */
ADDU $4, R5
JMP l5
/*
* last loop, cmp byte at a time
*/
out:
SGTU R6,R4, R1
BEQ R1, ret
MOVBU 0(R4), R8
MOVBU 0(R5), R9
ADDU $1, R4
BNE R8,R9, ne
ADDU $1, R5
JMP out
ne1:
SGTU R10,R11, R1
BNE R1, ret
MOVW $-1,R1
RET
ne:
SGTU R8,R9, R1
BNE R1, ret
MOVW $-1,R1
ret:
RET
END

View file

@ -1,30 +1,45 @@
APE=/sys/src/ape
<$APE/config
LIB=/$objtype/lib/ape/libap.a
MIPSSFILES=\
getfcr.s\
main9.s\
main9p.s\
memmove.s\
memset.s\
setjmp.s\
strcmp.s\
strcpy.s\
tas.s\
MIPSCFILES=\
cycles.c\
lock.c\
notetramp.c\
SFILES=\
memchr.s\
memcmp.s\
strchr.s\
vlop.s\
CFILES=\
vlrt.c\
OFILES=\
cycles.$O\
getfcr.$O\
lock.$O\
main9.$O\
main9p.$O\
memchr.$O\
memcmp.$O\
memmove.$O\
memset.$O\
notetramp.$O\
setjmp.$O\
strchr.$O\
strcmp.$O\
strcpy.$O\
tas.$O\
vlop.$O\
vlrt.$O\
${MIPSSFILES:%.s=%.$O}\
${SFILES:%.s=%.$O}\
${MIPSCFILES:%.c=%.$O}\
${CFILES:%.c=%.$O}\
%.$O: ../mips/%.c
$CC -I../mips $CFLAGS ../mips/$stem.c
MIPSS=`{echo $MIPSSFILES | sed 's/\.s//g; s/ /|/g'}
^($MIPSS)\.$O:R: '../mips/\1.s'
$AS $AFLAGS ../mips/$stem1.s
%.$O: ../mips/%.s
$AS -I../mips $AFLAGS ../mips/$stem.s
MIPSC=`{echo $MIPSCFILES | sed 's/\.c//g; s/ /|/g'}
^($MIPSC)\.$O:R: '../mips/\1.c'
$CC $CFLAGS ../mips/$stem1.c
</sys/src/cmd/mksyslib

View file

@ -0,0 +1,63 @@
TEXT strchr(SB), $0
MOVW R1, 0(FP)
MOVB c+4(FP), R4
MOVW s+0(FP), R3
BEQ R4, l2
/*
* char is not null
*/
l1:
MOVB (R3), R1
ADDU $1, R3
BEQ R1, ret
BNE R1,R4, l1
JMP rm1
/*
* char is null
* align to word
*/
l2:
AND $3,R3, R1
BEQ R1, l3
MOVB (R3), R1
ADDU $1, R3
BNE R1, l2
JMP rm1
l3:
MOVW $0xff000000, R6
MOVW $0x00ff0000, R7
l4:
MOVW (R3), R5
ADDU $4, R3
AND R6,R5, R1
AND R7,R5, R2
BEQ R1, b0
AND $0xff00,R5, R1
BEQ R2, b1
AND $0xff,R5, R2
BEQ R1, b2
BNE R2, l4
rm1:
ADDU $-1,R3, R1
JMP ret
b2:
ADDU $-2,R3, R1
JMP ret
b1:
ADDU $-3,R3, R1
JMP ret
b0:
ADDU $-4,R3, R1
JMP ret
ret:
RET

View file

@ -0,0 +1,20 @@
/*
* from https://bitbucket.org/cherry9/plan9-loongson
*/
TEXT _mulv(SB), $0
MOVW 8(FP), R2 /* hi1 */
MOVW 4(FP), R3 /* lo1 */
MOVW 16(FP), R4 /* hi2 */
MOVW 12(FP), R5 /* lo2 */
MULU R5, R3 /* lo1*lo2 -> hi:lo*/
MOVW LO, R6
MOVW HI, R7
MULU R3, R4 /* lo1*hi2 -> _:hi */
MOVW LO, R8
ADDU R8, R7
MULU R2, R5 /* hi1*lo2 -> _:hi */
MOVW LO, R8
ADDU R8, R7
MOVW R6, 0(R1) /* lo */
MOVW R7, 4(R1) /* hi */
RET

View file

@ -0,0 +1,759 @@
typedef unsigned long ulong;
typedef unsigned int uint;
typedef unsigned short ushort;
typedef unsigned char uchar;
typedef signed char schar;
#define SIGN(n) (1UL<<(n-1))
typedef struct Vlong Vlong;
struct Vlong
{
union
{
struct
{
ulong lo;
ulong hi;
};
struct
{
ushort loms;
ushort lols;
ushort hims;
ushort hils;
};
};
};
void abort(void);
/* needed by profiler; can't be profiled. */
#pragma profile off
void
_addv(Vlong *r, Vlong a, Vlong b)
{
ulong lo, hi;
lo = a.lo + b.lo;
hi = a.hi + b.hi;
if(lo < a.lo)
hi++;
r->lo = lo;
r->hi = hi;
}
void
_subv(Vlong *r, Vlong a, Vlong b)
{
ulong lo, hi;
lo = a.lo - b.lo;
hi = a.hi - b.hi;
if(lo > a.lo)
hi--;
r->lo = lo;
r->hi = hi;
}
#pragma profile on
void
_d2v(Vlong *y, double d)
{
union { double d; struct Vlong; } x;
ulong xhi, xlo, ylo, yhi;
int sh;
x.d = d;
xhi = (x.hi & 0xfffff) | 0x100000;
xlo = x.lo;
sh = 1075 - ((x.hi >> 20) & 0x7ff);
ylo = 0;
yhi = 0;
if(sh >= 0) {
/* v = (hi||lo) >> sh */
if(sh < 32) {
if(sh == 0) {
ylo = xlo;
yhi = xhi;
} else {
ylo = (xlo >> sh) | (xhi << (32-sh));
yhi = xhi >> sh;
}
} else {
if(sh == 32) {
ylo = xhi;
} else
if(sh < 64) {
ylo = xhi >> (sh-32);
}
}
} else {
/* v = (hi||lo) << -sh */
sh = -sh;
if(sh <= 10) {
ylo = xlo << sh;
yhi = (xhi << sh) | (xlo >> (32-sh));
} else {
/* overflow */
yhi = d; /* causes something awful */
}
}
if(x.hi & SIGN(32)) {
if(ylo != 0) {
ylo = -ylo;
yhi = ~yhi;
} else
yhi = -yhi;
}
y->hi = yhi;
y->lo = ylo;
}
void
_f2v(Vlong *y, float f)
{
_d2v(y, f);
}
double
_v2d(Vlong x)
{
if(x.hi & SIGN(32)) {
if(x.lo) {
x.lo = -x.lo;
x.hi = ~x.hi;
} else
x.hi = -x.hi;
return -((long)x.hi*4294967296. + x.lo);
}
return (long)x.hi*4294967296. + x.lo;
}
float
_v2f(Vlong x)
{
return _v2d(x);
}
double
_uv2d(Vlong x)
{
return x.hi*4294967296. + x.lo;
}
float
_uv2f(Vlong x)
{
return _uv2d(x);
}
void
_vasaddd(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv)
{
_d2v(lv, v2d(*lv)+rv);
*ret = *lv;
}
void
_vassubd(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv)
{
_d2v(lv, v2d(*lv)-rv);
*ret = *lv;
}
void
_vasmuld(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv)
{
_d2v(lv, v2d(*lv)*rv);
*ret = *lv;
}
void
_vasdivd(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv)
{
_d2v(lv, v2d(*lv)/rv);
*ret = *lv;
}
static void
dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
{
ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
int i;
numhi = num.hi;
numlo = num.lo;
denhi = den.hi;
denlo = den.lo;
/*
* get a divide by zero
*/
if(denlo==0 && denhi==0) {
numlo = numlo / denlo;
}
/*
* set up the divisor and find the number of iterations needed
*/
if(numhi >= SIGN(32)) {
quohi = SIGN(32);
quolo = 0;
} else {
quohi = numhi;
quolo = numlo;
}
i = 0;
while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
denhi = (denhi<<1) | (denlo>>31);
denlo <<= 1;
i++;
}
quohi = 0;
quolo = 0;
for(; i >= 0; i--) {
quohi = (quohi<<1) | (quolo>>31);
quolo <<= 1;
if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
t = numlo;
numlo -= denlo;
if(numlo > t)
numhi--;
numhi -= denhi;
quolo |= 1;
}
denlo = (denlo>>1) | (denhi<<31);
denhi >>= 1;
}
if(qp) {
qp->lo = quolo;
qp->hi = quohi;
}
if(rp) {
rp->lo = numlo;
rp->hi = numhi;
}
}
void
_divvu(Vlong *q, Vlong n, Vlong d)
{
if(n.hi == 0 && d.hi == 0) {
q->hi = 0;
q->lo = n.lo / d.lo;
return;
}
dodiv(n, d, q, 0);
}
void
_modvu(Vlong *r, Vlong n, Vlong d)
{
if(n.hi == 0 && d.hi == 0) {
r->hi = 0;
r->lo = n.lo % d.lo;
return;
}
dodiv(n, d, 0, r);
}
static void
vneg(Vlong *v)
{
if(v->lo == 0) {
v->hi = -v->hi;
return;
}
v->lo = -v->lo;
v->hi = ~v->hi;
}
void
_divv(Vlong *q, Vlong n, Vlong d)
{
long nneg, dneg;
if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
q->lo = (long)n.lo / (long)d.lo;
q->hi = ((long)q->lo) >> 31;
return;
}
nneg = n.hi >> 31;
if(nneg)
vneg(&n);
dneg = d.hi >> 31;
if(dneg)
vneg(&d);
dodiv(n, d, q, 0);
if(nneg != dneg)
vneg(q);
}
void
_modv(Vlong *r, Vlong n, Vlong d)
{
long nneg, dneg;
if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
r->lo = (long)n.lo % (long)d.lo;
r->hi = ((long)r->lo) >> 31;
return;
}
nneg = n.hi >> 31;
if(nneg)
vneg(&n);
dneg = d.hi >> 31;
if(dneg)
vneg(&d);
dodiv(n, d, 0, r);
if(nneg)
vneg(r);
}
void
_rshav(Vlong *r, Vlong a, int b)
{
long t;
t = a.hi;
if(b >= 32) {
r->hi = t>>31;
if(b >= 64) {
/* this is illegal re C standard */
r->lo = t>>31;
return;
}
r->lo = t >> (b-32);
return;
}
if(b <= 0) {
r->hi = t;
r->lo = a.lo;
return;
}
r->hi = t >> b;
r->lo = (t << (32-b)) | (a.lo >> b);
}
void
_rshlv(Vlong *r, Vlong a, int b)
{
ulong t;
t = a.hi;
if(b >= 32) {
r->hi = 0;
if(b >= 64) {
/* this is illegal re C standard */
r->lo = 0;
return;
}
r->lo = t >> (b-32);
return;
}
if(b <= 0) {
r->hi = t;
r->lo = a.lo;
return;
}
r->hi = t >> b;
r->lo = (t << (32-b)) | (a.lo >> b);
}
void
_lshv(Vlong *r, Vlong a, int b)
{
ulong t;
t = a.lo;
if(b >= 32) {
r->lo = 0;
if(b >= 64) {
/* this is illegal re C standard */
r->hi = 0;
return;
}
r->hi = t << (b-32);
return;
}
if(b <= 0) {
r->lo = t;
r->hi = a.hi;
return;
}
r->lo = t << b;
r->hi = (t >> (32-b)) | (a.hi << b);
}
void
_andv(Vlong *r, Vlong a, Vlong b)
{
r->hi = a.hi & b.hi;
r->lo = a.lo & b.lo;
}
void
_orv(Vlong *r, Vlong a, Vlong b)
{
r->hi = a.hi | b.hi;
r->lo = a.lo | b.lo;
}
void
_xorv(Vlong *r, Vlong a, Vlong b)
{
r->hi = a.hi ^ b.hi;
r->lo = a.lo ^ b.lo;
}
void
_vpp(Vlong *l, Vlong *r)
{
l->hi = r->hi;
l->lo = r->lo;
r->lo++;
if(r->lo == 0)
r->hi++;
}
void
_vmm(Vlong *l, Vlong *r)
{
l->hi = r->hi;
l->lo = r->lo;
if(r->lo == 0)
r->hi--;
r->lo--;
}
void
_ppv(Vlong *l, Vlong *r)
{
r->lo++;
if(r->lo == 0)
r->hi++;
l->hi = r->hi;
l->lo = r->lo;
}
void
_mmv(Vlong *l, Vlong *r)
{
if(r->lo == 0)
r->hi--;
r->lo--;
l->hi = r->hi;
l->lo = r->lo;
}
void
_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
{
Vlong t, u;
u.lo = 0;
u.hi = 0;
switch(type) {
default:
abort();
break;
case 1: /* schar */
t.lo = *(schar*)lv;
t.hi = t.lo >> 31;
fn(&u, t, rv);
*(schar*)lv = u.lo;
break;
case 2: /* uchar */
t.lo = *(uchar*)lv;
t.hi = 0;
fn(&u, t, rv);
*(uchar*)lv = u.lo;
break;
case 3: /* short */
t.lo = *(short*)lv;
t.hi = t.lo >> 31;
fn(&u, t, rv);
*(short*)lv = u.lo;
break;
case 4: /* ushort */
t.lo = *(ushort*)lv;
t.hi = 0;
fn(&u, t, rv);
*(ushort*)lv = u.lo;
break;
case 9: /* int */
t.lo = *(int*)lv;
t.hi = t.lo >> 31;
fn(&u, t, rv);
*(int*)lv = u.lo;
break;
case 10: /* uint */
t.lo = *(uint*)lv;
t.hi = 0;
fn(&u, t, rv);
*(uint*)lv = u.lo;
break;
case 5: /* long */
t.lo = *(long*)lv;
t.hi = t.lo >> 31;
fn(&u, t, rv);
*(long*)lv = u.lo;
break;
case 6: /* ulong */
t.lo = *(ulong*)lv;
t.hi = 0;
fn(&u, t, rv);
*(ulong*)lv = u.lo;
break;
case 7: /* vlong */
case 8: /* uvlong */
fn(&u, *(Vlong*)lv, rv);
*(Vlong*)lv = u;
break;
}
*ret = u;
}
void
_p2v(Vlong *ret, void *p)
{
long t;
t = (ulong)p;
ret->lo = t;
ret->hi = 0;
}
void
_sl2v(Vlong *ret, long sl)
{
long t;
t = sl;
ret->lo = t;
ret->hi = t >> 31;
}
void
_ul2v(Vlong *ret, ulong ul)
{
long t;
t = ul;
ret->lo = t;
ret->hi = 0;
}
void
_si2v(Vlong *ret, int si)
{
long t;
t = si;
ret->lo = t;
ret->hi = t >> 31;
}
void
_ui2v(Vlong *ret, uint ui)
{
long t;
t = ui;
ret->lo = t;
ret->hi = 0;
}
void
_sh2v(Vlong *ret, long sh)
{
long t;
t = (sh << 16) >> 16;
ret->lo = t;
ret->hi = t >> 31;
}
void
_uh2v(Vlong *ret, ulong ul)
{
long t;
t = ul & 0xffff;
ret->lo = t;
ret->hi = 0;
}
void
_sc2v(Vlong *ret, long uc)
{
long t;
t = (uc << 24) >> 24;
ret->lo = t;
ret->hi = t >> 31;
}
void
_uc2v(Vlong *ret, ulong ul)
{
long t;
t = ul & 0xff;
ret->lo = t;
ret->hi = 0;
}
long
_v2sc(Vlong rv)
{
long t;
t = rv.lo & 0xff;
return (t << 24) >> 24;
}
long
_v2uc(Vlong rv)
{
return rv.lo & 0xff;
}
long
_v2sh(Vlong rv)
{
long t;
t = rv.lo & 0xffff;
return (t << 16) >> 16;
}
long
_v2uh(Vlong rv)
{
return rv.lo & 0xffff;
}
long
_v2sl(Vlong rv)
{
return rv.lo;
}
long
_v2ul(Vlong rv)
{
return rv.lo;
}
long
_v2si(Vlong rv)
{
return rv.lo;
}
long
_v2ui(Vlong rv)
{
return rv.lo;
}
int
_testv(Vlong rv)
{
return rv.lo || rv.hi;
}
int
_eqv(Vlong lv, Vlong rv)
{
return lv.lo == rv.lo && lv.hi == rv.hi;
}
int
_nev(Vlong lv, Vlong rv)
{
return lv.lo != rv.lo || lv.hi != rv.hi;
}
int
_ltv(Vlong lv, Vlong rv)
{
return (long)lv.hi < (long)rv.hi ||
(lv.hi == rv.hi && lv.lo < rv.lo);
}
int
_lev(Vlong lv, Vlong rv)
{
return (long)lv.hi < (long)rv.hi ||
(lv.hi == rv.hi && lv.lo <= rv.lo);
}
int
_gtv(Vlong lv, Vlong rv)
{
return (long)lv.hi > (long)rv.hi ||
(lv.hi == rv.hi && lv.lo > rv.lo);
}
int
_gev(Vlong lv, Vlong rv)
{
return (long)lv.hi > (long)rv.hi ||
(lv.hi == rv.hi && lv.lo >= rv.lo);
}
int
_lov(Vlong lv, Vlong rv)
{
return lv.hi < rv.hi ||
(lv.hi == rv.hi && lv.lo < rv.lo);
}
int
_lsv(Vlong lv, Vlong rv)
{
return lv.hi < rv.hi ||
(lv.hi == rv.hi && lv.lo <= rv.lo);
}
int
_hiv(Vlong lv, Vlong rv)
{
return lv.hi > rv.hi ||
(lv.hi == rv.hi && lv.lo > rv.lo);
}
int
_hsv(Vlong lv, Vlong rv)
{
return lv.hi > rv.hi ||
(lv.hi == rv.hi && lv.lo >= rv.lo);
}