diff --git a/sys/src/ape/lib/ap/spim/memchr.s b/sys/src/ape/lib/ap/spim/memchr.s new file mode 100644 index 000000000..b244ea279 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/memchr.s @@ -0,0 +1,39 @@ +TEXT memchr(SB), $0 + MOVW R1, 0(FP) + + MOVW n+8(FP), R1 + MOVW s1+0(FP), R2 + MOVBU c+4(FP), R3 + ADDU R1, R2, R6 + + AND $(~1), R1, R5 + ADDU R2, R5 + BEQ R2, R5, lt2 + +l1: + MOVBU 0(R2), R4 + MOVBU 1(R2), R7 + BEQ R3, R4, eq0 + ADDU $2, R2 + BEQ R3, R7, eq + BNE R2, R5, l1 + +lt2: + BEQ R2, R6, zret + +l2: + MOVBU (R2), R4 + ADDU $1, R2 + BEQ R3, R4, eq + BNE R2, R6, l2 +zret: + MOVW R0, R1 + RET + +eq0: + MOVW R2, R1 + RET + +eq: + SUBU $1,R2, R1 + RET diff --git a/sys/src/ape/lib/ap/spim/memcmp.s b/sys/src/ape/lib/ap/spim/memcmp.s new file mode 100644 index 000000000..6fd1fda42 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/memcmp.s @@ -0,0 +1,117 @@ +TEXT memcmp(SB), $0 + MOVW R1, 0(FP) + +/* + * performance: + * alligned about 1.0us/call and 17.4mb/sec + * unalligned is about 3.1mb/sec + */ + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW s1+0(FP), R4 /* R4 is pointer1 */ + MOVW s2+4(FP), R5 /* R5 is pointer2 */ + ADDU R3,R4, R6 /* R6 is end pointer1 */ + + /* TODO(mischief): fix multibyte copy */ + JMP out + +/* + * if not at least 4 chars, + * dont even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word cmp. + */ + SGT $4,R3, R1 + BNE R1, out + +/* + * test if both pointers + * are similarly word alligned + */ + XOR R4,R5, R1 + AND $3, R1 + BNE R1, out + +/* + * byte at a time to word allign + */ +l1: + AND $3,R4, R1 + BEQ R1, l2 + MOVBU 0(R4), R8 + MOVBU 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP l1 + +/* + * turn R3 into end pointer1-15 + * cmp 16 at a time while theres room + */ +l2: + ADDU $-15,R6, R3 +l3: + SGTU R3,R4, R1 + BEQ R1, l4 + MOVW 0(R4), R8 + MOVW 0(R5), R9 + MOVW 4(R4), R10 + BNE R8,R9, ne + MOVW 4(R5), R11 + MOVW 8(R4), R8 + BNE R10,R11, ne1 + MOVW 8(R5), R9 + MOVW 12(R4), R10 + BNE R8,R9, ne + MOVW 12(R5), R11 + ADDU $16, R4 + BNE R10,R11, ne1 + BNE R8,R9, ne + ADDU $16, R5 + JMP l3 + +/* + * turn R3 into end pointer1-3 + * cmp 4 at a time while theres room + */ +l4: + ADDU $-3,R6, R3 +l5: + SGTU R3,R4, R1 + BEQ R1, out + MOVW 0(R4), R8 + MOVW 0(R5), R9 + ADDU $4, R4 + BNE R8,R9, ne /* only works because big endian */ + ADDU $4, R5 + JMP l5 + +/* + * last loop, cmp byte at a time + */ +out: + SGTU R6,R4, R1 + BEQ R1, ret + MOVBU 0(R4), R8 + MOVBU 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP out + +ne1: + SGTU R10,R11, R1 + BNE R1, ret + MOVW $-1,R1 + RET +ne: + SGTU R8,R9, R1 + BNE R1, ret + MOVW $-1,R1 +ret: + RET + END diff --git a/sys/src/ape/lib/ap/spim/mkfile b/sys/src/ape/lib/ap/spim/mkfile index 8fe8ae32e..44aae3750 100644 --- a/sys/src/ape/lib/ap/spim/mkfile +++ b/sys/src/ape/lib/ap/spim/mkfile @@ -1,30 +1,45 @@ APE=/sys/src/ape <$APE/config LIB=/$objtype/lib/ape/libap.a + +MIPSSFILES=\ + getfcr.s\ + main9.s\ + main9p.s\ + memmove.s\ + memset.s\ + setjmp.s\ + strcmp.s\ + strcpy.s\ + tas.s\ + +MIPSCFILES=\ + cycles.c\ + lock.c\ + notetramp.c\ + +SFILES=\ + memchr.s\ + memcmp.s\ + strchr.s\ + vlop.s\ + +CFILES=\ + vlrt.c\ + OFILES=\ - cycles.$O\ - getfcr.$O\ - lock.$O\ - main9.$O\ - main9p.$O\ - memchr.$O\ - memcmp.$O\ - memmove.$O\ - memset.$O\ - notetramp.$O\ - setjmp.$O\ - strchr.$O\ - strcmp.$O\ - strcpy.$O\ - tas.$O\ - vlop.$O\ - vlrt.$O\ + ${MIPSSFILES:%.s=%.$O}\ + ${SFILES:%.s=%.$O}\ + ${MIPSCFILES:%.c=%.$O}\ + ${CFILES:%.c=%.$O}\ -%.$O: ../mips/%.c - $CC -I../mips $CFLAGS ../mips/$stem.c +MIPSS=`{echo $MIPSSFILES | sed 's/\.s//g; s/ /|/g'} +^($MIPSS)\.$O:R: '../mips/\1.s' + $AS $AFLAGS ../mips/$stem1.s -%.$O: ../mips/%.s - $AS -I../mips $AFLAGS ../mips/$stem.s +MIPSC=`{echo $MIPSCFILES | sed 's/\.c//g; s/ /|/g'} +^($MIPSC)\.$O:R: '../mips/\1.c' + $CC $CFLAGS ../mips/$stem1.c hi:lo*/ + MOVW LO, R6 + MOVW HI, R7 + MULU R3, R4 /* lo1*hi2 -> _:hi */ + MOVW LO, R8 + ADDU R8, R7 + MULU R2, R5 /* hi1*lo2 -> _:hi */ + MOVW LO, R8 + ADDU R8, R7 + MOVW R6, 0(R1) /* lo */ + MOVW R7, 4(R1) /* hi */ + RET diff --git a/sys/src/ape/lib/ap/spim/vlrt.c b/sys/src/ape/lib/ap/spim/vlrt.c new file mode 100644 index 000000000..f17cda42d --- /dev/null +++ b/sys/src/ape/lib/ap/spim/vlrt.c @@ -0,0 +1,759 @@ +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef signed char schar; + +#define SIGN(n) (1UL<<(n-1)) + +typedef struct Vlong Vlong; +struct Vlong +{ + union + { + struct + { + ulong lo; + ulong hi; + }; + struct + { + ushort loms; + ushort lols; + ushort hims; + ushort hils; + }; + }; +}; + +void abort(void); + +/* needed by profiler; can't be profiled. */ +#pragma profile off +void +_addv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo + b.lo; + hi = a.hi + b.hi; + if(lo < a.lo) + hi++; + r->lo = lo; + r->hi = hi; +} + +void +_subv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo - b.lo; + hi = a.hi - b.hi; + if(lo > a.lo) + hi--; + r->lo = lo; + r->hi = hi; +} + +#pragma profile on + +void +_d2v(Vlong *y, double d) +{ + union { double d; struct Vlong; } x; + ulong xhi, xlo, ylo, yhi; + int sh; + + x.d = d; + + xhi = (x.hi & 0xfffff) | 0x100000; + xlo = x.lo; + sh = 1075 - ((x.hi >> 20) & 0x7ff); + + ylo = 0; + yhi = 0; + if(sh >= 0) { + /* v = (hi||lo) >> sh */ + if(sh < 32) { + if(sh == 0) { + ylo = xlo; + yhi = xhi; + } else { + ylo = (xlo >> sh) | (xhi << (32-sh)); + yhi = xhi >> sh; + } + } else { + if(sh == 32) { + ylo = xhi; + } else + if(sh < 64) { + ylo = xhi >> (sh-32); + } + } + } else { + /* v = (hi||lo) << -sh */ + sh = -sh; + if(sh <= 10) { + ylo = xlo << sh; + yhi = (xhi << sh) | (xlo >> (32-sh)); + } else { + /* overflow */ + yhi = d; /* causes something awful */ + } + } + if(x.hi & SIGN(32)) { + if(ylo != 0) { + ylo = -ylo; + yhi = ~yhi; + } else + yhi = -yhi; + } + + y->hi = yhi; + y->lo = ylo; +} + +void +_f2v(Vlong *y, float f) +{ + _d2v(y, f); +} + +double +_v2d(Vlong x) +{ + if(x.hi & SIGN(32)) { + if(x.lo) { + x.lo = -x.lo; + x.hi = ~x.hi; + } else + x.hi = -x.hi; + return -((long)x.hi*4294967296. + x.lo); + } + return (long)x.hi*4294967296. + x.lo; +} + +float +_v2f(Vlong x) +{ + return _v2d(x); +} + +double +_uv2d(Vlong x) +{ + return x.hi*4294967296. + x.lo; +} + +float +_uv2f(Vlong x) +{ + return _uv2d(x); +} + +void +_vasaddd(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv) +{ + _d2v(lv, v2d(*lv)+rv); + *ret = *lv; +} +void +_vassubd(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv) +{ + _d2v(lv, v2d(*lv)-rv); + *ret = *lv; +} +void +_vasmuld(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv) +{ + _d2v(lv, v2d(*lv)*rv); + *ret = *lv; +} +void +_vasdivd(Vlong *ret, Vlong *lv, double v2d(Vlong), double rv) +{ + _d2v(lv, v2d(*lv)/rv); + *ret = *lv; +} + +static void +dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp) +{ + ulong numlo, numhi, denhi, denlo, quohi, quolo, t; + int i; + + numhi = num.hi; + numlo = num.lo; + denhi = den.hi; + denlo = den.lo; + + /* + * get a divide by zero + */ + if(denlo==0 && denhi==0) { + numlo = numlo / denlo; + } + + /* + * set up the divisor and find the number of iterations needed + */ + if(numhi >= SIGN(32)) { + quohi = SIGN(32); + quolo = 0; + } else { + quohi = numhi; + quolo = numlo; + } + i = 0; + while(denhi < quohi || (denhi == quohi && denlo < quolo)) { + denhi = (denhi<<1) | (denlo>>31); + denlo <<= 1; + i++; + } + + quohi = 0; + quolo = 0; + for(; i >= 0; i--) { + quohi = (quohi<<1) | (quolo>>31); + quolo <<= 1; + if(numhi > denhi || (numhi == denhi && numlo >= denlo)) { + t = numlo; + numlo -= denlo; + if(numlo > t) + numhi--; + numhi -= denhi; + quolo |= 1; + } + denlo = (denlo>>1) | (denhi<<31); + denhi >>= 1; + } + + if(qp) { + qp->lo = quolo; + qp->hi = quohi; + } + if(rp) { + rp->lo = numlo; + rp->hi = numhi; + } +} + +void +_divvu(Vlong *q, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + q->hi = 0; + q->lo = n.lo / d.lo; + return; + } + dodiv(n, d, q, 0); +} + +void +_modvu(Vlong *r, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + r->hi = 0; + r->lo = n.lo % d.lo; + return; + } + dodiv(n, d, 0, r); +} + +static void +vneg(Vlong *v) +{ + + if(v->lo == 0) { + v->hi = -v->hi; + return; + } + v->lo = -v->lo; + v->hi = ~v->hi; +} + +void +_divv(Vlong *q, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + q->lo = (long)n.lo / (long)d.lo; + q->hi = ((long)q->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, q, 0); + if(nneg != dneg) + vneg(q); +} + +void +_modv(Vlong *r, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + r->lo = (long)n.lo % (long)d.lo; + r->hi = ((long)r->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, 0, r); + if(nneg) + vneg(r); +} + +void +_rshav(Vlong *r, Vlong a, int b) +{ + long t; + + t = a.hi; + if(b >= 32) { + r->hi = t>>31; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = t>>31; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_rshlv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.hi; + if(b >= 32) { + r->hi = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = 0; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_lshv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.lo; + if(b >= 32) { + r->lo = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->hi = 0; + return; + } + r->hi = t << (b-32); + return; + } + if(b <= 0) { + r->lo = t; + r->hi = a.hi; + return; + } + r->lo = t << b; + r->hi = (t >> (32-b)) | (a.hi << b); +} + +void +_andv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi & b.hi; + r->lo = a.lo & b.lo; +} + +void +_orv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi | b.hi; + r->lo = a.lo | b.lo; +} + +void +_xorv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi ^ b.hi; + r->lo = a.lo ^ b.lo; +} + +void +_vpp(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + r->lo++; + if(r->lo == 0) + r->hi++; +} + +void +_vmm(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + if(r->lo == 0) + r->hi--; + r->lo--; +} + +void +_ppv(Vlong *l, Vlong *r) +{ + + r->lo++; + if(r->lo == 0) + r->hi++; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_mmv(Vlong *l, Vlong *r) +{ + + if(r->lo == 0) + r->hi--; + r->lo--; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv) +{ + Vlong t, u; + + u.lo = 0; + u.hi = 0; + switch(type) { + default: + abort(); + break; + + case 1: /* schar */ + t.lo = *(schar*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(schar*)lv = u.lo; + break; + + case 2: /* uchar */ + t.lo = *(uchar*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uchar*)lv = u.lo; + break; + + case 3: /* short */ + t.lo = *(short*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(short*)lv = u.lo; + break; + + case 4: /* ushort */ + t.lo = *(ushort*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ushort*)lv = u.lo; + break; + + case 9: /* int */ + t.lo = *(int*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(int*)lv = u.lo; + break; + + case 10: /* uint */ + t.lo = *(uint*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uint*)lv = u.lo; + break; + + case 5: /* long */ + t.lo = *(long*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(long*)lv = u.lo; + break; + + case 6: /* ulong */ + t.lo = *(ulong*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ulong*)lv = u.lo; + break; + + case 7: /* vlong */ + case 8: /* uvlong */ + fn(&u, *(Vlong*)lv, rv); + *(Vlong*)lv = u; + break; + } + *ret = u; +} + +void +_p2v(Vlong *ret, void *p) +{ + long t; + + t = (ulong)p; + ret->lo = t; + ret->hi = 0; +} + +void +_sl2v(Vlong *ret, long sl) +{ + long t; + + t = sl; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ul2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul; + ret->lo = t; + ret->hi = 0; +} + +void +_si2v(Vlong *ret, int si) +{ + long t; + + t = si; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ui2v(Vlong *ret, uint ui) +{ + long t; + + t = ui; + ret->lo = t; + ret->hi = 0; +} + +void +_sh2v(Vlong *ret, long sh) +{ + long t; + + t = (sh << 16) >> 16; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uh2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xffff; + ret->lo = t; + ret->hi = 0; +} + +void +_sc2v(Vlong *ret, long uc) +{ + long t; + + t = (uc << 24) >> 24; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uc2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xff; + ret->lo = t; + ret->hi = 0; +} + +long +_v2sc(Vlong rv) +{ + long t; + + t = rv.lo & 0xff; + return (t << 24) >> 24; +} + +long +_v2uc(Vlong rv) +{ + + return rv.lo & 0xff; +} + +long +_v2sh(Vlong rv) +{ + long t; + + t = rv.lo & 0xffff; + return (t << 16) >> 16; +} + +long +_v2uh(Vlong rv) +{ + + return rv.lo & 0xffff; +} + +long +_v2sl(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ul(Vlong rv) +{ + + return rv.lo; +} + +long +_v2si(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ui(Vlong rv) +{ + + return rv.lo; +} + +int +_testv(Vlong rv) +{ + return rv.lo || rv.hi; +} + +int +_eqv(Vlong lv, Vlong rv) +{ + return lv.lo == rv.lo && lv.hi == rv.hi; +} + +int +_nev(Vlong lv, Vlong rv) +{ + return lv.lo != rv.lo || lv.hi != rv.hi; +} + +int +_ltv(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lev(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_gtv(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_gev(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +} + +int +_lov(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lsv(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_hiv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_hsv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +}