libc and ape support for amd64

This commit is contained in:
cinap_lenrek 2014-02-01 10:31:41 +01:00
parent d4fb753c9c
commit ed9e9f98e9
51 changed files with 2016 additions and 0 deletions

80
amd64/include/ape/float.h Normal file
View file

@ -0,0 +1,80 @@
#ifndef __FLOAT
#define __FLOAT
/* IEEE, default rounding */
#define FLT_ROUNDS 1
#define FLT_RADIX 2
#define FLT_DIG 6
#define FLT_EPSILON 1.19209290e-07
#define FLT_MANT_DIG 24
#define FLT_MAX 3.40282347e+38
#define FLT_MAX_10_EXP 38
#define FLT_MAX_EXP 128
#define FLT_MIN 1.17549435e-38
#define FLT_MIN_10_EXP -37
#define FLT_MIN_EXP -125
#define DBL_DIG 15
#define DBL_EPSILON 2.2204460492503131e-16
#define DBL_MANT_DIG 53
#define DBL_MAX 1.797693134862315708145e+308
#define DBL_MAX_10_EXP 308
#define DBL_MAX_EXP 1024
#define DBL_MIN 2.225073858507201383090233e-308
#define DBL_MIN_10_EXP -307
#define DBL_MIN_EXP -1021
#define LDBL_MANT_DIG DBL_MANT_DIG
#define LDBL_EPSILON DBL_EPSILON
#define LDBL_DIG DBL_DIG
#define LDBL_MIN_EXP DBL_MIN_EXP
#define LDBL_MIN DBL_MIN
#define LDBL_MIN_10_EXP DBL_MIN_10_EXP
#define LDBL_MAX_EXP DBL_MAX_EXP
#define LDBL_MAX DBL_MAX
#define LDBL_MAX_10_EXP DBL_MAX_10_EXP
typedef union FPdbleword FPdbleword;
union FPdbleword
{
double x;
struct { /* little endian */
long lo;
long hi;
};
};
#ifdef _RESEARCH_SOURCE
/* define stuff needed for floating conversion */
#define IEEE_8087 1
#define Sudden_Underflow 1
#endif
#ifdef _PLAN9_SOURCE
/* MXCSR */
/* fcr */
#define FPFTZ (1<<15) /* amd64 */
#define FPINEX (1<<12)
#define FPUNFL (1<<11)
#define FPOVFL (1<<10)
#define FPZDIV (1<<9)
#define FPDNRM (1<<8) /* amd64 */
#define FPINVAL (1<<7)
#define FPDAZ (1<<6) /* amd64 */
#define FPRNR (0<<13)
#define FPRZ (3<<13)
#define FPRPINF (2<<13)
#define FPRNINF (1<<13)
#define FPRMASK (3<<13)
#define FPPEXT 0
#define FPPSGL 0
#define FPPDBL 0
#define FPPMASK 0
/* fsr */
#define FPAINEX (1<<5)
#define FPAUNFL (1<<4)
#define FPAOVFL (1<<3)
#define FPAZDIV (1<<2)
#define FPADNRM (1<<1) /* not in plan 9 */
#define FPAINVAL (1<<0)
#endif
#endif /* __FLOAT */

View file

@ -0,0 +1,21 @@
#ifndef _SUSV2_SOURCE
#error "inttypes.h is SUSV2"
#endif
#ifndef _INTTYPES_H_
#define _INTTYPES_H_ 1
typedef char int8_t;
typedef short int16_t;
typedef int int32_t;
typedef long long int64_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef long long intptr_t;
typedef unsigned long long uintptr_t;
#endif

78
amd64/include/ape/math.h Normal file
View file

@ -0,0 +1,78 @@
#ifndef __MATH
#define __MATH
#pragma lib "/$M/lib/ape/libap.a"
/* a HUGE_VAL appropriate for IEEE double-precision */
/* the correct value, 1.797693134862316e+308, causes a ken overflow */
#define HUGE_VAL 1.79769313486231e+308
#ifdef __cplusplus
extern "C" {
#endif
extern double acos(double);
extern double asin(double);
extern double atan(double);
extern double atan2(double, double);
extern double cos(double);
extern double hypot(double, double);
extern double sin(double);
extern double tan(double);
extern double cosh(double);
extern double sinh(double);
extern double tanh(double);
extern double exp(double);
extern double frexp(double, int *);
extern double ldexp(double, int);
extern double log(double);
extern double log10(double);
extern double modf(double, double *);
extern double pow(double, double);
extern double sqrt(double);
extern double ceil(double);
extern double fabs(double);
extern double floor(double);
extern double fmod(double, double);
extern double NaN(void);
extern int isNaN(double);
extern double Inf(int);
extern int isInf(double, int);
#ifdef _RESEARCH_SOURCE
/* does >> treat left operand as unsigned ? */
#define Unsigned_Shifts 1
#define M_E 2.7182818284590452354 /* e */
#define M_LOG2E 1.4426950408889634074 /* log 2e */
#define M_LOG10E 0.43429448190325182765 /* log 10e */
#define M_LN2 0.69314718055994530942 /* log e2 */
#define M_LN10 2.30258509299404568402 /* log e10 */
#define M_PI 3.14159265358979323846 /* pi */
#define M_PI_2 1.57079632679489661923 /* pi/2 */
#define M_PI_4 0.78539816339744830962 /* pi/4 */
#define M_1_PI 0.31830988618379067154 /* 1/pi */
#define M_2_PI 0.63661977236758134308 /* 2/pi */
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
extern double hypot(double, double);
extern double erf(double);
extern double erfc(double);
extern double j0(double);
extern double y0(double);
extern double j1(double);
extern double y1(double);
extern double jn(int, double);
extern double yn(int, double);
#endif
#ifdef __cplusplus
}
#endif
#define isnan(x) isNaN(x)
#define isinf(x) isInf(x, 0)
#endif /* __MATH */

View file

@ -0,0 +1,18 @@
#ifndef __STDARG
#define __STDARG
typedef char *va_list;
#define va_start(list, start) list = (sizeof(start)<8 ? (char *)((long long *)&(start)+1) : \
(char *)(&(start)+1))
#define va_end(list)
#define va_arg(list, mode)\
((sizeof(mode) == 1)?\
((mode*)(list += 8))[-8]:\
(sizeof(mode) == 2)?\
((mode*)(list += 8))[-4]:\
(sizeof(mode) == 4)?\
((mode*)(list += 8))[-2]:\
((mode*)(list += sizeof(mode)))[-1])
#endif /* __STDARG */

38
amd64/include/ape/ureg.h Normal file
View file

@ -0,0 +1,38 @@
#ifndef __UREG_H
#define __UREG_H
#if !defined(_PLAN9_SOURCE)
This header file is an extension to ANSI/POSIX
#endif
struct Ureg {
unsigned long long ax;
unsigned long long bx;
unsigned long long cx;
unsigned long long dx;
unsigned long long si;
unsigned long long di;
unsigned long long bp;
unsigned long long r8;
unsigned long long r9;
unsigned long long r10;
unsigned long long r11;
unsigned long long r12;
unsigned long long r13;
unsigned long long r14;
unsigned long long r15;
unsigned short ds;
unsigned short es;
unsigned short fs;
unsigned short gs;
unsigned long long type;
unsigned long long error; /* error code (or zero) */
unsigned long long pc; /* pc */
unsigned long long cs; /* old context */
unsigned long long flags; /* old flags */
unsigned long long sp; /* sp */
unsigned long long ss; /* old stack segment */
};
#endif

View file

@ -0,0 +1,3 @@
TEXT getcallerpc(SB), 1, $0
MOVQ -8(RARG), AX
RET

View file

@ -0,0 +1,38 @@
TEXT setfcr(SB), $4
XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */
ANDL $0xFFC0, RARG /* just the fcr bits */
WAIT /* is this needed? */
STMXCSR 0(SP)
MOVL 0(SP), AX
ANDL $~0x3F, AX
ORL RARG, AX
MOVL AX, 0(SP)
LDMXCSR 0(SP)
RET
TEXT getfcr(SB), $4
WAIT
STMXCSR 0(SP)
MOVWLZX 0(SP), AX
ANDL $0xFFC0, AX
XORL $(0x3F<<7),AX
RET
TEXT getfsr(SB), $4
WAIT
STMXCSR 0(SP)
MOVL 0(SP), AX
ANDL $0x3F, AX
RET
TEXT setfsr(SB), $4
ANDL $0x3F, RARG
WAIT
STMXCSR 0(SP)
MOVL 0(SP), AX
ANDL $~0x3F, AX
ORL RARG, AX
MOVL AX, 0(SP)
LDMXCSR 0(SP)
RET

View file

@ -0,0 +1,11 @@
extern long __SEEK(long long*, int, long long, int);
long long
_SEEK(int fd, long long o, int p)
{
long long l;
if(__SEEK(&l, fd, o, p) < 0)
l = -1;
return l;
}

View file

@ -0,0 +1,5 @@
TEXT _cycles(SB),1,$0 /* time stamp counter; cycles since power up */
RDTSC
MOVL AX, 0(RARG) /* lo */
MOVL DX, 4(RARG) /* hi */
RET

View file

@ -0,0 +1,26 @@
#define _LOCK_EXTENSION
#include "../plan9/sys9.h"
#include <lock.h>
int tas(int*);
void
lock(Lock *lk)
{
while(tas(&lk->val))
_SLEEP(0);
}
int
canlock(Lock *lk)
{
if(tas(&lk->val))
return 0;
return 1;
}
void
unlock(Lock *lk)
{
lk->val = 0;
}

View file

@ -0,0 +1,26 @@
#define NPRIVATES 16
GLOBL _tos(SB), $8
GLOBL _errnoloc(SB), $8
GLOBL _privates(SB), $8
GLOBL _nprivates(SB), $8
TEXT _main(SB), 1, $(32+NPRIVATES*8)
/* _tos = arg */
MOVQ AX, _tos(SB)
LEAQ 24(SP), AX
MOVQ AX, _errnoloc(SB)
LEAQ 32(SP), AX
MOVQ AX, _privates(SB)
MOVQ $NPRIVATES, _nprivates(SB)
CALL _envsetup(SB)
MOVL inargc-8(FP), RARG
LEAQ inargv+0(FP), AX
MOVQ AX, 8(SP)
MOVQ environ(SB), AX
MOVQ AX, 16(SP)
CALL main(SB)
MOVQ AX, RARG
CALL exit(SB)
RET

View file

@ -0,0 +1,45 @@
#define NPRIVATES 16
GLOBL _tos(SB), $8
GLOBL _privates(SB), $8
GLOBL _nprivates(SB), $8
TEXT _mainp(SB), 1, $(3*8+NPRIVATES*8)
/* _tos = arg */
MOVQ AX, _tos(SB)
LEAQ 8(SP), AX
MOVQ AX, _privates(SB)
MOVQ $NPRIVATES, _nprivates(SB)
/* _profmain(); */
CALL _profmain(SB)
/* _tos->prof.pp = _tos->prof.next; */
MOVQ _tos+0(SB),DX
MOVQ 4(DX),CX
MOVQ CX,(DX)
CALL _envsetup(SB)
/* main(argc, argv, environ); */
MOVL inargc-8(FP), RARG
LEAQ inargv+0(FP), AX
MOVQ AX, 8(SP)
MOVQ environ(SB), AX
MOVQ AX, 16(SP)
CALL main(SB)
loop:
MOVL AX, RARG
CALL exit(SB)
MOVQ $_profin(SB), AX /* force loading of profile */
MOVL $0, AX
JMP loop
TEXT _savearg(SB), 1, $0
RET
TEXT _callpc(SB), 1, $0
MOVQ 8(RARG), AX
RET

View file

@ -0,0 +1,20 @@
APE=/sys/src/ape
objtype=amd64
<$APE/config
LIB=/$objtype/lib/ape/libap.a
OFILES=\
_seek.$O\
cycles.$O\
lock.$O\
main9.$O\
main9p.$O\
notetramp.$O\
setjmp.$O\
strchr.$O\
strlen.$O\
tas.$O\
</sys/src/cmd/mksyslib
CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE

View file

@ -0,0 +1,81 @@
#include "../plan9/lib.h"
#include "../plan9/sys9.h"
#include <signal.h>
#include <setjmp.h>
/* A stack to hold pcs when signals nest */
#define MAXSIGSTACK 20
typedef struct Pcstack Pcstack;
static struct Pcstack {
int sig;
void (*hdlr)(int, char*, Ureg*);
unsigned long long restorepc;
Ureg *u;
} pcstack[MAXSIGSTACK];
static int nstack = 0;
static void notecont(Ureg*, char*);
void
_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u)
{
Pcstack *p;
if(nstack >= MAXSIGSTACK)
_NOTED(1); /* nesting too deep; just do system default */
p = &pcstack[nstack];
p->restorepc = u->pc;
p->sig = sig;
p->hdlr = hdlr;
p->u = u;
nstack++;
u->pc = (unsigned long long) notecont;
_NOTED(2); /* NSAVE: clear note but hold state */
}
static void
notecont(Ureg *u, char *s)
{
Pcstack *p;
void(*f)(int, char*, Ureg*);
p = &pcstack[nstack-1];
f = p->hdlr;
u->pc = p->restorepc;
nstack--;
(*f)(p->sig, s, u);
_NOTED(3); /* NRSTR */
}
#define JMPBUFPC 1
#define JMPBUFSP 0
extern sigset_t _psigblocked;
typedef struct {
sigset_t set;
sigset_t blocked;
unsigned long long jmpbuf[2];
} sigjmp_buf_amd64;
void
siglongjmp(sigjmp_buf j, int ret)
{
struct Ureg *u;
sigjmp_buf_amd64 *jb;
jb = (sigjmp_buf_amd64*)j;
if(jb->set)
_psigblocked = jb->blocked;
if(nstack == 0 || pcstack[nstack-1].u->sp > jb->jmpbuf[JMPBUFSP])
longjmp((void*)jb->jmpbuf, ret);
u = pcstack[nstack-1].u;
nstack--;
u->ax = ret;
if(ret == 0)
u->ax = 1;
u->pc = jb->jmpbuf[JMPBUFPC];
u->sp = jb->jmpbuf[JMPBUFSP] + 8;
_NOTED(3); /* NRSTR */
}

View file

@ -0,0 +1,27 @@
TEXT longjmp(SB), $0
MOVL r+8(FP), AX
CMPL AX, $0
JNE ok /* ansi: "longjmp(0) => longjmp(1)" */
MOVL $1, AX /* bless their pointed heads */
ok:
MOVQ 0(RARG), SP /* restore sp */
MOVQ 8(RARG), BX /* put return pc on the stack */
MOVQ BX, 0(SP)
RET
TEXT setjmp(SB), $0
MOVQ SP, 0(RARG) /* store sp */
MOVQ 0(SP), BX /* store return pc */
MOVQ BX, 8(RARG)
MOVL $0, AX /* return 0 */
RET
TEXT sigsetjmp(SB), $0
MOVL savemask+8(FP), BX
MOVL BX, 0(RARG)
MOVL $_psigblocked(SB), 4(RARG)
MOVQ SP, 8(RARG) /* store sp */
MOVQ 0(SP), BX /* store return pc */
MOVQ BX, 16(RARG)
MOVL $0, AX /* return 0 */
RET

View file

@ -0,0 +1,38 @@
TEXT strchr(SB), $0
MOVQ RARG, DI
MOVB c+8(FP), AX
CMPB AX, $0
JEQ l2 /**/
/*
* char is not null
*/
l1:
MOVB (DI), BX
CMPB BX, $0
JEQ ret0
ADDQ $1, DI
CMPB AX, BX
JNE l1
MOVQ DI, AX
SUBQ $1, AX
RET
/*
* char is null
*/
l2:
MOVQ $-1, CX
CLD
REPN; SCASB
MOVQ DI, AX
SUBQ $1, AX
RET
ret0:
MOVQ $0, AX
RET

View file

@ -0,0 +1,16 @@
TEXT strlen(SB),$0
MOVL $0, AX
MOVQ $-1, CX
CLD
/*
* look for end of string
*/
MOVQ RARG, DI
REPN; SCASB
MOVQ DI, AX
SUBQ RARG, AX
SUBQ $1, AX
RET

View file

@ -0,0 +1,5 @@
TEXT tas(SB),$0
MOVL $0xdeadead,AX
XCHGL AX,(RARG)
RET

View file

@ -0,0 +1,14 @@
#include <u.h>
#include <libc.h>
extern int _seek(vlong*, int, vlong, int);
vlong
seek(int fd, vlong o, int p)
{
vlong l;
if(_seek(&l, fd, o, p) < 0)
l = -1LL;
return l;
}

View file

@ -0,0 +1,4 @@
GLOBL argv0(SB), $8
GLOBL _tos(SB), $8
GLOBL _privates(SB), $8
GLOBL _nprivates(SB), $4

69
sys/src/libc/amd64/atom.s Normal file
View file

@ -0,0 +1,69 @@
TEXT ainc(SB), 1, $0 /* int ainc(int *); */
ainclp:
MOVL (RARG), AX /* exp */
MOVL AX, BX
INCL BX /* new */
LOCK; CMPXCHGL BX, (RARG)
JNZ ainclp
MOVL BX, AX
RET
TEXT adec(SB), 1, $0 /* int adec(int*); */
adeclp:
MOVL (RARG), AX
MOVL AX, BX
DECL BX
LOCK; CMPXCHGL BX, (RARG)
JNZ adeclp
MOVL BX, AX
RET
/*
* int cas32(u32int *p, u32int ov, u32int nv);
* int cas(uint *p, int ov, int nv);
* int casl(ulong *p, ulong ov, ulong nv);
*/
TEXT cas32(SB), 1, $0
TEXT cas(SB), 1, $0
TEXT casul(SB), 1, $0
TEXT casl(SB), 1, $0 /* back compat */
MOVL exp+8(FP), AX
MOVL new+16(FP), BX
LOCK; CMPXCHGL BX, (RARG)
MOVL $1, AX /* use CMOVLEQ etc. here? */
JNZ _cas32r0
_cas32r1:
RET
_cas32r0:
DECL AX
RET
/*
* int cas64(u64int *p, u64int ov, u64int nv);
* int casp(void **p, void *ov, void *nv);
*/
TEXT cas64(SB), 1, $0
TEXT casp(SB), 1, $0
MOVQ exp+8(FP), AX
MOVQ new+16(FP), BX
LOCK; CMPXCHGQ BX, (RARG)
MOVL $1, AX /* use CMOVLEQ etc. here? */
JNZ _cas64r0
_cas64r1:
RET
_cas64r0:
DECL AX
RET
TEXT fas64(SB), 1, $-4
TEXT fasp(SB), 1, $-4
MOVQ p+8(FP), AX
LOCK; XCHGQ AX, (RARG) /* */
RET
TEXT fas32(SB), 1, $-4
MOVL p+8(FP), AX
LOCK; XCHGL AX, (RARG) /* */
RET

View file

@ -0,0 +1,5 @@
TEXT cycles(SB),1,$0 /* time stamp counter; cycles since power up */
RDTSC
MOVL AX, 0(RARG) /* lo */
MOVL DX, 4(RARG) /* hi */
RET

View file

@ -0,0 +1,3 @@
TEXT getcallerpc(SB), 1, $0
MOVQ -8(RARG), AX
RET

View file

@ -0,0 +1,38 @@
TEXT setfcr(SB), $4
XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */
ANDL $0xFFC0, RARG /* just the fcr bits */
WAIT /* is this needed? */
STMXCSR 0(SP)
MOVL 0(SP), AX
ANDL $~0x3F, AX
ORL RARG, AX
MOVL AX, 0(SP)
LDMXCSR 0(SP)
RET
TEXT getfcr(SB), $4
WAIT
STMXCSR 0(SP)
MOVWLZX 0(SP), AX
ANDL $0xFFC0, AX
XORL $(0x3F<<7),AX
RET
TEXT getfsr(SB), $4
WAIT
STMXCSR 0(SP)
MOVL 0(SP), AX
ANDL $0x3F, AX
RET
TEXT setfsr(SB), $4
ANDL $0x3F, RARG
WAIT
STMXCSR 0(SP)
MOVL 0(SP), AX
ANDL $~0x3F, AX
ORL RARG, AX
MOVL AX, 0(SP)
LDMXCSR 0(SP)
RET

View file

@ -0,0 +1,19 @@
#define NPRIVATES 16
TEXT _main(SB), 1, $(2*8+NPRIVATES*8)
MOVQ AX, _tos(SB)
LEAQ 16(SP), AX
MOVQ AX, _privates(SB)
MOVL $NPRIVATES, _nprivates(SB)
MOVL inargc-8(FP), RARG
LEAQ inargv+0(FP), AX
MOVQ AX, 8(SP)
CALL main(SB)
loop:
MOVQ $_exits<>(SB), RARG
CALL exits(SB)
JMP loop
DATA _exits<>+0(SB)/4, $"main"
GLOBL _exits<>+0(SB), $5

View file

@ -0,0 +1,41 @@
#define NPRIVATES 16
TEXT _mainp(SB), 1, $(2*8+NPRIVATES*8)
MOVQ AX, _tos(SB) /* _tos = arg */
LEAQ 16(SP), AX
MOVQ AX, _privates(SB)
MOVL $NPRIVATES, _nprivates(SB)
CALL _profmain(SB) /* _profmain(); */
MOVQ _tos+0(SB), DX /* _tos->prof.pp = _tos->prof.next; */
MOVQ 8(DX), CX
MOVQ CX, (DX)
MOVL inargc-8(FP), RARG /* main(argc, argv); */
LEAQ inargv+0(FP), AX
MOVQ AX, 8(SP)
CALL main(SB)
loop:
MOVQ $_exits<>(SB), RARG
CALL exits(SB)
MOVQ $_profin(SB), AX /* force loading of profile */
JMP loop
TEXT _savearg(SB), 1, $0
MOVQ RARG, AX
RET
TEXT _saveret(SB), 1, $0
RET
TEXT _restorearg(SB), 1, $0
RET /* we want RARG in RARG */
TEXT _callpc(SB), 1, $0
MOVQ 8(RARG), AX
RET
DATA _exits<>+0(SB)/4, $"main"
GLOBL _exits<>+0(SB), $5

View file

@ -0,0 +1,58 @@
TEXT memccpy(SB),$0
MOVL n+24(FP), CX
CMPL CX, $0
JEQ none
MOVQ p2+8(FP), DI
MOVBLZX c+16(FP), AX
CLD
/*
* find the character in the second string
*/
REPN; SCASB
JEQ found
/*
* if not found, set count to 'n'
*/
none:
MOVL $0, AX
MOVL n+24(FP), BX
JMP memcpy
/*
* if found, set count to bytes thru character
*/
found:
MOVQ DI, AX
SUBQ p2+8(FP), AX
MOVQ AX, BX
ADDQ RARG, AX
/*
* copy the memory
*/
memcpy:
MOVQ RARG, DI
MOVQ p2+8(FP), SI
/*
* copy whole longs, if aligned
*/
MOVQ DI, DX
ORQ SI, DX
ANDL $3, DX
JNE c3
MOVL BX, CX
SHRQ $2, CX
REP; MOVSL
/*
* copy the rest, by bytes
*/
ANDL $3, BX
c3:
MOVL BX, CX
REP; MOVSB
RET

View file

@ -0,0 +1,23 @@
TEXT memchr(SB),$0
MOVL n+16(FP), CX
CMPL CX, $0
JEQ none
MOVQ RARG, DI
MOVBLZX c+8(FP), AX
CLD
/*
* SCASB is memchr instruction
*/
REPN; SCASB
JEQ found
none:
MOVL $0, AX
RET
found:
MOVQ DI, AX
SUBQ $1, AX
RET

View file

@ -0,0 +1,52 @@
TEXT memcmp(SB),$0
MOVL n+16(FP), BX
CMPL BX, $0
JEQ none
MOVQ RARG, DI
MOVQ p2+8(FP), SI
CLD
MOVQ DI, CX
ORQ SI, CX
ANDL $3, CX
JNE c3
/*
* first by longs
*/
MOVL BX, CX
SHRQ $2, CX
REP; CMPSL
JNE found
/*
* then by bytes
*/
ANDL $3, BX
c3:
MOVL BX, CX
REP; CMPSB
JNE found1
none:
MOVQ $0, AX
RET
/*
* if long found,
* back up and look by bytes
*/
found:
MOVL $4, CX
SUBQ CX, DI
SUBQ CX, SI
REP; CMPSB
found1:
JLS lt
MOVQ $-1, AX
RET
lt:
MOVQ $1, AX
RET

View file

@ -0,0 +1,81 @@
TEXT memcpy(SB), $0
MOVQ RARG, DI
MOVQ DI, AX /* return value */
MOVQ p2+8(FP), SI
MOVL n+16(FP), BX
CMPL BX, $0
JGT _ok
JEQ _return /* nothing to do if n == 0 */
MOVL $0, SI /* fault if n < 0 */
/*
* check and set for backwards:
* (p2 < p1) && ((p2+n) > p1)
*/
_ok:
CMPQ SI, DI
JGT _forward
JEQ _return /* nothing to do if p2 == p1 */
MOVQ SI, DX
ADDQ BX, DX
CMPQ DX, DI
JGT _back
/*
* copy whole longs if aligned
*/
_forward:
CLD
MOVQ SI, DX
ORQ DI, DX
ANDL $3, DX
JNE c3f
MOVQ BX, CX
SHRQ $2, CX
ANDL $3, BX
REP; MOVSL
/*
* copy the rest, by bytes
*/
JEQ _return /* flags set by above ANDL */
c3f:
MOVL BX, CX
REP; MOVSB
RET
/*
* whole thing backwards has
* adjusted addresses
*/
_back:
ADDQ BX, DI
ADDQ BX, SI
STD
SUBQ $4, DI
SUBQ $4, SI
/*
* copy whole longs, if aligned
*/
MOVQ DI, DX
ORQ SI, DX
ANDL $3, DX
JNE c3b
MOVL BX, CX
SHRQ $2, CX
ANDL $3, BX
REP; MOVSL
/*
* copy the rest, by bytes
*/
JEQ _return /* flags set by above ANDL */
c3b:
ADDQ $3, DI
ADDQ $3, SI
MOVL BX, CX
REP; MOVSB
_return:
RET

View file

@ -0,0 +1,81 @@
TEXT memmove(SB), $0
MOVQ RARG, DI
MOVQ DI, AX /* return value */
MOVQ p2+8(FP), SI
MOVL n+16(FP), BX
CMPL BX, $0
JGT _ok
JEQ _return /* nothing to do if n == 0 */
MOVL $0, SI /* fault if n < 0 */
/*
* check and set for backwards:
* (p2 < p1) && ((p2+n) > p1)
*/
_ok:
CMPQ SI, DI
JGT _forward
JEQ _return /* nothing to do if p2 == p1 */
MOVQ SI, DX
ADDQ BX, DX
CMPQ DX, DI
JGT _back
/*
* copy whole longs if aligned
*/
_forward:
CLD
MOVQ SI, DX
ORQ DI, DX
ANDL $3, DX
JNE c3f
MOVQ BX, CX
SHRQ $2, CX
ANDL $3, BX
REP; MOVSL
/*
* copy the rest, by bytes
*/
JEQ _return /* flags set by above ANDL */
c3f:
MOVL BX, CX
REP; MOVSB
RET
/*
* whole thing backwards has
* adjusted addresses
*/
_back:
ADDQ BX, DI
ADDQ BX, SI
STD
SUBQ $4, DI
SUBQ $4, SI
/*
* copy whole longs, if aligned
*/
MOVQ DI, DX
ORQ SI, DX
ANDL $3, DX
JNE c3b
MOVL BX, CX
SHRQ $2, CX
ANDL $3, BX
REP; MOVSL
/*
* copy the rest, by bytes
*/
JEQ _return /* flags set by above ANDL */
c3b:
ADDQ $3, DI
ADDQ $3, SI
MOVL BX, CX
REP; MOVSB
_return:
RET

View file

@ -0,0 +1,41 @@
TEXT memset(SB),$0
CLD
MOVQ RARG, DI
MOVBLZX c+8(FP), AX
MOVL n+16(FP), BX
/*
* if not enough bytes, just set bytes
*/
CMPL BX, $9
JLS c3
/*
* if not aligned, just set bytes
*/
MOVQ RARG, CX
ANDL $3,CX
JNE c3
/*
* build word in AX
*/
MOVB AL, AH
MOVL AX, CX
SHLL $16, CX
ORL CX, AX
/*
* set whole longs
*/
c1:
MOVQ BX, CX
SHRQ $2, CX
ANDL $3, BX
REP; STOSL
/*
* set the rest, by bytes
*/
c3:
MOVL BX, CX
REP; STOSB
ret:
MOVQ RARG,AX
RET

41
sys/src/libc/amd64/mkfile Normal file
View file

@ -0,0 +1,41 @@
objtype=amd64
</$objtype/mkfile
LIB=/$objtype/lib/libc.a
SFILES=\
argv0.s\
atom.s\
cycles.s\
getfcr.s\
main9.s\
main9p.s\
memccpy.s\
memchr.s\
memcmp.s\
memcpy.s\
memmove.s\
memset.s\
muldiv.s\
setjmp.s\
sqrt.s\
strcat.s\
strchr.s\
strcpy.s\
strlen.s\
tas.s\
CFILES=\
_seek.c\
getcallerpc.c\
notejmp.c\
HFILES=/sys/include/libc.h
OFILES=${CFILES:%.c=%.$O} ${SFILES:%.s=%.$O}
UPDATE=mkfile\
$HFILES\
$CFILES\
$SFILES\
</sys/src/cmd/mksyslib

View file

@ -0,0 +1,12 @@
TEXT umuldiv(SB), $0
MOVL RARG, AX
MULL b+8(FP)
DIVL c+16(FP)
RET
TEXT muldiv(SB), $0
MOVL RARG, AX
IMULL b+8(FP)
IDIVL c+16(FP)
RET
END

View file

@ -0,0 +1,16 @@
#include <u.h>
#include <libc.h>
#include <ureg.h>
void
notejmp(void *vr, jmp_buf j, int ret)
{
struct Ureg *r = vr;
r->ax = ret;
if(ret == 0)
r->ax = 1;
r->pc = j[JMPBUFPC];
r->sp = j[JMPBUFSP] + 8;
noted(NCONT);
}

View file

@ -0,0 +1,17 @@
TEXT longjmp(SB), $0
MOVL r+8(FP), AX
CMPL AX, $0
JNE ok /* ansi: "longjmp(0) => longjmp(1)" */
MOVL $1, AX /* bless their pointed heads */
ok:
MOVQ 0(RARG), SP /* restore sp */
MOVQ 8(RARG), BX /* put return pc on the stack */
MOVQ BX, 0(SP)
RET
TEXT setjmp(SB), $0
MOVQ SP, 0(RARG) /* store sp */
MOVQ 0(SP), BX /* store return pc */
MOVQ BX, 8(RARG)
MOVL $0, AX /* return 0 */
RET

View file

@ -0,0 +1,4 @@
TEXT sqrt(SB), $0
MOVSD a+0(FP), X0
SQRTSD X0, X0
RET

View file

@ -0,0 +1,48 @@
TEXT strcat(SB),$0
MOVL $0, AX
MOVQ $-1, CX
CLD
/*
* find length of second string
*/
MOVQ p2+8(FP), DI
REPN; SCASB
MOVQ DI, BX
SUBQ p2+8(FP), BX
/*
* find end of first string
*/
MOVQ RARG, DI
REPN; SCASB
/*
* copy the memory
*/
SUBQ $1, DI
MOVQ p2+8(FP), SI
/*
* copy whole longs, if aligned
*/
MOVQ DI, CX
ORQ SI, CX
ANDL $3, CX
JNE c3
MOVQ BX, CX
SHRQ $2, CX
REP; MOVSL
/*
* copy the rest, by bytes
*/
ANDL $3, BX
c3:
MOVQ BX, CX
REP; MOVSB
MOVQ RARG, AX
RET

View file

@ -0,0 +1,38 @@
TEXT strchr(SB), $0
MOVQ RARG, DI
MOVB c+8(FP), AX
CMPB AX, $0
JEQ l2 /**/
/*
* char is not null
*/
l1:
MOVB (DI), BX
CMPB BX, $0
JEQ ret0
ADDQ $1, DI
CMPB AX, BX
JNE l1
MOVQ DI, AX
SUBQ $1, AX
RET
/*
* char is null
*/
l2:
MOVQ $-1, CX
CLD
REPN; SCASB
MOVQ DI, AX
SUBQ $1, AX
RET
ret0:
MOVQ $0, AX
RET

View file

@ -0,0 +1,40 @@
TEXT strcpy(SB),$0
MOVL $0, AX
MOVQ $-1, CX
CLD
/*
* find end of second string
*/
MOVQ p2+8(FP), DI
REPN; SCASB
MOVQ DI, BX
SUBQ p2+8(FP), BX
/*
* copy the memory
*/
MOVQ RARG, DI
MOVQ p2+8(FP), SI
/*
* copy whole longs, if aligned
*/
MOVQ DI, CX
ORQ SI, CX
ANDL $3, CX
JNE c3
MOVQ BX, CX
SHRQ $2, CX
REP; MOVSL
/*
* copy the rest, by bytes
*/
ANDL $3, BX
c3:
MOVL BX, CX
REP; MOVSB
MOVQ RARG, AX
RET

View file

@ -0,0 +1,16 @@
TEXT strlen(SB),$0
MOVL $0, AX
MOVQ $-1, CX
CLD
/*
* look for end of string
*/
MOVQ RARG, DI
REPN; SCASB
MOVQ DI, AX
SUBQ RARG, AX
SUBQ $1, AX
RET

5
sys/src/libc/amd64/tas.s Normal file
View file

@ -0,0 +1,5 @@
TEXT _tas(SB), 1, $0
MOVL $0xdeaddead,AX
XCHGL AX,(RARG)
RET

View file

@ -0,0 +1,20 @@
objtype=amd64
</$objtype/mkfile
LIB=/$objtype/lib/libmp.a
SFILES=\
mpdigdiv.s\
mpvecadd.s\
mpvecdigmuladd.s\
mpvecdigmulsub.s\
mpvecsub.s\
HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h
OFILES=${SFILES:%.s=%.$O}
UPDATE=mkfile\
$HFILES\
$SFILES\
</sys/src/cmd/mksyslib

View file

@ -0,0 +1,21 @@
TEXT mpdigdiv(SB),$0
/* MOVL dividend+0(FP),BX */
MOVL 0(RARG),AX
MOVL 4(RARG),DX
MOVL divisor+8(FP),BX
MOVQ quotient+16(FP),DI
XORL CX,CX
CMPL DX,BX /* dividend >= 2^32 * divisor */
JHS _divovfl
CMPL BX,CX /* divisor == 0 */
JE _divovfl
DIVL BX /* AX = DX:AX/BX */
MOVL AX,0(DI)
RET
/* return all 1's */
_divovfl:
NOTL CX
MOVL CX,0(DI)
RET

View file

@ -0,0 +1,54 @@
/*
* mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum)
*
* sum[0:alen] = a[0:alen-1] + b[0:blen-1]
*
* prereq: alen >= blen, sum has room for alen+1 digits
*/
TEXT mpvecadd(SB),$0
MOVL alen+8(FP),DX
MOVL blen+24(FP),CX
/* MOVL a+0(FP),SI */
MOVQ RARG, SI
MOVQ b+16(FP),BX
SUBL CX,DX
MOVQ sum+32(FP),DI
XORL BP,BP /* this also sets carry to 0 */
/* skip addition if b is zero */
TESTL CX,CX
JZ _add1
/* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */
_addloop1:
MOVL (SI)(BP*4), AX
ADCL (BX)(BP*4), AX
MOVL AX,(DI)(BP*4)
INCL BP
LOOP _addloop1
_add1:
/* jump if alen > blen */
INCL DX
MOVL DX,CX
LOOP _addloop2
/* sum[alen] = carry */
_addend:
JC _addcarry
MOVL $0,(DI)(BP*4)
RET
_addcarry:
MOVL $1,(DI)(BP*4)
RET
/* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */
_addloop2:
MOVL (SI)(BP*4),AX
ADCL $0,AX
MOVL AX,(DI)(BP*4)
INCL BP
LOOP _addloop2
JMP _addend

View file

@ -0,0 +1,53 @@
/*
* mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p)
*
* p += b*m
*
* each step look like:
* hi,lo = m*b[i]
* lo += oldhi + carry
* hi += carry
* p[i] += lo
* oldhi = hi
*
* the registers are:
* hi = DX - constrained by hardware
* lo = AX - constrained by hardware
* b+n = SI - can't be BP
* p+n = DI - can't be BP
* i-n = BP
* m = BX
* oldhi = CX
*
*/
TEXT mpvecdigmuladd(SB),$0
/* MOVQ b+0(FP),SI */
MOVQ RARG,SI
MOVL n+8(FP),CX
MOVL m+16(FP),BX
MOVQ p+24(FP),DI
MOVL CX,BP
NEGQ BP /* BP = -n */
SHLL $2,CX
ADDQ CX,SI /* SI = b + n */
ADDQ CX,DI /* DI = p + n */
XORL CX,CX
_muladdloop:
MOVL (SI)(BP*4),AX /* lo = b[i] */
MULL BX /* hi, lo = b[i] * m */
ADDL CX,AX /* lo += oldhi */
JCC _muladdnocarry1
INCL DX /* hi += carry */
_muladdnocarry1:
ADDL AX,(DI)(BP*4) /* p[i] += lo */
JCC _muladdnocarry2
INCL DX /* hi += carry */
_muladdnocarry2:
MOVL DX,CX /* oldhi = hi */
INCQ BP /* i++ */
JNZ _muladdloop
XORL AX,AX
ADDL CX,(DI)(BP*4) /* p[n] + oldhi */
ADCL AX,AX /* return carry out of p[n] */
RET

View file

@ -0,0 +1,53 @@
/*
* mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
*
* p -= b*m
*
* each step look like:
* hi,lo = m*b[i]
* lo += oldhi + carry
* hi += carry
* p[i] += lo
* oldhi = hi
*
* the registers are:
* hi = DX - constrained by hardware
* lo = AX - constrained by hardware
* b = SI - can't be BP
* p = DI - can't be BP
* i = BP
* n = CX - constrained by LOOP instr
* m = BX
* oldhi = R8
*
*/
TEXT mpvecdigmulsub(SB),$0
/* MOVL b+0(FP),SI */
MOVQ RARG,SI
MOVL n+8(FP),CX
MOVL m+16(FP),BX
MOVQ p+24(FP),DI
XORL BP,BP
MOVL BP,R8
_mulsubloop:
MOVL (SI)(BP*4),AX /* lo = b[i] */
MULL BX /* hi, lo = b[i] * m */
ADDL R8,AX /* lo += oldhi */
JCC _mulsubnocarry1
INCL DX /* hi += carry */
_mulsubnocarry1:
SUBL AX,(DI)(BP*4)
JCC _mulsubnocarry2
INCL DX /* hi += carry */
_mulsubnocarry2:
MOVL DX,R8
INCL BP
LOOP _mulsubloop
SUBL R8,(DI)(BP*4)
JCC _mulsubnocarry3
MOVQ $-1,AX
RET
_mulsubnocarry3:
MOVQ $1,AX
RET

View file

@ -0,0 +1,45 @@
/*
* mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff)
*
* diff[0:alen-1] = a[0:alen-1] - b[0:blen-1]
*
* prereq: alen >= blen, diff has room for alen digits
*/
TEXT mpvecsub(SB),$0
/* MOVQ a+0(FP),SI */
MOVQ RARG, SI
MOVQ b+16(FP),BX
MOVL alen+8(FP),DX
MOVL blen+24(FP),CX
MOVQ diff+32(FP),DI
SUBL CX,DX
XORL BP,BP /* this also sets carry to 0 */
/* skip subraction if b is zero */
TESTL CX,CX
JZ _sub1
/* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */
_subloop1:
MOVL (SI)(BP*4),AX
SBBL (BX)(BP*4),AX
MOVL AX,(DI)(BP*4)
INCL BP
LOOP _subloop1
_sub1:
INCL DX
MOVL DX,CX
LOOP _subloop2
RET
/* diff[blen:alen-1] = a[blen:alen-1] - 0 */
_subloop2:
MOVL (SI)(BP*4),AX
SBBL $0,AX
MOVL AX,(DI)(BP*4)
INCL BP
LOOP _subloop2
RET

View file

@ -0,0 +1,212 @@
/*
* rfc1321 requires that I include this. The code is new. The constants
* all come from the rfc (hence the copyright). We trade a table for the
* macros in rfc. The total size is a lot less. -- presotto
*
* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
* rights reserved.
*
* License to copy and use this software is granted provided that it
* is identified as the "RSA Data Security, Inc. MD5 Message-Digest
* Algorithm" in all material mentioning or referencing this software
* or this function.
*
* License is also granted to make and use derivative works provided
* that such works are identified as "derived from the RSA Data
* Security, Inc. MD5 Message-Digest Algorithm" in all material
* mentioning or referencing the derived work.
*
* RSA Data Security, Inc. makes no representations concerning either
* the merchantability of this software or the suitability of this
* software forany particular purpose. It is provided "as is"
* without express or implied warranty of any kind.
* These notices must be retained in any copies of any part of this
* documentation and/or software.
*/
#define S11 7
#define S12 12
#define S13 17
#define S14 22
#define S21 5
#define S22 9
#define S23 14
#define S24 20
#define S31 4
#define S32 11
#define S33 16
#define S34 23
#define S41 6
#define S42 10
#define S43 15
#define S44 21
/*
* SI is data
* a += FN(B,C,D);
* a += x[sh] + t[sh];
* a = (a << S11) | (a >> (32 - S11));
* a += b;
*/
#define BODY1(off,V,FN,SH,A,B,C,D)\
FN(B,C,D)\
LEAL V(A)(DI*1),A;\
ADDL (off)(BP),A;\
ROLL $SH,A;\
ADDL B,A;\
#define BODY(off,V,FN,SH,A,B,C,D)\
FN(B,C,D)\
LEAL V(A)(DI*1),A;\
ADDL (off)(BP),A;\
ROLL $SH,A;\
ADDL B,A;\
/*
* fn1 = ((c ^ d) & b) ^ d
*/
#define FN1(B,C,D)\
MOVL C,DI;\
XORL D,DI;\
ANDL B,DI;\
XORL D,DI;\
/*
* fn2 = ((b ^ c) & d) ^ c;
*/
#define FN2(B,C,D)\
MOVL B,DI;\
XORL C,DI;\
ANDL D,DI;\
XORL C,DI;\
/*
* fn3 = b ^ c ^ d;
*/
#define FN3(B,C,D)\
MOVL B,DI;\
XORL C,DI;\
XORL D,DI;\
/*
* fn4 = c ^ (b | ~d);
*/
#define FN4(B,C,D)\
MOVL D,DI;\
XORL $-1,DI;\
ORL B,DI;\
XORL C,DI;\
#define LEN 8
#define STATE 16
TEXT _md5block+0(SB), $0
MOVQ RARG,R8
MOVLQZX len+LEN(FP),BX
ADDQ BX,R8
mainloop:
MOVQ state+STATE(FP),SI
MOVL (SI),AX
MOVL 4(SI),BX
MOVL 8(SI),CX
MOVL 12(SI),DX
BODY1( 0*4,0xd76aa478,FN1,S11,AX,BX,CX,DX)
BODY1( 1*4,0xe8c7b756,FN1,S12,DX,AX,BX,CX)
BODY1( 2*4,0x242070db,FN1,S13,CX,DX,AX,BX)
BODY1( 3*4,0xc1bdceee,FN1,S14,BX,CX,DX,AX)
BODY1( 4*4,0xf57c0faf,FN1,S11,AX,BX,CX,DX)
BODY1( 5*4,0x4787c62a,FN1,S12,DX,AX,BX,CX)
BODY1( 6*4,0xa8304613,FN1,S13,CX,DX,AX,BX)
BODY1( 7*4,0xfd469501,FN1,S14,BX,CX,DX,AX)
BODY1( 8*4,0x698098d8,FN1,S11,AX,BX,CX,DX)
BODY1( 9*4,0x8b44f7af,FN1,S12,DX,AX,BX,CX)
BODY1(10*4,0xffff5bb1,FN1,S13,CX,DX,AX,BX)
BODY1(11*4,0x895cd7be,FN1,S14,BX,CX,DX,AX)
BODY1(12*4,0x6b901122,FN1,S11,AX,BX,CX,DX)
BODY1(13*4,0xfd987193,FN1,S12,DX,AX,BX,CX)
BODY1(14*4,0xa679438e,FN1,S13,CX,DX,AX,BX)
BODY1(15*4,0x49b40821,FN1,S14,BX,CX,DX,AX)
BODY( 1*4,0xf61e2562,FN2,S21,AX,BX,CX,DX)
BODY( 6*4,0xc040b340,FN2,S22,DX,AX,BX,CX)
BODY(11*4,0x265e5a51,FN2,S23,CX,DX,AX,BX)
BODY( 0*4,0xe9b6c7aa,FN2,S24,BX,CX,DX,AX)
BODY( 5*4,0xd62f105d,FN2,S21,AX,BX,CX,DX)
BODY(10*4,0x02441453,FN2,S22,DX,AX,BX,CX)
BODY(15*4,0xd8a1e681,FN2,S23,CX,DX,AX,BX)
BODY( 4*4,0xe7d3fbc8,FN2,S24,BX,CX,DX,AX)
BODY( 9*4,0x21e1cde6,FN2,S21,AX,BX,CX,DX)
BODY(14*4,0xc33707d6,FN2,S22,DX,AX,BX,CX)
BODY( 3*4,0xf4d50d87,FN2,S23,CX,DX,AX,BX)
BODY( 8*4,0x455a14ed,FN2,S24,BX,CX,DX,AX)
BODY(13*4,0xa9e3e905,FN2,S21,AX,BX,CX,DX)
BODY( 2*4,0xfcefa3f8,FN2,S22,DX,AX,BX,CX)
BODY( 7*4,0x676f02d9,FN2,S23,CX,DX,AX,BX)
BODY(12*4,0x8d2a4c8a,FN2,S24,BX,CX,DX,AX)
BODY( 5*4,0xfffa3942,FN3,S31,AX,BX,CX,DX)
BODY( 8*4,0x8771f681,FN3,S32,DX,AX,BX,CX)
BODY(11*4,0x6d9d6122,FN3,S33,CX,DX,AX,BX)
BODY(14*4,0xfde5380c,FN3,S34,BX,CX,DX,AX)
BODY( 1*4,0xa4beea44,FN3,S31,AX,BX,CX,DX)
BODY( 4*4,0x4bdecfa9,FN3,S32,DX,AX,BX,CX)
BODY( 7*4,0xf6bb4b60,FN3,S33,CX,DX,AX,BX)
BODY(10*4,0xbebfbc70,FN3,S34,BX,CX,DX,AX)
BODY(13*4,0x289b7ec6,FN3,S31,AX,BX,CX,DX)
BODY( 0*4,0xeaa127fa,FN3,S32,DX,AX,BX,CX)
BODY( 3*4,0xd4ef3085,FN3,S33,CX,DX,AX,BX)
BODY( 6*4,0x04881d05,FN3,S34,BX,CX,DX,AX)
BODY( 9*4,0xd9d4d039,FN3,S31,AX,BX,CX,DX)
BODY(12*4,0xe6db99e5,FN3,S32,DX,AX,BX,CX)
BODY(15*4,0x1fa27cf8,FN3,S33,CX,DX,AX,BX)
BODY( 2*4,0xc4ac5665,FN3,S34,BX,CX,DX,AX)
BODY( 0*4,0xf4292244,FN4,S41,AX,BX,CX,DX)
BODY( 7*4,0x432aff97,FN4,S42,DX,AX,BX,CX)
BODY(14*4,0xab9423a7,FN4,S43,CX,DX,AX,BX)
BODY( 5*4,0xfc93a039,FN4,S44,BX,CX,DX,AX)
BODY(12*4,0x655b59c3,FN4,S41,AX,BX,CX,DX)
BODY( 3*4,0x8f0ccc92,FN4,S42,DX,AX,BX,CX)
BODY(10*4,0xffeff47d,FN4,S43,CX,DX,AX,BX)
BODY( 1*4,0x85845dd1,FN4,S44,BX,CX,DX,AX)
BODY( 8*4,0x6fa87e4f,FN4,S41,AX,BX,CX,DX)
BODY(15*4,0xfe2ce6e0,FN4,S42,DX,AX,BX,CX)
BODY( 6*4,0xa3014314,FN4,S43,CX,DX,AX,BX)
BODY(13*4,0x4e0811a1,FN4,S44,BX,CX,DX,AX)
BODY( 4*4,0xf7537e82,FN4,S41,AX,BX,CX,DX)
BODY(11*4,0xbd3af235,FN4,S42,DX,AX,BX,CX)
BODY( 2*4,0x2ad7d2bb,FN4,S43,CX,DX,AX,BX)
BODY( 9*4,0xeb86d391,FN4,S44,BX,CX,DX,AX)
ADDQ $(16*4),BP
MOVQ state+STATE(FP),DI
ADDL AX,0(DI)
ADDL BX,4(DI)
ADDL CX,8(DI)
ADDL DX,12(DI)
CMPQ BP,R8
JCS mainloop
RET

View file

@ -0,0 +1,19 @@
objtype=amd64
</$objtype/mkfile
LIB=/$objtype/lib/libsec.a
FILES=\
md5block\
sha1block\
HFILES=/sys/include/libsec.h
SFILES=${FILES:%=%.s}
OFILES=${FILES:%=%.$O}
UPDATE=mkfile\
$HFILES\
$SFILES\
</sys/src/cmd/mksyslib

View file

@ -0,0 +1,197 @@
/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
* wp[off] = x;
* x += A <<< 5;
* E += 0xca62c1d6 + x;
* x = FN(B,C,D);
* E += x;
* B >>> 2
*/
#define BSWAPDI BYTE $0x0f; BYTE $0xcf;
#define BODY(off,FN,V,A,B,C,D,E)\
MOVL (off-64)(BP),DI;\
XORL (off-56)(BP),DI;\
XORL (off-32)(BP),DI;\
XORL (off-12)(BP),DI;\
ROLL $1,DI;\
MOVL DI,off(BP);\
LEAL V(DI)(E*1),E;\
MOVL A,DI;\
ROLL $5,DI;\
ADDL DI,E;\
FN(B,C,D)\
ADDL DI,E;\
RORL $2,B;\
#define BODY0(off,FN,V,A,B,C,D,E)\
MOVLQZX off(BX),DI;\
BSWAPDI;\
MOVL DI,off(BP);\
LEAL V(DI)(E*1),E;\
MOVL A,DI;\
ROLL $5,DI;\
ADDL DI,E;\
FN(B,C,D)\
ADDL DI,E;\
RORL $2,B;\
/*
* fn1 = (((C^D)&B)^D);
*/
#define FN1(B,C,D)\
MOVL C,DI;\
XORL D,DI;\
ANDL B,DI;\
XORL D,DI;\
/*
* fn24 = B ^ C ^ D
*/
#define FN24(B,C,D)\
MOVL B,DI;\
XORL C,DI;\
XORL D,DI;\
/*
* fn3 = ((B ^ C) & (D ^= B)) ^ B
* D ^= B to restore D
*/
#define FN3(B,C,D)\
MOVL B,DI;\
XORL C,DI;\
XORL B,D;\
ANDL D,DI;\
XORL B,DI;\
XORL B,D;\
/*
* stack offsets
* void sha1block(uchar *DATA, int LEN, ulong *STATE)
*/
#define DATA 0
#define LEN 8
#define STATE 16
/*
* stack offsets for locals
* ulong w[80];
* uchar *edata;
* ulong *w15, *w40, *w60, *w80;
* register local
* ulong *wp = BP
* ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
* ulong tmp = edi
*/
#define Rpdata R8
#define WARRAY (-8-(80*4))
#define TMP1 (-16-(80*4))
#define TMP2 (-24-(80*4))
#define W15 (-32-(80*4))
#define W40 (-40-(80*4))
#define W60 (-48-(80*4))
#define W80 (-56-(80*4))
#define EDATA (-64-(80*4))
TEXT _sha1block+0(SB),$384
MOVQ RARG, Rpdata
MOVLQZX len+LEN(FP),BX
ADDQ BX, RARG
MOVQ RARG,edata+EDATA(SP)
LEAQ aw15+(WARRAY+15*4)(SP),DI
MOVQ DI,w15+W15(SP)
LEAQ aw40+(WARRAY+40*4)(SP),DX
MOVQ DX,w40+W40(SP)
LEAQ aw60+(WARRAY+60*4)(SP),CX
MOVQ CX,w60+W60(SP)
LEAQ aw80+(WARRAY+80*4)(SP),DI
MOVQ DI,w80+W80(SP)
mainloop:
LEAQ warray+WARRAY(SP),BP
MOVQ state+STATE(FP),DI
MOVL (DI),AX
MOVL 4(DI),BX
MOVL BX,tmp1+TMP1(SP)
MOVL 8(DI),CX
MOVL 12(DI),DX
MOVL 16(DI),SI
MOVQ Rpdata,BX
loop1:
BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
MOVL SI,tmp2+TMP2(SP)
BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX)
MOVL tmp1+TMP1(SP),SI
BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX)
BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI)
MOVL SI,tmp1+TMP1(SP)
BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX)
MOVL tmp2+TMP2(SP),SI
ADDQ $20,BX
ADDQ $20,BP
CMPQ BP,w15+W15(SP)
JCS loop1
BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
ADDQ $4,BX
MOVQ BX,R8
MOVQ tmp1+TMP1(SP),BX
BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX)
BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX)
BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX)
BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX)
ADDQ $20,BP
loop2:
BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI)
BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX)
BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX)
BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX)
BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX)
ADDQ $20,BP
CMPQ BP,w40+W40(SP)
JCS loop2
loop3:
BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI)
BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX)
BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX)
BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX)
BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX)
ADDQ $20,BP
CMPQ BP,w60+W60(SP)
JCS loop3
loop4:
BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI)
BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX)
BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX)
BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX)
BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX)
ADDQ $20,BP
CMPQ BP,w80+W80(SP)
JCS loop4
MOVQ state+STATE(FP),DI
ADDL AX,0(DI)
ADDL BX,4(DI)
ADDL CX,8(DI)
ADDL DX,12(DI)
ADDL SI,16(DI)
MOVQ edata+EDATA(SP),DI
CMPQ Rpdata,DI
JCS mainloop
RET
END