plan9fox/sys/src/9/pc/main.c
2018-02-18 19:56:01 +01:00

757 lines
15 KiB
C

#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "tos.h"
#include "ureg.h"
#include "init.h"
#include "pool.h"
#include "reboot.h"
Mach *m;
Conf conf;
int delaylink;
int idle_spin;
extern void (*i8237alloc)(void);
extern void bootscreeninit(void);
extern void multibootdebug(void);
void
main(void)
{
mach0init();
bootargsinit();
ioinit();
i8250console();
quotefmtinstall();
screeninit();
print("\nPlan 9\n");
trapinit0();
i8253init();
cpuidentify();
meminit();
confinit();
xinit();
archinit();
bootscreeninit();
if(i8237alloc != nil)
i8237alloc();
trapinit();
printinit();
cpuidprint();
mmuinit();
if(arch->intrinit) /* launches other processors on an mp */
arch->intrinit();
timersinit();
mathinit();
if(arch->clockenable)
arch->clockenable();
procinit0();
initseg();
if(delaylink){
bootlinks();
pcimatch(0, 0, 0);
}else
links();
chandevreset();
pageinit();
userinit();
schedinit();
}
void
mach0init(void)
{
conf.nmach = 1;
MACHP(0) = (Mach*)CPU0MACH;
m->pdb = (ulong*)CPU0PDB;
m->gdt = (Segdesc*)CPU0GDT;
machinit();
active.machs[0] = 1;
active.exiting = 0;
}
void
machinit(void)
{
int machno;
ulong *pdb;
Segdesc *gdt;
machno = m->machno;
pdb = m->pdb;
gdt = m->gdt;
memset(m, 0, sizeof(Mach));
m->machno = machno;
m->pdb = pdb;
m->gdt = gdt;
m->perf.period = 1;
/*
* For polled uart output at boot, need
* a default delay constant. 100000 should
* be enough for a while. Cpuidentify will
* calculate the real value later.
*/
m->loopconst = 100000;
}
void
init0(void)
{
char buf[2*KNAMELEN], **sp;
up->nerrlab = 0;
spllo();
/*
* These are o.k. because rootinit is null.
* Then early kproc's will have a root and dot.
*/
up->slash = namec("#/", Atodir, 0, 0);
pathclose(up->slash->path);
up->slash->path = newpath("/");
up->dot = cclone(up->slash);
chandevinit();
if(!waserror()){
snprint(buf, sizeof(buf), "%s %s", arch->id, conffile);
ksetenv("terminal", buf, 0);
ksetenv("cputype", "386", 0);
if(cpuserver)
ksetenv("service", "cpu", 0);
else
ksetenv("service", "terminal", 0);
setconfenv();
poperror();
}
kproc("alarm", alarmkproc, 0);
sp = (char**)(USTKTOP - sizeof(Tos) - 8 - sizeof(sp[0])*4);
sp[3] = sp[2] = nil;
strcpy(sp[1] = (char*)&sp[4], "boot");
sp[0] = nil;
touser(sp);
}
void
userinit(void)
{
void *v;
Proc *p;
Segment *s;
Page *pg;
p = newproc();
p->pgrp = newpgrp();
p->egrp = smalloc(sizeof(Egrp));
p->egrp->ref = 1;
p->fgrp = dupfgrp(nil);
p->rgrp = newrgrp();
p->procmode = 0640;
kstrdup(&eve, "");
kstrdup(&p->text, "*init*");
kstrdup(&p->user, eve);
procsetup(p);
/*
* Kernel Stack
*
* N.B. make sure there's enough space for syscall to check
* for valid args and
* 4 bytes for gotolabel's return PC
*/
p->sched.pc = (ulong)init0;
p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD);
/*
* User Stack
*
* N.B. cannot call newpage() with clear=1, because pc kmap
* requires up != nil. use tmpmap instead.
*/
s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
p->seg[SSEG] = s;
pg = newpage(0, 0, USTKTOP-BY2PG);
segpage(s, pg);
v = tmpmap(pg);
memset(v, 0, BY2PG);
tmpunmap(v);
/*
* Text
*/
s = newseg(SG_TEXT, UTZERO, 1);
s->flushme++;
p->seg[TSEG] = s;
pg = newpage(0, 0, UTZERO);
pg->txtflush = ~0;
segpage(s, pg);
v = tmpmap(pg);
memset(v, 0, BY2PG);
memmove(v, initcode, sizeof initcode);
tmpunmap(v);
ready(p);
}
void
confinit(void)
{
char *p;
int i, userpcnt;
ulong kpages;
if(p = getconf("service")){
if(strcmp(p, "cpu") == 0)
cpuserver = 1;
else if(strcmp(p,"terminal") == 0)
cpuserver = 0;
}
if(p = getconf("*kernelpercent"))
userpcnt = 100 - strtol(p, 0, 0);
else
userpcnt = 0;
conf.npage = 0;
for(i=0; i<nelem(conf.mem); i++)
conf.npage += conf.mem[i].npage;
conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
if(cpuserver)
conf.nproc *= 3;
if(conf.nproc > 2000)
conf.nproc = 2000;
conf.nimage = 200;
conf.nswap = conf.nproc*80;
conf.nswppo = 4096;
if(cpuserver) {
if(userpcnt < 10)
userpcnt = 70;
kpages = conf.npage - (conf.npage*userpcnt)/100;
conf.nimage = conf.nproc;
/*
* Hack for the big boys. Only good while physmem < 4GB.
* Give the kernel fixed max + enough to allocate the
* page pool.
* This is an overestimate as conf.upages < conf.npages.
* The patch of nimage is a band-aid, scanning the whole
* page list in imagereclaim just takes too long.
*/
if(getconf("*imagemaxmb") == 0)
if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){
kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG;
kpages += (conf.nproc*KSTACK)/BY2PG;
}
} else {
if(userpcnt < 10) {
if(conf.npage*BY2PG < 16*MB)
userpcnt = 50;
else
userpcnt = 60;
}
kpages = conf.npage - (conf.npage*userpcnt)/100;
/*
* Make sure terminals with low memory get at least
* 4MB on the first Image chunk allocation.
*/
if(conf.npage*BY2PG < 16*MB)
imagmem->minarena = 4*MB;
}
/*
* can't go past the end of virtual memory
* (ulong)-KZERO is 2^32 - KZERO
*/
if(kpages > ((ulong)-KZERO)/BY2PG)
kpages = ((ulong)-KZERO)/BY2PG;
conf.upages = conf.npage - kpages;
conf.ialloc = (kpages/2)*BY2PG;
/*
* Guess how much is taken by the large permanent
* datastructures. Mntcache and Mntrpc are not accounted for.
*/
kpages *= BY2PG;
kpages -= conf.upages*sizeof(Page)
+ conf.nproc*sizeof(Proc)
+ conf.nimage*sizeof(Image)
+ conf.nswap
+ conf.nswppo*sizeof(Page*);
mainmem->maxsize = kpages;
/*
* the dynamic allocation will balance the load properly,
* hopefully. be careful with 32-bit overflow.
*/
imagmem->maxsize = kpages - (kpages/10);
if(p = getconf("*imagemaxmb")){
imagmem->maxsize = strtol(p, nil, 0)*MB;
if(imagmem->maxsize > mainmem->maxsize)
imagmem->maxsize = mainmem->maxsize;
}
}
/*
* we keep FPsave structure in SSE format emulating FXSAVE / FXRSTOR
* instructions for legacy x87 fpu.
*/
void
fpx87save(FPsave *fps)
{
ushort tag;
fpx87save0(fps);
/*
* convert x87 tag word to fxsave tag byte:
* 00, 01, 10 -> 1, 11 -> 0
*/
tag = ~fps->tag;
tag = (tag | (tag >> 1)) & 0x5555;
tag = (tag | (tag >> 1)) & 0x3333;
tag = (tag | (tag >> 2)) & 0x0F0F;
tag = (tag | (tag >> 4)) & 0x00FF;
/* NOP fps->fcw = fps->control; */
fps->fsw = fps->status;
fps->ftw = tag;
fps->fop = fps->opcode;
fps->fpuip = fps->pc;
fps->cs = fps->selector;
fps->fpudp = fps->operand;
fps->ds = fps->oselector;
#define MOVA(d,s) \
*((ushort*)(d+8)) = *((ushort*)(s+8)), \
*((ulong*)(d+4)) = *((ulong*)(s+4)), \
*((ulong*)(d)) = *((ulong*)(s))
MOVA(fps->xregs+0x70, fps->regs+70);
MOVA(fps->xregs+0x60, fps->regs+60);
MOVA(fps->xregs+0x50, fps->regs+50);
MOVA(fps->xregs+0x40, fps->regs+40);
MOVA(fps->xregs+0x30, fps->regs+30);
MOVA(fps->xregs+0x20, fps->regs+20);
MOVA(fps->xregs+0x10, fps->regs+10);
MOVA(fps->xregs+0x00, fps->regs+00);
#undef MOVA
#define CLR6(d) \
*((ulong*)(d)) = 0, \
*((ushort*)(d+4)) = 0
CLR6(fps->xregs+0x70+10);
CLR6(fps->xregs+0x60+10);
CLR6(fps->xregs+0x50+10);
CLR6(fps->xregs+0x40+10);
CLR6(fps->xregs+0x30+10);
CLR6(fps->xregs+0x20+10);
CLR6(fps->xregs+0x10+10);
CLR6(fps->xregs+0x00+10);
#undef CLR6
fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0;
}
void
fpx87restore(FPsave *fps)
{
ushort msk, tos, tag, *reg;
/* convert fxsave tag byte to x87 tag word */
tag = 0;
tos = 7 - ((fps->fsw >> 11) & 7);
for(msk = 0x80; msk != 0; tos--, msk >>= 1){
tag <<= 2;
if((fps->ftw & msk) != 0){
reg = (ushort*)&fps->xregs[(tos & 7) << 4];
switch(reg[4] & 0x7fff){
case 0x0000:
if((reg[0] | reg[1] | reg[2] | reg[3]) == 0){
tag |= 1; /* 01 zero */
break;
}
/* no break */
case 0x7fff:
tag |= 2; /* 10 special */
break;
default:
if((reg[3] & 0x8000) == 0)
break; /* 00 valid */
tag |= 2; /* 10 special */
break;
}
}else{
tag |= 3; /* 11 empty */
}
}
#define MOVA(d,s) \
*((ulong*)(d)) = *((ulong*)(s)), \
*((ulong*)(d+4)) = *((ulong*)(s+4)), \
*((ushort*)(d+8)) = *((ushort*)(s+8))
MOVA(fps->regs+00, fps->xregs+0x00);
MOVA(fps->regs+10, fps->xregs+0x10);
MOVA(fps->regs+20, fps->xregs+0x20);
MOVA(fps->regs+30, fps->xregs+0x30);
MOVA(fps->regs+40, fps->xregs+0x40);
MOVA(fps->regs+50, fps->xregs+0x50);
MOVA(fps->regs+60, fps->xregs+0x60);
MOVA(fps->regs+70, fps->xregs+0x70);
#undef MOVA
fps->oselector = fps->ds;
fps->operand = fps->fpudp;
fps->opcode = fps->fop & 0x7ff;
fps->selector = fps->cs;
fps->pc = fps->fpuip;
fps->tag = tag;
fps->status = fps->fsw;
/* NOP fps->control = fps->fcw; */
fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0;
fpx87restore0(fps);
}
static char* mathmsg[] =
{
nil, /* handled below */
"denormalized operand",
"division by zero",
"numeric overflow",
"numeric underflow",
"precision loss",
};
static void
mathnote(ulong status, ulong pc)
{
char *msg, note[ERRMAX];
int i;
/*
* Some attention should probably be paid here to the
* exception masks and error summary.
*/
msg = "unknown exception";
for(i = 1; i <= 5; i++){
if(!((1<<i) & status))
continue;
msg = mathmsg[i];
break;
}
if(status & 0x01){
if(status & 0x40){
if(status & 0x200)
msg = "stack overflow";
else
msg = "stack underflow";
}else
msg = "invalid operation";
}
snprint(note, sizeof note, "sys: fp: %s fppc=0x%lux status=0x%lux",
msg, pc, status);
postnote(up, 1, note, NDebug);
}
/*
* math coprocessor error
*/
static void
matherror(Ureg*, void*)
{
/*
* a write cycle to port 0xF0 clears the interrupt latch attached
* to the error# line from the 387
*/
if(!(m->cpuiddx & Fpuonchip))
outb(0xF0, 0xFF);
/*
* get floating point state to check out error
*/
fpsave(up->fpsave);
up->fpstate = FPinactive;
mathnote(up->fpsave->fsw, up->fpsave->fpuip);
}
/*
* SIMD error
*/
static void
simderror(Ureg *ureg, void*)
{
fpsave(up->fpsave);
up->fpstate = FPinactive;
mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
}
/*
* math coprocessor emulation fault
*/
static void
mathemu(Ureg *ureg, void*)
{
ulong status, control;
if(up->fpstate & FPillegal){
/* someone did floating point in a note handler */
postnote(up, 1, "sys: floating point in note handler", NDebug);
return;
}
switch(up->fpstate){
case FPinit:
fpinit();
if(fpsave == fpssesave)
ldmxcsr(0x1f80); /* no simd exceptions on 386 */
while(up->fpsave == nil)
up->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
up->fpstate = FPactive;
break;
case FPinactive:
/*
* Before restoring the state, check for any pending
* exceptions, there's no way to restore the state without
* generating an unmasked exception.
* More attention should probably be paid here to the
* exception masks and error summary.
*/
status = up->fpsave->fsw;
control = up->fpsave->fcw;
if((status & ~control) & 0x07F){
mathnote(status, up->fpsave->fpuip);
break;
}
fprestore(up->fpsave);
up->fpstate = FPactive;
break;
case FPactive:
panic("math emu pid %ld %s pc 0x%lux",
up->pid, up->text, ureg->pc);
break;
}
}
/*
* math coprocessor segment overrun
*/
static void
mathover(Ureg*, void*)
{
pexit("math overrun", 0);
}
void
mathinit(void)
{
trapenable(VectorCERR, matherror, 0, "matherror");
if(X86FAMILY(m->cpuidax) == 3)
intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
trapenable(VectorCNA, mathemu, 0, "mathemu");
trapenable(VectorCSO, mathover, 0, "mathover");
trapenable(VectorSIMD, simderror, 0, "simderror");
}
/*
* set up floating point for a new process
*/
void
procsetup(Proc *p)
{
p->fpstate = FPinit;
fpoff();
cycles(&p->kentry);
p->pcycles = -p->kentry;
memset(p->gdt, 0, sizeof(p->gdt));
p->ldt = nil;
p->nldt = 0;
memset(p->dr, 0, sizeof(p->dr));
}
void
procfork(Proc *p)
{
int s;
p->kentry = up->kentry;
p->pcycles = -p->kentry;
/* inherit user descriptors */
memmove(p->gdt, up->gdt, sizeof(p->gdt));
/* copy local descriptor table */
if(up->ldt != nil && up->nldt > 0){
p->ldt = smalloc(sizeof(Segdesc) * up->nldt);
memmove(p->ldt, up->ldt, sizeof(Segdesc) * up->nldt);
p->nldt = up->nldt;
}
/* save floating point state */
s = splhi();
switch(up->fpstate & ~FPillegal){
case FPactive:
fpsave(up->fpsave);
up->fpstate = FPinactive;
case FPinactive:
while(p->fpsave == nil)
p->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
memmove(p->fpsave, up->fpsave, sizeof(FPsave));
p->fpstate = FPinactive;
}
/* clear debug registers */
memset(p->dr, 0, sizeof(p->dr));
splx(s);
}
void
procrestore(Proc *p)
{
uvlong t;
if(p->dr[7] != 0){
m->dr7 = p->dr[7];
putdr(p->dr);
}
if(p->vmx != nil)
vmxprocrestore(p);
if(p->kp)
return;
cycles(&t);
p->kentry += t;
p->pcycles -= t;
}
/*
* Save the mach dependent part of the process state.
*/
void
procsave(Proc *p)
{
uvlong t;
/* we could just always putdr7(0) but accessing DR7 might be slow in a VM */
if(m->dr7 != 0){
m->dr7 = 0;
putdr7(0);
}
cycles(&t);
p->kentry -= t;
p->pcycles += t;
if(p->fpstate == FPactive){
if(p->state == Moribund)
fpclear();
else{
/*
* Fpsave() stores without handling pending
* unmasked exeptions. Postnote() can't be called
* here as sleep() already has up->rlock, so
* the handling of pending exceptions is delayed
* until the process runs again and generates an
* emulation fault to activate the FPU.
*/
fpsave(p->fpsave);
}
p->fpstate = FPinactive;
}
/*
* While this processor is in the scheduler, the process could run
* on another processor and exit, returning the page tables to
* the free list where they could be reallocated and overwritten.
* When this processor eventually has to get an entry from the
* trashed page tables it will crash.
*
* If there's only one processor, this can't happen.
* You might think it would be a win not to do this in that case,
* especially on VMware, but it turns out not to matter.
*/
mmuflushtlb(PADDR(m->pdb));
}
void
reboot(void *entry, void *code, ulong size)
{
void (*f)(ulong, ulong, ulong);
ulong *pdb;
writeconf();
vmxshutdown();
/*
* the boot processor is cpu0. execute this function on it
* so that the new kernel has the same cpu0. this only matters
* because the hardware has a notion of which processor was the
* boot processor and we look at it at start up.
*/
if (m->machno != 0) {
procwired(up, 0);
sched();
}
cpushutdown();
splhi();
/* turn off buffered serial console */
serialoq = nil;
/* shutdown devices */
chandevshutdown();
arch->introff();
/*
* Modify the machine page table to directly map the low 4MB of memory
* This allows the reboot code to turn off the page mapping
*/
pdb = m->pdb;
pdb[PDX(0)] = pdb[PDX(KZERO)];
mmuflushtlb(PADDR(pdb));
/* setup reboot trampoline function */
f = (void*)REBOOTADDR;
memmove(f, rebootcode, sizeof(rebootcode));
/* off we go - never to return */
coherence();
(*f)((ulong)entry & ~0xF0000000UL, PADDR(code), size);
}
void
exit(int)
{
cpushutdown();
arch->reset();
}