add experimental pc64 kernel

This commit is contained in:
cinap_lenrek 2014-02-01 10:25:10 +01:00
parent 28ad4e6616
commit 56343cafcf
15 changed files with 5385 additions and 6 deletions

View file

@ -0,0 +1,169 @@
/*
* Start an Application Processor. This must be placed on a 4KB boundary
* somewhere in the 1st MB of conventional memory (APBOOTSTRAP). However,
* due to some shortcuts below it's restricted further to within the 1st
* 64KB. The AP starts in real-mode, with
* CS selector set to the startup memory address/16;
* CS base set to startup memory address;
* CS limit set to 64KB;
* CPL and IP set to 0.
*/
#include "mem.h"
#define NOP BYTE $0x90 /* NOP */
#define pFARJMP32(s, o) BYTE $0xea; /* far jmp ptr32:16 */ \
LONG $o; WORD $s
#define rFARJMP16(s, o) BYTE $0xea; /* far jump ptr16:16 */ \
WORD $o; WORD $s;
#define rFARJMP32(s, o) BYTE $0x66; /* far jump ptr32:16 */ \
pFARJMP32(s, o)
#define rLGDT(gdtptr) BYTE $0x0f; /* LGDT */ \
BYTE $0x01; BYTE $0x16; \
WORD $gdtptr
#define rMOVAX(i) BYTE $0xb8; /* i -> AX */ \
WORD $i;
#define DELAY BYTE $0xEB; /* JMP .+2 */ \
BYTE $0x00
MODE $16
TEXT apbootstrap(SB), 1, $-4
rFARJMP16(0, _apbootstrap-KZERO(SB))
NOP; NOP; NOP;
TEXT _apvector(SB), 1, $-4 /* address APBOOTSTRAP+0x08 */
QUAD $0
TEXT _appml4(SB), 1, $-4 /* address APBOOTSTRAP+0x10 */
QUAD $0
TEXT _apapic(SB), 1, $-4 /* address APBOOTSTRAP+0x18 */
QUAD $0
TEXT _apmach(SB), 1, $-4 /* address APBOOTSTRAP+0x20 */
QUAD $0
TEXT _apbootstrap(SB), 1, $-4
MOVW CS, AX
MOVW AX, DS /* initialise DS */
rLGDT(_gdtptr32p<>-KZERO(SB)) /* load a basic gdt */
MOVL CR0, AX
ORL $1, AX
MOVL AX, CR0 /* turn on protected mode */
DELAY /* JMP .+2 */
rFARJMP16(SELECTOR(3, SELGDT, 0), _ap32-KZERO(SB))
/*
* Enable and activate Long Mode. From the manual:
* make sure Page Size Extentions are off, and Page Global
* Extensions and Physical Address Extensions are on in CR4;
* set Long Mode Enable in the Extended Feature Enable MSR;
* set Paging Enable in CR0;
* make an inter-segment jump to the Long Mode code.
* It's all in 32-bit mode until the jump is made.
*/
MODE $32
TEXT _ap32(SB), 1, $-4
MOVW $SELECTOR(2, SELGDT, 0), AX
MOVW AX, DS
MOVW AX, ES
MOVW AX, FS
MOVW AX, GS
MOVW AX, SS
MOVL _appml4-KZERO(SB), AX /* physical address of PML4 */
MOVL AX, CR3 /* load the mmu */
DELAY
MOVL CR4, AX
ANDL $~0x00000010, AX /* Page Size */
ORL $0x000000A0, AX /* Page Global, Phys. Address */
MOVL AX, CR4
MOVL $0xc0000080, CX /* Extended Feature Enable */
RDMSR
ORL $0x00000100, AX /* Long Mode Enable */
WRMSR
MOVL CR0, DX
ANDL $~0x6000000a, DX
ORL $0x80010000, DX /* Paging Enable, Write Protect */
MOVL DX, CR0
pFARJMP32(SELECTOR(KESEG, SELGDT, 0), _ap64-KZERO(SB))
/*
* Long mode. Welcome to 2003.
* Jump out of the identity map space;
* load a proper long mode GDT;
* zap the identity map;
* initialise the stack and call the
* C startup code in m->splpc.
*/
MODE $64
TEXT _ap64(SB), 1, $-4
MOVQ $_gdtptr64v<>(SB), AX
MOVL (AX), GDTR
XORQ AX, AX
MOVW AX, DS /* not used in long mode */
MOVW AX, ES /* not used in long mode */
MOVW AX, FS
MOVW AX, GS
MOVW AX, SS /* not used in long mode */
MOVW AX, LDTR
MOVQ _apmach(SB), SP
MOVQ AX, RUSER /* up = 0; */
MOVQ SP, RMACH /* m = apmach */
ADDQ $MACHSIZE, SP
PUSHQ AX /* clear flags */
POPFQ
MOVQ _apvector(SB), AX
MOVQ _apapic(SB), RARG
PUSHQ RARG
CALL *AX
_halt:
HLT
JMP _halt
TEXT _gdt<>(SB), 1, $-4
/* null descriptor */
LONG $0
LONG $0
/* (KESEG) 64 bit long mode exec segment */
LONG $(0xFFFF)
LONG $(SEGL|SEGG|SEGP|(0xF<<16)|SEGPL(0)|SEGEXEC|SEGR)
/* 32 bit data segment descriptor for 4 gigabytes (PL 0) */
LONG $(0xFFFF)
LONG $(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW)
/* 32 bit exec segment descriptor for 4 gigabytes (PL 0) */
LONG $(0xFFFF)
LONG $(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR)
TEXT _gdtptr32p<>(SB), 1, $-4
WORD $(4*8-1)
LONG $_gdt<>-KZERO(SB)
TEXT _gdtptr64p<>(SB), 1, $-4
WORD $(4*8-1)
QUAD $_gdt<>-KZERO(SB)
TEXT _gdtptr64v<>(SB), 1, $-4
WORD $(4*8-1)
QUAD $_gdt<>(SB)

360
sys/src/9/pc64/dat.h Normal file
View file

@ -0,0 +1,360 @@
typedef struct BIOS32si BIOS32si;
typedef struct BIOS32ci BIOS32ci;
typedef struct Conf Conf;
typedef struct Confmem Confmem;
typedef union FPsave FPsave;
typedef struct Fxsave Fxsave;
typedef struct FPstate FPstate;
typedef struct ISAConf ISAConf;
typedef struct Label Label;
typedef struct Lock Lock;
typedef struct MMU MMU;
typedef struct Mach Mach;
typedef struct Notsave Notsave;
typedef struct PCArch PCArch;
typedef struct Pcidev Pcidev;
typedef struct PCMmap PCMmap;
typedef struct PCMslot PCMslot;
typedef struct Page Page;
typedef struct PMMU PMMU;
typedef struct Proc Proc;
typedef struct Segdesc Segdesc;
typedef vlong Tval;
typedef struct Ureg Ureg;
typedef struct Vctl Vctl;
#pragma incomplete BIOS32si
#pragma incomplete Pcidev
#pragma incomplete Ureg
#define MAXSYSARG 5 /* for mount(fd, afd, mpt, flag, arg) */
/*
* parameters for sysproc.c
*/
#define AOUT_MAGIC (S_MAGIC)
struct Lock
{
ulong key;
ulong sr;
uintptr pc;
Proc *p;
Mach *m;
ushort isilock;
long lockcycles;
};
struct Label
{
uintptr sp;
uintptr pc;
};
/*
* FPsave.status
*/
enum
{
/* this is a state */
FPinit= 0,
FPactive= 1,
FPinactive= 2,
/* the following is a bit that can be or'd into the state */
FPillegal= 0x100,
};
/*
* the FP regs must be stored here, not somewhere pointed to from here.
* port code assumes this.
*/
struct Fxsave {
u16int fcw; /* x87 control word */
u16int fsw; /* x87 status word */
u8int ftw; /* x87 tag word */
u8int zero; /* 0 */
u16int fop; /* last x87 opcode */
u64int rip; /* last x87 instruction pointer */
u64int rdp; /* last x87 data pointer */
u32int mxcsr; /* MMX control and status */
u32int mxcsrmask; /* supported MMX feature bits */
uchar st[128]; /* shared 64-bit media and x87 regs */
uchar xmm[256]; /* 128-bit media regs */
uchar ign[96]; /* reserved, ignored */
};
union FPsave {
uchar align[512+15];
Fxsave;
};
struct Confmem
{
uintptr base;
ulong npage;
uintptr kbase;
uintptr klimit;
};
struct Conf
{
ulong nmach; /* processors */
ulong nproc; /* processes */
ulong monitor; /* has monitor? */
Confmem mem[4]; /* physical memory */
ulong npage; /* total physical pages of memory */
ulong upages; /* user page pool */
ulong nimage; /* number of page cache image headers */
ulong nswap; /* number of swap pages */
int nswppo; /* max # of pageouts per segment pass */
ulong copymode; /* 0 is copy on write, 1 is copy on reference */
ulong ialloc; /* max interrupt time allocation in bytes */
ulong pipeqsize; /* size in bytes of pipe queues */
int nuart; /* number of uart devices */
};
struct Segdesc
{
u32int d0;
u32int d1;
};
/*
* MMU structure for PDP, PD, PT pages.
*/
struct MMU
{
MMU *next;
uintptr *page;
int index;
int level;
};
/*
* MMU stuff in proc
*/
#define NCOLOR 1
struct PMMU
{
MMU *mmuhead;
MMU *mmutail;
int mmucount;
};
/*
* things saved in the Proc structure during a notify
*/
struct Notsave
{
ulong svflags;
ulong svcs;
ulong svss;
};
#include "../port/portdat.h"
typedef struct {
u32int _0_;
u32int rsp0[2];
u32int rsp1[2];
u32int rsp2[2];
u32int _28_[2];
u32int ist[14];
u16int _92_[5];
u16int iomap;
} Tss;
struct Mach
{
int machno; /* physical id of processor (KNOWN TO ASSEMBLY) */
uintptr splpc; /* pc of last caller to splhi (KNOWN TO ASSEMBLY) */
Proc* proc; /* current process on this processor (KNOWN TO ASSEMBLY) */
u64int* pml4; /* pml4 base for this processor (va) */
Tss* tss; /* tss for this processor */
Segdesc *gdt; /* gdt for this processor */
u64int mmumap[4]; /* bitmap of pml4 entries for zapping */
MMU* mmufree; /* freelist for MMU structures */
int mmucount; /* number of MMU structures in freelist */
int kmapindex; /* next KMAP page index for use */
ulong ticks; /* of the clock since boot time */
Label sched; /* scheduler wakeup */
Lock alarmlock; /* access to alarm list */
void* alarm; /* alarms bound to this clock */
int inclockintr;
Proc* readied; /* for runproc */
ulong schedticks; /* next forced context switch */
int tlbfault;
int tlbpurge;
int pfault;
int cs;
int syscall;
int load;
int intr;
int flushmmu; /* make current proc flush it's mmu state */
int ilockdepth;
Perf perf; /* performance counters */
ulong spuriousintr;
int lastintr;
int loopconst;
int cpumhz;
uvlong cyclefreq; /* Frequency of user readable cycle counter */
uvlong cpuhz;
int cpuidax;
int cpuidcx;
int cpuiddx;
char cpuidid[16];
char* cpuidtype;
int havetsc;
int havepge;
uvlong tscticks;
int pdballoc;
int pdbfree;
vlong mtrrcap;
vlong mtrrdef;
vlong mtrrfix[11];
vlong mtrrvar[32]; /* 256 max. */
uintptr stack[1];
};
/*
* KMap the structure
*/
typedef void KMap;
#define VA(k) ((void*)k)
struct
{
Lock;
int machs; /* bitmap of active CPUs */
int exiting; /* shutdown */
int ispanic; /* shutdown in response to a panic */
int thunderbirdsarego; /* lets the added processors continue to schedinit */
}active;
/*
* routines for things outside the PC model, like power management
*/
struct PCArch
{
char* id;
int (*ident)(void); /* this should be in the model */
void (*reset)(void); /* this should be in the model */
int (*serialpower)(int); /* 1 == on, 0 == off */
int (*modempower)(int); /* 1 == on, 0 == off */
void (*intrinit)(void);
int (*intrenable)(Vctl*);
int (*intrvecno)(int);
int (*intrdisable)(int);
void (*introff)(void);
void (*intron)(void);
void (*clockenable)(void);
uvlong (*fastclock)(uvlong*);
void (*timerset)(uvlong);
};
/* cpuid instruction result register bits */
enum {
/* cx */
Monitor = 1<<3,
/* dx */
Fpuonchip = 1<<0,
Vmex = 1<<1, /* virtual-mode extensions */
Pse = 1<<3, /* page size extensions */
Tsc = 1<<4, /* time-stamp counter */
Cpumsr = 1<<5, /* model-specific registers, rdmsr/wrmsr */
Pae = 1<<6, /* physical-addr extensions */
Mce = 1<<7, /* machine-check exception */
Cmpxchg8b = 1<<8,
Cpuapic = 1<<9,
Mtrr = 1<<12, /* memory-type range regs. */
Pge = 1<<13, /* page global extension */
Pse2 = 1<<17, /* more page size extensions */
Clflush = 1<<19,
Acpif = 1<<22, /* therm control msr */
Mmx = 1<<23,
Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */
Sse = 1<<25, /* thus sfence instr. */
Sse2 = 1<<26, /* thus mfence & lfence instr.s */
Rdrnd = 1<<30, /* RDRAND support bit */
};
enum { /* MSRs */
PerfEvtbase = 0xc0010000, /* Performance Event Select */
PerfCtrbase = 0xc0010004, /* Performance Counters */
Efer = 0xc0000080, /* Extended Feature Enable */
Star = 0xc0000081, /* Legacy Target IP and [CS]S */
Lstar = 0xc0000082, /* Long Mode Target IP */
Cstar = 0xc0000083, /* Compatibility Target IP */
Sfmask = 0xc0000084, /* SYSCALL Flags Mask */
FSbase = 0xc0000100, /* 64-bit FS Base Address */
GSbase = 0xc0000101, /* 64-bit GS Base Address */
KernelGSbase = 0xc0000102, /* SWAPGS instruction */
};
/*
* a parsed plan9.ini line
*/
#define NISAOPT 8
struct ISAConf {
char *type;
ulong port;
int irq;
ulong dma;
ulong mem;
ulong size;
ulong freq;
int nopt;
char *opt[NISAOPT];
};
extern PCArch *arch; /* PC architecture */
Mach* machp[MAXMACH];
#define MACHP(n) (machp[n])
extern register Mach* m; /* R15 */
extern register Proc* up; /* R14 */
/*
* hardware info about a device
*/
typedef struct {
ulong port;
int size;
} Devport;
struct DevConf
{
ulong intnum; /* interrupt number */
char *type; /* card type, malloced */
int nports; /* Number of ports */
Devport *ports; /* The ports themselves */
};
typedef struct BIOS32ci { /* BIOS32 Calling Interface */
u32int eax;
u32int ebx;
u32int ecx;
u32int edx;
u32int esi;
u32int edi;
} BIOS32ci;

189
sys/src/9/pc64/fns.h Normal file
View file

@ -0,0 +1,189 @@
#include "../port/portfns.h"
void aamloop(int);
Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));
void archinit(void);
int bios32call(BIOS32ci*, u16int[3]);
int bios32ci(BIOS32si*, BIOS32ci*);
void bios32close(BIOS32si*);
BIOS32si* bios32open(char*);
void bootargs(void*);
uintptr cankaddr(uintptr);
int checksum(void *, int);
void clockintr(Ureg*, void*);
int (*cmpswap)(long*, long, long);
int cmpswap486(long*, long, long);
void (*coherence)(void);
void cpuid(int, ulong regs[]);
int cpuidentify(void);
void cpuidprint(void);
void (*cycles)(uvlong*);
void delay(int);
void* dmabva(int);
int dmacount(int);
int dmadone(int);
void dmaend(int);
int dmainit(int, int);
#define DMAWRITE 0
#define DMAREAD 1
#define DMALOOP 2
long dmasetup(int, void*, long, int);
#define evenaddr(x) /* x86 doesn't care */
void (*fprestore)(FPsave*);
void (*fpsave)(FPsave*);
void fpsserestore(FPsave*);
void fpssesave(FPsave*);
void fpx87restore(FPsave*);
void fpx87save(FPsave*);
u64int getcr0(void);
u64int getcr2(void);
u64int getcr3(void);
u64int getcr4(void);
char* getconf(char*);
void guesscpuhz(int);
void halt(void);
void mwait(void*);
int i8042auxcmd(int);
int i8042auxcmds(uchar*, int);
void i8042auxenable(void (*)(int, int));
void i8042reset(void);
void i8250console(void);
void* i8250alloc(int, int, int);
void i8253enable(void);
void i8253init(void);
void i8253reset(void);
uvlong i8253read(uvlong*);
void i8253timerset(uvlong);
int i8259disable(int);
int i8259enable(Vctl*);
void i8259init(void);
int i8259isr(int);
void i8259on(void);
void i8259off(void);
int i8259vecno(int);
void idle(void);
void idlehands(void);
int inb(int);
void insb(int, void*, int);
ushort ins(int);
void inss(int, void*, int);
ulong inl(int);
void insl(int, void*, int);
int intrdisable(int, void (*)(Ureg *, void *), void*, int, char*);
void intrenable(int, void (*)(Ureg*, void*), void*, int, char*);
void introff(void);
void intron(void);
void invlpg(uintptr);
void iofree(int);
void ioinit(void);
int iounused(int, int);
int ioalloc(int, int, int, char*);
int ioreserve(int, int, int, char*);
int iprint(char*, ...);
int isaconfig(char*, int, ISAConf*);
void* kaddr(uintptr);
void kbdenable(void);
void kbdinit(void);
KMap* kmap(Page*);
void kunmap(KMap*);
#define kmapinval()
void lgdt(void*);
void lidt(void*);
void links(void);
void ltr(ulong);
void mach0init(void);
void mathinit(void);
void mb386(void);
void mb586(void);
void meminit(void);
void memorysummary(void);
void mfence(void);
#define mmuflushtlb() putcr3(getcr3())
void mmuinit(void);
uintptr *mmuwalk(uintptr*, uintptr, int, int);
int mtrr(uvlong, uvlong, char *);
void mtrrclock(void);
int mtrrprint(char *, long);
uchar nvramread(int);
void nvramwrite(int, uchar);
void outb(int, int);
void outsb(int, void*, int);
void outs(int, ushort);
void outss(int, void*, int);
void outl(int, ulong);
void outsl(int, void*, int);
uintptr paddr(void*);
ulong pcibarsize(Pcidev*, int);
void pcibussize(Pcidev*, ulong*, ulong*);
int pcicfgr8(Pcidev*, int);
int pcicfgr16(Pcidev*, int);
int pcicfgr32(Pcidev*, int);
void pcicfgw8(Pcidev*, int, int);
void pcicfgw16(Pcidev*, int, int);
void pcicfgw32(Pcidev*, int, int);
void pciclrbme(Pcidev*);
void pciclrioe(Pcidev*);
void pciclrmwi(Pcidev*);
int pcigetpms(Pcidev*);
void pcihinv(Pcidev*);
uchar pciipin(Pcidev*, uchar);
Pcidev* pcimatch(Pcidev*, int, int);
Pcidev* pcimatchtbdf(int);
int pcicap(Pcidev*, int);
int pcihtcap(Pcidev*, int);
void pcireset(void);
int pciscan(int, Pcidev**);
void pcisetbme(Pcidev*);
void pcisetioe(Pcidev*);
void pcisetmwi(Pcidev*);
int pcisetpms(Pcidev*, int);
void pcmcisread(PCMslot*);
int pcmcistuple(int, int, int, void*, int);
PCMmap* pcmmap(int, ulong, int, int);
int pcmspecial(char*, ISAConf*);
int (*_pcmspecial)(char *, ISAConf *);
void pcmspecialclose(int);
void (*_pcmspecialclose)(int);
void pcmunmap(int, PCMmap*);
void pmap(uintptr *, uintptr, uintptr, int);
void procrestore(Proc*);
void procsave(Proc*);
void procsetup(Proc*);
void procfork(Proc*);
void putcr0(u64int);
void putcr3(u64int);
void putcr4(u64int);
void* rampage(void);
int rdmsr(int, vlong*);
void realmode(Ureg*);
void screeninit(void);
void (*screenputs)(char*, int);
void* sigsearch(char*);
void syncclock(void);
void syscallentry(void);
void touser(void*);
void trapenable(int, void (*)(Ureg*, void*), void*, char*);
void trapinit(void);
void trapinit0(void);
int tas(void*);
uvlong tscticks(uvlong*);
uintptr umbmalloc(uintptr, int, int);
void umbfree(uintptr, int);
uintptr umbrwmalloc(uintptr, int, int);
void umbrwfree(uintptr, int);
uintptr upaalloc(int, int);
void upafree(uintptr, int);
void upareserve(uintptr, int);
void vectortable(void);
void* vmap(uintptr, int);
int vmapsync(uintptr);
void vunmap(void*, int);
void wbinvd(void);
int wrmsr(int, vlong);
int xchgw(ushort*, int);
void rdrandbuf(void*, ulong);
#define userureg(ur) (((ur)->cs & 3) == 3)
#define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
#define KADDR(a) kaddr(a)
#define PADDR(a) paddr((void*)(a))

1050
sys/src/9/pc64/l.s Normal file

File diff suppressed because it is too large Load diff

742
sys/src/9/pc64/main.c Normal file
View file

@ -0,0 +1,742 @@
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "tos.h"
#include "ureg.h"
#include "init.h"
#include "pool.h"
/*
* Where configuration info is left for the loaded programme.
* This will turn into a structure as more is done by the boot loader
* (e.g. why parse the .ini file twice?).
* There are 3584 bytes available at CONFADDR.
*/
#define BOOTLINE ((char*)CONFADDR)
#define BOOTLINELEN 64
#define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN))
#define BOOTARGSLEN (4096-0x200-BOOTLINELEN)
#define MAXCONF 64
Conf conf;
char *confname[MAXCONF];
char *confval[MAXCONF];
int nconf;
int delaylink;
uchar *sp; /* user stack of init proc */
extern void (*i8237alloc)(void);
static void
options(void)
{
long i, n;
char *cp, *line[MAXCONF], *p, *q;
// multibootargs();
/*
* parse configuration args from dos file plan9.ini
*/
cp = BOOTARGS; /* where b.com leaves its config */
cp[BOOTARGSLEN-1] = 0;
/*
* Strip out '\r', change '\t' -> ' '.
*/
p = cp;
for(q = cp; *q; q++){
if(*q == '\r')
continue;
if(*q == '\t')
*q = ' ';
*p++ = *q;
}
*p = 0;
n = getfields(cp, line, MAXCONF, 1, "\n");
for(i = 0; i < n; i++){
if(*line[i] == '#')
continue;
cp = strchr(line[i], '=');
if(cp == nil)
continue;
*cp++ = '\0';
confname[nconf] = line[i];
confval[nconf] = cp;
nconf++;
}
}
char*
getconf(char *name)
{
int i;
for(i = 0; i < nconf; i++)
if(cistrcmp(confname[i], name) == 0)
return confval[i];
return 0;
}
void
confinit(void)
{
char *p;
int i, userpcnt;
ulong kpages;
if(p = getconf("*kernelpercent"))
userpcnt = 100 - strtol(p, 0, 0);
else
userpcnt = 0;
conf.npage = 0;
for(i=0; i<nelem(conf.mem); i++)
conf.npage += conf.mem[i].npage;
conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
if(cpuserver)
conf.nproc *= 3;
if(conf.nproc > 2000)
conf.nproc = 2000;
conf.nimage = 200;
conf.nswap = conf.nproc*80;
conf.nswppo = 4096;
if(cpuserver) {
if(userpcnt < 10)
userpcnt = 70;
kpages = conf.npage - (conf.npage*userpcnt)/100;
/*
* Hack for the big boys. Only good while physmem < 4GB.
* Give the kernel fixed max + enough to allocate the
* page pool.
* This is an overestimate as conf.upages < conf.npages.
* The patch of nimage is a band-aid, scanning the whole
* page list in imagereclaim just takes too long.
*/
if(getconf("*imagemaxmb") == 0)
if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){
kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG;
conf.nimage = 2000;
kpages += (conf.nproc*KSTACK)/BY2PG;
}
} else {
if(userpcnt < 10) {
if(conf.npage*BY2PG < 16*MB)
userpcnt = 50;
else
userpcnt = 60;
}
kpages = conf.npage - (conf.npage*userpcnt)/100;
/*
* Make sure terminals with low memory get at least
* 4MB on the first Image chunk allocation.
*/
if(conf.npage*BY2PG < 16*MB)
imagmem->minarena = 4*MB;
}
/*
* can't go past the end of virtual memory.
*/
if(kpages > ((uintptr)-KZERO)/BY2PG)
kpages = ((uintptr)-KZERO)/BY2PG;
conf.upages = conf.npage - kpages;
conf.ialloc = (kpages/2)*BY2PG;
/*
* Guess how much is taken by the large permanent
* datastructures. Mntcache and Mntrpc are not accounted for
* (probably ~300KB).
*/
kpages *= BY2PG;
kpages -= conf.upages*sizeof(Page)
+ conf.nproc*sizeof(Proc)
+ conf.nimage*sizeof(Image)
+ conf.nswap
+ conf.nswppo*sizeof(Page*);
mainmem->maxsize = kpages;
/*
* the dynamic allocation will balance the load properly,
* hopefully. be careful with 32-bit overflow.
*/
imagmem->maxsize = kpages - (kpages/10);
if(p = getconf("*imagemaxmb")){
imagmem->maxsize = strtol(p, nil, 0)*MB;
if(imagmem->maxsize > mainmem->maxsize)
imagmem->maxsize = mainmem->maxsize;
}
}
void
machinit(void)
{
int machno;
Segdesc *gdt;
uintptr *pml4;
machno = m->machno;
pml4 = m->pml4;
gdt = m->gdt;
memset(m, 0, sizeof(Mach));
m->machno = machno;
m->pml4 = pml4;
m->gdt = gdt;
m->perf.period = 1;
/*
* For polled uart output at boot, need
* a default delay constant. 100000 should
* be enough for a while. Cpuidentify will
* calculate the real value later.
*/
m->loopconst = 100000;
}
void
mach0init(void)
{
conf.nmach = 1;
MACHP(0) = (Mach*)CPU0MACH;
m->machno = 0;
m->pml4 = (u64int*)CPU0PML4;
m->gdt = (Segdesc*)CPU0GDT;
machinit();
active.machs = 1;
active.exiting = 0;
}
uchar *
pusharg(char *p)
{
int n;
n = strlen(p)+1;
sp -= n;
memmove(sp, p, n);
return sp;
}
void
bootargs(void *base)
{
int i, ac;
uchar *av[32];
uchar **lsp;
char *cp = BOOTLINE;
char buf[64];
sp = (uchar*)base + BY2PG - sizeof(Tos);
ac = 0;
av[ac++] = pusharg("boot");
/* when boot is changed to only use rc, this code can go away */
cp[BOOTLINELEN-1] = 0;
buf[0] = 0;
if(strncmp(cp, "fd", 2) == 0){
sprint(buf, "local!#f/fd%lddisk", strtol(cp+2, 0, 0));
av[ac++] = pusharg(buf);
} else if(strncmp(cp, "sd", 2) == 0){
sprint(buf, "local!#S/sd%c%c/fs", *(cp+2), *(cp+3));
av[ac++] = pusharg(buf);
} else if(strncmp(cp, "ether", 5) == 0)
av[ac++] = pusharg("-n");
/* 8 byte word align stack */
sp = (uchar*)((uintptr)sp & ~7);
/* build argc, argv on stack */
sp -= (ac+1)*sizeof(sp);
lsp = (uchar**)sp;
for(i = 0; i < ac; i++)
lsp[i] = av[i] + ((uintptr)(USTKTOP - BY2PG) - (uintptr)base);
lsp[i] = 0;
sp += (uintptr)(USTKTOP - BY2PG) - (uintptr)base;
sp -= BY2WD;
}
void
init0(void)
{
int i;
char buf[2*KNAMELEN];
up->nerrlab = 0;
spllo();
/*
* These are o.k. because rootinit is null.
* Then early kproc's will have a root and dot.
*/
up->slash = namec("#/", Atodir, 0, 0);
pathclose(up->slash->path);
up->slash->path = newpath("/");
up->dot = cclone(up->slash);
chandevinit();
if(!waserror()){
snprint(buf, sizeof(buf), "%s %s", arch->id, conffile);
ksetenv("terminal", buf, 0);
ksetenv("cputype", "amd64", 0);
if(cpuserver)
ksetenv("service", "cpu", 0);
else
ksetenv("service", "terminal", 0);
for(i = 0; i < nconf; i++){
if(confname[i][0] != '*')
ksetenv(confname[i], confval[i], 0);
ksetenv(confname[i], confval[i], 1);
}
poperror();
}
kproc("alarm", alarmkproc, 0);
touser(sp);
}
void
userinit(void)
{
void *v;
Proc *p;
Segment *s;
Page *pg;
p = newproc();
p->pgrp = newpgrp();
p->egrp = smalloc(sizeof(Egrp));
p->egrp->ref = 1;
p->fgrp = dupfgrp(nil);
p->rgrp = newrgrp();
p->procmode = 0640;
kstrdup(&eve, "");
kstrdup(&p->text, "*init*");
kstrdup(&p->user, eve);
procsetup(p);
/*
* Kernel Stack
*
* N.B. make sure there's enough space for syscall to check
* for valid args and
* 8 bytes for gotolabel's return PC
*/
p->sched.pc = (uintptr)init0;
p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD);
/*
* User Stack
*/
s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
p->seg[SSEG] = s;
pg = newpage(0, 0, USTKTOP-BY2PG);
v = kmap(pg);
memset(v, 0, BY2PG);
segpage(s, pg);
bootargs(v);
kunmap(v);
/*
* Text
*/
s = newseg(SG_TEXT, UTZERO, 1);
s->flushme++;
p->seg[TSEG] = s;
pg = newpage(0, 0, UTZERO);
memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
segpage(s, pg);
v = kmap(pg);
memset(v, 0, BY2PG);
memmove(v, initcode, sizeof initcode);
kunmap(v);
ready(p);
}
void
main()
{
mach0init();
options();
ioinit();
// i8250console();
quotefmtinstall();
screeninit();
trapinit0();
kbdinit();
i8253init();
cpuidentify();
meminit();
confinit();
archinit();
xinit();
if(i8237alloc != nil)
i8237alloc();
trapinit();
printinit();
cpuidprint();
mmuinit();
if(arch->intrinit)
arch->intrinit();
timersinit();
mathinit();
kbdenable();
if(arch->clockenable)
arch->clockenable();
procinit0();
initseg();
if(delaylink){
bootlinks();
pcimatch(0, 0, 0);
}else
links();
conf.monitor = 1;
chandevreset();
pageinit();
swapinit();
userinit();
active.thunderbirdsarego = 1;
schedinit();
}
void
exit(int)
{
print("exit\n");
splhi();
for(;;);
}
void
reboot(void*, void*, ulong)
{
exit(0);
}
void
idlehands(void)
{
halt();
}
/*
* SIMD Floating Point.
* Assembler support to get at the individual instructions
* is in l.s.
* There are opportunities to be lazier about saving and
* restoring the state and allocating the storage needed.
*/
extern void _clts(void);
extern void _fldcw(u16int);
extern void _fnclex(void);
extern void _fninit(void);
extern void _fxrstor(Fxsave*);
extern void _fxsave(Fxsave*);
extern void _fwait(void);
extern void _ldmxcsr(u32int);
extern void _stts(void);
/*
* not used, AMD64 mandated SSE
*/
void
fpx87save(FPsave*)
{
}
void
fpx87restore(FPsave*)
{
}
void
fpssesave(FPsave *fps)
{
Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign);
_fxsave(fx);
_stts();
if(fx != (Fxsave*)fps)
memmove((Fxsave*)fps, fx, sizeof(Fxsave));
}
void
fpsserestore(FPsave *fps)
{
Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign);
if(fx != (Fxsave*)fps)
memmove(fx, (Fxsave*)fps, sizeof(Fxsave));
_clts();
_fxrstor(fx);
}
static char* mathmsg[] =
{
nil, /* handled below */
"denormalized operand",
"division by zero",
"numeric overflow",
"numeric underflow",
"precision loss",
};
static void
mathnote(ulong status, uintptr pc)
{
char *msg, note[ERRMAX];
int i;
/*
* Some attention should probably be paid here to the
* exception masks and error summary.
*/
msg = "unknown exception";
for(i = 1; i <= 5; i++){
if(!((1<<i) & status))
continue;
msg = mathmsg[i];
break;
}
if(status & 0x01){
if(status & 0x40){
if(status & 0x200)
msg = "stack overflow";
else
msg = "stack underflow";
}else
msg = "invalid operation";
}
snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux",
msg, pc, status);
postnote(up, 1, note, NDebug);
}
/*
* math coprocessor error
*/
static void
matherror(Ureg*, void*)
{
/*
* Save FPU state to check out the error.
*/
fpsave(&up->fpsave);
up->fpstate = FPinactive;
mathnote(up->fpsave.fsw, up->fpsave.rip);
}
/*
* math coprocessor emulation fault
*/
static void
mathemu(Ureg *ureg, void*)
{
ulong status, control;
if(up->fpstate & FPillegal){
/* someone did floating point in a note handler */
postnote(up, 1, "sys: floating point in note handler", NDebug);
return;
}
switch(up->fpstate){
case FPinit:
/*
* A process tries to use the FPU for the
* first time and generates a 'device not available'
* exception.
* Turn the FPU on and initialise it for use.
* Set the precision and mask the exceptions
* we don't care about from the generic Mach value.
*/
_clts();
_fninit();
_fwait();
_fldcw(0x0232);
/*
* TODO: sse exceptions
* _ldmxcsr(m->mxcsr);
*
*/
up->fpstate = FPactive;
break;
case FPinactive:
/*
* Before restoring the state, check for any pending
* exceptions, there's no way to restore the state without
* generating an unmasked exception.
* More attention should probably be paid here to the
* exception masks and error summary.
*/
status = up->fpsave.fsw;
control = up->fpsave.fcw;
if((status & ~control) & 0x07F){
mathnote(status, up->fpsave.rip);
break;
}
fprestore(&up->fpsave);
up->fpstate = FPactive;
break;
case FPactive:
panic("math emu pid %ld %s pc %#p",
up->pid, up->text, ureg->pc);
break;
}
}
/*
* math coprocessor segment overrun
*/
static void
mathover(Ureg*, void*)
{
pexit("math overrun", 0);
}
void
mathinit(void)
{
trapenable(VectorCERR, matherror, 0, "matherror");
if(X86FAMILY(m->cpuidax) == 3)
intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
trapenable(VectorCNA, mathemu, 0, "mathemu");
trapenable(VectorCSO, mathover, 0, "mathover");
}
void
procsetup(Proc *p)
{
p->fpstate = FPinit;
_stts();
cycles(&p->kentry);
p->pcycles = -p->kentry;
}
void
procfork(Proc *p)
{
int s;
p->kentry = up->kentry;
p->pcycles = -p->kentry;
/* save floating point state */
s = splhi();
switch(up->fpstate & ~FPillegal){
case FPactive:
fpsave(&up->fpsave);
up->fpstate = FPinactive;
case FPinactive:
p->fpsave = up->fpsave;
p->fpstate = FPinactive;
}
splx(s);
}
void
procrestore(Proc *p)
{
uvlong t;
if(p->kp)
return;
cycles(&t);
p->kentry += t;
p->pcycles -= t;
}
void
procsave(Proc *p)
{
uvlong t;
cycles(&t);
p->kentry -= t;
p->pcycles += t;
if(p->fpstate == FPactive){
if(p->state == Moribund){
_clts();
_fnclex();
_stts();
}
else{
/*
* Fpsave() stores without handling pending
* unmasked exeptions. Postnote() can't be called
* here as sleep() already has up->rlock, so
* the handling of pending exceptions is delayed
* until the process runs again and generates an
* emulation fault to activate the FPU.
*/
fpsave(&p->fpsave);
}
p->fpstate = FPinactive;
}
/*
* While this processor is in the scheduler, the process could run
* on another processor and exit, returning the page tables to
* the free list where they could be reallocated and overwritten.
* When this processor eventually has to get an entry from the
* trashed page tables it will crash.
*
* If there's only one processor, this can't happen.
* You might think it would be a win not to do this in that case,
* especially on VMware, but it turns out not to matter.
*/
mmuflushtlb();
}
int
isaconfig(char *class, int ctlrno, ISAConf *isa)
{
char cc[32], *p;
int i;
snprint(cc, sizeof cc, "%s%d", class, ctlrno);
p = getconf(cc);
if(p == nil)
return 0;
isa->type = "";
isa->nopt = tokenize(p, isa->opt, NISAOPT);
for(i = 0; i < isa->nopt; i++){
p = isa->opt[i];
if(cistrncmp(p, "type=", 5) == 0)
isa->type = p + 5;
else if(cistrncmp(p, "port=", 5) == 0)
isa->port = strtoul(p+5, &p, 0);
else if(cistrncmp(p, "irq=", 4) == 0)
isa->irq = strtoul(p+4, &p, 0);
else if(cistrncmp(p, "dma=", 4) == 0)
isa->dma = strtoul(p+4, &p, 0);
else if(cistrncmp(p, "mem=", 4) == 0)
isa->mem = strtoul(p+4, &p, 0);
else if(cistrncmp(p, "size=", 5) == 0)
isa->size = strtoul(p+5, &p, 0);
else if(cistrncmp(p, "freq=", 5) == 0)
isa->freq = strtoul(p+5, &p, 0);
}
return 1;
}

164
sys/src/9/pc64/mem.h Normal file
View file

@ -0,0 +1,164 @@
/*
* Memory and machine-specific definitions. Used in C and assembler.
*/
#define KiB 1024u /* Kibi 0x0000000000000400 */
#define MiB 1048576u /* Mebi 0x0000000000100000 */
#define GiB 1073741824u /* Gibi 000000000040000000 */
#define TiB 1099511627776ull /* Tebi 0x0000010000000000 */
#define PiB 1125899906842624ull /* Pebi 0x0004000000000000 */
#define EiB 1152921504606846976ull /* Exbi 0x1000000000000000 */
#define MIN(a, b) ((a) < (b)? (a): (b))
#define MAX(a, b) ((a) > (b)? (a): (b))
#define ALIGNED(p, a) (!(((uintptr)(p)) & ((a)-1)))
/*
* Sizes
*/
#define BI2BY 8 /* bits per byte */
#define BI2WD 32 /* bits per word */
#define BY2WD 8 /* bytes per word */
#define BY2V 8 /* bytes per double word */
#define BY2PG (0x1000ull) /* bytes per page */
#define WD2PG (BY2PG/BY2WD) /* words per page */
#define BY2XPG (2*MiB) /* bytes per big page */
#define PGSHIFT 12 /* log(BY2PG) */
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
#define PGROUND(s) ROUND(s, BY2PG)
#define BLOCKALIGN 8
#define FPalign 16
#define MAXMACH 32 /* max # cpus system can run */
#define KSTACK (16*KiB) /* Size of Proc kernel stack */
/*
* Time
*/
#define HZ (100) /* clock frequency */
#define MS2HZ (100/HZ) /* millisec per clock tick */
#define TK2SEC(t) ((t)/HZ) /* ticks to seconds */
/*
* Address spaces. User:
*/
#define UTZERO (0x0000000000200000ull) /* first address in user text */
#define TSTKTOP (0x00007ffffffff000ull)
#define USTKSIZE (16*MiB) /* size of user stack */
#define USTKTOP (TSTKTOP-USTKSIZE) /* end of new stack in sysexec */
/*
* Address spaces. Kernel, sorted by address.
*/
#define KZERO (0xffffffff80000000ull) /* 2GB identity map of lower 2GB ram */
#define KTZERO (KZERO+1*MiB+64*KiB)
#define VMAP (0xffffffff00000000ull) /* 2GB identity map of upper 2GB ram */
#define VMAPSIZE (2*GiB)
#define KMAP (0xffffff7f00000000ull)
#define KMAPSIZE (512*GiB)
/*
* Fundamental addresses - bottom 64kB saved for return to real mode
*/
#define CONFADDR (KZERO+0x1200ull) /* info passed from boot loader */
#define APBOOTSTRAP (KZERO+0x3000ull) /* AP bootstrap code */
#define IDTADDR (KZERO+0x10000ull) /* idt */
#define REBOOTADDR (0x11000) /* reboot code - physical address */
#define CPU0PML4 (KZERO+0x13000ull)
#define CPU0GDT (KZERO+0x17000ull) /* bootstrap processor GDT */
#define CPU0MACH (KZERO+0x18000ull) /* Mach for bootstrap processor */
#define CPU0END (CPU0MACH+MACHSIZE)
#define MACHSIZE (2*KSTACK)
#define INIMAP (8*MiB) /* 4 pages; size of inital map in l.s */
/*
* known x86 segments (in GDT) and their selectors
*/
#define NULLSEG 0 /* null segment */
#define KESEG 1 /* kernel executable */
#define KDSEG 2 /* kernel data */
#define UE32SEG 3 /* user executable 32bit */
#define UDSEG 4 /* user data/stack */
#define UESEG 5 /* user executable 64bit */
#define TSSSEG 8 /* task segment (two descriptors) */
#define NGDT 10 /* number of GDT entries required */
#define SELGDT (0<<2) /* selector is in gdt */
#define SELLDT (1<<2) /* selector is in ldt */
#define SELECTOR(i, t, p) (((i)<<3) | (t) | (p))
#define NULLSEL SELECTOR(NULLSEG, SELGDT, 0)
#define KESEL SELECTOR(KESEG, SELGDT, 0)
#define UE32SEL SELECTOR(UE32SEG, SELGDT, 3)
#define UDSEL SELECTOR(UDSEG, SELGDT, 3)
#define UESEL SELECTOR(UESEG, SELGDT, 3)
#define TSSSEL SELECTOR(TSSSEG, SELGDT, 0)
/*
* fields in segment descriptors
*/
#define SEGDATA (0x10<<8) /* data/stack segment */
#define SEGEXEC (0x18<<8) /* executable segment */
#define SEGTSS (0x9<<8) /* TSS segment */
#define SEGCG (0x0C<<8) /* call gate */
#define SEGIG (0x0E<<8) /* interrupt gate */
#define SEGTG (0x0F<<8) /* trap gate */
#define SEGLDT (0x02<<8) /* local descriptor table */
#define SEGTYPE (0x1F<<8)
#define SEGP (1<<15) /* segment present */
#define SEGPL(x) ((x)<<13) /* priority level */
#define SEGB (1<<22) /* granularity 1==4k (for expand-down) */
#define SEGD (1<<22) /* default 1==32bit (for code) */
#define SEGE (1<<10) /* expand down */
#define SEGW (1<<9) /* writable (for data/stack) */
#define SEGR (1<<9) /* readable (for code) */
#define SEGL (1<<21) /* 64 bit */
#define SEGG (1<<23) /* granularity 1==4k (for other) */
/*
* virtual MMU
*/
#define PTEMAPMEM (1024*1024)
#define PTEPERTAB (PTEMAPMEM/BY2PG)
#define SEGMAPSIZE 1984
#define SSEGMAPSIZE 16
#define PPN(x) ((x)&~((uintptr)BY2PG-1))
/*
* physical MMU
*/
#define PTEVALID (1ull<<0)
#define PTEWT (1ull<<3)
#define PTEUNCACHED (1ull<<4)
#define PTEWRITE (1ull<<1)
#define PTERONLY (0ull<<1)
#define PTEKERNEL (0ull<<2)
#define PTEUSER (1ull<<2)
#define PTESIZE (1ull<<7)
#define PTEGLOBAL (1ull<<8)
/*
* Hierarchical Page Tables.
* For example, traditional IA-32 paging structures have 2 levels,
* level 1 is the PD, and level 0 the PT pages; with IA-32e paging,
* level 3 is the PML4(!), level 2 the PDP, level 1 the PD,
* and level 0 the PT pages. The PTLX macro gives an index into the
* page-table page at level 'l' for the virtual address 'v'.
*/
#define PTSZ (4*KiB) /* page table page size */
#define PTSHIFT 9 /* */
#define PTLX(v, l) (((v)>>(((l)*PTSHIFT)+PGSHIFT)) & ((1<<PTSHIFT)-1))
#define PGLSZ(l) (1ull<<(((l)*PTSHIFT)+PGSHIFT))
#define getpgcolor(a) 0
#define RMACH R15 /* m-> */
#define RUSER R14 /* up-> */

720
sys/src/9/pc64/memory.c Normal file
View file

@ -0,0 +1,720 @@
/*
* Size memory and create the kernel page-tables on the fly while doing so.
* Called from main(), this code should only be run by the bootstrap processor.
*
* MemMin is what the bootstrap code in l.s has already mapped;
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "ureg.h"
#define MEMDEBUG 0
enum {
MemUPA = 0, /* unbacked physical address */
MemRAM = 1, /* physical memory */
MemUMB = 2, /* upper memory block (<16MB) */
MemReserved = 3,
NMemType = 4,
KB = 1024,
MemMin = INIMAP,
};
typedef struct Map Map;
struct Map {
uintptr size;
uintptr addr;
};
typedef struct RMap RMap;
struct RMap {
char* name;
Map* map;
Map* mapend;
Lock;
};
/*
* Memory allocation tracking.
*/
static Map mapupa[16];
static RMap rmapupa = {
"unallocated unbacked physical memory",
mapupa,
&mapupa[nelem(mapupa)-1],
};
static Map mapram[16];
static RMap rmapram = {
"physical memory",
mapram,
&mapram[nelem(mapram)-1],
};
static Map mapumb[64];
static RMap rmapumb = {
"upper memory block",
mapumb,
&mapumb[nelem(mapumb)-1],
};
static Map mapumbrw[16];
static RMap rmapumbrw = {
"UMB device memory",
mapumbrw,
&mapumbrw[nelem(mapumbrw)-1],
};
void
mapprint(RMap *rmap)
{
Map *mp;
print("%s\n", rmap->name);
for(mp = rmap->map; mp->size; mp++)
print("\t%#p %#p (%#p)\n", mp->addr, mp->addr+mp->size, mp->size);
}
void
memdebug(void)
{
ulong maxpa, maxpa1, maxpa2;
maxpa = (nvramread(0x18)<<8)|nvramread(0x17);
maxpa1 = (nvramread(0x31)<<8)|nvramread(0x30);
maxpa2 = (nvramread(0x16)<<8)|nvramread(0x15);
print("maxpa = %luX -> %luX, maxpa1 = %luX maxpa2 = %luX\n",
maxpa, MB+maxpa*KB, maxpa1, maxpa2);
mapprint(&rmapram);
mapprint(&rmapumb);
mapprint(&rmapumbrw);
mapprint(&rmapupa);
}
void
mapfree(RMap* rmap, uintptr addr, uintptr size)
{
Map *mp;
uintptr t;
if(size <= 0)
return;
lock(rmap);
for(mp = rmap->map; mp->addr <= addr && mp->size; mp++)
;
if(mp > rmap->map && (mp-1)->addr+(mp-1)->size == addr){
(mp-1)->size += size;
if(addr+size == mp->addr){
(mp-1)->size += mp->size;
while(mp->size){
mp++;
(mp-1)->addr = mp->addr;
(mp-1)->size = mp->size;
}
}
}
else{
if(addr+size == mp->addr && mp->size){
mp->addr -= size;
mp->size += size;
}
else do{
if(mp >= rmap->mapend){
print("mapfree: %s: losing %#p, %#p\n",
rmap->name, addr, size);
break;
}
t = mp->addr;
mp->addr = addr;
addr = t;
t = mp->size;
mp->size = size;
mp++;
}while(size = t);
}
unlock(rmap);
}
uintptr
mapalloc(RMap* rmap, uintptr addr, int size, int align)
{
Map *mp;
uintptr maddr, oaddr;
lock(rmap);
for(mp = rmap->map; mp->size; mp++){
maddr = mp->addr;
if(addr){
/*
* A specific address range has been given:
* if the current map entry is greater then
* the address is not in the map;
* if the current map entry does not overlap
* the beginning of the requested range then
* continue on to the next map entry;
* if the current map entry does not entirely
* contain the requested range then the range
* is not in the map.
*/
if(maddr > addr)
break;
if(mp->size < addr - maddr) /* maddr+mp->size < addr, but no overflow */
continue;
if(addr - maddr > mp->size - size) /* addr+size > maddr+mp->size, but no overflow */
break;
maddr = addr;
}
if(align > 0)
maddr = ((maddr+align-1)/align)*align;
if(mp->addr+mp->size-maddr < size)
continue;
oaddr = mp->addr;
mp->addr = maddr+size;
mp->size -= maddr-oaddr+size;
if(mp->size == 0){
do{
mp++;
(mp-1)->addr = mp->addr;
}while((mp-1)->size = mp->size);
}
unlock(rmap);
if(oaddr != maddr)
mapfree(rmap, oaddr, maddr-oaddr);
return maddr;
}
unlock(rmap);
return 0;
}
/*
* Allocate from the ram map directly to make page tables.
* Called by mmuwalk during e820scan.
*/
void*
rampage(void)
{
uintptr m;
m = mapalloc(&rmapram, 0, BY2PG, BY2PG);
if(m == 0)
return nil;
return KADDR(m);
}
static void
umbexclude(void)
{
int size;
ulong addr;
char *op, *p, *rptr;
if((p = getconf("umbexclude")) == nil)
return;
while(p && *p != '\0' && *p != '\n'){
op = p;
addr = strtoul(p, &rptr, 0);
if(rptr == nil || rptr == p || *rptr != '-'){
print("umbexclude: invalid argument <%s>\n", op);
break;
}
p = rptr+1;
size = strtoul(p, &rptr, 0) - addr + 1;
if(size <= 0){
print("umbexclude: bad range <%s>\n", op);
break;
}
if(rptr != nil && *rptr == ',')
*rptr++ = '\0';
p = rptr;
mapalloc(&rmapumb, addr, size, 0);
}
}
static void
umbscan(void)
{
uchar *p;
/*
* Scan the Upper Memory Blocks (0xA0000->0xF0000) for pieces
* which aren't used; they can be used later for devices which
* want to allocate some virtual address space.
* Check for two things:
* 1) device BIOS ROM. This should start with a two-byte header
* of 0x55 0xAA, followed by a byte giving the size of the ROM
* in 512-byte chunks. These ROM's must start on a 2KB boundary.
* 2) device memory. This is read-write.
* There are some assumptions: there's VGA memory at 0xA0000 and
* the VGA BIOS ROM is at 0xC0000. Also, if there's no ROM signature
* at 0xE0000 then the whole 64KB up to 0xF0000 is theoretically up
* for grabs; check anyway.
*/
p = KADDR(0xD0000);
while(p < (uchar*)KADDR(0xE0000)){
/*
* Test for 0x55 0xAA before poking obtrusively,
* some machines (e.g. Thinkpad X20) seem to map
* something dynamic here (cardbus?) causing weird
* problems if it is changed.
*/
if(p[0] == 0x55 && p[1] == 0xAA){
p += p[2]*512;
continue;
}
p[0] = 0xCC;
p[2*KB-1] = 0xCC;
if(p[0] != 0xCC || p[2*KB-1] != 0xCC){
p[0] = 0x55;
p[1] = 0xAA;
p[2] = 4;
if(p[0] == 0x55 && p[1] == 0xAA){
p += p[2]*512;
continue;
}
if(p[0] == 0xFF && p[1] == 0xFF)
mapfree(&rmapumb, PADDR(p), 2*KB);
}
else
mapfree(&rmapumbrw, PADDR(p), 2*KB);
p += 2*KB;
}
p = KADDR(0xE0000);
if(p[0] != 0x55 || p[1] != 0xAA){
p[0] = 0xCC;
p[64*KB-1] = 0xCC;
if(p[0] != 0xCC && p[64*KB-1] != 0xCC)
mapfree(&rmapumb, PADDR(p), 64*KB);
}
umbexclude();
}
int
checksum(void *v, int n)
{
uchar *p, s;
s = 0;
p = v;
while(n-- > 0)
s += *p++;
return s;
}
static void*
sigscan(uchar* addr, int len, char* signature)
{
int sl;
uchar *e, *p;
e = addr+len;
sl = strlen(signature);
for(p = addr; p+sl < e; p += 16)
if(memcmp(p, signature, sl) == 0)
return p;
return nil;
}
void*
sigsearch(char* signature)
{
uintptr p;
uchar *bda;
void *r;
/*
* Search for the data structure:
* 1) within the first KiB of the Extended BIOS Data Area (EBDA), or
* 2) within the last KiB of system base memory if the EBDA segment
* is undefined, or
* 3) within the BIOS ROM address space between 0xf0000 and 0xfffff
* (but will actually check 0xe0000 to 0xfffff).
*/
bda = KADDR(0x400);
if(memcmp(KADDR(0xfffd9), "EISA", 4) == 0){
if((p = (bda[0x0f]<<8)|bda[0x0e]) != 0){
if((r = sigscan(KADDR(p<<4), 1024, signature)) != nil)
return r;
}
}
if((p = ((bda[0x14]<<8)|bda[0x13])*1024) != 0){
if((r = sigscan(KADDR(p-1024), 1024, signature)) != nil)
return r;
}
/* hack for virtualbox: look in KiB below 0xa0000 */
if((r = sigscan(KADDR(0xa0000-1024), 1024, signature)) != nil)
return r;
return sigscan(KADDR(0xe0000), 0x20000, signature);
}
static void
lowraminit(void)
{
uintptr pa, x;
/*
* Initialise the memory bank information for conventional memory
* (i.e. less than 640KB). The base is the first location after the
* bootstrap processor MMU information and the limit is obtained from
* the BIOS data area.
*/
x = PADDR(PGROUND((uintptr)end));
pa = MemMin;
if(x > pa)
panic("kernel too big");
mapfree(&rmapram, x, pa-x);
memset(KADDR(x), 0, pa-x); /* keep us honest */
}
typedef struct Emap Emap;
struct Emap
{
int type;
uvlong base;
uvlong top;
};
static Emap emap[128];
int nemap;
static int
emapcmp(const void *va, const void *vb)
{
Emap *a, *b;
a = (Emap*)va;
b = (Emap*)vb;
if(a->top < b->top)
return -1;
if(a->top > b->top)
return 1;
if(a->base < b->base)
return -1;
if(a->base > b->base)
return 1;
return 0;
}
static void
map(uintptr base, uintptr len, int type)
{
uintptr e, n, *pte, flags, maxkpa;
/*
* Split any call crossing MemMin to make below simpler.
*/
if(base < MemMin && len > MemMin-base){
n = MemMin - base;
map(base, n, type);
map(MemMin, len-n, type);
}
/*
* Let lowraminit and umbscan hash out the low MemMin.
*/
if(base < MemMin)
return;
/*
* Any non-memory below 16*MB is used as upper mem blocks.
*/
if(type == MemUPA && base < 16*MB && len > 16*MB-base){
map(base, 16*MB-base, MemUMB);
map(16*MB, len-(16*MB-base), MemUPA);
return;
}
/*
* Memory below CPU0END is reserved for the kernel
* and already mapped.
*/
if(base < PADDR(CPU0END)){
n = PADDR(CPU0END) - base;
if(len <= n)
return;
map(PADDR(CPU0END), len-n, type);
return;
}
/*
* Memory between KTZERO and end is the kernel itself
* and is already mapped.
*/
if(base < PADDR(KTZERO) && len > PADDR(KTZERO)-base){
map(base, PADDR(KTZERO)-base, type);
return;
}
if(PADDR(KTZERO) < base && base < PADDR(PGROUND((uintptr)end))){
n = PADDR(PGROUND((uintptr)end));
if(len <= n)
return;
map(PADDR(PGROUND((uintptr)end)), len-n, type);
return;
}
/*
* Now we have a simple case.
*/
switch(type){
case MemRAM:
mapfree(&rmapram, base, len);
flags = PTEWRITE|PTEVALID;
break;
case MemUMB:
mapfree(&rmapumb, base, len);
flags = PTEWRITE|PTEUNCACHED|PTEVALID;
break;
case MemUPA:
mapfree(&rmapupa, base, len);
flags = 0;
break;
default:
case MemReserved:
flags = 0;
break;
}
/*
* bottom MemMin is already mapped - just twiddle flags.
* (not currently used - see above)
*/
if(base < MemMin){
e = base+len;
base &= ~((uintptr)PGLSZ(1)-1);
for(; base<e; base+=PGLSZ(1)){
pte = mmuwalk(m->pml4, base+KZERO, 1, 0);
if(pte != 0 && *pte & PTEVALID)
*pte |= flags;
}
return;
}
if(flags){
maxkpa = -KZERO;
if(base >= maxkpa)
return;
if(len > maxkpa-base)
len = maxkpa - base;
pmap(m->pml4, base|flags, base+KZERO, len);
}
}
static int
e820scan(void)
{
uintptr base, len, last;
Emap *e;
char *s;
int i;
/* passed by bootloader */
if((s = getconf("*e820")) == nil)
if((s = getconf("e820")) == nil)
return -1;
nemap = 0;
while(nemap < nelem(emap)){
while(*s == ' ')
s++;
if(*s == 0)
break;
e = emap + nemap;
e->type = 1;
if(s[1] == ' '){ /* new format */
e->type = s[0] - '0';
s += 2;
}
e->base = strtoull(s, &s, 16);
if(*s != ' ')
break;
e->top = strtoull(s, &s, 16);
if(*s != ' ' && *s != 0)
break;
if(e->base < e->top)
nemap++;
}
if(nemap == 0)
return -1;
qsort(emap, nemap, sizeof emap[0], emapcmp);
last = 0;
for(i=0; i<nemap; i++){
e = &emap[i];
/*
* pull out the info but only about the low 32 bits...
*/
if(e->top <= last)
continue;
if(e->base < last)
base = last;
else
base = e->base;
len = e->top - base;
/*
* If the map skips addresses, mark them available.
*/
if(last < base)
map(last, base-last, MemUPA);
map(base, len, (e->type == 1) ? MemRAM : MemReserved);
last = base + len;
if(last == 0)
break;
}
if(last != 0)
map(last, -last, MemUPA);
return 0;
}
void
meminit(void)
{
int i;
Map *mp;
Confmem *cm;
uintptr lost;
umbscan();
// lowraminit();
e820scan();
/*
* Set the conf entries describing banks of allocatable memory.
*/
for(i=0; i<nelem(mapram) && i<nelem(conf.mem); i++){
mp = &rmapram.map[i];
cm = &conf.mem[i];
cm->base = mp->addr;
cm->npage = mp->size/BY2PG;
}
lost = 0;
for(; i<nelem(mapram); i++)
lost += rmapram.map[i].size;
if(lost)
print("meminit - lost %llud bytes\n", lost);
if(MEMDEBUG)
memdebug();
}
/*
* Allocate memory from the upper memory blocks.
*/
uintptr
umbmalloc(uintptr addr, int size, int align)
{
uintptr a;
if(a = mapalloc(&rmapumb, addr, size, align))
return (uintptr)KADDR(a);
return 0;
}
void
umbfree(uintptr addr, int size)
{
mapfree(&rmapumb, PADDR(addr), size);
}
uintptr
umbrwmalloc(uintptr addr, int size, int align)
{
uintptr a;
uchar *p;
if(a = mapalloc(&rmapumbrw, addr, size, align))
return (uintptr)KADDR(a);
/*
* Perhaps the memory wasn't visible before
* the interface is initialised, so try again.
*/
if((a = umbmalloc(addr, size, align)) == 0)
return 0;
p = (uchar*)a;
p[0] = 0xCC;
p[size-1] = 0xCC;
if(p[0] == 0xCC && p[size-1] == 0xCC)
return a;
umbfree(a, size);
return 0;
}
void
umbrwfree(uintptr addr, int size)
{
mapfree(&rmapumbrw, PADDR(addr), size);
}
/*
* Give out otherwise-unused physical address space
* for use in configuring devices. Note that upaalloc
* does not map the physical address into virtual memory.
* Call vmap to do that.
*/
uintptr
upaalloc(int size, int align)
{
uintptr a;
a = mapalloc(&rmapupa, 0, size, align);
if(a == 0){
print("out of physical address space allocating %d\n", size);
mapprint(&rmapupa);
}
return a;
}
void
upafree(uintptr pa, int size)
{
mapfree(&rmapupa, pa, size);
}
void
upareserve(uintptr pa, int size)
{
uintptr a;
a = mapalloc(&rmapupa, pa, size, 0);
if(a != pa){
/*
* This can happen when we're using the E820
* map, which might have already reserved some
* of the regions claimed by the pci devices.
*/
// print("upareserve: cannot reserve pa=%#p size=%d\n", pa, size);
if(a != 0)
mapfree(&rmapupa, a, size);
}
}
void
memorysummary(void)
{
memdebug();
}

149
sys/src/9/pc64/mkfile Normal file
View file

@ -0,0 +1,149 @@
CONF=pc64
CONFLIST=pc64
objtype=amd64
</$objtype/mkfile
p=9
KTZERO=0xffffffff80110000
APBOOTSTRAP=0xffffffff80003000
DEVS=`{rc ../port/mkdevlist $CONF}
PORT=\
alarm.$O\
alloc.$O\
allocb.$O\
auth.$O\
cache.$O\
chan.$O\
dev.$O\
edf.$O\
fault.$O\
page.$O\
parse.$O\
pgrp.$O\
portclock.$O\
print.$O\
proc.$O\
qio.$O\
qlock.$O\
rdb.$O\
rebootcmd.$O\
segment.$O\
swap.$O\
syscallfmt.$O\
sysfile.$O\
sysproc.$O\
taslock.$O\
tod.$O\
xalloc.$O\
random.$O\
OBJ=\
l.$O\
cga.$O\
i8253.$O\
i8259.$O\
main.$O\
memory.$O\
mmu.$O\
trap.$O\
$CONF.root.$O\
$CONF.rootc.$O\
$DEVS\
$PORT\
LIB=\
/$objtype/lib/libmemlayer.a\
/$objtype/lib/libmemdraw.a\
/$objtype/lib/libdraw.a\
/$objtype/lib/libip.a\
/$objtype/lib/libsec.a\
/$objtype/lib/libmp.a\
/$objtype/lib/libc.a\
/$objtype/lib/libfis.a\
/$objtype/lib/libaml.a\
ETHER=`{cd ../pc; echo devether.c ether*.c | sed 's/\.c/.'$O'/g'}
AUDIO=`{cd ../pc; echo devaudio.c audio*.c | sed 's/\.c/.'$O'/g'}
VGA=`{cd ../pc; echo devvga.c screen.c vga*.c | sed 's/\.c/.'$O'/g'}
SDEV=`{cd ../pc; echo devsd.c sd*.c | sed 's/\.c/.'$O'/g'}
$p$CONF: $CONF.c $OBJ $LIB
$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
$LD -o $target -T$KTZERO -R4096 -l $OBJ $CONF.$O $LIB
size $target
install:V: $p$CONF
cp $p$CONF /$objtype/
for(i in $EXTRACOPIES)
import $i / /n/$i && cp $p$CONF $p$CONF.gz /n/$i/$objtype/
# copies generated by the rule below
PCHEADERS=wifi.h uncached.h usbehci.h screen.h etherif.h mp.h io.h
&.h: ../pc/&.h
cp $prereq .
PCFILES=`{../port/mkfilelist ../pc}
^($PCFILES)\.$O:R: '../pc/\1.c'
$CC $CFLAGS -I. -. ../pc/$stem1.c
<../boot/bootmkfile
<../port/portmkfile
<|../port/mkbootrules $CONF
l.$O apbootstrap.$O: mem.h
$ETHER: etherif.h ../port/netif.h
$AUDIO: ../port/audioif.h
ether8003.$O ether8390.$O: ether8390.h
etheryuk.$O: yukdump.h
$VGA mouse.$O: screen.h /sys/include/memdraw.h
vgavesa.$O: /386/include/ureg.h
archmp.$O mp.$O: apbootstrap.h
apic.$O archmp.$O mp.$O: mp.h
squidboy.$O: mp.h
$SDEV: ../port/sd.h
sdiahci.$O: ahci.h
devaoe.$O sdaoe.$O: ../port/aoe.h
main.$O: init.h
devusb.$O usbuhci.$O usbohci.$O usbehci.$O: ../port/usb.h
usbehci.$O: usbehci.h uncached.h
trap.$O: /sys/include/tos.h
etheriwl.$O: wifi.h
etherrt2860.$O: wifi.h
wifi.$O: wifi.h
init.h:D: ../port/initcode.c ../pc/init9.c
$CC ../port/initcode.c
$CC ../pc/init9.c
$LD -l -R1 -s -o init.out init9.$O initcode.$O /$objtype/lib/libc.a
{echo 'uchar initcode[]={'
xd -1x <init.out |
sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
echo '};'} > init.h
apbootstrap.h: apbootstrap.s
$AS apbootstrap.s
$LD -l -R1 -s -o apbootstrap.out -T$APBOOTSTRAP apbootstrap.$O
{echo 'uchar apbootstrap[]={'
dd -if apbootstrap.out -bs 1 -iseek 40 |
xd -1x |
sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
echo '};'} > $target
sd53c8xx.i: sd53c8xx.n
aux/na $prereq > $target
acid:V:
$CC -a -w -I. -. ../pc/i8253.c>acid
%.clean:V:
rm -f $stem.c [9bz]$stem [9bz]$stem.gz boot$stem.* apbootstrap.h init.h $PCHEADERS

505
sys/src/9/pc64/mmu.c Normal file
View file

@ -0,0 +1,505 @@
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
/*
* Simple segment descriptors with no translation.
*/
#define EXECSEGM(p) { 0, SEGL|SEGP|SEGPL(p)|SEGEXEC }
#define DATASEGM(p) { 0, SEGB|SEGG|SEGP|SEGPL(p)|SEGDATA|SEGW }
#define EXEC32SEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define DATA32SEGM(p) { 0xFFFF, SEGB|SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
Segdesc gdt[NGDT] =
{
[NULLSEG] { 0, 0}, /* null descriptor */
[KESEG] EXECSEGM(0), /* kernel code */
[KDSEG] DATASEGM(0), /* kernel data */
[UE32SEG] EXEC32SEGM(3), /* user code 32 bit*/
[UDSEG] DATA32SEGM(3), /* user data/stack */
[UESEG] EXECSEGM(3), /* user code */
};
static int didmmuinit = 0;
/* level */
enum {
PML4E = 2,
PDPE = 1,
PDE = 0,
MAPBITS = 8*sizeof(m->mmumap[0]),
};
static void
loadptr(u16int lim, uintptr off, void (*load)(void*))
{
u64int b[2], *o;
u16int *s;
o = &b[1];
s = ((u16int*)o)-1;
*s = lim;
*o = off;
(*load)(s);
}
static void
taskswitch(uintptr stack)
{
Tss *tss;
tss = m->tss;
tss->rsp0[0] = (u32int)stack;
tss->rsp0[1] = stack >> 32;
tss->rsp1[0] = (u32int)stack;
tss->rsp1[1] = stack >> 32;
tss->rsp2[0] = (u32int)stack;
tss->rsp2[1] = stack >> 32;
mmuflushtlb();
}
void
mmuinit(void)
{
uintptr x;
vlong v;
int i;
didmmuinit = 1;
/* zap double map done by l.s */
m->pml4[0] = 0;
m->pml4[512] = 0;
m->tss = mallocz(sizeof(Tss), 1);
if(m->tss == nil)
panic("mmuinit: no memory for Tss");
m->tss->iomap = 0xDFFF;
for(i=0; i<14; i+=2){
x = (uintptr)m + MACHSIZE;
m->tss->ist[i] = x;
m->tss->ist[i+1] = x>>32;
}
/*
* We used to keep the GDT in the Mach structure, but it
* turns out that that slows down access to the rest of the
* page. Since the Mach structure is accessed quite often,
* it pays off anywhere from a factor of 1.25 to 2 on real
* hardware to separate them (the AMDs are more sensitive
* than Intels in this regard). Under VMware it pays off
* a factor of about 10 to 100.
*/
memmove(m->gdt, gdt, sizeof gdt);
x = (uintptr)m->tss;
m->gdt[TSSSEG+0].d0 = (x<<16)|(sizeof(Tss)-1);
m->gdt[TSSSEG+0].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
m->gdt[TSSSEG+1].d0 = x>>32;
m->gdt[TSSSEG+1].d1 = 0;
loadptr(sizeof(gdt)-1, (uintptr)m->gdt, lgdt);
loadptr(sizeof(Segdesc)*512-1, (uintptr)IDTADDR, lidt);
taskswitch((uintptr)m + MACHSIZE);
ltr(TSSSEL);
wrmsr(0xc0000100, 0ull); /* 64 bit fsbase */
wrmsr(0xc0000101, (uvlong)&machp[m->machno]); /* 64 bit gsbase */
wrmsr(0xc0000102, 0ull); /* kernel gs base */
/* enable syscall extension */
rdmsr(0xc0000080, &v);
v |= 1ull;
wrmsr(0xc0000080, v);
/* IA32_STAR */
wrmsr(0xc0000081, ((uvlong)UE32SEL << 48) | ((uvlong)KESEL << 32));
/* IA32_LSTAR */
wrmsr(0xc0000082, (uvlong)syscallentry);
/* SYSCALL flags mask */
wrmsr(0xc0000084, 0x200);
}
/*
* These could go back to being macros once the kernel is debugged,
* but the extra checking is nice to have.
*/
void*
kaddr(uintptr pa)
{
if(pa > (uintptr)-KZERO)
panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
return (void*)(pa+KZERO);
}
uintptr
paddr(void *v)
{
uintptr va;
va = (uintptr)v;
if(va >= KZERO)
return va-KZERO;
if(va >= VMAP)
return va-VMAP;
panic("paddr: va=%#p pc=%#p", va, getcallerpc(&v));
return 0;
}
static MMU*
mmualloc(void)
{
MMU *p;
int i, n;
p = m->mmufree;
if(p == nil){
n = 256;
p = malloc(n * sizeof(MMU));
if(p == nil)
panic("mmualloc: out of memory for MMU");
p->page = mallocalign(n * PTSZ, BY2PG, 0, 0);
if(p->page == nil)
panic("mmualloc: out of memory for MMU pages");
for(i=1; i<n; i++){
p[i].page = p[i-1].page + (1<<PTSHIFT);
p[i-1].next = &p[i];
}
m->mmucount += n;
}
m->mmucount--;
m->mmufree = p->next;
p->next = nil;
return p;
}
uintptr*
mmuwalk(uintptr* table, uintptr va, int level, int create)
{
uintptr pte, *page;
int i, x;
MMU *p;
x = PTLX(va, 3);
for(i = 2; i >= level; i--){
pte = table[x];
if(pte & PTEVALID){
if(pte & PTESIZE)
return 0;
table = KADDR(PPN(pte));
} else {
if(!create)
return 0;
pte = PTEWRITE|PTEVALID;
if(va < VMAP){
if(va < TSTKTOP)
pte |= PTEUSER;
p = mmualloc();
p->index = x;
p->level = i;
if(i == PML4E){
/* PML4 entries linked to head */
p->next = up->mmuhead;
if(p->next == nil)
up->mmutail = p;
up->mmuhead = p;
if(p->index <= PTLX(TSTKTOP, 3))
m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS);
} else {
/* PDP and PD entries linked to tail */
up->mmutail->next = p;
}
page = p->page;
} else if(didmmuinit) {
page = mallocalign(PTSZ, BY2PG, 0, 0);
} else
page = rampage();
memset(page, 0, PTSZ);
table[x] = PADDR(page) | pte;
table = page;
}
x = PTLX(va, i);
}
return &table[x];
}
static int
ptecount(uintptr va, int level)
{
return (1<<PTSHIFT) - (va & PGLSZ(level+1)-1) / PGLSZ(level);
}
void
pmap(uintptr *pml4, uintptr pa, uintptr va, int size)
{
uintptr *pte, *ptee, flags;
int z, l;
if((size <= 0) || va < VMAP)
panic("pmap: pa=%#p va=%#p size=%d", pa, va, size);
flags = pa;
pa = PPN(pa);
flags -= pa;
if(va >= KZERO)
flags |= PTEGLOBAL;
while(size > 0){
if(size >= PGLSZ(1) && (va % PGLSZ(1)) == 0)
flags |= PTESIZE;
l = (flags & PTESIZE) != 0;
z = PGLSZ(l);
pte = mmuwalk(pml4, va, l, 1);
if(pte == 0){
pte = mmuwalk(pml4, va, ++l, 0);
if(pte && (*pte & PTESIZE)){
flags |= PTESIZE;
z = va & PGLSZ(l)-1;
va -= z;
pa -= z;
size += z;
continue;
}
panic("pmap: pa=%#p va=%#p size=%d", pa, va, size);
}
ptee = pte + ptecount(va, l);
while(size > 0 && pte < ptee){
*pte++ = pa | flags;
pa += z;
va += z;
size -= z;
}
}
}
static void
mmuzap(void)
{
uintptr *pte;
u64int w;
int i, x;
pte = m->pml4;
pte[PTLX(KMAP, 3)] = 0;
/* common case */
pte[PTLX(UTZERO, 3)] = 0;
pte[PTLX(TSTKTOP, 3)] = 0;
m->mmumap[PTLX(UTZERO, 3)/MAPBITS] &= ~(1ull<<(PTLX(UTZERO, 3)%MAPBITS));
m->mmumap[PTLX(TSTKTOP, 3)/MAPBITS] &= ~(1ull<<(PTLX(TSTKTOP, 3)%MAPBITS));
for(i = 0; i < nelem(m->mmumap); pte += MAPBITS, i++){
w = m->mmumap[i];
if(w == 0)
continue;
x = 0;
do {
if(w & 1)
pte[x] = 0;
x++;
x >>= 1;
} while(w);
m->mmumap[i] = 0;
}
}
static void
mmufree(Proc *proc)
{
MMU *p;
p = proc->mmutail;
if(p != nil){
p->next = m->mmufree;
m->mmufree = proc->mmuhead;
proc->mmuhead = proc->mmutail = nil;
m->mmucount += proc->mmucount;
proc->mmucount = 0;
}
}
void
flushmmu(void)
{
int x;
x = splhi();
up->newtlb = 1;
mmuswitch(up);
splx(x);
}
void
mmuswitch(Proc *proc)
{
uintptr pte;
MMU *p;
mmuzap();
if(proc->newtlb){
mmufree(proc);
proc->newtlb = 0;
}
for(p = proc->mmuhead; p && p->level==PML4E; p = p->next){
pte = PADDR(p->page) | PTEWRITE|PTEVALID;
if(p->index <= PTLX(TSTKTOP, 3)){
m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS);
pte |= PTEUSER;
}
m->pml4[p->index] = pte;
}
taskswitch((uintptr)proc->kstack+KSTACK);
}
void
mmurelease(Proc *proc)
{
mmuzap();
mmufree(proc);
taskswitch((uintptr)m+MACHSIZE);
}
void
putmmu(uintptr va, uintptr pa, Page *)
{
uintptr *pte, old;
int x;
x = splhi();
pte = mmuwalk(m->pml4, va, 0, 1);
if(pte == 0){
panic("putmmu: bug: va=%#p pa=%#p", va, pa);
return;
}
old = *pte;
*pte = pa | PTEVALID|PTEUSER;
splx(x);
if(old & PTEVALID)
invlpg(va);
}
void
checkmmu(uintptr va, uintptr pa)
{
USED(va, pa);
}
uintptr
cankaddr(uintptr pa)
{
if(pa >= -KZERO)
return 0;
return -KZERO - pa;
}
void
countpagerefs(ulong *ref, int print)
{
USED(ref, print);
}
KMap*
kmap(Page *page)
{
uintptr *pte, pa, va;
int x;
pa = page->pa;
if(cankaddr(pa) != 0)
return (KMap*)KADDR(pa);
x = splhi();
va = KMAP + ((uintptr)m->kmapindex << PGSHIFT);
pte = mmuwalk(m->pml4, va, 0, 1);
if(pte == 0 || *pte & PTEVALID)
panic("kmap: pa=%#p va=%#p", pa, va);
*pte = pa | PTEWRITE|PTEVALID;
m->kmapindex = (m->kmapindex + 1) % (1<<PTSHIFT);
if(m->kmapindex == 0)
mmuflushtlb();
splx(x);
return (KMap*)va;
}
void
kunmap(KMap *k)
{
uintptr *pte, va;
int x;
va = (uintptr)k;
if(va >= KZERO)
return;
x = splhi();
pte = mmuwalk(m->pml4, va, 0, 0);
if(pte == 0 || (*pte & PTEVALID) == 0)
panic("kunmap: va=%#p", va);
*pte = 0;
splx(x);
}
/*
* Add a device mapping to the vmap range.
*/
void*
vmap(uintptr pa, int size)
{
uintptr va;
int o;
if(size <= 0 || pa & ~0xffffffffull)
panic("vmap: pa=%#p size=%d pc=%#p", pa, size, getcallerpc(&pa));
if(cankaddr(pa) >= size)
va = pa+KZERO;
else
va = pa+VMAP;
/*
* might be asking for less than a page.
*/
o = pa & (BY2PG-1);
pa -= o;
va -= o;
size += o;
pmap(m->pml4, pa | PTEUNCACHED|PTEWRITE|PTEVALID, va, size);
return (void*)(va+o);
}
void
vunmap(void *v, int)
{
paddr(v); /* will panic on error */
}
/*
* vmapsync() is currently unused as the VMAP and KZERO PDPs
* are shared between processors. (see mpstartap)
*/
int
vmapsync(uintptr va)
{
uintptr *pte1, *pte2;
int level;
if(va < VMAP || m->machno == 0)
return 0;
for(level=0; level<2; level++){
pte1 = mmuwalk(MACHP(0)->pml4, va, level, 0);
if(pte1 && *pte1 & PTEVALID){
pte2 = mmuwalk(m->pml4, va, level, 1);
if(pte2 == 0)
break;
if(pte1 != pte2)
*pte2 = *pte1;
return 1;
}
}
return 0;
}

153
sys/src/9/pc64/pc64 Normal file
View file

@ -0,0 +1,153 @@
# pcf - pc terminal with local disk
dev
root
cons
arch
pnp pci
env
pipe
proc
mnt
srv
shr
dup
rtc
ssl
tls
cap
kprof
fs
ether netif
ip arp chandial ip ipv6 ipaux iproute netlog ethermedium nullmedium pktmedium inferno
draw screen vga vgax swcursor
mouse mouse
kbd
vga
sd
# floppy dma
# aoe
# lpt
audio dma
# pccard
# i82365 cis
uart
usb
link
# segdesc
# devpccard
# devi82365
# cputemp
# apm apmjump
# ether2000 ether8390
# ether2114x pci
# ether589 etherelnk3
# ether79c970 pci
# ether8003 ether8390
# ether8139 pci
# ether8169 pci ethermii
# should be obsoleted by igbe
# ether82543gc pci
# ether82557 pci
ether82563 pci
# ether82598 pci
# ether83815 pci
# etherbcm pci
# etherdp83820 pci
# etherec2t ether8390
# etherelnk3 pci
# etherga620 pci
# etherigbe pci ethermii
# ethervgbe pci ethermii
# ethervt6102 pci ethermii
# ethervt6105m pci ethermii
# ethersink
# ethersmc devi82365 cis
# etheryuk pci
# etherwavelan wavelan devi82365 cis pci
etheriwl pci wifi
# etherrt2860 pci wifi
ethermedium
# pcmciamodem
netdevmedium
loopbackmedium
usbuhci
# usbohci
usbehci usbehcipc
# audiosb16 dma
# audioac97 audioac97mix
audiohda
misc
archacpi mp apic squidboy
archmp mp apic squidboy
mtrr
# sdaoe
# sdide pci sdscsi
# sd53c8xx pci sdscsi
# sdmylex pci sdscsi
# sdiahci pci sdscsi led
# sdodin pci sdscsi led
# sdvirtio pci sdscsi
# sdmmc pci pmmc
# sdloop
# uarti8250
# uartisa
# uartpci pci
# vga3dfx +cur
# vgaark2000pv +cur
# vgabt485 =cur
# vgaclgd542x +cur
# vgaclgd546x +cur
# vgact65545 +cur
# vgacyber938x +cur
# vgaet4000 +cur
# vgageode +cur
# vgahiqvideo +cur
# vgai81x +cur
# vgamach64xx +cur
# vgamga2164w +cur
# vgamga4xx +cur
# vganeomagic +cur
# vganvidia +cur
# vgaradeon +cur
# vgargb524 =cur
# vgas3 +cur vgasavage
# vgat2r4 +cur
# vgatvp3020 =cur
# vgatvp3026 =cur
vgavesa
# vgavmware +cur
ip
tcp
udp
rudp
ipifc
icmp
icmp6
gre
ipmux
esp
il
port
int cpuserver = 0;
boot boot
tcp
local
bootdir
boot$CONF.out boot
/$objtype/bin/paqfs
/$objtype/bin/auth/factotum
bootfs.paq

113
sys/src/9/pc64/squidboy.c Normal file
View file

@ -0,0 +1,113 @@
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "ureg.h"
#include "mp.h"
extern void checkmtrr(void);
static void
squidboy(Apic* apic)
{
machinit();
mmuinit();
cpuidentify();
cpuidprint();
checkmtrr();
apic->online = 1;
coherence();
lapicinit(apic);
lapiconline();
syncclock();
timersinit();
lock(&active);
active.machs |= 1<<m->machno;
unlock(&active);
while(!active.thunderbirdsarego)
microdelay(100);
schedinit();
}
void
mpstartap(Apic* apic)
{
uintptr *apbootp, *pml4, *pdp0;
Segdesc *gdt;
Mach *mach;
uchar *p;
int i;
/*
* Initialise the AP page-tables and Mach structure.
* Xspanalloc will panic if an allocation can't be made.
*/
p = xspanalloc(2*PTSZ + BY2PG + MACHSIZE, BY2PG, 0);
pml4 = (uintptr*)p;
p += PTSZ;
pdp0 = (uintptr*)p;
p += PTSZ;
gdt = (Segdesc*)p;
p += BY2PG;
mach = (Mach*)p;
memset(pml4, 0, PTSZ);
memset(pdp0, 0, PTSZ);
memset(gdt, 0, BY2PG);
memset(mach, 0, MACHSIZE);
mach->machno = apic->machno;
mach->pml4 = pml4;
mach->gdt = gdt; /* filled by mmuinit */
MACHP(mach->machno) = mach;
/*
* map KZERO (note that we share the KZERO (and VMAP)
* PDP between processors.
*/
pml4[PTLX(KZERO, 3)] = MACHP(0)->pml4[PTLX(KZERO, 3)];
/* double map */
pml4[0] = PADDR(pdp0) | PTEWRITE|PTEVALID;
pdp0[0] = *mmuwalk(pml4, KZERO, 2, 0);
/*
* Tell the AP where its kernel vector and pdb are.
* The offsets are known in the AP bootstrap code.
*/
apbootp = (uintptr*)(APBOOTSTRAP+0x08);
apbootp[0] = (uintptr)squidboy; /* assembler jumps here eventually */
apbootp[1] = (uintptr)PADDR(pml4);
apbootp[2] = (uintptr)apic;
apbootp[3] = (uintptr)mach;
/*
* Universal Startup Algorithm.
*/
p = KADDR(0x467); /* warm-reset vector */
*p++ = PADDR(APBOOTSTRAP);
*p++ = PADDR(APBOOTSTRAP)>>8;
i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16;
/* code assumes i==0 */
if(i != 0)
print("mp: bad APBOOTSTRAP\n");
*p++ = i;
*p = i>>8;
coherence();
nvramwrite(0x0F, 0x0A); /* shutdown code: warm reset upon init ipi */
lapicstartap(apic, PADDR(APBOOTSTRAP));
for(i = 0; i < 1000; i++){
if(apic->online)
break;
delay(10);
}
nvramwrite(0x0F, 0x00);
}

1065
sys/src/9/pc64/trap.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -601,15 +601,15 @@ consread(Chan *c, void *buf, long n, vlong off)
case Qswap:
snprint(tmp, sizeof tmp,
"%lud memory\n"
"%d pagesize\n"
"%llud memory\n"
"%llud pagesize\n"
"%lud kernel\n"
"%lud/%lud user\n"
"%lud/%lud swap\n"
"%lud/%lud kernel malloc\n"
"%lud/%lud kernel draw\n",
conf.npage*BY2PG,
BY2PG,
(uvlong)conf.npage*BY2PG,
(uvlong)BY2PG,
conf.npage-conf.upages,
palloc.user-palloc.freecount, palloc.user,
conf.nswap-swapalloc.free, conf.nswap,

View file

@ -109,7 +109,7 @@ END{
printf "\t%slink();\n", link[i];
printf "}\n\n";
if(narch || objtype == "386" || objtype == "amd64"){
if(narch || objtype ~ "(386|amd64)"){
for(i = 0; i < narch; i++)
printf "extern PCArch %s;\n", arch[i];
printf "PCArch* knownarch[] = {\n";

View file

@ -40,7 +40,7 @@ END{
x = ""
for(i in obj)
x = x i "\n";
if(objtype ~ "386" && obj["pci" "'.$O'"])
if((objtype ~ "386" || objtype ~ "amd64") && obj["pci" "'.$O'"])
x = x "bios32'.$O' \n";
printf x;
}' $*