plan9fox/sys/src/9/pc/mmu.c

1115 lines
25 KiB
C

/*
* Memory mappings. Life was easier when 2G of memory was enough.
*
* The kernel memory starts at KZERO, with the text loaded at KZERO+1M
* (9load sits under 1M during the load). The memory from KZERO to the
* top of memory is mapped 1-1 with physical memory, starting at physical
* address 0. All kernel memory and data structures (i.e., the entries stored
* into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
* then the kernel can only have 256MB of memory for itself.
*
* The 256M below KZERO comprises three parts. The lowest 4M is the
* virtual page table, a virtual address representation of the current
* page table tree. The second 4M is used for temporary per-process
* mappings managed by kmap and kunmap. The remaining 248M is used
* for global (shared by all procs and all processors) device memory
* mappings and managed by vmap and vunmap. The total amount (256M)
* could probably be reduced somewhat if desired. The largest device
* mapping is that of the video card, and even though modern video cards
* have embarrassing amounts of memory, the video drivers only use one
* frame buffer worth (at most 16M). Each is described in more detail below.
*
* The VPT is a 4M frame constructed by inserting the pdb into itself.
* This short-circuits one level of the page tables, with the result that
* the contents of second-level page tables can be accessed at VPT.
* We use the VPT to edit the page tables (see mmu) after inserting them
* into the page directory. It is a convenient mechanism for mapping what
* might be otherwise-inaccessible pages. The idea was borrowed from
* the Exokernel.
*
* The VPT doesn't solve all our problems, because we still need to
* prepare page directories before we can install them. For that, we
* use tmpmap/tmpunmap, which map a single page at TMPADDR.
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
/*
* Simple segment descriptors with no translation.
*/
#define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
#define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
Segdesc gdt[NGDT] =
{
[NULLSEG] { 0, 0}, /* null descriptor */
[KDSEG] DATASEGM(0), /* kernel data/stack */
[KESEG] EXECSEGM(0), /* kernel code */
[UDSEG] DATASEGM(3), /* user data/stack */
[UESEG] EXECSEGM(3), /* user code */
[TSSSEG] TSSSEGM(0,0), /* tss segment */
[KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */
};
static void taskswitch(ulong, ulong);
static void memglobal(void);
#define vpt ((ulong*)VPT)
#define VPTX(va) (((ulong)(va))>>12)
#define vpd (vpt+VPTX(VPT))
enum {
/* PAT entry used for write combining */
PATWC = 7,
};
void
mmuinit(void)
{
ulong x, *p;
ushort ptr[3];
vlong v;
if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
VPT, vpd, KMAP);
memglobal();
m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
m->tss = mallocz(sizeof(Tss), 1);
if(m->tss == nil)
panic("mmuinit: no memory for Tss");
m->tss->iomap = 0xDFFF<<16;
/*
* We used to keep the GDT in the Mach structure, but it
* turns out that that slows down access to the rest of the
* page. Since the Mach structure is accessed quite often,
* it pays off anywhere from a factor of 1.25 to 2 on real
* hardware to separate them (the AMDs are more sensitive
* than Intels in this regard). Under VMware it pays off
* a factor of about 10 to 100.
*/
memmove(m->gdt, gdt, sizeof gdt);
x = (ulong)m->tss;
m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
ptr[0] = sizeof(gdt)-1;
x = (ulong)m->gdt;
ptr[1] = x & 0xFFFF;
ptr[2] = (x>>16) & 0xFFFF;
lgdt(ptr);
ptr[0] = sizeof(Segdesc)*256-1;
x = IDTADDR;
ptr[1] = x & 0xFFFF;
ptr[2] = (x>>16) & 0xFFFF;
lidt(ptr);
/* make kernel text unwritable */
for(x = KTZERO; x < (ulong)etext; x += BY2PG){
p = mmuwalk(m->pdb, x, 2, 0);
if(p == nil)
panic("mmuinit");
*p &= ~PTEWRITE;
}
taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
ltr(TSSSEL);
/* IA32_PAT write combining */
if((MACHP(0)->cpuiddx & Pat) != 0
&& rdmsr(0x277, &v) != -1){
v &= ~(255LL<<(PATWC*8));
v |= 1LL<<(PATWC*8); /* WC */
wrmsr(0x277, v);
}
}
/*
* On processors that support it, we set the PTEGLOBAL bit in
* page table and page directory entries that map kernel memory.
* Doing this tells the processor not to bother flushing them
* from the TLB when doing the TLB flush associated with a
* context switch (write to CR3). Since kernel memory mappings
* are never removed, this is safe. (If we ever remove kernel memory
* mappings, we can do a full flush by turning off the PGE bit in CR4,
* writing to CR3, and then turning the PGE bit back on.)
*
* See also mmukmap below.
*
* Processor support for the PTEGLOBAL bit is enabled in devarch.c.
*/
static void
memglobal(void)
{
int i, j;
ulong *pde, *pte;
/* only need to do this once, on bootstrap processor */
if(m->machno != 0)
return;
if(!m->havepge)
return;
pde = m->pdb;
for(i=PDX(KZERO); i<1024; i++){
if(pde[i] & PTEVALID){
pde[i] |= PTEGLOBAL;
if(!(pde[i] & PTESIZE)){
pte = KADDR(pde[i]&~(BY2PG-1));
for(j=0; j<1024; j++)
if(pte[j] & PTEVALID)
pte[j] |= PTEGLOBAL;
}
}
}
}
/*
* Flush all the user-space and device-mapping mmu info
* for this process, because something has been deleted.
* It will be paged back in on demand.
*/
void
flushmmu(void)
{
int s;
s = splhi();
up->newtlb = 1;
mmuswitch(up);
splx(s);
}
/*
* Flush a single page mapping from the tlb.
*/
void
flushpg(ulong va)
{
if(m->cpuidfamily >= 4)
invlpg(va);
else
putcr3(getcr3());
}
/*
* Allocate a new page for a page directory.
* We keep a small cache of pre-initialized
* page directories in each mach.
*/
static Page*
mmupdballoc(void)
{
int s;
Page *page;
ulong *pdb;
s = splhi();
m->pdballoc++;
if(m->pdbpool == 0){
spllo();
page = newpage(0, 0, 0);
page->va = (ulong)vpd;
splhi();
pdb = tmpmap(page);
memmove(pdb, m->pdb, BY2PG);
pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */
tmpunmap(pdb);
}else{
page = m->pdbpool;
m->pdbpool = page->next;
m->pdbcnt--;
}
splx(s);
return page;
}
static void
mmupdbfree(Proc *proc, Page *p)
{
if(islo())
panic("mmupdbfree: islo");
m->pdbfree++;
if(m->pdbcnt >= 10){
p->next = proc->mmufree;
proc->mmufree = p;
}else{
p->next = m->pdbpool;
m->pdbpool = p;
m->pdbcnt++;
}
}
/*
* A user-space memory segment has been deleted, or the
* process is exiting. Clear all the pde entries for user-space
* memory mappings and device mappings. Any entries that
* are needed will be paged back in as necessary.
*/
static void
mmuptefree(Proc* proc)
{
int s;
ulong *pdb;
Page **last, *page;
if(proc->mmupdb == nil || proc->mmuused == nil)
return;
s = splhi();
pdb = tmpmap(proc->mmupdb);
last = &proc->mmuused;
for(page = *last; page; page = page->next){
pdb[page->daddr] = 0;
last = &page->next;
}
tmpunmap(pdb);
splx(s);
*last = proc->mmufree;
proc->mmufree = proc->mmuused;
proc->mmuused = 0;
}
static void
taskswitch(ulong pdb, ulong stack)
{
Tss *tss;
tss = m->tss;
tss->ss0 = KDSEL;
tss->esp0 = stack;
tss->ss1 = KDSEL;
tss->esp1 = stack;
tss->ss2 = KDSEL;
tss->esp2 = stack;
putcr3(pdb);
}
void
mmuswitch(Proc* proc)
{
ulong *pdb;
ulong x;
int n;
if(proc->newtlb){
mmuptefree(proc);
proc->newtlb = 0;
}
if(proc->mmupdb != nil){
pdb = tmpmap(proc->mmupdb);
pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
tmpunmap(pdb);
taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
}else
taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
memmove(&m->gdt[PROCSEG0], proc->gdt, sizeof(proc->gdt));
if((x = (ulong)proc->ldt) && (n = proc->nldt) > 0){
m->gdt[LDTSEG].d0 = (x<<16)|((n * sizeof(Segdesc)) - 1);
m->gdt[LDTSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGLDT|SEGPL(0)|SEGP;
lldt(LDTSEL);
} else
lldt(NULLSEL);
}
/*
* Release any pages allocated for a page directory base or page-tables
* for this process:
* switch to the prototype pdb for this processor (m->pdb);
* call mmuptefree() to place all pages used for page-tables (proc->mmuused)
* onto the process' free list (proc->mmufree). This has the side-effect of
* cleaning any user entries in the pdb (proc->mmupdb);
* if there's a pdb put it in the cache of pre-initialised pdb's
* for this processor (m->pdbpool) or on the process' free list;
* finally, place any pages freed back into the free pool (palloc).
* This routine is only called from schedinit() with palloc locked.
*/
void
mmurelease(Proc* proc)
{
Page *page, *next;
ulong *pdb;
if(islo())
panic("mmurelease: islo");
taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
if(proc->kmaptable != nil){
if(proc->mmupdb == nil)
panic("mmurelease: no mmupdb");
if(--proc->kmaptable->ref != 0)
panic("mmurelease: kmap ref %ld", proc->kmaptable->ref);
if(proc->nkmap)
panic("mmurelease: nkmap %d", proc->nkmap);
/*
* remove kmaptable from pdb before putting pdb up for reuse.
*/
pdb = tmpmap(proc->mmupdb);
if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
pdb[PDX(KMAP)], proc->kmaptable->pa);
pdb[PDX(KMAP)] = 0;
tmpunmap(pdb);
/*
* move kmaptable to free list.
*/
pagechainhead(proc->kmaptable);
proc->kmaptable = nil;
}
if(proc->mmupdb != nil){
mmuptefree(proc);
mmupdbfree(proc, proc->mmupdb);
proc->mmupdb = nil;
}
for(page = proc->mmufree; page != nil; page = next){
next = page->next;
if(--page->ref != 0)
panic("mmurelease: page->ref %ld", page->ref);
pagechainhead(page);
}
if(proc->mmufree != nil)
pagechaindone();
proc->mmufree = nil;
if(proc->ldt != nil){
free(proc->ldt);
proc->ldt = nil;
proc->nldt = 0;
}
}
/*
* Allocate and install pdb for the current process.
*/
static void
upallocpdb(void)
{
int s;
ulong *pdb;
Page *page;
if(up->mmupdb != nil)
return;
page = mmupdballoc();
s = splhi();
if(up->mmupdb != nil){
/*
* Perhaps we got an interrupt while
* mmupdballoc was sleeping and that
* interrupt allocated an mmupdb?
* Seems unlikely.
*/
mmupdbfree(up, page);
splx(s);
return;
}
pdb = tmpmap(page);
pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
tmpunmap(pdb);
up->mmupdb = page;
putcr3(up->mmupdb->pa);
splx(s);
}
/*
* Update the mmu in response to a user fault. pa may have PTEWRITE set.
*/
void
putmmu(uintptr va, uintptr pa, Page*)
{
int old, s;
Page *page;
if(up->mmupdb == nil)
upallocpdb();
/*
* We should be able to get through this with interrupts
* turned on (if we get interrupted we'll just pick up
* where we left off) but we get many faults accessing
* vpt[] near the end of this function, and they always happen
* after the process has been switched out and then
* switched back, usually many times in a row (perhaps
* it cannot switch back successfully for some reason).
*
* In any event, I'm tired of searching for this bug.
* Turn off interrupts during putmmu even though
* we shouldn't need to. - rsc
*/
s = splhi();
if(!(vpd[PDX(va)]&PTEVALID)){
if(up->mmufree == 0){
spllo();
page = newpage(0, 0, 0);
splhi();
}
else{
page = up->mmufree;
up->mmufree = page->next;
}
vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
/* page is now mapped into the VPT - clear it */
memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
page->daddr = PDX(va);
page->next = up->mmuused;
up->mmuused = page;
}
old = vpt[VPTX(va)];
vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
if(old&PTEVALID)
flushpg(va);
if(getcr3() != up->mmupdb->pa)
print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
splx(s);
}
/*
* Double-check the user MMU.
* Error checking only.
*/
void
checkmmu(uintptr va, uintptr pa)
{
if(up->mmupdb == 0)
return;
if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
return;
if(PPN(vpt[VPTX(va)]) != pa)
print("%ld %s: va=%#p pa=%#p pte=%#08lux\n",
up->pid, up->text,
va, pa, vpt[VPTX(va)]);
}
/*
* Walk the page-table pointed to by pdb and return a pointer
* to the entry for virtual address va at the requested level.
* If the entry is invalid and create isn't requested then bail
* out early. Otherwise, for the 2nd level walk, allocate a new
* page-table page and register it in the 1st level. This is used
* only to edit kernel mappings, which use pages from kernel memory,
* so it's okay to use KADDR to look at the tables.
*/
ulong*
mmuwalk(ulong* pdb, ulong va, int level, int create)
{
ulong *table;
void *map;
table = &pdb[PDX(va)];
if(!(*table & PTEVALID) && create == 0)
return 0;
switch(level){
default:
return 0;
case 1:
return table;
case 2:
if(*table & PTESIZE)
panic("mmuwalk2: va %luX entry %luX", va, *table);
if(!(*table & PTEVALID)){
/*
* Have to call low-level allocator from
* memory.c if we haven't set up the xalloc
* tables yet.
*/
if(conf.mem[0].npage != 0)
map = xspanalloc(BY2PG, BY2PG, 0);
else
map = rampage();
if(map == nil)
panic("mmuwalk xspanalloc failed");
*table = PADDR(map)|PTEWRITE|PTEVALID;
}
table = KADDR(PPN(*table));
return &table[PTX(va)];
}
}
/*
* Device mappings are shared by all procs and processors and
* live in the virtual range VMAP to VMAP+VMAPSIZE. The master
* copy of the mappings is stored in mach0->pdb, and they are
* paged in from there as necessary by vmapsync during faults.
*/
static Lock vmaplock;
static int findhole(ulong *a, int n, int count);
static ulong vmapalloc(ulong size);
static void pdbunmap(ulong*, ulong, int);
/*
* Add a device mapping to the vmap range.
*/
void*
vmap(ulong pa, int size)
{
int osize;
ulong o, va;
/*
* might be asking for less than a page.
*/
osize = size;
o = pa & (BY2PG-1);
pa -= o;
size += o;
size = ROUND(size, BY2PG);
if(pa == 0){
print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
return nil;
}
ilock(&vmaplock);
if((va = vmapalloc(size)) == 0
|| pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
iunlock(&vmaplock);
return 0;
}
iunlock(&vmaplock);
/* avoid trap on local processor
for(i=0; i<size; i+=4*MB)
vmapsync(va+i);
*/
USED(osize);
// print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
return (void*)(va + o);
}
static int
findhole(ulong *a, int n, int count)
{
int have, i;
have = 0;
for(i=0; i<n; i++){
if(a[i] == 0)
have++;
else
have = 0;
if(have >= count)
return i+1 - have;
}
return -1;
}
/*
* Look for free space in the vmap.
*/
static ulong
vmapalloc(ulong size)
{
int i, n, o;
ulong *vpdb;
int vpdbsize;
vpdb = &MACHP(0)->pdb[PDX(VMAP)];
vpdbsize = VMAPSIZE/(4*MB);
if(size >= 4*MB){
n = (size+4*MB-1) / (4*MB);
if((o = findhole(vpdb, vpdbsize, n)) != -1)
return VMAP + o*4*MB;
return 0;
}
n = (size+BY2PG-1) / BY2PG;
for(i=0; i<vpdbsize; i++)
if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
return VMAP + i*4*MB + o*BY2PG;
if((o = findhole(vpdb, vpdbsize, 1)) != -1)
return VMAP + o*4*MB;
/*
* could span page directory entries, but not worth the trouble.
* not going to be very much contention.
*/
return 0;
}
/*
* Remove a device mapping from the vmap range.
* Since pdbunmap does not remove page tables, just entries,
* the call need not be interlocked with vmap.
*/
void
vunmap(void *v, int size)
{
int i;
ulong va, o;
Mach *nm;
Proc *p;
/*
* might not be aligned
*/
va = (ulong)v;
o = va&(BY2PG-1);
va -= o;
size += o;
size = ROUND(size, BY2PG);
if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
va, size, getcallerpc(&v));
pdbunmap(MACHP(0)->pdb, va, size);
/*
* Flush mapping from all the tlbs and copied pdbs.
* This can be (and is) slow, since it is called only rarely.
* It is possible for vunmap to be called with up == nil,
* e.g. from the reset/init driver routines during system
* boot. In that case it suffices to flush the MACH(0) TLB
* and return.
*/
if(up == nil){
putcr3(PADDR(MACHP(0)->pdb));
return;
}
for(i=0; i<conf.nproc; i++){
p = proctab(i);
if(p->state == Dead)
continue;
if(p != up)
p->newtlb = 1;
}
for(i=0; i<conf.nmach; i++){
nm = MACHP(i);
if(nm != m)
nm->flushmmu = 1;
}
flushmmu();
for(i=0; i<conf.nmach; i++){
nm = MACHP(i);
if(nm != m)
while(active.machs[nm->machno] && nm->flushmmu)
;
}
}
/*
* Add kernel mappings for pa -> va for a section of size bytes.
*/
int
pdbmap(ulong *pdb, ulong pa, ulong va, int size)
{
int pse;
ulong pgsz, *pte, *table;
ulong flag, off;
flag = pa&0xFFF;
pa &= ~0xFFF;
if((MACHP(0)->cpuiddx & Pse) && (getcr4() & 0x10))
pse = 1;
else
pse = 0;
for(off=0; off<size; off+=pgsz){
table = &pdb[PDX(va+off)];
if((*table&PTEVALID) && (*table&PTESIZE))
panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
va+off, pa+off, *table);
/*
* Check if it can be mapped using a 4MB page:
* va, pa aligned and size >= 4MB and processor can do it.
*/
if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
*table = (pa+off)|flag|PTESIZE|PTEVALID;
pgsz = 4*MB;
}else{
pte = mmuwalk(pdb, va+off, 2, 1);
if(*pte&PTEVALID)
panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
va+off, pa+off, *pte);
*pte = (pa+off)|flag|PTEVALID;
pgsz = BY2PG;
}
}
return 0;
}
/*
* Remove mappings. Must already exist, for sanity.
* Only used for kernel mappings, so okay to use KADDR.
*/
static void
pdbunmap(ulong *pdb, ulong va, int size)
{
ulong vae;
ulong *table;
vae = va+size;
while(va < vae){
table = &pdb[PDX(va)];
if(!(*table & PTEVALID))
panic("vunmap: not mapped");
if(*table & PTESIZE){
if(va & 4*MB-1)
panic("vunmap: misaligned: %#p", va);
*table = 0;
va += 4*MB;
continue;
}
table = KADDR(PPN(*table));
if(!(table[PTX(va)] & PTEVALID))
panic("vunmap: not mapped");
table[PTX(va)] = 0;
va += BY2PG;
}
}
/*
* Handle a fault by bringing vmap up to date.
* Only copy pdb entries and they never go away,
* so no locking needed.
*/
int
vmapsync(ulong va)
{
ulong entry, *table;
if(va < VMAP || va >= VMAP+VMAPSIZE)
return 0;
entry = MACHP(0)->pdb[PDX(va)];
if(!(entry&PTEVALID))
return 0;
if(!(entry&PTESIZE)){
/* make sure entry will help the fault */
table = KADDR(PPN(entry));
if(!(table[PTX(va)]&PTEVALID))
return 0;
}
vpd[PDX(va)] = entry;
/*
* TLB doesn't cache negative results, so no flush needed.
*/
return 1;
}
/*
* KMap is used to map individual pages into virtual memory.
* It is rare to have more than a few KMaps at a time (in the
* absence of interrupts, only two at a time are ever used,
* but interrupts can stack). The mappings are local to a process,
* so we can use the same range of virtual address space for
* all processes without any coordination.
*/
#define kpt (vpt+VPTX(KMAP))
#define NKPT (KMAPSIZE/BY2PG)
KMap*
kmap(Page *page)
{
int i, o, s;
if(up == nil)
panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
if(up->mmupdb == nil)
upallocpdb();
if(up->nkmap < 0)
panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
/*
* Splhi shouldn't be necessary here, but paranoia reigns.
* See comment in putmmu above.
*/
s = splhi();
up->nkmap++;
if(!(vpd[PDX(KMAP)]&PTEVALID)){
/* allocate page directory */
if(KMAPSIZE > BY2XPG)
panic("bad kmapsize");
if(up->kmaptable != nil)
panic("kmaptable");
spllo();
up->kmaptable = newpage(0, 0, 0);
splhi();
vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
flushpg((ulong)kpt);
memset(kpt, 0, BY2PG);
kpt[0] = page->pa|PTEWRITE|PTEVALID;
up->lastkmap = 0;
splx(s);
return (KMap*)KMAP;
}
if(up->kmaptable == nil)
panic("no kmaptable");
o = up->lastkmap+1;
for(i=0; i<NKPT; i++){
if(kpt[(i+o)%NKPT] == 0){
o = (i+o)%NKPT;
kpt[o] = page->pa|PTEWRITE|PTEVALID;
up->lastkmap = o;
splx(s);
return (KMap*)(KMAP+o*BY2PG);
}
}
panic("out of kmap");
return nil;
}
void
kunmap(KMap *k)
{
ulong va;
va = (ulong)k;
if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
panic("kunmap: no kmaps");
if(va < KMAP || va >= KMAP+KMAPSIZE)
panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
if(!(vpt[VPTX(va)]&PTEVALID))
panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
up->nkmap--;
if(up->nkmap < 0)
panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
vpt[VPTX(va)] = 0;
flushpg(va);
}
/*
* Temporary one-page mapping used to edit page directories.
*
* The fasttmp #define controls whether the code optimizes
* the case where the page is already mapped in the physical
* memory window.
*/
#define fasttmp 1
void*
tmpmap(Page *p)
{
ulong i;
ulong *entry;
if(islo())
panic("tmpaddr: islo");
if(fasttmp && p->pa < -KZERO)
return KADDR(p->pa);
/*
* PDX(TMPADDR) == PDX(MACHADDR), so this
* entry is private to the processor and shared
* between up->mmupdb (if any) and m->pdb.
*/
entry = &vpt[VPTX(TMPADDR)];
if(!(*entry&PTEVALID)){
for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
panic("tmpmap: no entry");
}
if(PPN(*entry) != PPN(TMPADDR-KZERO))
panic("tmpmap: already mapped entry=%#.8lux", *entry);
*entry = p->pa|PTEWRITE|PTEVALID;
flushpg(TMPADDR);
return (void*)TMPADDR;
}
void
tmpunmap(void *v)
{
ulong *entry;
if(islo())
panic("tmpaddr: islo");
if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
return;
if(v != (void*)TMPADDR)
panic("tmpunmap: bad address");
entry = &vpt[VPTX(TMPADDR)];
if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
panic("tmpmap: not mapped entry=%#.8lux", *entry);
*entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
flushpg(TMPADDR);
}
/*
* These could go back to being macros once the kernel is debugged,
* but the extra checking is nice to have.
*/
void*
kaddr(ulong pa)
{
if(pa >= (ulong)-KZERO)
panic("kaddr: pa=%#.8lux", pa);
return (void*)(pa+KZERO);
}
ulong
paddr(void *v)
{
ulong va;
va = (ulong)v;
if(va < KZERO)
panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v));
return va-KZERO;
}
/*
* More debugging.
*/
void
countpagerefs(ulong *ref, int print)
{
int i, n;
Mach *mm;
Page *pg;
Proc *p;
n = 0;
for(i=0; i<conf.nproc; i++){
p = proctab(i);
if(p->mmupdb){
if(print){
if(ref[pagenumber(p->mmupdb)])
iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
p->mmupdb->pa, i, p->pid);
continue;
}
if(ref[pagenumber(p->mmupdb)]++ == 0)
n++;
else
iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
p->mmupdb->pa, i, p->pid);
}
if(p->kmaptable){
if(print){
if(ref[pagenumber(p->kmaptable)])
iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
p->kmaptable->pa, i, p->pid);
continue;
}
if(ref[pagenumber(p->kmaptable)]++ == 0)
n++;
else
iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
p->kmaptable->pa, i, p->pid);
}
for(pg=p->mmuused; pg; pg=pg->next){
if(print){
if(ref[pagenumber(pg)])
iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
pg->pa, i, p->pid);
continue;
}
if(ref[pagenumber(pg)]++ == 0)
n++;
else
iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
pg->pa, i, p->pid);
}
for(pg=p->mmufree; pg; pg=pg->next){
if(print){
if(ref[pagenumber(pg)])
iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
pg->pa, i, p->pid);
continue;
}
if(ref[pagenumber(pg)]++ == 0)
n++;
else
iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
pg->pa, i, p->pid);
}
}
if(!print)
iprint("%d pages in proc mmu\n", n);
n = 0;
for(i=0; i<conf.nmach; i++){
mm = MACHP(i);
for(pg=mm->pdbpool; pg; pg=pg->next){
if(print){
if(ref[pagenumber(pg)])
iprint("page %#.8lux is in cpu%d pdbpool\n",
pg->pa, i);
continue;
}
if(ref[pagenumber(pg)]++ == 0)
n++;
else
iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
pg->pa, i);
}
}
if(!print){
iprint("%d pages in mach pdbpools\n", n);
for(i=0; i<conf.nmach; i++)
iprint("cpu%d: %d pdballoc, %d pdbfree\n",
i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
}
}
void
checkfault(ulong, ulong)
{
}
/*
* Return the number of bytes that can be accessed via KADDR(pa).
* If pa is not a valid argument to KADDR, return 0.
*/
ulong
cankaddr(ulong pa)
{
if(pa >= -KZERO)
return 0;
return -KZERO - pa;
}
/*
* mark pages as write combining (used for framebuffer)
*/
void
patwc(void *a, int n)
{
ulong *pte, mask, attr, va;
vlong v;
int z;
/* check if pat is usable */
if((MACHP(0)->cpuiddx & Pat) == 0
|| rdmsr(0x277, &v) == -1
|| ((v >> PATWC*8) & 7) != 1)
return;
/* set the bits for all pages in range */
for(va = (ulong)a; n > 0; n -= z, va += z){
pte = mmuwalk(MACHP(0)->pdb, va, 1, 0);
if(pte && (*pte & (PTEVALID|PTESIZE)) == (PTEVALID|PTESIZE)){
z = 4*MB - (va & (4*MB-1));
mask = 3<<3 | 1<<12;
} else {
pte = mmuwalk(MACHP(0)->pdb, va, 2, 0);
if(pte == 0 || (*pte & PTEVALID) == 0)
panic("patwc: va=%#p", va);
z = BY2PG - (va & (BY2PG-1));
mask = 3<<3 | 1<<7;
}
attr = (((PATWC&3)<<3) | ((PATWC&4)<<5) | ((PATWC&4)<<10));
*pte = (*pte & ~mask) | (attr & mask);
}
}