bcm64: switch to 64k page size

This commit is contained in:
cinap_lenrek 2019-05-17 18:35:14 +02:00
parent e0c19ae048
commit 5c5c1b6666
6 changed files with 90 additions and 82 deletions

View file

@ -115,7 +115,7 @@ struct Conf
*/
struct MMMU
{
PTE* mmul1; /* l1 for this processor */
PTE* mmutop; /* first level user page table */
};
/*
@ -245,8 +245,6 @@ struct Soc { /* SoC dependent configuration */
uintptr busdram;
uintptr busio;
uintptr armlocal;
u32int l1ptedramattrs;
u32int l2ptedramattrs;
};
extern Soc soc;

View file

@ -195,8 +195,8 @@ TEXT mmuenable<>(SB), 1, $-4
/* T0SZ */ | (64-EVASHIFT)<<0 )
MOV $TCRINIT, R1
MRS ID_AA64MMFR0_EL1, R2
ANDW $0xF, R2 // IPS
ADD R2<<32, R1
ANDW $0x7, R2 // PARange
ADD R2<<32, R1 // IPS
MSR R1, TCR_EL1
ISB $SY

View file

@ -17,11 +17,11 @@
* 16K 32M 64G 128T
* 64K 512M 4T -
*/
#define PGSHIFT 12 /* log(BY2PG) */
#define PGSHIFT 16 /* log(BY2PG) */
#define BY2PG (1ULL<<PGSHIFT) /* bytes per page */
/* effective virtual address space */
#define EVASHIFT 36
#define EVASHIFT 33
#define EVAMASK ((1ULL<<EVASHIFT)-1)
#define PTSHIFT (PGSHIFT-3)
@ -31,8 +31,8 @@
#define PTL1X(v, l) (L1TABLEX(v, l) | PTLX(v, l))
#define L1TABLEX(v, l) (L1TABLE(v, l) << PTSHIFT)
#define L1TABLES HOWMANY(-KZERO, PGLSZ(2))
#define L1TABLE(v, l) (L1TABLES-1 - ((PTLX(v, 2) % L1TABLES) >> (((l)-1)*PTSHIFT)) + (l)-1)
#define L1TABLES HOWMANY(-KSEG0, PGLSZ(2))
#define L1TABLE(v, l) (L1TABLES - ((PTLX(v, 2) % L1TABLES) >> (((l)-1)*PTSHIFT)) + (l)-1)
#define L1TOPSIZE (1ULL << (EVASHIFT - PTLEVELS*PTSHIFT))
#define MAXMACH 4 /* max # cpus system can run */
@ -42,14 +42,12 @@
#define STACKALIGN(sp) ((sp) & ~7) /* bug: assure with alloc */
#define TRAPFRAMESIZE (38*8)
/*
* Address spaces.
* KTZERO is used by kprof and dumpstack (if any).
*
* KZERO is mapped to physical 0 (start of ram).
*/
#define KZERO 0xFFFFFFFF80000000ULL /* kernel address space */
#define KSEG0 (0xFFFFFFFF00000000ULL)
#define VIRTIO (0xFFFFFFFF3F000000ULL) /* i/o registers */
#define ARMLOCAL (0xFFFFFFFF40000000ULL)
#define KZERO (0xFFFFFFFF80000000ULL) /* kernel address space */
#define FRAMEBUFFER (0xFFFFFFFFC0000000ULL|PTEWT)
#define VGPIO 0 /* virtual gpio for pi3 ACT LED */
#define SPINTABLE (KZERO+0xd8)
#define CONFADDR (KZERO+0x100)
@ -57,16 +55,12 @@
#define VCBUFFER (KZERO+0x3400) /* videocore mailbox buffer */
#define L1 (L1TOP-L1SIZE)
#define L1SIZE ((L1TABLES+PTLEVELS-3)*BY2PG)
#define L1SIZE ((L1TABLES+PTLEVELS-2)*BY2PG)
#define L1TOP ((MACHADDR(MAXMACH-1)-L1TOPSIZE)&-BY2PG)
#define MACHADDR(n) (KTZERO-((n)+1)*MACHSIZE)
#define KTZERO (KZERO+0x80000) /* kernel text start */
#define FRAMEBUFFER (0xFFFFFFFFC0000000ULL | PTEWT)
#define VIRTIO 0xFFFFFFFFE0000000ULL /* i/o registers */
#define ARMLOCAL (VIRTIO+IOSIZE)
#define VGPIO 0 /* virtual gpio for pi3 ACT LED */
#define UZERO 0ULL /* user segment */
#define UTZERO (UZERO+0x10000) /* user text start */

View file

@ -71,11 +71,11 @@ LIB=\
$p$CONF:DQ: $CONF.c $OBJ $LIB mkfile
$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
echo '# linking raw kernel' # H6: no headers, data segment aligned
$LD -l -o $target -H6 -R4096 -T$loadaddr $OBJ $CONF.$O $LIB
$LD -l -o $target -H6 -R0x10000 -T$loadaddr $OBJ $CONF.$O $LIB
s$p$CONF:DQ: $CONF.$O $OBJ $LIB
echo '# linking kernel with symbols'
$LD -l -o $target -R4096 -T$loadaddr $OBJ $CONF.$O $LIB
$LD -l -o $target -R0x10000 -T$loadaddr $OBJ $CONF.$O $LIB
size $target
$p$CONF.gz:D: $p$CONF

View file

@ -8,45 +8,65 @@
void
mmu0init(uintptr *l1)
{
uintptr va, pa, pe;
uintptr va, pa, pe, attr;
/* 0 identity map */
/* KZERO */
attr = PTEWRITE | PTEAF | PTEKERNEL | PTESH(SHARE_INNER);
pe = PHYSDRAM + soc.dramsize;
if(pe > (uintptr)-KZERO)
pe = (uintptr)-KZERO;
for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(1))
l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
| PTEKERNEL | PTESH(SHARE_INNER);
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
if(pe - pa < PGLSZ(1)){
l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
l1[PTL1X(pa, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0))
l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr;
break;
}
l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | attr;
}
pe = (uintptr)-KZERO; /* populate top levels for mmukmap() */
if(PTLEVELS > 2)
for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(2))
l1[PTL1X(pa, 2)] = (uintptr)&l1[L1TABLEX(pa, 1)] | PTEVALID | PTETABLE;
if(PTLEVELS > 3)
for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(3))
l1[PTL1X(pa, 3)] = (uintptr)&l1[L1TABLEX(pa, 2)] | PTEVALID | PTETABLE;
/* KZERO */
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
| PTEKERNEL | PTESH(SHARE_INNER);
if(PTLEVELS > 2)
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)){
l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
l1[PTL1X(pa, 2)] = (uintptr)&l1[L1TABLEX(pa, 1)] | PTEVALID | PTETABLE;
}
if(PTLEVELS > 3)
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)){
l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
l1[PTL1X(pa, 3)] = (uintptr)&l1[L1TABLEX(pa, 2)] | PTEVALID | PTETABLE;
}
/* VIRTIO */
pe = -VIRTIO + soc.physio;
for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
| PTEKERNEL | PTESH(SHARE_OUTER) | PTEDEVICE;
attr = PTEWRITE | PTEAF | PTEKERNEL | PTESH(SHARE_OUTER) | PTEDEVICE;
pe = soc.physio + IOSIZE;
for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
if(pe - pa < PGLSZ(1)){
l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0))
l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr;
break;
}
l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
}
if(PTLEVELS > 2)
for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
if(PTLEVELS > 3)
for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
/* ARMLOCAL */
pe = soc.armlocal + MB;
for(pa = soc.armlocal, va = ARMLOCAL; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
if(PTLEVELS > 2)
for(pa = soc.armlocal, va = ARMLOCAL; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
if(PTLEVELS > 3)
for(pa = soc.armlocal, va = ARMLOCAL; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
}
void
@ -57,21 +77,17 @@ mmu0clear(uintptr *l1)
pe = PHYSDRAM + soc.dramsize;
if(pe > (uintptr)-KZERO)
pe = (uintptr)-KZERO;
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
if(PTL1X(pa, 1) != PTL1X(va, 1))
l1[PTL1X(pa, 1)] = 0;
}
if(PTLEVELS > 2)
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)){
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
if(PTL1X(pa, 2) != PTL1X(va, 2))
l1[PTL1X(pa, 2)] = 0;
}
if(PTLEVELS > 3)
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)){
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
if(PTL1X(pa, 3) != PTL1X(va, 3))
l1[PTL1X(pa, 3)] = 0;
}
}
void
@ -85,32 +101,24 @@ mmuidmap(uintptr *l1)
pe = PHYSDRAM + soc.dramsize;
if(pe > (uintptr)-KZERO)
pe = (uintptr)-KZERO;
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
if(PTL1X(pa, 1) != PTL1X(va, 1))
l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
| PTEKERNEL | PTESH(SHARE_INNER);
}
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
l1[PTL1X(pa, 1)] = l1[PTL1X(va, 1)];
if(PTLEVELS > 2)
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)){
if(PTL1X(pa, 2) != PTL1X(va, 2))
l1[PTL1X(pa, 2)] = PADDR(&l1[L1TABLEX(pa, 1)]) | PTEVALID | PTETABLE;
}
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
l1[PTL1X(pa, 2)] = l1[PTL1X(va, 2)];
if(PTLEVELS > 3)
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)){
if(PTL1X(pa, 3) != PTL1X(va, 3))
l1[PTL1X(pa, 3)] = PADDR(&l1[L1TABLEX(pa, 2)]) | PTEVALID | PTETABLE;
}
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
l1[PTL1X(pa, 3)] = l1[PTL1X(va, 3)];
setttbr(PADDR(&l1[L1TABLEX(0, PTLEVELS-1)]));
}
void
mmu1init(void)
{
m->mmul1 = mallocalign(L1SIZE+L1TOPSIZE, BY2PG, L1SIZE, 0);
if(m->mmul1 == nil)
panic("mmu1init: no memory for mmul1");
memset(m->mmul1, 0, L1SIZE+L1TOPSIZE);
m->mmutop = mallocalign(L1TOPSIZE, BY2PG, 0, 0);
if(m->mmutop == nil)
panic("mmu1init: no memory for mmutop");
memset(m->mmutop, 0, L1TOPSIZE);
mmuswitch(nil);
}
@ -187,7 +195,7 @@ mmuwalk(uintptr va, int level)
int i, x;
x = PTLX(va, PTLEVELS-1);
table = &m->mmul1[L1TABLEX(va, PTLEVELS-1)];
table = m->mmutop;
for(i = PTLEVELS-2; i >= level; i--){
pte = table[x];
if(pte & PTEVALID) {
@ -262,12 +270,12 @@ putasid(Proc *p)
{
/*
* Prevent the following scenario:
* pX sleeps on cpuA, leaving its page tables in mmul1
* pX sleeps on cpuA, leaving its page tables in mmutop
* pX wakes up on cpuB, and exits, freeing its page tables
* pY on cpuB allocates a freed page table page and overwrites with data
* cpuA takes an interrupt, and is now running with bad page tables
* In theory this shouldn't hurt because only user address space tables
* are affected, and mmuswitch will clear mmul1 before a user process is
* are affected, and mmuswitch will clear mmutop before a user process is
* dispatched. But empirically it correlates with weird problems, eg
* resetting of the core clock at 0x4000001C which confuses local timers.
*/
@ -287,7 +295,6 @@ putmmu(uintptr va, uintptr pa, Page *pg)
s = splhi();
while((pte = mmuwalk(va, 0)) == nil){
spllo();
assert(up->mmufree == nil);
up->mmufree = newpage(0, nil, 0);
splhi();
}
@ -330,10 +337,10 @@ mmuswitch(Proc *p)
Page *t;
for(va = UZERO; va < USTKTOP; va += PGLSZ(PTLEVELS-1))
m->mmul1[PTL1X(va, PTLEVELS-1)] = 0;
m->mmutop[PTLX(va, PTLEVELS-1)] = 0;
if(p == nil){
setttbr(PADDR(&m->mmul1[L1TABLEX(0, PTLEVELS-1)]));
setttbr(PADDR(m->mmutop));
return;
}
@ -344,13 +351,13 @@ mmuswitch(Proc *p)
for(t = p->mmuhead[PTLEVELS-1]; t != nil; t = t->next){
va = t->va;
m->mmul1[PTL1X(va, PTLEVELS-1)] = t->pa | PTEVALID | PTETABLE;
m->mmutop[PTLX(va, PTLEVELS-1)] = t->pa | PTEVALID | PTETABLE;
}
if(allocasid(p))
flushasid((uvlong)p->asid<<48);
setttbr((uvlong)p->asid<<48 | PADDR(&m->mmul1[L1TABLEX(0, PTLEVELS-1)]));
setttbr((uvlong)p->asid<<48 | PADDR(m->mmutop));
}
void

View file

@ -37,7 +37,7 @@ rebootcmd(int argc, char *argv[])
{
Chan *c;
Exec exec;
ulong magic, text, rtext, entry, data, size;
ulong magic, text, rtext, entry, data, size, align;
uchar *p;
if(argc == 0)
@ -68,8 +68,17 @@ rebootcmd(int argc, char *argv[])
if(magic & HDR_MAGIC)
readn(c, &exec, 8);
switch(magic){
case R_MAGIC:
align = 0x10000; /* 64k segment alignment for arm64 */
break;
default:
align = BY2PG;
break;
}
/* round text out to page boundary */
rtext = PGROUND(entry+text)-entry;
rtext = ROUND(entry+text, align)-entry;
size = rtext + data;
p = malloc(size);
if(p == nil)