diff --git a/sys/src/9/bcm64/dat.h b/sys/src/9/bcm64/dat.h index 7a1747029..82fe13f09 100644 --- a/sys/src/9/bcm64/dat.h +++ b/sys/src/9/bcm64/dat.h @@ -115,7 +115,7 @@ struct Conf */ struct MMMU { - PTE* mmul1; /* l1 for this processor */ + PTE* mmutop; /* first level user page table */ }; /* @@ -245,8 +245,6 @@ struct Soc { /* SoC dependent configuration */ uintptr busdram; uintptr busio; uintptr armlocal; - u32int l1ptedramattrs; - u32int l2ptedramattrs; }; extern Soc soc; diff --git a/sys/src/9/bcm64/l.s b/sys/src/9/bcm64/l.s index 2270fbb8d..c40dd8fc1 100644 --- a/sys/src/9/bcm64/l.s +++ b/sys/src/9/bcm64/l.s @@ -195,8 +195,8 @@ TEXT mmuenable<>(SB), 1, $-4 /* T0SZ */ | (64-EVASHIFT)<<0 ) MOV $TCRINIT, R1 MRS ID_AA64MMFR0_EL1, R2 - ANDW $0xF, R2 // IPS - ADD R2<<32, R1 + ANDW $0x7, R2 // PARange + ADD R2<<32, R1 // IPS MSR R1, TCR_EL1 ISB $SY diff --git a/sys/src/9/bcm64/mem.h b/sys/src/9/bcm64/mem.h index d4b55f556..46fad55aa 100644 --- a/sys/src/9/bcm64/mem.h +++ b/sys/src/9/bcm64/mem.h @@ -17,11 +17,11 @@ * 16K 32M 64G 128T * 64K 512M 4T - */ -#define PGSHIFT 12 /* log(BY2PG) */ +#define PGSHIFT 16 /* log(BY2PG) */ #define BY2PG (1ULL<> (((l)-1)*PTSHIFT)) + (l)-1) +#define L1TABLES HOWMANY(-KSEG0, PGLSZ(2)) +#define L1TABLE(v, l) (L1TABLES - ((PTLX(v, 2) % L1TABLES) >> (((l)-1)*PTSHIFT)) + (l)-1) #define L1TOPSIZE (1ULL << (EVASHIFT - PTLEVELS*PTSHIFT)) #define MAXMACH 4 /* max # cpus system can run */ @@ -42,14 +42,12 @@ #define STACKALIGN(sp) ((sp) & ~7) /* bug: assure with alloc */ #define TRAPFRAMESIZE (38*8) -/* - * Address spaces. - * KTZERO is used by kprof and dumpstack (if any). - * - * KZERO is mapped to physical 0 (start of ram). - */ - -#define KZERO 0xFFFFFFFF80000000ULL /* kernel address space */ +#define KSEG0 (0xFFFFFFFF00000000ULL) +#define VIRTIO (0xFFFFFFFF3F000000ULL) /* i/o registers */ +#define ARMLOCAL (0xFFFFFFFF40000000ULL) +#define KZERO (0xFFFFFFFF80000000ULL) /* kernel address space */ +#define FRAMEBUFFER (0xFFFFFFFFC0000000ULL|PTEWT) +#define VGPIO 0 /* virtual gpio for pi3 ACT LED */ #define SPINTABLE (KZERO+0xd8) #define CONFADDR (KZERO+0x100) @@ -57,16 +55,12 @@ #define VCBUFFER (KZERO+0x3400) /* videocore mailbox buffer */ #define L1 (L1TOP-L1SIZE) -#define L1SIZE ((L1TABLES+PTLEVELS-3)*BY2PG) +#define L1SIZE ((L1TABLES+PTLEVELS-2)*BY2PG) #define L1TOP ((MACHADDR(MAXMACH-1)-L1TOPSIZE)&-BY2PG) #define MACHADDR(n) (KTZERO-((n)+1)*MACHSIZE) #define KTZERO (KZERO+0x80000) /* kernel text start */ -#define FRAMEBUFFER (0xFFFFFFFFC0000000ULL | PTEWT) -#define VIRTIO 0xFFFFFFFFE0000000ULL /* i/o registers */ -#define ARMLOCAL (VIRTIO+IOSIZE) -#define VGPIO 0 /* virtual gpio for pi3 ACT LED */ #define UZERO 0ULL /* user segment */ #define UTZERO (UZERO+0x10000) /* user text start */ diff --git a/sys/src/9/bcm64/mkfile b/sys/src/9/bcm64/mkfile index ad216131e..757024058 100644 --- a/sys/src/9/bcm64/mkfile +++ b/sys/src/9/bcm64/mkfile @@ -71,11 +71,11 @@ LIB=\ $p$CONF:DQ: $CONF.c $OBJ $LIB mkfile $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c echo '# linking raw kernel' # H6: no headers, data segment aligned - $LD -l -o $target -H6 -R4096 -T$loadaddr $OBJ $CONF.$O $LIB + $LD -l -o $target -H6 -R0x10000 -T$loadaddr $OBJ $CONF.$O $LIB s$p$CONF:DQ: $CONF.$O $OBJ $LIB echo '# linking kernel with symbols' - $LD -l -o $target -R4096 -T$loadaddr $OBJ $CONF.$O $LIB + $LD -l -o $target -R0x10000 -T$loadaddr $OBJ $CONF.$O $LIB size $target $p$CONF.gz:D: $p$CONF diff --git a/sys/src/9/bcm64/mmu.c b/sys/src/9/bcm64/mmu.c index 29313d3ac..b71d6097b 100644 --- a/sys/src/9/bcm64/mmu.c +++ b/sys/src/9/bcm64/mmu.c @@ -8,45 +8,65 @@ void mmu0init(uintptr *l1) { - uintptr va, pa, pe; + uintptr va, pa, pe, attr; - /* 0 identity map */ + /* KZERO */ + attr = PTEWRITE | PTEAF | PTEKERNEL | PTESH(SHARE_INNER); pe = PHYSDRAM + soc.dramsize; if(pe > (uintptr)-KZERO) pe = (uintptr)-KZERO; - - for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(1)) - l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF - | PTEKERNEL | PTESH(SHARE_INNER); + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){ + if(pe - pa < PGLSZ(1)){ + l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE; + l1[PTL1X(pa, 1)] = (uintptr)l1 | PTEVALID | PTETABLE; + for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0)) + l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr; + break; + } + l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr; + l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | attr; + } + pe = (uintptr)-KZERO; /* populate top levels for mmukmap() */ if(PTLEVELS > 2) - for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(2)) - l1[PTL1X(pa, 2)] = (uintptr)&l1[L1TABLEX(pa, 1)] | PTEVALID | PTETABLE; - if(PTLEVELS > 3) - for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(3)) - l1[PTL1X(pa, 3)] = (uintptr)&l1[L1TABLEX(pa, 2)] | PTEVALID | PTETABLE; - - /* KZERO */ - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)) - l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF - | PTEKERNEL | PTESH(SHARE_INNER); - if(PTLEVELS > 2) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)) + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)){ l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE; + l1[PTL1X(pa, 2)] = (uintptr)&l1[L1TABLEX(pa, 1)] | PTEVALID | PTETABLE; + } if(PTLEVELS > 3) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)) + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)){ l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE; + l1[PTL1X(pa, 3)] = (uintptr)&l1[L1TABLEX(pa, 2)] | PTEVALID | PTETABLE; + } /* VIRTIO */ - pe = -VIRTIO + soc.physio; - for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)) - l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF - | PTEKERNEL | PTESH(SHARE_OUTER) | PTEDEVICE; + attr = PTEWRITE | PTEAF | PTEKERNEL | PTESH(SHARE_OUTER) | PTEDEVICE; + pe = soc.physio + IOSIZE; + for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){ + if(pe - pa < PGLSZ(1)){ + l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE; + for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0)) + l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr; + break; + } + l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr; + } if(PTLEVELS > 2) for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)) l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE; if(PTLEVELS > 3) for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)) l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE; + + /* ARMLOCAL */ + pe = soc.armlocal + MB; + for(pa = soc.armlocal, va = ARMLOCAL; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)) + l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr; + if(PTLEVELS > 2) + for(pa = soc.armlocal, va = ARMLOCAL; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)) + l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE; + if(PTLEVELS > 3) + for(pa = soc.armlocal, va = ARMLOCAL; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)) + l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE; } void @@ -57,21 +77,17 @@ mmu0clear(uintptr *l1) pe = PHYSDRAM + soc.dramsize; if(pe > (uintptr)-KZERO) pe = (uintptr)-KZERO; - - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){ + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)) if(PTL1X(pa, 1) != PTL1X(va, 1)) l1[PTL1X(pa, 1)] = 0; - } if(PTLEVELS > 2) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)){ + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)) if(PTL1X(pa, 2) != PTL1X(va, 2)) l1[PTL1X(pa, 2)] = 0; - } if(PTLEVELS > 3) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)){ + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)) if(PTL1X(pa, 3) != PTL1X(va, 3)) l1[PTL1X(pa, 3)] = 0; - } } void @@ -85,32 +101,24 @@ mmuidmap(uintptr *l1) pe = PHYSDRAM + soc.dramsize; if(pe > (uintptr)-KZERO) pe = (uintptr)-KZERO; - - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){ - if(PTL1X(pa, 1) != PTL1X(va, 1)) - l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF - | PTEKERNEL | PTESH(SHARE_INNER); - } + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)) + l1[PTL1X(pa, 1)] = l1[PTL1X(va, 1)]; if(PTLEVELS > 2) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)){ - if(PTL1X(pa, 2) != PTL1X(va, 2)) - l1[PTL1X(pa, 2)] = PADDR(&l1[L1TABLEX(pa, 1)]) | PTEVALID | PTETABLE; - } + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)) + l1[PTL1X(pa, 2)] = l1[PTL1X(va, 2)]; if(PTLEVELS > 3) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)){ - if(PTL1X(pa, 3) != PTL1X(va, 3)) - l1[PTL1X(pa, 3)] = PADDR(&l1[L1TABLEX(pa, 2)]) | PTEVALID | PTETABLE; - } + for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)) + l1[PTL1X(pa, 3)] = l1[PTL1X(va, 3)]; setttbr(PADDR(&l1[L1TABLEX(0, PTLEVELS-1)])); } void mmu1init(void) { - m->mmul1 = mallocalign(L1SIZE+L1TOPSIZE, BY2PG, L1SIZE, 0); - if(m->mmul1 == nil) - panic("mmu1init: no memory for mmul1"); - memset(m->mmul1, 0, L1SIZE+L1TOPSIZE); + m->mmutop = mallocalign(L1TOPSIZE, BY2PG, 0, 0); + if(m->mmutop == nil) + panic("mmu1init: no memory for mmutop"); + memset(m->mmutop, 0, L1TOPSIZE); mmuswitch(nil); } @@ -187,7 +195,7 @@ mmuwalk(uintptr va, int level) int i, x; x = PTLX(va, PTLEVELS-1); - table = &m->mmul1[L1TABLEX(va, PTLEVELS-1)]; + table = m->mmutop; for(i = PTLEVELS-2; i >= level; i--){ pte = table[x]; if(pte & PTEVALID) { @@ -262,12 +270,12 @@ putasid(Proc *p) { /* * Prevent the following scenario: - * pX sleeps on cpuA, leaving its page tables in mmul1 + * pX sleeps on cpuA, leaving its page tables in mmutop * pX wakes up on cpuB, and exits, freeing its page tables * pY on cpuB allocates a freed page table page and overwrites with data * cpuA takes an interrupt, and is now running with bad page tables * In theory this shouldn't hurt because only user address space tables - * are affected, and mmuswitch will clear mmul1 before a user process is + * are affected, and mmuswitch will clear mmutop before a user process is * dispatched. But empirically it correlates with weird problems, eg * resetting of the core clock at 0x4000001C which confuses local timers. */ @@ -287,7 +295,6 @@ putmmu(uintptr va, uintptr pa, Page *pg) s = splhi(); while((pte = mmuwalk(va, 0)) == nil){ spllo(); - assert(up->mmufree == nil); up->mmufree = newpage(0, nil, 0); splhi(); } @@ -330,10 +337,10 @@ mmuswitch(Proc *p) Page *t; for(va = UZERO; va < USTKTOP; va += PGLSZ(PTLEVELS-1)) - m->mmul1[PTL1X(va, PTLEVELS-1)] = 0; + m->mmutop[PTLX(va, PTLEVELS-1)] = 0; if(p == nil){ - setttbr(PADDR(&m->mmul1[L1TABLEX(0, PTLEVELS-1)])); + setttbr(PADDR(m->mmutop)); return; } @@ -344,13 +351,13 @@ mmuswitch(Proc *p) for(t = p->mmuhead[PTLEVELS-1]; t != nil; t = t->next){ va = t->va; - m->mmul1[PTL1X(va, PTLEVELS-1)] = t->pa | PTEVALID | PTETABLE; + m->mmutop[PTLX(va, PTLEVELS-1)] = t->pa | PTEVALID | PTETABLE; } if(allocasid(p)) flushasid((uvlong)p->asid<<48); - setttbr((uvlong)p->asid<<48 | PADDR(&m->mmul1[L1TABLEX(0, PTLEVELS-1)])); + setttbr((uvlong)p->asid<<48 | PADDR(m->mmutop)); } void diff --git a/sys/src/9/port/rebootcmd.c b/sys/src/9/port/rebootcmd.c index e130dc516..517d25a03 100644 --- a/sys/src/9/port/rebootcmd.c +++ b/sys/src/9/port/rebootcmd.c @@ -37,7 +37,7 @@ rebootcmd(int argc, char *argv[]) { Chan *c; Exec exec; - ulong magic, text, rtext, entry, data, size; + ulong magic, text, rtext, entry, data, size, align; uchar *p; if(argc == 0) @@ -68,8 +68,17 @@ rebootcmd(int argc, char *argv[]) if(magic & HDR_MAGIC) readn(c, &exec, 8); + switch(magic){ + case R_MAGIC: + align = 0x10000; /* 64k segment alignment for arm64 */ + break; + default: + align = BY2PG; + break; + } + /* round text out to page boundary */ - rtext = PGROUND(entry+text)-entry; + rtext = ROUND(entry+text, align)-entry; size = rtext + data; p = malloc(size); if(p == nil)