plan9fox/sys/src/9/teg2/cache-l2-pl310.c
2013-01-26 17:33:21 +01:00

457 lines
8.7 KiB
C

/*
* PL310 level 2 cache (non-architectural bag on the side)
*
* guaranteed to work incorrectly with default settings; must set Sharovr.
*
* clean & invalidate (wbinv) is buggy, so we work around erratum 588369
* by disabling write-back and cache line-fill before, and restoring after.
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/error.h"
#include "arm.h"
#define NWAYS(l2p) ((l2p)->auxctl & Assoc16way? 16: 8)
#define L2P ((L2pl310 *)soc.l2cache)
enum {
L2size = 1024 * 1024, /* according to the tegra 2 manual */
Wayszgran = 16 * KiB, /* granularity of way sizes */
};
typedef struct L2pl310 L2pl310;
typedef struct Pl310op Pl310op;
struct Pl310op {
ulong pa;
ulong _pad;
ulong indexway;
ulong way;
};
struct L2pl310 {
ulong id;
ulong type;
uchar _pad0[0x100 - 0x8];
ulong ctl;
ulong auxctl;
uchar _pad1[0x730 - 0x108]; /* boring regs */
ulong sync;
uchar _pad2[0x740 - 0x734];
ulong r3p0sync; /* workaround for r3p0 bug */
uchar _pad3[0x770 - 0x744];
Pl310op inv; /* inv.indexway doesn't exist */
uchar _pad4[0x7b0 - 0x780];
Pl310op clean;
uchar _pad5[0x7f0 - 0x7c0];
Pl310op cleaninv;
uchar _pad6[0xc00 - 0x7d0];
ulong filtstart;
ulong filtend;
uchar _pad6[0xf40 - 0xc08];
ulong debug;
/* ... */
};
enum {
/* ctl bits */
L2enable = 1,
/* auxctl bits */
Ipref = 1<<29, /* prefetch enables */
Dpref = 1<<28,
Mbo = 1<<25,
Sharovr = 1<<22, /* shared attribute override (i.e., work right!) */
Parity = 1<<21,
Waycfgshift= 17,
Waycfgmask = (1<<3) - 1,
Assoc16way = 1<<16,
/*
* optim'n to 0 cache lines; must be enabled in a9(?!).
* set CpAClwr0line on all cpus 1st.
*/
Fullline0= 1<<0,
/* debug bits */
Wt = 1<<1, /* write-through, not write-back */
Nolinefill= 1<<0,
Basecfg = Wt | Nolinefill,
};
static Lock l2lock;
static int disallowed; /* by user: *l2off= in plan9.ini */
static int l2ison;
static int bg_op_running;
static ulong waysmask;
static Cacheimpl l2cacheimpl;
static void
awaitbgop(void)
{
while (bg_op_running)
;
}
static void
getlock(void)
{
awaitbgop(); /* wait at normal PL first */
ilock(&l2lock);
awaitbgop(); /* wait under lock */
}
static void
l2pl310sync(void)
{
L2P->sync = 0;
coherence();
}
/* call this first to set sets/ways configuration */
void
l2pl310init(void)
{
int waysz, nways;
ulong new;
L2pl310 *l2p = L2P;
static int configed;
if (getconf("*l2off") != nil) {
// iprint("l2 cache (pl310) disabled\n");
disallowed = 1;
return;
}
if (l2ison || configed)
return;
l2cache = &l2cacheimpl;
cachedwb();
/*
* default config is:
* l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB
* but the tegra 2 manual says there's 1MB available.
* ways or way-size may be fixed by hardware; the only way to tell
* is to try to change the setting and read it back.
*/
l2pl310sync();
l2cache->inv();
/* figure out number of ways */
l2pl310sync();
nways = NWAYS(l2p);
if (!(l2p->auxctl & Assoc16way)) {
l2p->auxctl |= Assoc16way;
coherence();
l2pl310sync();
nways = NWAYS(l2p);
// iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways);
}
waysmask = MASK(nways);
/* figure out way size (and thus number of sets) */
waysz = L2size / nways;
new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) |
(log2(waysz / Wayszgran) + 1) << Waycfgshift;
l2p->auxctl = new;
coherence();
l2pl310sync();
l2cache->inv();
// iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways,
// waysz / CACHELINESZ, waysz);
if (l2p->auxctl != new)
iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n",
new, l2p->auxctl);
configed++;
}
void
l2pl310info(Memcache *cp)
{
int pow2;
ulong waysz;
L2pl310 *l2p = L2P;
memset(cp, 0, sizeof *cp);
if (!l2ison)
return;
l2pl310init();
assert((l2p->id >> 24) == 'A');
cp->level = 2;
cp->type = Unified;
cp->external = Extcache;
cp->setsways = Cara | Cawa | Cawt | Cawb;
cp->l1ip = 3<<14; /* PIPT */
cp->setsh = cp->waysh = 0; /* bag on the side */
cp->linelen = CACHELINESZ;
cp->log2linelen = log2(CACHELINESZ);
cp->nways = NWAYS(l2p);
pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1;
if (pow2 < 0)
pow2 = 0;
waysz = (1 << pow2) * Wayszgran;
cp->nsets = waysz / CACHELINESZ;
}
void
l2pl310on(void)
{
ulong ctl;
L2pl310 *l2p = L2P;
if (getconf("*l2off") != nil) {
// iprint("l2 cache (pl310) disabled\n");
disallowed = 1;
return;
}
if (l2ison)
return;
l2pl310init();
l2cache->inv();
/*
* drain l1. can't turn it off (which would make locks not work)
* because doing so makes references below to the l2 registers wedge
* the system.
*/
cacheuwbinv();
cacheiinv();
/*
* this is only called once, on cpu0 at startup,
* so we don't need locks here.
* must do all configuration before enabling l2 cache.
*/
l2p->filtend = 0;
coherence();
l2p->filtstart = 0; /* no enable bit */
l2p->debug = 0; /* write-back, line fills allowed */
coherence();
ctl = l2p->auxctl;
/* don't change number of sets & ways, but reset all else. */
ctl &= Waycfgmask << Waycfgshift | Assoc16way;
ctl |= Sharovr; /* actually work correctly for a change */
ctl |= Mbo | Ipref | Dpref | Parity | Fullline0;
l2p->auxctl = ctl;
coherence();
l2p->ctl |= L2enable;
coherence();
l2ison = 1;
// iprint("l2 cache (pl310) now on\n");
}
void
l2pl310off(void)
{
if (!l2ison)
return;
l2cache->wbinv();
getlock();
L2P->ctl &= ~L2enable;
coherence();
l2ison = 0;
iunlock(&l2lock);
}
static void
applyrange(ulong *reg, void *ava, int len)
{
uintptr va, endva;
if (disallowed || !l2ison)
return;
if (len < 0)
panic("l2cache*se called with negative length");
endva = (uintptr)ava + len;
for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva;
va += CACHELINESZ)
*reg = PADDR(va);
l2pl310sync();
}
void
l2pl310invse(void *va, int bytes)
{
uintptr start, end;
L2pl310 *l2p = L2P;
/*
* if start & end addresses are not on cache-line boundaries,
* flush first & last cachelines before invalidating.
*/
start = (uintptr)va;
end = start + bytes;
getlock();
if (start % CACHELINESZ != 0) {
// iprint("l2pl310invse: unaligned start %#p from %#p\n", start,
// getcallerpc(&va));
applyrange(&l2p->clean.pa, va, 1);
}
if (end % CACHELINESZ != 0) {
// iprint("l2pl310invse: unaligned end %#p from %#p\n", end,
// getcallerpc(&va));
applyrange(&l2p->clean.pa, (char *)va + bytes, 1);
}
applyrange(&l2p->inv.pa, va, bytes);
iunlock(&l2lock);
}
void
l2pl310wbse(void *va, int bytes)
{
getlock();
applyrange(&L2P->clean.pa, va, bytes);
iunlock(&l2lock);
}
/*
* assume that ldrex/strex (thus locks) won't work when Wt in is effect,
* so don't manipulate locks between setting and clearing Wt.
*/
void
l2pl310wbinvse(void *va, int bytes)
{
int odb;
L2pl310 *l2p = L2P;
if (!l2ison)
return;
getlock();
applyrange(&l2p->clean.pa, va, bytes); /* paranoia */
odb = l2p->debug;
l2p->debug |= Wt | Nolinefill; /* erratum workaround */
coherence();
applyrange(&l2p->cleaninv.pa, va, bytes);
l2p->debug = odb;
iunlock(&l2lock);
}
/*
* we want to wait for completion at normal PL.
* if waiting is interrupted, interrupt code that calls
* these ops could deadlock on a uniprocessor, so we only
* give up l2lock before waiting on multiprocessors.
* in this port, only cpu 0 gets interrupts other than local timer ones.
*/
void
l2pl310inv(void)
{
L2pl310 *l2p = L2P;
if (disallowed)
return;
getlock();
bg_op_running = 1;
l2p->inv.way = waysmask;
coherence();
if (conf.nmach > 1)
iunlock(&l2lock);
while (l2p->inv.way & waysmask)
;
if (conf.nmach > 1)
ilock(&l2lock);
l2pl310sync();
bg_op_running = 0;
iunlock(&l2lock);
}
/*
* maximum time seen is 2542µs, typical is 625µs.
*/
void
l2pl310wb(void)
{
L2pl310 *l2p = L2P;
if (disallowed || !l2ison)
return;
getlock();
bg_op_running = 1;
l2p->clean.way = waysmask;
coherence();
if (conf.nmach > 1)
iunlock(&l2lock);
while (l2p->clean.way & waysmask)
;
if (conf.nmach > 1)
ilock(&l2lock);
l2pl310sync();
bg_op_running = 0;
iunlock(&l2lock);
}
void
l2pl310wbinv(void)
{
int odb;
L2pl310 *l2p = L2P;
if (disallowed || !l2ison)
return;
l2pl310wb(); /* paranoia */
getlock();
bg_op_running = 1;
odb = l2p->debug;
l2p->debug |= Wt | Nolinefill; /* erratum workaround */
coherence();
l2p->cleaninv.way = waysmask;
coherence();
if (conf.nmach > 1)
iunlock(&l2lock);
while (l2p->cleaninv.way & waysmask)
;
if (conf.nmach > 1)
ilock(&l2lock);
l2pl310sync();
l2p->debug = odb;
bg_op_running = 0;
iunlock(&l2lock);
}
static Cacheimpl l2cacheimpl = {
.info = l2pl310info,
.on = l2pl310on,
.off = l2pl310off,
.inv = l2pl310inv,
.wb = l2pl310wb,
.wbinv = l2pl310wbinv,
.invse = l2pl310invse,
.wbse = l2pl310wbse,
.wbinvse= l2pl310wbinvse,
};