plan9fox/sys/src/9/teg2/archtegra.c
cinap_lenrek e4ce6aadac kernel: handle tos and per process pcycle counters in port/
we might as well handle the per process cycle
counter in the portable part instead of duplicating the code
in every arch and have inconsistent implementations.

we now have a portable kenter() and kexit() function,
that is ment to be used in trap/syscall from user,
which updates the counters.

some kernels missed initializing Mach.cyclefreq.
2020-12-20 22:34:41 +01:00

868 lines
16 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* nvidia tegra 2 architecture-specific stuff
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "../port/error.h"
#include "io.h"
#include "arm.h"
#include "../port/netif.h"
#include "../port/etherif.h"
#include "../port/flashif.h"
enum {
/* hardware limits imposed by register contents or layouts */
Maxcpus = 4,
Maxflowcpus = 2,
Debug = 0,
};
typedef struct Clkrst Clkrst;
typedef struct Diag Diag;
typedef struct Flow Flow;
typedef struct Scu Scu;
typedef struct Power Power;
struct Clkrst {
ulong rstsrc;
ulong rstdevl;
ulong rstdevh;
ulong rstdevu;
ulong clkoutl;
ulong clkouth;
ulong clkoutu;
uchar _pad0[0x24-0x1c];
ulong supcclkdiv; /* super cclk divider */
ulong _pad1;
ulong supsclkdiv; /* super sclk divider */
uchar _pad4[0x4c-0x30];
ulong clkcpu;
uchar _pad1[0xe0-0x50];
ulong pllxbase; /* pllx controls CPU clock speed */
ulong pllxmisc;
ulong pllebase; /* plle is dedicated to pcie */
ulong pllemisc;
uchar _pad2[0x340-0xf0];
ulong cpuset;
ulong cpuclr;
};
enum {
/* rstsrc bits */
Wdcpurst = 1<<0,
Wdcoprst = 1<<1,
Wdsysrst = 1<<2,
Wdsel = 1<<4, /* tmr1 or tmr2? */
Wdena = 1<<5,
/* devl bits */
Sysreset = 1<<2,
/* clkcpu bits */
Cpu1stop = 1<<9,
Cpu0stop = 1<<8,
/* cpu* bits */
Cpu1dbgreset = 1<<13,
Cpu0dbgreset = 1<<12,
Cpu1wdreset = 1<<9,
Cpu0wdreset = 1<<8,
Cpu1dereset = 1<<5,
Cpu0dereset = 1<<4,
Cpu1reset = 1<<1,
Cpu0reset = 1<<0,
};
struct Power {
ulong ctl; /* mainly for rtc clock signals */
ulong secregdis;
ulong swrst;
ulong wakevmask;
ulong waklvl;
ulong waksts;
ulong swwaksts;
ulong dpdpadsovr; /* deep power down pads override */
ulong dpdsample;
ulong dpden;
ulong gatetimroff;
ulong gatetimron;
ulong toggle;
ulong unclamp;
ulong gatests; /* ro */
ulong goodtmr;
ulong blinktmr;
ulong noiopwr;
ulong detect;
ulong detlatch;
ulong scratch[24];
ulong secscratch[6];
ulong cpupwrgoodtmr;
ulong cpupwrofftmr;
ulong pgmask[2];
ulong autowaklvl;
ulong autowaklvlmask;
ulong wakdelay;
ulong detval;
ulong ddr;
ulong usbdebdel; /* usb de-bounce delay */
ulong usbao;
ulong cryptoop;
ulong pllpwb0ovr;
ulong scratch24[42-24+1];
ulong boundoutmirr[3];
ulong sys33ven;
ulong boundoutmirracc;
ulong gate;
};
enum {
/* toggle bits */
Start = 1<<8,
/* partition ids */
Partpcie= 3,
Partl2 = 4,
};
struct Scu {
ulong ctl;
ulong cfg; /* ro */
ulong cpupwrsts;
ulong inval;
uchar _pad0[0x40-0x10];
ulong filtstart;
ulong filtend;
uchar _pad1[0x50-0x48];
ulong accctl; /* initially 0 */
ulong nsaccctl;
};
enum {
/* ctl bits */
Scuenable = 1<<0,
Filter = 1<<1,
Scuparity = 1<<2,
Specfill = 1<<3, /* only for PL310 */
Allport0 = 1<<4,
Standby = 1<<5,
Icstandby = 1<<6,
};
struct Flow {
ulong haltcpu0;
ulong haltcop;
ulong cpu0;
ulong cop;
ulong xrq;
ulong haltcpu1;
ulong cpu1;
};
enum {
/* haltcpu* bits */
Stop = 2<<29,
/* cpu* bits */
Event = 1<<14, /* w1c */
Waitwfebitsshift = 4,
Waitwfebitsmask = MASK(2),
Eventenable = 1<<1,
Cpuenable = 1<<0,
};
struct Diag {
Cacheline c0;
Lock;
Ref cnt;
Ref sync;
Cacheline c1;
};
extern ulong testmem;
/*
* number of cpus available. contrast with conf.nmach, which is number
* of running cpus.
*/
int navailcpus;
Isolated l1ptstable;
Soc soc = {
.clkrst = 0x60006000, /* clock & reset signals */
.power = 0x7000e400,
.exceptvec = PHYSEVP, /* undocumented magic */
.sema = 0x60001000,
.l2cache= PHYSL2BAG, /* pl310 bag on the side */
.flow = 0x60007000,
/* 4 non-gic controllers */
// .intr = { 0x60004000, 0x60004100, 0x60004200, 0x60004300, },
/* private memory region */
.scu = 0x50040000,
/* we got this address from the `cortex-a series programmer's guide'. */
.intr = 0x50040100, /* per-cpu interface */
.glbtmr = 0x50040200,
.loctmr = 0x50040600,
.intrdist=0x50041000,
.uart = { 0x70006000, 0x70006040,
0x70006200, 0x70006300, 0x70006400, },
.rtc = 0x7000e000,
.tmr = { 0x60005000, 0x60005008, 0x60005050, 0x60005058, },
.µs = 0x60005010,
.pci = 0x80000000,
.ether = 0xa0024000,
.nand = 0x70008000,
.nor = 0x70009000, /* also VIRTNOR */
.ehci = P2VAHB(0xc5000000), /* 1st of 3 */
.ide = P2VAHB(0xc3000000),
.gpio = { 0x6000d000, 0x6000d080, 0x6000d100, 0x6000d180,
0x6000d200, 0x6000d280, 0x6000d300, },
.spi = { 0x7000d400, 0x7000d600, 0x7000d800, 0x7000da00, },
.twsi = 0x7000c000,
.mmc = { P2VAHB(0xc8000000), P2VAHB(0xc8000200),
P2VAHB(0xc8000400), P2VAHB(0xc8000600), },
};
static volatile Diag diag;
static int missed;
void
dumpcpuclks(void) /* run CPU at full speed */
{
Clkrst *clk = (Clkrst *)soc.clkrst;
iprint("pllx base %#lux misc %#lux\n", clk->pllxbase, clk->pllxmisc);
iprint("plle base %#lux misc %#lux\n", clk->pllebase, clk->pllemisc);
iprint("super cclk divider %#lux\n", clk->supcclkdiv);
iprint("super sclk divider %#lux\n", clk->supsclkdiv);
}
static char *
devidstr(ulong)
{
return "ARM Cortex-A9";
}
void
archtegralink(void)
{
}
/* convert AddrDevid register to a string in buf and return buf */
char *
cputype2name(char *buf, int size)
{
ulong r;
r = cpidget(); /* main id register */
assert((r >> 24) == 'A');
seprint(buf, buf + size, "Cortex-A9 r%ldp%ld",
(r >> 20) & MASK(4), r & MASK(4));
return buf;
}
static void
errata(void)
{
ulong reg, r, p;
/* apply cortex-a9 errata workarounds */
r = cpidget(); /* main id register */
assert((r >> 24) == 'A');
p = r & MASK(4); /* minor revision */
r >>= 20;
r &= MASK(4); /* major revision */
/* this is an undocumented `diagnostic register' that linux knows */
reg = cprdsc(0, CpDTLB, 0, 1);
if (r < 2 || r == 2 && p <= 2)
reg |= 1<<4; /* 742230 */
if (r == 2 && p <= 2)
reg |= 1<<6 | 1<<12 | 1<<22; /* 743622, 2×742231 */
if (r < 3)
reg |= 1<<11; /* 751472 */
cpwrsc(0, CpDTLB, 0, 1, reg);
}
void
archconfinit(void)
{
char *p;
ulong hz;
assert(m != nil);
m->cpuhz = 1000 * Mhz; /* trimslice speed */
p = getconf("*cpumhz");
if (p) {
hz = atoi(p) * Mhz;
if (hz >= 100*Mhz && hz <= 3600UL*Mhz)
m->cpuhz = hz;
}
m->cyclefreq = m->cpuhz;
m->delayloop = m->cpuhz/2000; /* initial estimate */
errata();
}
int
archether(unsigned ctlrno, Ether *ether)
{
switch(ctlrno) {
case 0:
ether->type = "rtl8169"; /* pci-e ether */
ether->ctlrno = ctlrno;
ether->irq = Pcieirq; /* non-msi pci-e intr */
ether->nopt = 0;
ether->mbps = 1000;
return 1;
}
return -1;
}
void
dumpscustate(void)
{
Scu *scu = (Scu *)soc.scu;
print("cpu%d scu: accctl %#lux\n", m->machno, scu->accctl);
print("cpu%d scu: smp cpu bit map %#lo for %ld cpus; ", m->machno,
(scu->cfg >> 4) & MASK(4), (scu->cfg & MASK(2)) + 1);
print("cpus' power %#lux\n", scu->cpupwrsts);
}
void
scuon(void)
{
Scu *scu = (Scu *)soc.scu;
if (scu->ctl & Scuenable)
return;
scu->inval = MASK(16);
coherence();
scu->ctl = Scuparity | Scuenable | Specfill;
coherence();
}
int
getncpus(void)
{
int n;
char *p;
Scu *scu;
if (navailcpus == 0) {
scu = (Scu *)soc.scu;
navailcpus = (scu->cfg & MASK(2)) + 1;
if (navailcpus > MAXMACH)
navailcpus = MAXMACH;
p = getconf("*ncpu");
if (p && *p) {
n = atoi(p);
if (n > 0 && n < navailcpus)
navailcpus = n;
}
}
return navailcpus;
}
void
cpuidprint(void)
{
char name[64];
cputype2name(name, sizeof name);
delay(50); /* let uart catch up */
iprint("cpu%d: %lldMHz ARM %s %s-endian\n",
m->machno, m->cpuhz / Mhz, name,
getpsr() & PsrBigend? "big": "little");
}
static void
clockson(void)
{
Clkrst *clk = (Clkrst *)soc.clkrst;
/* enable all by clearing resets */
clk->rstdevl = clk->rstdevh = clk->rstdevu = 0;
coherence();
clk->clkoutl = clk->clkouth = clk->clkoutu = ~0; /* enable all clocks */
coherence();
clk->rstsrc = Wdcpurst | Wdcoprst | Wdsysrst | Wdena;
coherence();
}
/* we could be shutting down ourself (if cpu == m->machno), so take care. */
void
stopcpu(uint cpu)
{
Flow *flow = (Flow *)soc.flow;
Clkrst *clk = (Clkrst *)soc.clkrst;
if (cpu == 0) {
iprint("stopcpu: may not stop cpu0\n");
return;
}
machoff(cpu);
lock(&active);
active.stopped |= 1 << cpu;
unlock(&active);
l1cache->wb();
/* shut down arm7 avp coproc so it can't cause mischief. */
/* could try watchdog without stopping avp. */
flow->haltcop = Stop;
coherence();
flow->cop = 0; /* no Cpuenable */
coherence();
delay(10);
assert(cpu < Maxflowcpus);
*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = Stop;
coherence();
*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0; /* no Cpuenable */
coherence();
delay(10);
/* cold reset */
assert(cpu < Maxcpus);
clk->cpuset = (Cpu0reset | Cpu0dbgreset | Cpu0dereset) << cpu;
coherence();
delay(1);
l1cache->wb();
}
static void
synccpus(Ref *cntp, int n)
{
incref(cntp);
while (cntp->ref < n)
;
/* all cpus should now be here */
}
static void
pass1(int pass, volatile Diag *dp)
{
int i;
if(m->machno == 0)
iprint(" %d", pass);
for (i = 1000*1000; --i > 0; ) {
incref(&dp->cnt);
incref(&dp->cnt);
}
synccpus(&dp->sync, navailcpus);
/* all cpus are now here */
ilock(dp);
if(dp->cnt.ref != 0)
panic("cpu%d: diag: failed w count %ld", m->machno, dp->cnt.ref);
iunlock(dp);
synccpus(&dp->sync, 2 * navailcpus);
/* all cpus are now here */
decref(&dp->sync);
decref(&dp->sync);
}
/*
* try to confirm coherence of l1 caches.
* assume that all available cpus will be started.
*/
void
l1diag(void)
{
int pass;
volatile Diag *dp;
if (!Debug)
return;
l1cache->wb();
/*
* synchronise and print
*/
dp = &diag;
ilock(dp);
if (m->machno == 0)
iprint("l1: waiting for %d cpus... ", navailcpus);
iunlock(dp);
synccpus(&dp->sync, navailcpus);
ilock(dp);
if (m->machno == 0)
iprint("cache coherency pass");
iunlock(dp);
synccpus(&dp->sync, 2 * navailcpus);
decref(&dp->sync);
decref(&dp->sync);
/*
* cpus contend
*/
for (pass = 0; pass < 3; pass++)
pass1(pass, dp);
/*
* synchronise and check sanity
*/
synccpus(&dp->sync, navailcpus);
if(dp->sync.ref < navailcpus || dp->sync.ref >= 2 * navailcpus)
panic("cpu%d: diag: failed w dp->sync %ld", m->machno,
dp->sync.ref);
if(dp->cnt.ref != 0)
panic("cpu%d: diag: failed w dp->cnt %ld", m->machno,
dp->cnt.ref);
ilock(dp);
iprint(" cpu%d ok", m->machno);
iunlock(dp);
synccpus(&dp->sync, 2 * navailcpus);
decref(&dp->sync);
decref(&dp->sync);
l1cache->wb();
/*
* all done, print
*/
ilock(dp);
if (m->machno == 0)
iprint("\n");
iunlock(dp);
}
static void
unfreeze(uint cpu)
{
Clkrst *clk = (Clkrst *)soc.clkrst;
Flow *flow = (Flow *)soc.flow;
assert(cpu < Maxcpus);
clk->clkcpu &= ~(Cpu0stop << cpu);
coherence();
/* out of reset */
clk->cpuclr = (Cpu0reset | Cpu0wdreset | Cpu0dbgreset | Cpu0dereset) <<
cpu;
coherence();
assert(cpu < Maxflowcpus);
*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;
coherence();
*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = 0; /* normal operat'n */
coherence();
}
/*
* this is all a bit magic. the soc.exceptvec register is effectively
* undocumented. we had to look at linux and experiment, alas. this is the
* sort of thing that should be standardised as part of the cortex mpcore spec.
* even intel document their equivalent procedure.
*/
int
startcpu(uint cpu)
{
int i, r;
ulong oldvec, rstaddr;
ulong *evp = (ulong *)soc.exceptvec; /* magic */
r = 0;
if (getncpus() < 2 || cpu == m->machno ||
cpu >= MAXMACH || cpu >= navailcpus)
return -1;
oldvec = *evp;
l1cache->wb(); /* start next cpu w same view of ram */
*evp = rstaddr = PADDR(_vrst); /* will start cpu executing at _vrst */
coherence();
l1cache->wb();
unfreeze(cpu);
for (i = 2000; i > 0 && *evp == rstaddr; i--)
delay(1);
if (i <= 0 || *evp != cpu) {
iprint("cpu%d: didn't start!\n", cpu);
stopcpu(cpu); /* make sure it's stopped */
r = -1;
}
*evp = oldvec;
return r;
}
static void
cksecure(void)
{
ulong db;
extern ulong getdebug(void);
if (getscr() & 1)
panic("cpu%d: running non-secure", m->machno);
db = getdebug();
if (db)
iprint("cpu%d: debug enable reg %#lux\n", m->machno, db);
}
ulong
smpon(void)
{
ulong aux;
/* cortex-a9 model-specific configuration */
aux = getauxctl();
putauxctl(aux | CpACsmp | CpACmaintbcast);
return aux;
}
void
cortexa9cachecfg(void)
{
/* cortex-a9 model-specific configuration */
putauxctl(getauxctl() | CpACparity | CpAClwr0line | CpACl2pref);
}
/*
* called on a cpu other than 0 from cpureset in l.s,
* from _vrst in lexception.s.
* mmu and l1 (and system-wide l2) caches and coherency (smpon) are on,
* but interrupts are disabled.
* our mmu is using an exact copy of cpu0's l1 page table
* as it was after userinit ran.
*/
void
cpustart(void)
{
int ms;
ulong *evp;
Power *pwr;
up = nil;
if (active.machs[m->machno]) {
serialputc('?');
serialputc('r');
panic("cpu%d: resetting after start", m->machno);
}
assert(m->machno != 0);
errata();
cortexa9cachecfg();
memdiag(&testmem);
machinit(); /* bumps nmach, adds bit to machs */
machoff(m->machno); /* not ready to go yet */
/* clock signals and scu are system-wide and already on */
clockshutdown(); /* kill any watch-dog timer */
trapinit();
clockinit(); /* sets loop delay */
timersinit();
cpuidprint();
/*
* notify cpu0 that we're up so it can proceed to l1diag.
*/
evp = (ulong *)soc.exceptvec; /* magic */
*evp = m->machno;
coherence();
l1diag(); /* contend with other cpus to verify sanity */
/*
* pwr->noiopwr == 0
* pwr->detect == 0x1ff (default, all disabled)
*/
pwr = (Power *)soc.power;
assert(pwr->gatests == MASK(7)); /* everything has power */
/*
* 8169 has to initialise before we get past this, thus cpu0
* has to schedule processes first.
*/
if (Debug)
iprint("cpu%d: waiting for 8169\n", m->machno);
for (ms = 0; !l1ptstable.word && ms < 5000; ms += 10) {
delay(10);
cachedinvse(&l1ptstable.word, sizeof l1ptstable.word);
}
if (!l1ptstable.word)
iprint("cpu%d: 8169 unreasonably slow; proceeding\n", m->machno);
/* now safe to copy cpu0's l1 pt in mmuinit */
mmuinit(); /* update our l1 pt from cpu0's */
fpon();
machon(m->machno); /* now ready to go and be scheduled */
if (Debug)
iprint("cpu%d: scheding\n", m->machno);
schedinit();
panic("cpu%d: schedinit returned", m->machno);
}
/* mainly used to break out of wfi */
void
sgintr(Ureg *ureg, void *)
{
iprint("cpu%d: got sgi\n", m->machno);
/* try to prod cpu1 into life when it gets stuck */
if (m->machno != 0)
clockprod(ureg);
}
void
archreset(void)
{
static int beenhere;
if (beenhere)
return;
beenhere = 1;
/* conservative temporary values until archconfinit runs */
m->cpuhz = 1000 * Mhz; /* trimslice speed */
m->delayloop = m->cpuhz/2000; /* initial estimate */
prcachecfg();
clockson();
/* all partitions were powered up by u-boot, so needn't do anything */
archconfinit();
// resetusb();
fpon();
if (irqtooearly)
panic("archreset: too early for irqenable");
irqenable(Cpu0irq, sgintr, nil, "cpu0");
irqenable(Cpu1irq, sgintr, nil, "cpu1");
/* ... */
}
void
archreboot(void)
{
Clkrst *clk = (Clkrst *)soc.clkrst;
assert(m->machno == 0);
iprint("archreboot: reset!\n");
delay(20);
clk->rstdevl |= Sysreset;
coherence();
delay(500);
/* shouldn't get here */
splhi();
iprint("awaiting reset");
for(;;) {
delay(1000);
print(".");
}
}
void
kbdinit(void)
{
}
static void
missing(ulong addr, char *name)
{
static int firstmiss = 1;
if (addr == 0) {
iprint("address zero for %s\n", name);
return;
}
if (probeaddr(addr) >= 0)
return;
missed++;
if (firstmiss) {
iprint("missing:");
firstmiss = 0;
} else
iprint(",\n\t");
iprint(" %s at %#lux", name, addr);
}
/* verify that all the necessary device registers are accessible */
void
chkmissing(void)
{
delay(10);
missing(KZERO, "dram");
missing(soc.intr, "intr ctlr");
missing(soc.intrdist, "intr distrib");
missing(soc.tmr[0], "tegra timer1");
missing(soc.uart[0], "console uart");
missing(soc.pci, "pcie");
missing(soc.ether, "ether8169");
missing(soc.µs, "µs counter");
if (missed)
iprint("\n");
delay(10);
}
void
archflashwp(Flash*, int)
{
}
/*
* for ../port/devflash.c:/^flashreset
* retrieve flash type, virtual base and length and return 0;
* return -1 on error (no flash)
*/
int
archflashreset(int bank, Flash *f)
{
if(bank != 0)
return -1;
panic("archflashreset: rewrite for nor & nand flash on ts");
/*
* this is set up for the igepv2 board.
*/
f->type = "onenand";
f->addr = (void*)VIRTNOR; /* mapped here by archreset */
f->size = 0; /* done by probe */
f->width = 1;
f->interleave = 0;
return 0;
}