diff --git a/sys/src/9/pc/sdnvme.c b/sys/src/9/pc/sdnvme.c new file mode 100644 index 000000000..c8605c0d2 --- /dev/null +++ b/sys/src/9/pc/sdnvme.c @@ -0,0 +1,663 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" + +typedef struct WS WS; +typedef struct CQ CQ; +typedef struct SQ SQ; +typedef struct Ctlr Ctlr; + +struct WS +{ + u32int cdw0; + ushort status; + Rendez *sleep; + WS **link; + SQ *queue; +}; + +struct CQ +{ + u32int head; + u32int mask; + u32int shift; + u32int *base; + Ctlr *ctlr; +}; + +struct SQ +{ + u32int tail; + u32int mask; + u32int shift; + u32int *base; + WS **wait; + Ctlr *ctlr; +}; + +struct Ctlr +{ + QLock; + + Lock intr; + u32int ints; + u32int irqc[2]; + + Pcidev *pci; + u32int *reg; + + u64int cap; + uchar *ident; + u32int *nsid; + int nnsid; + + u32int mps; /* mps = 1<sq[1+m->machno]; + } else { + qlock(ctlr); + sq = &ctlr->sq[0]; + } + ws->sleep = &up->sleep; + ws->queue = sq; + ws->link = &sq->wait[sq->tail & sq->mask]; + while(*ws->link != nil){ + sched(); + if(!adm){ + /* should be very rare */ + goto Retry; + } + } + *ws->link = ws; + + e = &sq->base[((cid = sq->tail++) & sq->mask)<<4]; + e[0] = opc | cid<<16; + e[1] = nsid; + e[2] = 0; + e[3] = 0; + if(mptr != nil){ + pa = PADDR(mptr); + e[4] = pa; + e[5] = pa>>32; + } else { + e[4] = 0; + e[5] = 0; + } + if(len > 0){ + pa = PADDR(data); + e[6] = pa; + e[7] = pa>>32; + if(len > ctlr->mps - (pa & ctlr->mps-1)) + pa += ctlr->mps - (pa & ctlr->mps-1); + else + pa = 0; + } else { + e[6] = 0; + e[7] = 0; + pa = 0; + } + e[8] = pa; + e[9] = pa>>32; + return e; +} + +static void +nvmeintr(Ureg *, void *arg) +{ + u32int phaseshift, *e; + WS *ws, **wp; + Ctlr *ctlr; + SQ *sq; + CQ *cq; + + ctlr = arg; + if(ctlr->ints == 0) + return; + + ilock(&ctlr->intr); + ctlr->reg[IntMs] = ctlr->ints; + for(cq = &ctlr->cq[nelem(ctlr->cq)-1]; cq >= ctlr->cq; cq--){ + if(cq->base == nil) + continue; + phaseshift = 16 - cq->shift; + for(;; cq->head++){ + e = &cq->base[(cq->head & cq->mask)<<2]; + if(((e[3] ^ (cq->head << phaseshift)) & 0x10000) == 0) + break; + + if(0) iprint("nvmeintr: cq%d [%.4ux] %.8ux %.8ux %.8ux %.8ux\n", + (int)(cq - ctlr->cq), cq->head & cq->mask, + e[0], e[1], e[2], e[3]); + + sq = &ctlr->sq[e[2] >> 16]; + wp = &sq->wait[e[3] & sq->mask]; + if((ws = *wp) != nil && ws->link == wp){ + Rendez *z = ws->sleep; + ws->cdw0 = e[0]; + ws->status = e[3]>>17; + *wp = nil; + wakeup(z); + } + } + ctlr->reg[DBell + ((cq-ctlr->cq)*2+1 << ctlr->dstrd)] = cq->head & cq->mask; + } + if((ctlr->reg[CSts] & 3) != 1) + iprint("nvmeintr: fatal controller error\n"); + ctlr->reg[IntMc] = ctlr->ints; + iunlock(&ctlr->intr); +} + +static int +wdone(void *arg) +{ + WS *ws = arg; + return *ws->link != ws; +} + +static u32int +wcmd(WS *ws) +{ + SQ *sq = ws->queue; + Ctlr *ctlr = sq->ctlr; + + coherence(); + ctlr->reg[DBell + ((sq-ctlr->sq)*2+0 << ctlr->dstrd)] = sq->tail & sq->mask; + if(sq > ctlr->sq) { + assert(sq == &ctlr->sq[1+m->machno]); + spllo(); + } else + qunlock(sq->ctlr); + while(waserror()) + ; + tsleep(ws->sleep, wdone, ws, 5); + while(!wdone(ws)){ + nvmeintr(nil, ctlr); + tsleep(ws->sleep, wdone, ws, 10); + } + poperror(); + return ws->status; +} + +void +checkstatus(u32int status, char *info) +{ + if(status == 0) + return; + snprint(up->genbuf, sizeof(up->genbuf), "%s: status %ux", info, status); + error(up->genbuf); +} + +static long +nvmebio(SDunit *u, int lun, int write, void *a, long count, uvlong lba) +{ + u32int nsid, s, n, m, *e; + Ctlr *ctlr; + uchar *p; + WS ws; + + USED(lun); + + ctlr = u->dev->ctlr; + nsid = ctlr->nsid[u->subno]; + s = u->secsize; + p = a; + while(count > 0){ + m = (2*ctlr->mps - ((uintptr)p & ctlr->mps-1)) / s; + if((n = count) > m) + n = m; + e = qcmd(&ws, ctlr, 0, write ? 0x01 : 0x02, nsid, nil, p, n*s); + e[10] = lba; + e[11] = lba>>32; + e[12] = n-1; + e[13] = (count>n)<<6; /* sequential request */ + e[14] = 0; + e[15] = 0; + checkstatus(wcmd(&ws), write ? "write" : "read"); + p += n*s; + count -= n; + lba += n; + } + return p - (uchar*)a; +} + +static int +nvmerio(SDreq *r) +{ + int i, count, rw; + uvlong lba; + SDunit *u; + + u = r->unit; + if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91) + return sdsetsense(r, SDok, 0, 0, 0); + if((i = sdfakescsi(r)) != SDnostatus) + return r->status = i; + if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus) + return i; + r->rlen = nvmebio(u, r->lun, rw == SDwrite, r->data, count, lba); + return r->status = SDok; +} + +static int +nvmeverify(SDunit *u) +{ + Ctlr *ctlr = u->dev->ctlr; + return u->subno < ctlr->nnsid; +} + +static int +nvmeonline(SDunit *u) +{ + u32int *e, lbaf; + uchar *info, *p; + Ctlr *ctlr; + WS ws; + + if(u->sectors != 0) + return 1; + + ctlr = u->dev->ctlr; + if((info = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil) + return 0; + + e = qcmd(&ws, ctlr, 1, 0x06, ctlr->nsid[u->subno], nil, info, 0x1000); + e[10] = 0; // identify namespace + if(wcmd(&ws) != 0){ + free(info); + return 0; + } + p = info; + u->sectors = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24 + | (u64int)p[4]<<32 + | (u64int)p[5]<<40 + | (u64int)p[6]<<48 + | (u64int)p[7]<<56; + p = &info[128 + 4*(info[26]&15)]; + lbaf = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24; + u->secsize = 1<<((lbaf>>16)&0xFF); + free(info); + + memset(u->inquiry, 0, sizeof u->inquiry); + u->inquiry[2] = 2; + u->inquiry[3] = 2; + u->inquiry[4] = sizeof u->inquiry - 4; + memmove(u->inquiry+8, ctlr->ident+24, 20); + + return 2; +} + +static int +nvmerctl(SDunit *u, char *p, int l) +{ + Ctlr *ctlr; + char *e, *s; + + if((ctlr = u->dev->ctlr) == nil || ctlr->ident == nil) + return 0; + + e = p+l; + s = p; + + p = seprint(p, e, "model\t%.20s\n", (char*)ctlr->ident+24); + p = seprint(p, e, "serial\t%.10s\n", (char*)ctlr->ident+4); + p = seprint(p, e, "firm\t%.6s\n", (char*)ctlr->ident+64); + p = seprint(p, e, "geometry %llud %lud\n", u->sectors, u->secsize); + + return p-s; +} + +static void* +cqalloc(Ctlr *ctlr, CQ *cq, u32int lgsize) +{ + cq->ctlr = ctlr; + cq->head = 0; + cq->shift = lgsize-4; + cq->mask = (1<shift)-1; + if((cq->base = mallocalign(1<mps, 0, 0)) == nil) + error(Enomem); + memset(cq->base, 0, 1<base; +} + +static void* +sqalloc(Ctlr *ctlr, SQ *sq, u32int lgsize) +{ + sq->ctlr = ctlr; + sq->tail = 0; + sq->shift = lgsize-6; + sq->mask = (1<shift)-1; + if((sq->base = mallocalign(1<mps, 0, 0)) == nil) + error(Enomem); + if((sq->wait = mallocz(sizeof(WS*)*(sq->mask+1), 1)) == nil) + error(Enomem); + memset(sq->base, 0, 1<base; +} + +static void +setupqueues(Ctlr *ctlr) +{ + u32int lgsize, *e; + CQ *cq; + SQ *sq; + WS ws; + int i; + + /* Overkill */ + lgsize = 12-6+4; + while(lgsize < 16+4 && lgsize < ctlr->mpsshift && 1<cq[1]; + cqalloc(ctlr, cq, lgsize); + e = qcmd(&ws, ctlr, 1, 0x05, ~0, nil, cq->base, 1<cq) | cq->mask<<16; + e[11] = 3; /* IEN | PC */ + checkstatus(wcmd(&ws), "create completion queue"); + + /* SQID[1..nmach]: submission queue per cpu */ + for(i=1; i<=conf.nmach; i++){ + sq = &ctlr->sq[i]; + sqalloc(ctlr, sq, 12); + e = qcmd(&ws, ctlr, 1, 0x01, ~0, nil, sq->base, 0x1000); + e[10] = i | sq->mask<<16; + e[11] = (cq - ctlr->cq)<<16 | 1; /* CQID<<16 | PC */ + checkstatus(wcmd(&ws), "create submission queue"); + } + + ilock(&ctlr->intr); + ctlr->ints |= 1<<(cq - ctlr->cq); + ctlr->reg[IntMc] = ctlr->ints; + iunlock(&ctlr->intr); +} + +static void +identify(Ctlr *ctlr) +{ + u32int *e; + WS ws; + + if(ctlr->ident == nil) + if((ctlr->ident = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil) + error(Enomem); + if(ctlr->nsid == nil) + if((ctlr->nsid = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil) + error(Enomem); + + e = qcmd(&ws, ctlr, 1, 0x06, ~0, nil, ctlr->ident, 0x1000); + e[10] = 1; // identify controller + checkstatus(wcmd(&ws), "identify controller"); + + e = qcmd(&ws, ctlr, 1, 0x06, 0, nil, ctlr->nsid, 0x1000); + e[10] = 2; // namespace list + checkstatus(wcmd(&ws), "namespace list"); + + ctlr->nnsid = 0; + while(ctlr->nnsid < 1024 && ctlr->nsid[ctlr->nnsid] != 0) + ctlr->nnsid++; +} + +static int +nvmedisable(SDev *sd) +{ + char name[32]; + Ctlr *ctlr; + int i; + + ctlr = sd->ctlr; + + /* mask interrupts */ + ilock(&ctlr->intr); + ctlr->ints = 0; + ctlr->reg[IntMs] = ~ctlr->ints; + iunlock(&ctlr->intr); + + /* disable controller */ + ctlr->reg[CCfg] = 0; + + for(i = 0; i < 10; i++){ + if((ctlr->reg[CSts] & 1) == 0) + break; + tsleep(&up->sleep, return0, nil, 100); + } + + snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name); + intrdisable(ctlr->pci->intl, nvmeintr, ctlr, ctlr->pci->tbdf, name); + + pciclrbme(ctlr->pci); /* dma disable */ + + for(i=0; isq); i++){ + free(ctlr->sq[i].base); + free(ctlr->sq[i].wait); + } + for(i=0; icq); i++) + free(ctlr->cq[i].base); + + memset(ctlr->sq, 0, sizeof(ctlr->sq)); + memset(ctlr->cq, 0, sizeof(ctlr->cq)); + + free(ctlr->ident); + ctlr->ident = nil; + free(ctlr->nsid); + ctlr->nsid = nil; + ctlr->nnsid = 0; + + return 1; +} + +static int +nvmeenable(SDev *sd) +{ + char name[32]; + Ctlr *ctlr; + u64int pa; + int to; + + ctlr = sd->ctlr; + + snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name); + intrenable(ctlr->pci->intl, nvmeintr, ctlr, ctlr->pci->tbdf, name); + + if(waserror()){ + print("%s: %s\n", name, up->errstr); + nvmedisable(sd); + sd->nunit = 0; /* hack: prevent further probing */ + return 0; + } + + pa = PADDR(cqalloc(ctlr, &ctlr->cq[0], ctlr->mpsshift)); + ctlr->reg[ACQBase0] = pa; + ctlr->reg[ACQBase1] = pa>>32; + + pa = PADDR(sqalloc(ctlr, &ctlr->sq[0], ctlr->mpsshift)); + ctlr->reg[ASQBase0] = pa; + ctlr->reg[ASQBase1] = pa>>32; + + ctlr->reg[AQAttr] = ctlr->sq[0].mask | ctlr->cq[0].mask<<16; + + /* dma enable */ + pcisetbme(ctlr->pci); + + /* enable interrupt */ + ilock(&ctlr->intr); + ctlr->ints = 1; + ctlr->reg[IntMc] = ctlr->ints; + iunlock(&ctlr->intr); + + /* enable controller */ + ctlr->reg[CCfg] = 1 | (ctlr->mpsshift-12)<<7 | 6<<16 | 4<<20; + + for(to = (ctlr->cap>>24) & 255; to >= 0; to--){ + tsleep(&up->sleep, return0, nil, 500); + if((ctlr->reg[CSts] & 3) == 1) + goto Ready; + } + if(ctlr->reg[CSts] & 2) + error("fatal controller status during initialization"); + error("controller initialization timeout"); +Ready: + identify(ctlr); + setupqueues(ctlr); + + poperror(); + + return 1; +} + +static Ctlr* +nvmepnpctlrs(void) +{ + Ctlr *ctlr, *h, *t; + Pcidev *p; + int i; + + h = t = nil; + for(p = nil; p = pcimatch(p, 0, 0);){ + if(p->ccrb != 1 || p->ccru != 8 || p->ccrp != 2) + continue; + if(p->mem[0].size == 0) + continue; + if((ctlr = malloc(sizeof(*ctlr))) == nil){ + print("nvme: no memory for Ctlr\n"); + break; + } + ctlr->pci = p; + ctlr->reg = vmap(p->mem[0].bar & ~0xF, p->mem[0].size); + if(ctlr->reg == nil){ + print("nvme: can't vmap bar0\n"); + Bad: + if(ctlr->reg != nil) + vunmap(ctlr->reg, p->mem[0].size); + free(ctlr); + continue; + } + ctlr->cap = ctlr->reg[Cap0]; + ctlr->cap |= (u64int)ctlr->reg[Cap1]<<32; + + /* mask interrupts */ + ctlr->ints = 0; + ctlr->reg[IntMs] = ~ctlr->ints; + + /* disable controller */ + ctlr->reg[CCfg] = 0; + + if((ctlr->cap&(1ULL<<37)) == 0){ + print("nvme: doesnt support NVM commactlr set: %ux\n", + (u32int)(ctlr->cap>>37) & 0xFF); + goto Bad; + } + + /* use 64K page size when possible */ + ctlr->dstrd = (ctlr->cap >> 32) & 15; + for(i = (ctlr->cap >> 48) & 15; i < ((ctlr->cap >> 52) & 15); i++){ + if(i >= 16-12) /* 64K */ + break; + } + ctlr->mpsshift = i+12; + ctlr->mps = 1 << ctlr->mpsshift; + + if(h == nil) + h = ctlr; + else + t->next = ctlr; + t = ctlr; + } + + return h; +} + +SDifc sdnvmeifc; + +static SDev* +nvmepnp(void) +{ + SDev *s, *h, *t; + Ctlr *ctlr; + int id; + + h = t = nil; + + id = 'N'; + for(ctlr = nvmepnpctlrs(); ctlr != nil; ctlr = ctlr->next){ + if((s = malloc(sizeof(*s))) == nil) + break; + s->ctlr = ctlr; + s->idno = id++; + s->ifc = &sdnvmeifc; + s->nunit = 1024; + if(h) + t->next = s; + else + h = s; + t = s; + } + + return h; +} + +SDifc sdnvmeifc = { + "nvme", /* name */ + + nvmepnp, /* pnp */ + nil, /* legacy */ + nvmeenable, /* enable */ + nvmedisable, /* disable */ + + nvmeverify, /* verify */ + nvmeonline, /* online */ + nvmerio, /* rio */ + nvmerctl, /* rctl */ + nil, /* wctl */ + + nvmebio, /* bio */ + nil, /* probe */ + nil, /* clear */ + nil, /* rtopctl */ + nil, /* wtopctl */ +};