plan9fox/sys/src/9/pc/ethervirtio.c
cinap_lenrek a0bb446d75 ethervirtio: fix queue notifications and interrupt flags, avoid useless notifications
bug: Rnointerrupt was used on Vqueue.used.flags instead of
Vqueue.avail.flags.

introduce vqnotify() function that notifies the device
about available ring advancement.

avoid queue notifications there that can be slow by
checking Unonotify flag in Vqueue.used.flags.

keep track of the number of notifications in the queue.
2014-12-09 03:23:53 +01:00

672 lines
12 KiB
C

#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/error.h"
#include "../port/netif.h"
#include "etherif.h"
/*
* virtio ethernet driver
* http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
*/
typedef struct Vring Vring;
typedef struct Vdesc Vdesc;
typedef struct Vused Vused;
typedef struct Vheader Vheader;
typedef struct Vqueue Vqueue;
typedef struct Ctlr Ctlr;
enum {
/* §2.1 Device Status Field */
Sacknowledge = 1,
Sdriver = 2,
Sdriverok = 4,
Sfeatureok = 8,
Sfailed = 128,
/* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
Qdevfeat = 0,
Qdrvfeat = 4,
Qaddr = 8,
Qsize = 12,
Qselect = 14,
Qnotify = 16,
Qstatus = 18,
Qisr = 19,
Qmac = 20,
Qnetstatus = 26,
/* flags in Qnetstatus */
Nlinkup = (1<<0),
Nannounce = (1<<1),
/* feature bits */
Fmac = (1<<5),
Fstatus = (1<<16),
Fctrlvq = (1<<17),
Fctrlrx = (1<<18),
/* vring used flags */
Unonotify = 1,
/* vring avail flags */
Rnointerrupt = 1,
/* descriptor flags */
Dnext = 1,
Dwrite = 2,
Dindirect = 4,
/* struct sizes */
VringSize = 4,
VdescSize = 16,
VusedSize = 8,
VheaderSize = 10,
/* §4.1.5.1.4.1 says pages are 4096 bytes
* for the purposes of the driver.
*/
VBY2PG = 4096,
#define VPGROUND(s) ROUND(s, VBY2PG)
Vrxq = 0,
Vtxq = 1,
Vctlq = 2,
/* class/cmd for Vctlq */
CtrlRx = 0x00,
CmdPromisc = 0x00,
CmdAllmulti = 0x01,
CtrlMac = 0x01,
CmdMacTableSet = 0x00,
CtrlVlan= 0x02,
CmdVlanAdd = 0x00,
CmdVlanDel = 0x01,
};
struct Vring
{
u16int flags;
u16int idx;
};
struct Vdesc
{
u64int addr;
u32int len;
u16int flags;
u16int next;
};
struct Vused
{
u32int id;
u32int len;
};
struct Vheader
{
u8int flags;
u8int segtype;
u16int hlen;
u16int seglen;
u16int csumstart;
u16int csumend;
};
/* §2.4 Virtqueues */
struct Vqueue
{
Rendez;
uint qsize;
uint qmask;
Vdesc *desc;
Vring *avail;
u16int *availent;
u16int *availevent;
Vring *used;
Vused *usedent;
u16int *usedevent;
u16int lastused;
uint nintr;
uint nnote;
};
struct Ctlr {
Lock;
QLock ctllock;
int attached;
int port;
Pcidev *pcidev;
Ctlr *next;
int active;
int id;
int typ;
ulong feat;
int nqueue;
/* virtioether has 3 queues: rx, tx and ctl */
Vqueue queue[3];
};
static Ctlr *ctlrhead;
static int
vhasroom(void *v)
{
Vqueue *q = v;
return q->lastused != q->used->idx;
}
static void
vqnotify(Ctlr *ctlr, int x)
{
Vqueue *q;
coherence();
q = &ctlr->queue[x];
if(q->used->flags & Unonotify)
return;
q->nnote++;
outs(ctlr->port+Qnotify, x);
}
static void
txproc(void *v)
{
Vheader *header;
Block **blocks;
Ether *edev;
Ctlr *ctlr;
Vqueue *q;
Vused *u;
Block *b;
int i, j;
edev = v;
ctlr = edev->ctlr;
q = &ctlr->queue[Vtxq];
header = smalloc(VheaderSize);
blocks = smalloc(sizeof(Block*) * (q->qsize/2));
for(i = 0; i < q->qsize/2; i++){
j = i << 1;
q->desc[j].addr = PADDR(header);
q->desc[j].len = VheaderSize;
q->desc[j].next = j | 1;
q->desc[j].flags = Dnext;
q->availent[i] = q->availent[i + q->qsize/2] = j;
j |= 1;
q->desc[j].next = 0;
q->desc[j].flags = 0;
}
q->avail->flags &= ~Rnointerrupt;
while(waserror())
;
while((b = qbread(edev->oq, 1000000)) != nil){
for(;;){
/* retire completed packets */
while((i = q->lastused) != q->used->idx){
u = &q->usedent[i & q->qmask];
i = (u->id & q->qmask) >> 1;
if(blocks[i] == nil)
break;
freeb(blocks[i]);
blocks[i] = nil;
q->lastused++;
}
/* have free slot? */
i = q->avail->idx & (q->qmask >> 1);
if(blocks[i] == nil)
break;
/* ring full, wait and retry */
if(!vhasroom(q))
sleep(q, vhasroom, q);
}
/* slot is free, fill in descriptor */
blocks[i] = b;
j = (i << 1) | 1;
q->desc[j].addr = PADDR(b->rp);
q->desc[j].len = BLEN(b);
coherence();
q->avail->idx++;
vqnotify(ctlr, Vtxq);
}
pexit("ether out queue closed", 1);
}
static void
rxproc(void *v)
{
Vheader *header;
Block **blocks;
Ether *edev;
Ctlr *ctlr;
Vqueue *q;
Vused *u;
Block *b;
int i, j;
edev = v;
ctlr = edev->ctlr;
q = &ctlr->queue[Vrxq];
header = smalloc(VheaderSize);
blocks = smalloc(sizeof(Block*) * (q->qsize/2));
for(i = 0; i < q->qsize/2; i++){
j = i << 1;
q->desc[j].addr = PADDR(header);
q->desc[j].len = VheaderSize;
q->desc[j].next = j | 1;
q->desc[j].flags = Dwrite|Dnext;
q->availent[i] = q->availent[i + q->qsize/2] = j;
j |= 1;
q->desc[j].next = 0;
q->desc[j].flags = Dwrite;
}
q->avail->flags &= ~Rnointerrupt;
while(waserror())
;
for(;;){
/* replenish receive ring */
do {
i = q->avail->idx & (q->qmask >> 1);
if(blocks[i] != nil)
break;
if((b = iallocb(ETHERMAXTU)) == nil)
break;
blocks[i] = b;
j = (i << 1) | 1;
q->desc[j].addr = PADDR(b->rp);
q->desc[j].len = BALLOC(b);
coherence();
q->avail->idx++;
} while(q->avail->idx != q->used->idx);
vqnotify(ctlr, Vrxq);
/* wait for any packets to complete */
if(!vhasroom(q))
sleep(q, vhasroom, q);
/* retire completed packets */
while((i = q->lastused) != q->used->idx) {
u = &q->usedent[i & q->qmask];
i = (u->id & q->qmask) >> 1;
if((b = blocks[i]) == nil)
break;
blocks[i] = nil;
b->wp = b->rp + u->len;
etheriq(edev, b, 1);
q->lastused++;
}
}
}
static int
vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
{
uchar hdr[2], ack[1];
Ctlr *ctlr;
Vqueue *q;
Vdesc *d;
int i;
ctlr = edev->ctlr;
q = &ctlr->queue[Vctlq];
if(q->qsize < 3)
return -1;
qlock(&ctlr->ctllock);
while(waserror())
;
ack[0] = 0x55;
hdr[0] = class;
hdr[1] = cmd;
d = &q->desc[0];
d->addr = PADDR(hdr);
d->len = sizeof(hdr);
d->next = 1;
d->flags = Dnext;
d++;
d->addr = PADDR(data);
d->len = ndata;
d->next = 2;
d->flags = Dnext;
d++;
d->addr = PADDR(ack);
d->len = sizeof(ack);
d->next = 0;
d->flags = Dwrite;
i = q->avail->idx & q->qmask;
q->availent[i] = 0;
coherence();
q->avail->flags &= ~Rnointerrupt;
q->avail->idx++;
vqnotify(ctlr, Vctlq);
while(!vhasroom(q))
sleep(q, vhasroom, q);
q->lastused = q->used->idx;
q->avail->flags |= Rnointerrupt;
qunlock(&ctlr->ctllock);
poperror();
if(ack[0] != 0)
print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
return ack[0];
}
static void
interrupt(Ureg*, void* arg)
{
Ether *edev;
Ctlr *ctlr;
Vqueue *q;
int i;
edev = arg;
ctlr = edev->ctlr;
if(inb(ctlr->port+Qisr) & 1){
for(i = 0; i < ctlr->nqueue; i++){
q = &ctlr->queue[i];
if(vhasroom(q)){
q->nintr++;
wakeup(q);
}
}
}
}
static void
attach(Ether* edev)
{
char name[KNAMELEN];
Ctlr* ctlr;
ctlr = edev->ctlr;
lock(ctlr);
if(!ctlr->attached){
ctlr->attached = 1;
/* ready to go */
outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
/* start kprocs */
snprint(name, sizeof name, "#l%drx", edev->ctlrno);
kproc(name, rxproc, edev);
snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
kproc(name, txproc, edev);
}
unlock(ctlr);
}
static long
ifstat(Ether *edev, void *a, long n, ulong offset)
{
int i, l;
char *p;
Ctlr *ctlr;
Vqueue *q;
ctlr = edev->ctlr;
p = smalloc(READSTR);
l = snprint(p, READSTR, "devfeat %32.32lub\n", ctlr->feat);
l += snprint(p+l, READSTR-l, "drvfeat %32.32lub\n", inl(ctlr->port+Qdrvfeat));
l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", inb(ctlr->port+Qstatus));
if(ctlr->feat & Fstatus)
l += snprint(p+l, READSTR-l, "netstatus %8.8ub\n", inb(ctlr->port+Qnetstatus));
for(i = 0; i < ctlr->nqueue; i++){
q = &ctlr->queue[i];
l += snprint(p+l, READSTR-l,
"vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
}
n = readstr(offset, a, n, p);
free(p);
return n;
}
static void
shutdown(Ether* edev)
{
Ctlr *ctlr = edev->ctlr;
outb(ctlr->port+Qstatus, 0);
}
static void
promiscuous(void *arg, int on)
{
Ether *edev = arg;
uchar b[1];
b[0] = on != 0;
vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
}
static void
multicast(void *arg, uchar*, int)
{
Ether *edev = arg;
uchar b[1];
b[0] = edev->nmaddr > 0;
vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
}
/* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
static ulong
queuesize(ulong size)
{
return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
+ VPGROUND(sizeof(u16int)*3 + VusedSize*size);
}
static int
initqueue(Vqueue *q, int size)
{
uchar *p;
/* §2.4: Queue Size value is always a power of 2 and <= 32768 */
assert(!(size & (size - 1)) && size <= 32768);
p = mallocalign(queuesize(size), VBY2PG, 0, 0);
if(p == nil){
print("ethervirtio: no memory for Vqueue\n");
free(p);
return -1;
}
q->desc = (void*)p;
p += VdescSize*size;
q->avail = (void*)p;
p += VringSize;
q->availent = (void*)p;
p += sizeof(u16int)*size;
q->availevent = (void*)p;
p += sizeof(u16int);
p = (uchar*)VPGROUND((uintptr)p);
q->used = (void*)p;
p += VringSize;
q->usedent = (void*)p;
p += VusedSize*size;
q->usedevent = (void*)p;
q->qsize = size;
q->qmask = q->qsize - 1;
q->lastused = q->avail->idx = q->used->idx = 0;
q->avail->flags |= Rnointerrupt;
return 0;
}
static Ctlr*
pciprobe(int typ)
{
Ctlr *c, *h, *t;
Pcidev *p;
int n, i;
h = t = nil;
/* §4.1.2 PCI Device Discovery */
for(p = nil; p = pcimatch(p, 0, 0);){
if(p->vid != 0x1AF4)
continue;
/* the two possible DIDs for virtio-net */
if(p->did != 0x1000 && p->did != 0x1041)
continue;
/* non-transitional devices will have a revision > 0 */
if(p->rid != 0)
continue;
/* non-transitional device will have typ+0x40 */
if(pcicfgr16(p, 0x2E) != typ)
continue;
if((c = mallocz(sizeof(Ctlr), 1)) == nil){
print("ethervirtio: no memory for Ctlr\n");
break;
}
c->port = p->mem[0].bar & ~0x1;
if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
print("ethervirtio: port %ux in use\n", c->port);
free(c);
continue;
}
c->typ = typ;
c->pcidev = p;
c->id = (p->did<<16)|p->vid;
/* §3.1.2 Legacy Device Initialization */
outb(c->port+Qstatus, 0);
outb(c->port+Qstatus, Sacknowledge|Sdriver);
/* negotiate feature bits */
c->feat = inl(c->port+Qdevfeat);
outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx));
/* §4.1.5.1.4 Virtqueue Configuration */
for(i=0; i<nelem(c->queue); i++){
outs(c->port+Qselect, i);
n = ins(c->port+Qsize);
if(n == 0 || (n & (n-1)) != 0)
break;
if(initqueue(&c->queue[i], n) < 0)
break;
coherence();
outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG);
}
c->nqueue = i;
if(h == nil)
h = c;
else
t->next = c;
t = c;
}
return h;
}
static int
reset(Ether* edev)
{
static uchar zeros[Eaddrlen];
Ctlr *ctlr;
int i;
if(ctlrhead == nil) {
ctlrhead = pciprobe(1);
}
for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
if(ctlr->active)
continue;
if(edev->port == 0 || edev->port == ctlr->port){
ctlr->active = 1;
break;
}
}
if(ctlr == nil)
return -1;
edev->ctlr = ctlr;
edev->port = ctlr->port;
edev->irq = ctlr->pcidev->intl;
edev->tbdf = ctlr->pcidev->tbdf;
edev->mbps = 1000;
edev->link = 1;
if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
for(i = 0; i < Eaddrlen; i++)
edev->ea[i] = inb(ctlr->port+Qmac+i);
} else {
for(i = 0; i < Eaddrlen; i++)
outb(ctlr->port+Qmac+i, edev->ea[i]);
}
edev->arg = edev;
edev->attach = attach;
edev->shutdown = shutdown;
edev->interrupt = interrupt;
edev->ifstat = ifstat;
if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
edev->multicast = multicast;
edev->promiscuous = promiscuous;
}
return 0;
}
void
ethervirtiolink(void)
{
addethercard("virtio", reset);
}