![cinap_lenrek](/assets/img/avatar_default.png)
The new interface uses pci capability structures to locate the registers in a rather fine granular way making it more complicated as they can be located anywhere in any pci bar at any offset. As far as i can see, qemu (6.0.50) never uses i/o bars in non-legacy mode, so only mmio is implemented for now. The previous virtio drivers implemented the legacy interface only which uses i/o ports for all register accesses. This is still the preferred method (and also qemu default) as it is easier to emulate and most likely faster. However, some vps providers like vultr force the legacy interface to disabled with qemu -device option "disable-legacy=on" resulting on a system without a disk and ethernet.
692 lines
12 KiB
C
692 lines
12 KiB
C
/*
|
|
* virtio ethernet driver implementing the legacy interface:
|
|
* http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
|
|
*/
|
|
#include "u.h"
|
|
#include "../port/lib.h"
|
|
#include "mem.h"
|
|
#include "dat.h"
|
|
#include "fns.h"
|
|
#include "io.h"
|
|
#include "../port/pci.h"
|
|
#include "../port/error.h"
|
|
#include "../port/netif.h"
|
|
#include "../port/etherif.h"
|
|
|
|
typedef struct Vring Vring;
|
|
typedef struct Vdesc Vdesc;
|
|
typedef struct Vused Vused;
|
|
typedef struct Vheader Vheader;
|
|
typedef struct Vqueue Vqueue;
|
|
typedef struct Ctlr Ctlr;
|
|
|
|
enum {
|
|
/* §2.1 Device Status Field */
|
|
Sacknowledge = 1,
|
|
Sdriver = 2,
|
|
Sdriverok = 4,
|
|
Sfeatureok = 8,
|
|
Sfailed = 128,
|
|
|
|
/* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
|
|
Qdevfeat = 0,
|
|
Qdrvfeat = 4,
|
|
Qaddr = 8,
|
|
Qsize = 12,
|
|
Qselect = 14,
|
|
Qnotify = 16,
|
|
Qstatus = 18,
|
|
Qisr = 19,
|
|
Qmac = 20,
|
|
Qnetstatus = 26,
|
|
|
|
/* flags in Qnetstatus */
|
|
Nlinkup = (1<<0),
|
|
Nannounce = (1<<1),
|
|
|
|
/* feature bits */
|
|
Fmac = (1<<5),
|
|
Fstatus = (1<<16),
|
|
Fctrlvq = (1<<17),
|
|
Fctrlrx = (1<<18),
|
|
|
|
/* vring used flags */
|
|
Unonotify = 1,
|
|
/* vring avail flags */
|
|
Rnointerrupt = 1,
|
|
|
|
/* descriptor flags */
|
|
Dnext = 1,
|
|
Dwrite = 2,
|
|
Dindirect = 4,
|
|
|
|
/* struct sizes */
|
|
VringSize = 4,
|
|
VdescSize = 16,
|
|
VusedSize = 8,
|
|
VheaderSize = 10,
|
|
|
|
/* §4.1.5.1.4.1 says pages are 4096 bytes
|
|
* for the purposes of the driver.
|
|
*/
|
|
VBY2PG = 4096,
|
|
#define VPGROUND(s) ROUND(s, VBY2PG)
|
|
|
|
Vrxq = 0,
|
|
Vtxq = 1,
|
|
Vctlq = 2,
|
|
|
|
/* class/cmd for Vctlq */
|
|
CtrlRx = 0x00,
|
|
CmdPromisc = 0x00,
|
|
CmdAllmulti = 0x01,
|
|
CtrlMac = 0x01,
|
|
CmdMacTableSet = 0x00,
|
|
CtrlVlan= 0x02,
|
|
CmdVlanAdd = 0x00,
|
|
CmdVlanDel = 0x01,
|
|
};
|
|
|
|
struct Vring
|
|
{
|
|
u16int flags;
|
|
u16int idx;
|
|
};
|
|
|
|
struct Vdesc
|
|
{
|
|
u64int addr;
|
|
u32int len;
|
|
u16int flags;
|
|
u16int next;
|
|
};
|
|
|
|
struct Vused
|
|
{
|
|
u32int id;
|
|
u32int len;
|
|
};
|
|
|
|
struct Vheader
|
|
{
|
|
u8int flags;
|
|
u8int segtype;
|
|
u16int hlen;
|
|
u16int seglen;
|
|
u16int csumstart;
|
|
u16int csumend;
|
|
};
|
|
|
|
/* §2.4 Virtqueues */
|
|
struct Vqueue
|
|
{
|
|
Rendez;
|
|
|
|
uint qsize;
|
|
uint qmask;
|
|
|
|
Vdesc *desc;
|
|
|
|
Vring *avail;
|
|
u16int *availent;
|
|
u16int *availevent;
|
|
|
|
Vring *used;
|
|
Vused *usedent;
|
|
u16int *usedevent;
|
|
u16int lastused;
|
|
|
|
uint nintr;
|
|
uint nnote;
|
|
};
|
|
|
|
struct Ctlr {
|
|
Lock;
|
|
|
|
QLock ctllock;
|
|
|
|
int attached;
|
|
|
|
int port;
|
|
Pcidev *pcidev;
|
|
Ctlr *next;
|
|
int active;
|
|
int id;
|
|
int typ;
|
|
ulong feat;
|
|
int nqueue;
|
|
|
|
/* virtioether has 3 queues: rx, tx and ctl */
|
|
Vqueue queue[3];
|
|
};
|
|
|
|
static Ctlr *ctlrhead;
|
|
|
|
static int
|
|
vhasroom(void *v)
|
|
{
|
|
Vqueue *q = v;
|
|
return q->lastused != q->used->idx;
|
|
}
|
|
|
|
static void
|
|
vqnotify(Ctlr *ctlr, int x)
|
|
{
|
|
Vqueue *q;
|
|
|
|
coherence();
|
|
q = &ctlr->queue[x];
|
|
if(q->used->flags & Unonotify)
|
|
return;
|
|
q->nnote++;
|
|
outs(ctlr->port+Qnotify, x);
|
|
}
|
|
|
|
static void
|
|
txproc(void *v)
|
|
{
|
|
Vheader *header;
|
|
Block **blocks;
|
|
Ether *edev;
|
|
Ctlr *ctlr;
|
|
Vqueue *q;
|
|
Vused *u;
|
|
Block *b;
|
|
int i, j;
|
|
|
|
edev = v;
|
|
ctlr = edev->ctlr;
|
|
q = &ctlr->queue[Vtxq];
|
|
|
|
header = smalloc(VheaderSize);
|
|
blocks = smalloc(sizeof(Block*) * (q->qsize/2));
|
|
|
|
for(i = 0; i < q->qsize/2; i++){
|
|
j = i << 1;
|
|
q->desc[j].addr = PADDR(header);
|
|
q->desc[j].len = VheaderSize;
|
|
q->desc[j].next = j | 1;
|
|
q->desc[j].flags = Dnext;
|
|
|
|
q->availent[i] = q->availent[i + q->qsize/2] = j;
|
|
|
|
j |= 1;
|
|
q->desc[j].next = 0;
|
|
q->desc[j].flags = 0;
|
|
}
|
|
|
|
q->avail->flags &= ~Rnointerrupt;
|
|
|
|
while(waserror())
|
|
;
|
|
|
|
while((b = qbread(edev->oq, 1000000)) != nil){
|
|
for(;;){
|
|
/* retire completed packets */
|
|
while((i = q->lastused) != q->used->idx){
|
|
u = &q->usedent[i & q->qmask];
|
|
i = (u->id & q->qmask) >> 1;
|
|
if(blocks[i] == nil)
|
|
break;
|
|
freeb(blocks[i]);
|
|
blocks[i] = nil;
|
|
q->lastused++;
|
|
}
|
|
|
|
/* have free slot? */
|
|
i = q->avail->idx & (q->qmask >> 1);
|
|
if(blocks[i] == nil)
|
|
break;
|
|
|
|
/* ring full, wait and retry */
|
|
if(!vhasroom(q))
|
|
sleep(q, vhasroom, q);
|
|
}
|
|
|
|
/* slot is free, fill in descriptor */
|
|
blocks[i] = b;
|
|
j = (i << 1) | 1;
|
|
q->desc[j].addr = PADDR(b->rp);
|
|
q->desc[j].len = BLEN(b);
|
|
coherence();
|
|
q->avail->idx++;
|
|
vqnotify(ctlr, Vtxq);
|
|
}
|
|
|
|
pexit("ether out queue closed", 1);
|
|
}
|
|
|
|
static void
|
|
rxproc(void *v)
|
|
{
|
|
Vheader *header;
|
|
Block **blocks;
|
|
Ether *edev;
|
|
Ctlr *ctlr;
|
|
Vqueue *q;
|
|
Vused *u;
|
|
Block *b;
|
|
int i, j;
|
|
|
|
edev = v;
|
|
ctlr = edev->ctlr;
|
|
q = &ctlr->queue[Vrxq];
|
|
|
|
header = smalloc(VheaderSize);
|
|
blocks = smalloc(sizeof(Block*) * (q->qsize/2));
|
|
|
|
for(i = 0; i < q->qsize/2; i++){
|
|
j = i << 1;
|
|
q->desc[j].addr = PADDR(header);
|
|
q->desc[j].len = VheaderSize;
|
|
q->desc[j].next = j | 1;
|
|
q->desc[j].flags = Dwrite|Dnext;
|
|
|
|
q->availent[i] = q->availent[i + q->qsize/2] = j;
|
|
|
|
j |= 1;
|
|
q->desc[j].next = 0;
|
|
q->desc[j].flags = Dwrite;
|
|
}
|
|
|
|
q->avail->flags &= ~Rnointerrupt;
|
|
|
|
while(waserror())
|
|
;
|
|
|
|
for(;;){
|
|
/* replenish receive ring */
|
|
do {
|
|
i = q->avail->idx & (q->qmask >> 1);
|
|
if(blocks[i] != nil)
|
|
break;
|
|
if((b = iallocb(ETHERMAXTU)) == nil)
|
|
break;
|
|
blocks[i] = b;
|
|
j = (i << 1) | 1;
|
|
q->desc[j].addr = PADDR(b->rp);
|
|
q->desc[j].len = BALLOC(b);
|
|
coherence();
|
|
q->avail->idx++;
|
|
} while(q->avail->idx != q->used->idx);
|
|
vqnotify(ctlr, Vrxq);
|
|
|
|
/* wait for any packets to complete */
|
|
if(!vhasroom(q))
|
|
sleep(q, vhasroom, q);
|
|
|
|
/* retire completed packets */
|
|
while((i = q->lastused) != q->used->idx) {
|
|
u = &q->usedent[i & q->qmask];
|
|
i = (u->id & q->qmask) >> 1;
|
|
if((b = blocks[i]) == nil)
|
|
break;
|
|
|
|
blocks[i] = nil;
|
|
|
|
b->wp = b->rp + u->len - VheaderSize;
|
|
etheriq(edev, b);
|
|
q->lastused++;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
|
|
{
|
|
uchar hdr[2], ack[1];
|
|
Ctlr *ctlr;
|
|
Vqueue *q;
|
|
Vdesc *d;
|
|
int i;
|
|
|
|
ctlr = edev->ctlr;
|
|
q = &ctlr->queue[Vctlq];
|
|
if(q->qsize < 3)
|
|
return -1;
|
|
|
|
qlock(&ctlr->ctllock);
|
|
while(waserror())
|
|
;
|
|
|
|
ack[0] = 0x55;
|
|
hdr[0] = class;
|
|
hdr[1] = cmd;
|
|
|
|
d = &q->desc[0];
|
|
d->addr = PADDR(hdr);
|
|
d->len = sizeof(hdr);
|
|
d->next = 1;
|
|
d->flags = Dnext;
|
|
d++;
|
|
d->addr = PADDR(data);
|
|
d->len = ndata;
|
|
d->next = 2;
|
|
d->flags = Dnext;
|
|
d++;
|
|
d->addr = PADDR(ack);
|
|
d->len = sizeof(ack);
|
|
d->next = 0;
|
|
d->flags = Dwrite;
|
|
|
|
i = q->avail->idx & q->qmask;
|
|
q->availent[i] = 0;
|
|
coherence();
|
|
|
|
q->avail->flags &= ~Rnointerrupt;
|
|
q->avail->idx++;
|
|
vqnotify(ctlr, Vctlq);
|
|
while(!vhasroom(q))
|
|
sleep(q, vhasroom, q);
|
|
q->lastused = q->used->idx;
|
|
q->avail->flags |= Rnointerrupt;
|
|
|
|
qunlock(&ctlr->ctllock);
|
|
poperror();
|
|
|
|
if(ack[0] != 0)
|
|
print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
|
|
|
|
return ack[0];
|
|
}
|
|
|
|
static void
|
|
interrupt(Ureg*, void* arg)
|
|
{
|
|
Ether *edev;
|
|
Ctlr *ctlr;
|
|
Vqueue *q;
|
|
int i;
|
|
|
|
edev = arg;
|
|
ctlr = edev->ctlr;
|
|
if(inb(ctlr->port+Qisr) & 1){
|
|
for(i = 0; i < ctlr->nqueue; i++){
|
|
q = &ctlr->queue[i];
|
|
if(vhasroom(q)){
|
|
q->nintr++;
|
|
wakeup(q);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
attach(Ether* edev)
|
|
{
|
|
char name[KNAMELEN];
|
|
Ctlr* ctlr;
|
|
|
|
ctlr = edev->ctlr;
|
|
lock(ctlr);
|
|
if(ctlr->attached){
|
|
unlock(ctlr);
|
|
return;
|
|
}
|
|
ctlr->attached = 1;
|
|
unlock(ctlr);
|
|
|
|
/* ready to go */
|
|
outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
|
|
|
|
/* start kprocs */
|
|
snprint(name, sizeof name, "#l%drx", edev->ctlrno);
|
|
kproc(name, rxproc, edev);
|
|
snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
|
|
kproc(name, txproc, edev);
|
|
}
|
|
|
|
static long
|
|
ifstat(Ether *edev, void *a, long n, ulong offset)
|
|
{
|
|
int i, l;
|
|
char *p;
|
|
Ctlr *ctlr;
|
|
Vqueue *q;
|
|
|
|
ctlr = edev->ctlr;
|
|
|
|
p = smalloc(READSTR);
|
|
|
|
l = snprint(p, READSTR, "devfeat %32.32lub\n", ctlr->feat);
|
|
l += snprint(p+l, READSTR-l, "drvfeat %32.32lub\n", inl(ctlr->port+Qdrvfeat));
|
|
l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", inb(ctlr->port+Qstatus));
|
|
if(ctlr->feat & Fstatus)
|
|
l += snprint(p+l, READSTR-l, "netstatus %8.8ub\n", inb(ctlr->port+Qnetstatus));
|
|
|
|
for(i = 0; i < ctlr->nqueue; i++){
|
|
q = &ctlr->queue[i];
|
|
l += snprint(p+l, READSTR-l,
|
|
"vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
|
|
i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
|
|
}
|
|
|
|
n = readstr(offset, a, n, p);
|
|
free(p);
|
|
|
|
return n;
|
|
}
|
|
|
|
static void
|
|
shutdown(Ether* edev)
|
|
{
|
|
Ctlr *ctlr = edev->ctlr;
|
|
outb(ctlr->port+Qstatus, 0);
|
|
pciclrbme(ctlr->pcidev);
|
|
}
|
|
|
|
static void
|
|
promiscuous(void *arg, int on)
|
|
{
|
|
Ether *edev = arg;
|
|
uchar b[1];
|
|
|
|
b[0] = on != 0;
|
|
vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
|
|
}
|
|
|
|
static void
|
|
multicast(void *arg, uchar*, int)
|
|
{
|
|
Ether *edev = arg;
|
|
uchar b[1];
|
|
|
|
b[0] = edev->nmaddr > 0;
|
|
vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
|
|
}
|
|
|
|
/* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
|
|
static ulong
|
|
queuesize(ulong size)
|
|
{
|
|
return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
|
|
+ VPGROUND(sizeof(u16int)*3 + VusedSize*size);
|
|
}
|
|
|
|
static int
|
|
initqueue(Vqueue *q, int size)
|
|
{
|
|
uchar *p;
|
|
|
|
/* §2.4: Queue Size value is always a power of 2 and <= 32768 */
|
|
assert(!(size & (size - 1)) && size <= 32768);
|
|
|
|
p = mallocalign(queuesize(size), VBY2PG, 0, 0);
|
|
if(p == nil){
|
|
print("ethervirtio: no memory for Vqueue\n");
|
|
free(p);
|
|
return -1;
|
|
}
|
|
|
|
q->desc = (void*)p;
|
|
p += VdescSize*size;
|
|
q->avail = (void*)p;
|
|
p += VringSize;
|
|
q->availent = (void*)p;
|
|
p += sizeof(u16int)*size;
|
|
q->availevent = (void*)p;
|
|
p += sizeof(u16int);
|
|
|
|
p = (uchar*)VPGROUND((uintptr)p);
|
|
q->used = (void*)p;
|
|
p += VringSize;
|
|
q->usedent = (void*)p;
|
|
p += VusedSize*size;
|
|
q->usedevent = (void*)p;
|
|
|
|
q->qsize = size;
|
|
q->qmask = q->qsize - 1;
|
|
|
|
q->lastused = q->avail->idx = q->used->idx = 0;
|
|
|
|
q->avail->flags |= Rnointerrupt;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static Ctlr*
|
|
pciprobe(int typ)
|
|
{
|
|
Ctlr *c, *h, *t;
|
|
Pcidev *p;
|
|
int n, i;
|
|
|
|
h = t = nil;
|
|
|
|
/* §4.1.2 PCI Device Discovery */
|
|
for(p = nil; p = pcimatch(p, 0x1AF4, 0);){
|
|
/* the two possible DIDs for virtio-net */
|
|
if(p->did != 0x1000 && p->did != 0x1041)
|
|
continue;
|
|
/*
|
|
* non-transitional devices will have a revision > 0,
|
|
* these are handled by ethervirtio10 driver.
|
|
*/
|
|
if(p->rid != 0)
|
|
continue;
|
|
/* first membar needs to be I/O */
|
|
if((p->mem[0].bar & 1) == 0)
|
|
continue;
|
|
/* non-transitional device will have typ+0x40 */
|
|
if(pcicfgr16(p, 0x2E) != typ)
|
|
continue;
|
|
if((c = mallocz(sizeof(Ctlr), 1)) == nil){
|
|
print("ethervirtio: no memory for Ctlr\n");
|
|
break;
|
|
}
|
|
c->port = p->mem[0].bar & ~3;
|
|
if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
|
|
print("ethervirtio: port %ux in use\n", c->port);
|
|
free(c);
|
|
continue;
|
|
}
|
|
|
|
c->typ = typ;
|
|
c->pcidev = p;
|
|
pcienable(p);
|
|
c->id = (p->did<<16)|p->vid;
|
|
|
|
/* §3.1.2 Legacy Device Initialization */
|
|
outb(c->port+Qstatus, 0);
|
|
while(inb(c->port+Qstatus) != 0)
|
|
delay(1);
|
|
outb(c->port+Qstatus, Sacknowledge|Sdriver);
|
|
|
|
/* negotiate feature bits */
|
|
c->feat = inl(c->port+Qdevfeat);
|
|
outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx));
|
|
|
|
/* §4.1.5.1.4 Virtqueue Configuration */
|
|
for(i=0; i<nelem(c->queue); i++){
|
|
outs(c->port+Qselect, i);
|
|
n = ins(c->port+Qsize);
|
|
if(n == 0 || (n & (n-1)) != 0){
|
|
if(i < 2)
|
|
print("ethervirtio: queue %d has invalid size %d\n", i, n);
|
|
break;
|
|
}
|
|
if(initqueue(&c->queue[i], n) < 0)
|
|
break;
|
|
coherence();
|
|
outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG);
|
|
}
|
|
if(i < 2){
|
|
print("ethervirtio: no queues\n");
|
|
pcidisable(p);
|
|
free(c);
|
|
continue;
|
|
}
|
|
c->nqueue = i;
|
|
|
|
if(h == nil)
|
|
h = c;
|
|
else
|
|
t->next = c;
|
|
t = c;
|
|
}
|
|
|
|
return h;
|
|
}
|
|
|
|
|
|
static int
|
|
reset(Ether* edev)
|
|
{
|
|
static uchar zeros[Eaddrlen];
|
|
Ctlr *ctlr;
|
|
int i;
|
|
|
|
if(ctlrhead == nil)
|
|
ctlrhead = pciprobe(1);
|
|
|
|
for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
|
|
if(ctlr->active)
|
|
continue;
|
|
if(edev->port == 0 || edev->port == ctlr->port){
|
|
ctlr->active = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(ctlr == nil)
|
|
return -1;
|
|
|
|
edev->ctlr = ctlr;
|
|
edev->port = ctlr->port;
|
|
edev->irq = ctlr->pcidev->intl;
|
|
edev->tbdf = ctlr->pcidev->tbdf;
|
|
edev->mbps = 1000;
|
|
edev->link = 1;
|
|
|
|
if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
|
|
for(i = 0; i < Eaddrlen; i++)
|
|
edev->ea[i] = inb(ctlr->port+Qmac+i);
|
|
} else {
|
|
for(i = 0; i < Eaddrlen; i++)
|
|
outb(ctlr->port+Qmac+i, edev->ea[i]);
|
|
}
|
|
|
|
edev->arg = edev;
|
|
|
|
edev->attach = attach;
|
|
edev->shutdown = shutdown;
|
|
edev->ifstat = ifstat;
|
|
|
|
if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
|
|
edev->multicast = multicast;
|
|
edev->promiscuous = promiscuous;
|
|
}
|
|
|
|
pcisetbme(ctlr->pcidev);
|
|
intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
ethervirtiolink(void)
|
|
{
|
|
addethercard("virtio", reset);
|
|
}
|
|
|