virtio: add non-legacy virtio 1.0 drivers for disk and ethernet

The new interface uses pci capability structures to locate the
registers in a rather fine granular way making it more complicated
as they can be located anywhere in any pci bar at any offset.

As far as i can see, qemu (6.0.50) never uses i/o bars in
non-legacy mode, so only mmio is implemented for now.

The previous virtio drivers implemented the legacy interface only
which uses i/o ports for all register accesses. This is still
the preferred method (and also qemu default) as it is easier to
emulate and most likely faster.

However, some vps providers like vultr force the legacy interface
to disabled with qemu -device option "disable-legacy=on" resulting
on a system without a disk and ethernet.
This commit is contained in:
cinap_lenrek 2021-07-11 11:24:13 +00:00
parent c3589ef3cf
commit f58d99aa7a
6 changed files with 1617 additions and 9 deletions

View file

@ -1,3 +1,7 @@
/*
* virtio ethernet driver implementing the legacy interface:
* http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@ -9,11 +13,6 @@
#include "../port/netif.h"
#include "../port/etherif.h"
/*
* virtio ethernet driver
* http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
*/
typedef struct Vring Vring;
typedef struct Vdesc Vdesc;
typedef struct Vused Vused;
@ -555,13 +554,14 @@ pciprobe(int typ)
h = t = nil;
/* §4.1.2 PCI Device Discovery */
for(p = nil; p = pcimatch(p, 0, 0);){
if(p->vid != 0x1AF4)
continue;
for(p = nil; p = pcimatch(p, 0x1AF4, 0);){
/* the two possible DIDs for virtio-net */
if(p->did != 0x1000 && p->did != 0x1041)
continue;
/* non-transitional devices will have a revision > 0 */
/*
* non-transitional devices will have a revision > 0,
* these are handled by ethervirtio10 driver.
*/
if(p->rid != 0)
continue;
/* first membar needs to be I/O */
@ -588,6 +588,8 @@ pciprobe(int typ)
/* §3.1.2 Legacy Device Initialization */
outb(c->port+Qstatus, 0);
while(inb(c->port+Qstatus) != 0)
delay(1);
outb(c->port+Qstatus, Sacknowledge|Sdriver);
/* negotiate feature bits */

View file

@ -0,0 +1,790 @@
/*
* virtio 1.0 ethernet driver
* http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
*
* In contrast to ethervirtio.c, this driver handles the non-legacy
* interface for virtio ethernet which uses mmio for all register accesses
* and requires a laborate pci capability structure dance to get working.
*
* It is kind of pointless as it is most likely slower than
* port i/o (harder to emulate on the pc platform).
*
* The reason why this driver is needed it is that vultr set the
* disable-legacy=on option in the -device parameter for qemu
* on their hypervisor.
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/pci.h"
#include "../port/error.h"
#include "../port/netif.h"
#include "../port/etherif.h"
typedef struct Vconfig Vconfig;
typedef struct Vnetcfg Vnetcfg;
typedef struct Vring Vring;
typedef struct Vdesc Vdesc;
typedef struct Vused Vused;
typedef struct Vheader Vheader;
typedef struct Vqueue Vqueue;
typedef struct Ctlr Ctlr;
enum {
/* §2.1 Device Status Field */
Sacknowledge = 1,
Sdriver = 2,
Sdriverok = 4,
Sfeatureok = 8,
Sfailed = 128,
/* flags in Qnetstatus */
Nlinkup = (1<<0),
Nannounce = (1<<1),
/* feat[0] bits */
Fmac = 1<<5,
Fstatus = 1<<16,
Fctrlvq = 1<<17,
Fctrlrx = 1<<18,
/* feat[1] bits */
Fversion1 = 1<<(32-32),
/* vring used flags */
Unonotify = 1,
/* vring avail flags */
Rnointerrupt = 1,
/* descriptor flags */
Dnext = 1,
Dwrite = 2,
Dindirect = 4,
/* struct sizes */
VringSize = 4,
VdescSize = 16,
VusedSize = 8,
VheaderSize = 12,
Vrxq = 0,
Vtxq = 1,
Vctlq = 2,
/* class/cmd for Vctlq */
CtrlRx = 0x00,
CmdPromisc = 0x00,
CmdAllmulti = 0x01,
CtrlMac = 0x01,
CmdMacTableSet = 0x00,
CtrlVlan= 0x02,
CmdVlanAdd = 0x00,
CmdVlanDel = 0x01,
};
struct Vconfig {
u32int devfeatsel;
u32int devfeat;
u32int drvfeatsel;
u32int drvfeat;
u16int msixcfg;
u16int nqueues;
u8int status;
u8int cfggen;
u16int queuesel;
u16int queuesize;
u16int queuemsixvect;
u16int queueenable;
u16int queuenotifyoff;
u64int queuedesc;
u64int queueavail;
u64int queueused;
};
struct Vnetcfg
{
u16int mac0;
u16int mac1;
u16int mac2;
u16int status;
u16int maxqueuepairs;
u16int mtu;
};
struct Vring
{
u16int flags;
u16int idx;
};
struct Vdesc
{
u64int addr;
u32int len;
u16int flags;
u16int next;
};
struct Vused
{
u32int id;
u32int len;
};
struct Vheader
{
u8int flags;
u8int segtype;
u16int hlen;
u16int seglen;
u16int csumstart;
u16int csumend;
};
struct Vqueue
{
Rendez;
uint qsize;
uint qmask;
Vdesc *desc;
Vring *avail;
u16int *availent;
u16int *availevent;
Vring *used;
Vused *usedent;
u16int *usedevent;
u16int lastused;
uint nintr;
uint nnote;
/* notify register */
void *notify;
};
struct Ctlr {
Lock;
QLock ctllock;
int attached;
/* registers */
Vconfig *cfg;
Vnetcfg *dev;
u8int *isr;
u8int *notify;
u32int notifyoffmult;
uvlong port;
Pcidev *pcidev;
Ctlr *next;
int active;
ulong feat[2];
int nqueue;
/* virtioether has 3 queues: rx, tx and ctl */
Vqueue queue[3];
};
static Ctlr *ctlrhead;
static int
vhasroom(void *v)
{
Vqueue *q = v;
return q->lastused != q->used->idx;
}
static void
vqnotify(Ctlr *ctlr, int x)
{
Vqueue *q;
coherence();
q = &ctlr->queue[x];
if(q->used->flags & Unonotify)
return;
q->nnote++;
*((u16int*)q->notify) = x;
}
static void
txproc(void *v)
{
Vheader *header;
Block **blocks;
Ether *edev;
Ctlr *ctlr;
Vqueue *q;
Vused *u;
Block *b;
int i, j;
edev = v;
ctlr = edev->ctlr;
q = &ctlr->queue[Vtxq];
header = smalloc(VheaderSize);
blocks = smalloc(sizeof(Block*) * (q->qsize/2));
for(i = 0; i < q->qsize/2; i++){
j = i << 1;
q->desc[j].addr = PADDR(header);
q->desc[j].len = VheaderSize;
q->desc[j].next = j | 1;
q->desc[j].flags = Dnext;
q->availent[i] = q->availent[i + q->qsize/2] = j;
j |= 1;
q->desc[j].next = 0;
q->desc[j].flags = 0;
}
q->avail->flags &= ~Rnointerrupt;
while(waserror())
;
while((b = qbread(edev->oq, 1000000)) != nil){
for(;;){
/* retire completed packets */
while((i = q->lastused) != q->used->idx){
u = &q->usedent[i & q->qmask];
i = (u->id & q->qmask) >> 1;
if(blocks[i] == nil)
break;
freeb(blocks[i]);
blocks[i] = nil;
q->lastused++;
}
/* have free slot? */
i = q->avail->idx & (q->qmask >> 1);
if(blocks[i] == nil)
break;
/* ring full, wait and retry */
if(!vhasroom(q))
sleep(q, vhasroom, q);
}
/* slot is free, fill in descriptor */
blocks[i] = b;
j = (i << 1) | 1;
q->desc[j].addr = PADDR(b->rp);
q->desc[j].len = BLEN(b);
coherence();
q->avail->idx++;
vqnotify(ctlr, Vtxq);
}
pexit("ether out queue closed", 1);
}
static void
rxproc(void *v)
{
Vheader *header;
Block **blocks;
Ether *edev;
Ctlr *ctlr;
Vqueue *q;
Vused *u;
Block *b;
int i, j;
edev = v;
ctlr = edev->ctlr;
q = &ctlr->queue[Vrxq];
header = smalloc(VheaderSize);
blocks = smalloc(sizeof(Block*) * (q->qsize/2));
for(i = 0; i < q->qsize/2; i++){
j = i << 1;
q->desc[j].addr = PADDR(header);
q->desc[j].len = VheaderSize;
q->desc[j].next = j | 1;
q->desc[j].flags = Dwrite|Dnext;
q->availent[i] = q->availent[i + q->qsize/2] = j;
j |= 1;
q->desc[j].next = 0;
q->desc[j].flags = Dwrite;
}
q->avail->flags &= ~Rnointerrupt;
while(waserror())
;
for(;;){
/* replenish receive ring */
do {
i = q->avail->idx & (q->qmask >> 1);
if(blocks[i] != nil)
break;
if((b = iallocb(ETHERMAXTU)) == nil)
break;
blocks[i] = b;
j = (i << 1) | 1;
q->desc[j].addr = PADDR(b->rp);
q->desc[j].len = BALLOC(b);
coherence();
q->avail->idx++;
} while(q->avail->idx != q->used->idx);
vqnotify(ctlr, Vrxq);
/* wait for any packets to complete */
if(!vhasroom(q))
sleep(q, vhasroom, q);
/* retire completed packets */
while((i = q->lastused) != q->used->idx) {
u = &q->usedent[i & q->qmask];
i = (u->id & q->qmask) >> 1;
if((b = blocks[i]) == nil)
break;
blocks[i] = nil;
b->wp = b->rp + u->len - VheaderSize;
etheriq(edev, b);
q->lastused++;
}
}
}
static int
vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
{
uchar hdr[2], ack[1];
Ctlr *ctlr;
Vqueue *q;
Vdesc *d;
int i;
ctlr = edev->ctlr;
q = &ctlr->queue[Vctlq];
if(q->qsize < 3)
return -1;
qlock(&ctlr->ctllock);
while(waserror())
;
ack[0] = 0x55;
hdr[0] = class;
hdr[1] = cmd;
d = &q->desc[0];
d->addr = PADDR(hdr);
d->len = sizeof(hdr);
d->next = 1;
d->flags = Dnext;
d++;
d->addr = PADDR(data);
d->len = ndata;
d->next = 2;
d->flags = Dnext;
d++;
d->addr = PADDR(ack);
d->len = sizeof(ack);
d->next = 0;
d->flags = Dwrite;
i = q->avail->idx & q->qmask;
q->availent[i] = 0;
coherence();
q->avail->flags &= ~Rnointerrupt;
q->avail->idx++;
vqnotify(ctlr, Vctlq);
while(!vhasroom(q))
sleep(q, vhasroom, q);
q->lastused = q->used->idx;
q->avail->flags |= Rnointerrupt;
qunlock(&ctlr->ctllock);
poperror();
if(ack[0] != 0)
print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
return ack[0];
}
static void
interrupt(Ureg*, void* arg)
{
Ether *edev;
Ctlr *ctlr;
Vqueue *q;
int i;
edev = arg;
ctlr = edev->ctlr;
if(*ctlr->isr & 1){
for(i = 0; i < ctlr->nqueue; i++){
q = &ctlr->queue[i];
if(vhasroom(q)){
q->nintr++;
wakeup(q);
}
}
}
}
static void
attach(Ether* edev)
{
char name[KNAMELEN];
Ctlr* ctlr;
int i;
ctlr = edev->ctlr;
ilock(ctlr);
if(ctlr->attached){
iunlock(ctlr);
return;
}
ctlr->attached = 1;
/* driver is ready */
ctlr->cfg->status |= Sdriverok;
/* enable the queues */
for(i = 0; i < ctlr->nqueue; i++){
ctlr->cfg->queuesel = i;
ctlr->cfg->queueenable = 1;
}
iunlock(ctlr);
/* start kprocs */
snprint(name, sizeof name, "#l%drx", edev->ctlrno);
kproc(name, rxproc, edev);
snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
kproc(name, txproc, edev);
}
static long
ifstat(Ether *edev, void *a, long n, ulong offset)
{
int i, l;
char *p;
Ctlr *ctlr;
Vqueue *q;
ctlr = edev->ctlr;
p = smalloc(READSTR);
l = snprint(p, READSTR, "devfeat %32.32lub %32.32lub\n", ctlr->feat[1], ctlr->feat[0]);
l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", ctlr->cfg->status);
for(i = 0; i < ctlr->nqueue; i++){
q = &ctlr->queue[i];
l += snprint(p+l, READSTR-l,
"vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
}
n = readstr(offset, a, n, p);
free(p);
return n;
}
static void
shutdown(Ether* edev)
{
Ctlr *ctlr = edev->ctlr;
coherence();
ctlr->cfg->status = 0;
coherence();
pciclrbme(ctlr->pcidev);
}
static void
promiscuous(void *arg, int on)
{
Ether *edev = arg;
uchar b[1];
b[0] = on != 0;
vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
}
static void
multicast(void *arg, uchar*, int)
{
Ether *edev = arg;
uchar b[1];
b[0] = edev->nmaddr > 0;
vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
}
static int
initqueue(Vqueue *q, int size)
{
uchar *p;
q->desc = mallocalign(VdescSize*size, 16, 0, 0);
if(q->desc == nil)
return -1;
p = mallocalign(VringSize + 2*size + 2, 2, 0, 0);
if(p == nil){
FreeDesc:
free(q->desc);
q->desc = nil;
return -1;
}
q->avail = (void*)p;
p += VringSize;
q->availent = (void*)p;
p += sizeof(u16int)*size;
q->availevent = (void*)p;
p = mallocalign(VringSize + VusedSize*size + 2, 4, 0, 0);
if(p == nil){
free(q->avail);
q->avail = nil;
goto FreeDesc;
}
q->used = (void*)p;
p += VringSize;
q->usedent = (void*)p;
p += VusedSize*size;
q->usedevent = (void*)p;
q->qsize = size;
q->qmask = q->qsize - 1;
q->lastused = q->avail->idx = q->used->idx = 0;
q->avail->flags |= Rnointerrupt;
return 0;
}
static int
matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
{
int bar;
if(cap != 9 || pcicfgr8(p, off+3) != typ)
return 1;
/* skip invalid or non memory bars */
bar = pcicfgr8(p, off+4);
if(bar < 0 || bar >= nelem(p->mem)
|| p->mem[bar].size == 0
|| (p->mem[bar].bar & 3) != 0)
return 1;
return 0;
}
static int
virtiocap(Pcidev *p, int typ)
{
return pcienumcaps(p, matchvirtiocfgcap, typ);
}
static void*
virtiomapregs(Pcidev *p, int cap, int size)
{
int bar, len;
uvlong addr;
if(cap < 0)
return nil;
bar = pcicfgr8(p, cap+4) % nelem(p->mem);
addr = pcicfgr32(p, cap+8);
len = pcicfgr32(p, cap+12);
if(size <= 0)
size = len;
else if(len < size)
return nil;
if(addr+len > p->mem[bar].size)
return nil;
addr += p->mem[bar].bar & ~0xFULL;
return vmap(addr, size);
}
static Ctlr*
pciprobe(void)
{
Ctlr *c, *h, *t;
Pcidev *p;
Vconfig *cfg;
int bar, cap, n, i;
h = t = nil;
/* §4.1.2 PCI Device Discovery */
for(p = nil; p = pcimatch(p, 0x1AF4, 0x1041);){
/* non-transitional devices will have a revision > 0 */
if(p->rid == 0)
continue;
if((cap = virtiocap(p, 1)) < 0)
continue;
bar = pcicfgr8(p, cap+4) % nelem(p->mem);
cfg = virtiomapregs(p, cap, sizeof(Vconfig));
if(cfg == nil)
continue;
if((c = mallocz(sizeof(Ctlr), 1)) == nil){
print("ethervirtio: no memory for Ctlr\n");
break;
}
c->cfg = cfg;
c->pcidev = p;
c->port = p->mem[bar].bar & ~0xFULL;
pcienable(p);
c->dev = virtiomapregs(p, virtiocap(p, 4), sizeof(Vnetcfg));
if(c->dev == nil)
goto Baddev;
c->isr = virtiomapregs(p, virtiocap(p, 3), 0);
if(c->isr == nil)
goto Baddev;
cap = virtiocap(p, 2);
c->notify = virtiomapregs(p, cap, 0);
if(c->notify == nil)
goto Baddev;
c->notifyoffmult = pcicfgr32(p, cap+16);
/* device reset */
coherence();
cfg->status = 0;
while(cfg->status != 0)
delay(1);
cfg->status = Sacknowledge|Sdriver;
/* negotiate feature bits */
cfg->devfeatsel = 1;
c->feat[1] = cfg->devfeat;
cfg->devfeatsel = 0;
c->feat[0] = cfg->devfeat;
cfg->drvfeatsel = 1;
cfg->drvfeat = c->feat[1] & Fversion1;
cfg->drvfeatsel = 0;
cfg->drvfeat = c->feat[0] & (Fmac|Fctrlvq|Fctrlrx);
for(i=0; i<nelem(c->queue); i++){
cfg->queuesel = i;
n = cfg->queuesize;
if(n == 0 || (n & (n-1)) != 0){
if(i < 2)
print("ethervirtio: queue %d has invalid size %d\n", i, n);
break;
}
if(initqueue(&c->queue[i], n) < 0)
break;
c->queue[i].notify = c->notify + c->notifyoffmult * cfg->queuenotifyoff;
coherence();
cfg->queuedesc = PADDR(c->queue[i].desc);
cfg->queueavail = PADDR(c->queue[i].avail);
cfg->queueused = PADDR(c->queue[i].used);
}
if(i < 2){
print("ethervirtio: no queues\n");
Baddev:
pcidisable(p);
/* TODO, vunmap */
free(c);
continue;
}
c->nqueue = i;
if(h == nil)
h = c;
else
t->next = c;
t = c;
}
return h;
}
static int
reset(Ether* edev)
{
static uchar zeros[Eaddrlen];
Ctlr *ctlr;
int i;
if(ctlrhead == nil)
ctlrhead = pciprobe();
for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
if(ctlr->active)
continue;
if(edev->port == 0 || edev->port == ctlr->port){
ctlr->active = 1;
break;
}
}
if(ctlr == nil)
return -1;
edev->ctlr = ctlr;
edev->port = ctlr->port;
edev->irq = ctlr->pcidev->intl;
edev->tbdf = ctlr->pcidev->tbdf;
edev->mbps = 1000;
edev->link = 1;
if((ctlr->feat[0] & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
for(i = 0; i < Eaddrlen; i++)
edev->ea[i] = ((uchar*)ctlr->dev)[i];
} else {
for(i = 0; i < Eaddrlen; i++)
((uchar*)ctlr->dev)[i] = edev->ea[i];
}
edev->arg = edev;
edev->attach = attach;
edev->shutdown = shutdown;
edev->ifstat = ifstat;
if((ctlr->feat[0] & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
edev->multicast = multicast;
edev->promiscuous = promiscuous;
}
pcisetbme(ctlr->pcidev);
intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
return 0;
}
void
ethervirtio10link(void)
{
addethercard("virtio10", reset);
}

View file

@ -80,6 +80,7 @@ link
etherwpi pci wifi
etherrt2860 pci wifi
ethervirtio pci
ethervirtio10 pci
ethermedium
pcmciamodem
netdevmedium
@ -108,6 +109,7 @@ misc
sdiahci pci sdscsi led
sdodin pci sdscsi led
sdvirtio pci sdscsi
sdvirtio10 pci sdscsi
sdmmc pci pmmc
sdnvme pci
sdloop

View file

@ -1,3 +1,7 @@
/*
* virtio ethernet driver implementing the legacy interface:
* http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"

808
sys/src/9/pc/sdvirtio10.c Normal file
View file

@ -0,0 +1,808 @@
/*
* virtio 1.0 disk driver
* http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
*
* In contrast to sdvirtio.c, this driver handles the non-legacy
* interface for virtio disk which uses mmio for all register accesses
* and requires a laborate pci capability structure dance to get working.
*
* It is kind of pointless as it is most likely slower than
* port i/o (harder to emulate on the pc platform).
*
* The reason why this driver is needed it is that vultr set the
* disable-legacy=on option in the -device parameter for qemu
* on their hypervisor.
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/pci.h"
#include "ureg.h"
#include "../port/error.h"
#include "../port/sd.h"
typedef struct Vscsidev Vscsidev;
typedef struct Vblkdev Vblkdev;
typedef struct Vconfig Vconfig;
typedef struct Vring Vring;
typedef struct Vdesc Vdesc;
typedef struct Vused Vused;
typedef struct Vqueue Vqueue;
typedef struct Vdev Vdev;
/* device types */
enum {
TypBlk = 2,
TypSCSI = 8,
};
/* status flags */
enum {
Acknowledge = 1,
Driver = 2,
DriverOk = 4,
Failed = 0x80,
};
/* descriptor flags */
enum {
Next = 1,
Write = 2,
Indirect = 4,
};
/* struct sizes */
enum {
VringSize = 4,
};
enum {
CDBSIZE = 32,
SENSESIZE = 96,
};
struct Vscsidev
{
u32int num_queues;
u32int seg_max;
u32int max_sectors;
u32int cmd_per_lun;
u32int event_info_size;
u32int sense_size;
u32int cdb_size;
u16int max_channel;
u16int max_target;
u32int max_lun;
};
struct Vblkdev
{
u64int capacity;
};
struct Vconfig {
u32int devfeatsel;
u32int devfeat;
u32int drvfeatsel;
u32int drvfeat;
u16int msixcfg;
u16int nqueues;
u8int status;
u8int cfggen;
u16int queuesel;
u16int queuesize;
u16int queuemsixvect;
u16int queueenable;
u16int queuenotifyoff;
u64int queuedesc;
u64int queueavail;
u64int queueused;
};
struct Vring
{
u16int flags;
u16int idx;
};
struct Vdesc
{
u64int addr;
u32int len;
u16int flags;
u16int next;
};
struct Vused
{
u32int id;
u32int len;
};
struct Vqueue
{
Lock;
Vdev *dev;
void *notify;
int idx;
int size;
int free;
int nfree;
Vdesc *desc;
Vring *avail;
u16int *availent;
u16int *availevent;
Vring *used;
Vused *usedent;
u16int *usedevent;
u16int lastused;
void *rock[];
};
struct Vdev
{
int typ;
Pcidev *pci;
uvlong port;
ulong feat[2];
int nqueue;
Vqueue *queue[16];
void *dev; /* device specific config (for scsi) */
/* registers */
Vconfig *cfg;
u8int *isr;
u8int *notify;
u32int notifyoffmult;
Vdev *next;
};
static Vqueue*
mkvqueue(int size)
{
Vqueue *q;
uchar *p;
int i;
q = malloc(sizeof(*q) + sizeof(void*)*size);
p = mallocalign(
PGROUND(sizeof(Vdesc)*size +
VringSize +
sizeof(u16int)*size +
sizeof(u16int)) +
PGROUND(VringSize +
sizeof(Vused)*size +
sizeof(u16int)),
BY2PG, 0, 0);
if(p == nil || q == nil){
print("virtio: no memory for Vqueue\n");
free(p);
free(q);
return nil;
}
q->desc = (void*)p;
p += sizeof(Vdesc)*size;
q->avail = (void*)p;
p += VringSize;
q->availent = (void*)p;
p += sizeof(u16int)*size;
q->availevent = (void*)p;
p += sizeof(u16int);
p = (uchar*)PGROUND((uintptr)p);
q->used = (void*)p;
p += VringSize;
q->usedent = (void*)p;
p += sizeof(Vused)*size;
q->usedevent = (void*)p;
q->free = -1;
q->nfree = q->size = size;
for(i=0; i<size; i++){
q->desc[i].next = q->free;
q->free = i;
}
return q;
}
static int
matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
{
int bar;
if(cap != 9 || pcicfgr8(p, off+3) != typ)
return 1;
/* skip invalid or non memory bars */
bar = pcicfgr8(p, off+4);
if(bar < 0 || bar >= nelem(p->mem)
|| p->mem[bar].size == 0
|| (p->mem[bar].bar & 3) != 0)
return 1;
return 0;
}
static int
virtiocap(Pcidev *p, int typ)
{
return pcienumcaps(p, matchvirtiocfgcap, typ);
}
static void*
virtiomapregs(Pcidev *p, int cap, int size)
{
int bar, len;
uvlong addr;
if(cap < 0)
return nil;
bar = pcicfgr8(p, cap+4) % nelem(p->mem);
addr = pcicfgr32(p, cap+8);
len = pcicfgr32(p, cap+12);
if(size <= 0)
size = len;
else if(len < size)
return nil;
if(addr+len > p->mem[bar].size)
return nil;
addr += p->mem[bar].bar & ~0xFULL;
return vmap(addr, size);
}
static Vdev*
viopnpdevs(int typ)
{
Vdev *vd, *h, *t;
Vconfig *cfg;
Vqueue *q;
Pcidev *p;
int cap, bar;
int n, i;
h = t = nil;
for(p = nil; p = pcimatch(p, 0x1AF4, 0x1040+typ);){
if(p->rid == 0)
continue;
if((cap = virtiocap(p, 1)) < 0)
continue;
bar = pcicfgr8(p, cap+4) % nelem(p->mem);
cfg = virtiomapregs(p, cap, sizeof(Vconfig));
if(cfg == nil)
continue;
if((vd = malloc(sizeof(*vd))) == nil){
print("virtio: no memory for Vdev\n");
break;
}
vd->port = p->mem[bar].bar & ~0xFULL;
vd->typ = typ;
vd->pci = p;
vd->cfg = cfg;
pcienable(p);
vd->isr = virtiomapregs(p, virtiocap(p, 3), 0);
if(vd->isr == nil){
Baddev:
pcidisable(p);
/* TODO: vunmap */
free(vd);
continue;
}
cap = virtiocap(p, 2);
vd->notify = virtiomapregs(p, cap, 0);
if(vd->notify == nil)
goto Baddev;
vd->notifyoffmult = pcicfgr32(p, cap+16);
/* reset */
cfg->status = 0;
while(cfg->status != 0)
delay(1);
cfg->status = Acknowledge|Driver;
/* negotiate feature bits */
cfg->devfeatsel = 1;
vd->feat[1] = cfg->devfeat;
cfg->devfeatsel = 0;
vd->feat[0] = cfg->devfeat;
cfg->drvfeatsel = 1;
cfg->drvfeat = vd->feat[1] & 1;
cfg->drvfeatsel = 0;
cfg->drvfeat = 0;
for(i=0; i<nelem(vd->queue); i++){
cfg->queuesel = i;
n = cfg->queuesize;
if(n == 0 || (n & (n-1)) != 0)
break;
if((q = mkvqueue(n)) == nil)
break;
q->notify = vd->notify + vd->notifyoffmult * cfg->queuenotifyoff;
q->dev = vd;
q->idx = i;
vd->queue[i] = q;
coherence();
cfg->queuedesc = PADDR(q->desc);
cfg->queueavail = PADDR(q->avail);
cfg->queueused = PADDR(q->used);
}
vd->nqueue = i;
if(h == nil)
h = vd;
else
t->next = vd;
t = vd;
}
return h;
}
struct Rock {
int done;
Rendez *sleep;
};
static void
vqinterrupt(Vqueue *q)
{
int id, free, m;
struct Rock *r;
Rendez *z;
m = q->size-1;
ilock(q);
while((q->lastused ^ q->used->idx) & m){
id = q->usedent[q->lastused++ & m].id;
if(r = q->rock[id]){
q->rock[id] = nil;
z = r->sleep;
r->done = 1; /* hands off */
if(z != nil)
wakeup(z);
}
do {
free = id;
id = q->desc[free].next;
q->desc[free].next = q->free;
q->free = free;
q->nfree++;
} while(q->desc[free].flags & Next);
}
iunlock(q);
}
static void
viointerrupt(Ureg *, void *arg)
{
Vdev *vd = arg;
if(vd->isr[0] & 1)
vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]);
}
static int
viodone(void *arg)
{
return ((struct Rock*)arg)->done;
}
static void
vqio(Vqueue *q, int head)
{
struct Rock rock;
rock.done = 0;
rock.sleep = &up->sleep;
q->rock[head] = &rock;
q->availent[q->avail->idx & (q->size-1)] = head;
coherence();
q->avail->idx++;
iunlock(q);
if((q->used->flags & 1) == 0)
*((u16int*)q->notify) = q->idx;
while(!rock.done){
while(waserror())
;
tsleep(rock.sleep, viodone, &rock, 1000);
poperror();
if(!rock.done)
vqinterrupt(q);
}
}
static int
vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba)
{
int need, free, head;
Vqueue *q;
Vdesc *d;
u8int status;
struct Vioblkreqhdr {
u32int typ;
u32int prio;
u64int lba;
} req;
need = 2;
if(a != nil)
need = 3;
status = -1;
req.typ = typ;
req.prio = 0;
req.lba = lba;
q = vd->queue[0];
ilock(q);
while(q->nfree < need){
iunlock(q);
if(!waserror())
tsleep(&up->sleep, return0, 0, 500);
poperror();
ilock(q);
}
head = free = q->free;
d = &q->desc[free]; free = d->next;
d->addr = PADDR(&req);
d->len = sizeof(req);
d->flags = Next;
if(a != nil){
d = &q->desc[free]; free = d->next;
d->addr = PADDR(a);
d->len = secsize*count;
d->flags = typ ? Next : (Write|Next);
}
d = &q->desc[free]; free = d->next;
d->addr = PADDR(&status);
d->len = sizeof(status);
d->flags = Write;
q->free = free;
q->nfree -= need;
/* queue io, unlock and wait for completion */
vqio(q, head);
return status;
}
static int
vioscsireq(SDreq *r)
{
u8int resp[4+4+2+2+SENSESIZE];
u8int req[8+8+3+CDBSIZE];
int free, head;
u32int len;
Vqueue *q;
Vdesc *d;
Vdev *vd;
SDunit *u;
Vscsidev *scsi;
u = r->unit;
vd = u->dev->ctlr;
scsi = vd->dev;
memset(resp, 0, sizeof(resp));
memset(req, 0, sizeof(req));
req[0] = 1;
req[1] = u->subno;
req[2] = r->lun>>8;
req[3] = r->lun&0xFF;
*(u64int*)(&req[8]) = (uintptr)r;
memmove(&req[8+8+3], r->cmd, r->clen);
q = vd->queue[2];
ilock(q);
while(q->nfree < 3){
iunlock(q);
if(!waserror())
tsleep(&up->sleep, return0, 0, 500);
poperror();
ilock(q);
}
head = free = q->free;
d = &q->desc[free]; free = d->next;
d->addr = PADDR(req);
d->len = 8+8+3+scsi->cdb_size;
d->flags = Next;
if(r->write && r->dlen > 0){
d = &q->desc[free]; free = d->next;
d->addr = PADDR(r->data);
d->len = r->dlen;
d->flags = Next;
}
d = &q->desc[free]; free = d->next;
d->addr = PADDR(resp);
d->len = 4+4+2+2+scsi->sense_size;
d->flags = Write;
if(!r->write && r->dlen > 0){
d->flags |= Next;
d = &q->desc[free]; free = d->next;
d->addr = PADDR(r->data);
d->len = r->dlen;
d->flags = Write;
}
q->free = free;
q->nfree -= 2 + (r->dlen > 0);
/* queue io, unlock and wait for completion */
vqio(q, head);
/* response+status */
r->status = resp[10];
if(resp[11] != 0)
r->status = SDcheck;
/* sense_len */
len = *((u32int*)&resp[0]);
if(len > 0){
if(len > sizeof(r->sense))
len = sizeof(r->sense);
memmove(r->sense, &resp[4+4+2+2], len);
r->flags |= SDvalidsense;
}
/* data residue */
len = *((u32int*)&resp[4]);
if(len > r->dlen)
r->rlen = 0;
else
r->rlen = r->dlen - len;
return r->status;
}
static long
viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
{
long ss, cc, max, ret;
Vdev *vd;
vd = u->dev->ctlr;
if(vd->typ == TypSCSI)
return scsibio(u, lun, write, a, count, lba);
max = 32;
ss = u->secsize;
ret = 0;
while(count > 0){
if((cc = count) > max)
cc = max;
if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0)
error(Eio);
ret += cc*ss;
count -= cc;
lba += cc;
}
return ret;
}
static int
viorio(SDreq *r)
{
int i, count, rw;
uvlong lba;
SDunit *u;
Vdev *vd;
u = r->unit;
vd = u->dev->ctlr;
if(vd->typ == TypSCSI)
return vioscsireq(r);
if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0)
return sdsetsense(r, SDcheck, 3, 0xc, 2);
return sdsetsense(r, SDok, 0, 0, 0);
}
if((i = sdfakescsi(r)) != SDnostatus)
return r->status = i;
if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
return i;
r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba);
return r->status = SDok;
}
static int
vioonline(SDunit *u)
{
Vdev *vd;
Vblkdev *blk;
uvlong cap;
vd = u->dev->ctlr;
if(vd->typ == TypSCSI)
return scsionline(u);
blk = vd->dev;
cap = blk->capacity;
if(u->sectors != cap){
u->sectors = cap;
u->secsize = 512;
return 2;
}
return 1;
}
static int
vioverify(SDunit *u)
{
Vdev *vd;
vd = u->dev->ctlr;
if(vd->typ == TypSCSI)
return scsiverify(u);
return 1;
}
SDifc sdvirtio10ifc;
static int
vioenable(SDev *sd)
{
char name[32];
Vdev *vd;
int i;
vd = sd->ctlr;
pcisetbme(vd->pci);
snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
coherence();
vd->cfg->status |= DriverOk;
for(i = 0; i < vd->nqueue; i++){
vd->cfg->queuesel = i;
vd->cfg->queueenable = 1;
}
return 1;
}
static int
viodisable(SDev *sd)
{
char name[32];
Vdev *vd;
vd = sd->ctlr;
snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
pciclrbme(vd->pci);
return 1;
}
static SDev*
viopnp(void)
{
SDev *s, *h, *t;
Vdev *vd;
int id;
h = t = nil;
id = 'F';
for(vd = viopnpdevs(TypBlk); vd; vd = vd->next){
if(vd->nqueue == 0)
continue;
if((vd->dev = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vblkdev))) == nil)
break;
if((s = malloc(sizeof(*s))) == nil)
break;
s->ctlr = vd;
s->idno = id++;
s->ifc = &sdvirtio10ifc;
s->nunit = 1;
if(h)
t->next = s;
else
h = s;
t = s;
}
id = '0';
for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){
Vscsidev *scsi;
if(vd->nqueue < 3)
continue;
if((scsi = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vscsidev))) == nil)
break;
if(scsi->max_target == 0){
vunmap(scsi, sizeof(Vscsidev));
continue;
}
if((scsi->cdb_size > CDBSIZE) || (scsi->sense_size > SENSESIZE)){
print("sdvirtio: cdb %ud or sense size %ud too big\n",
scsi->cdb_size, scsi->sense_size);
vunmap(scsi, sizeof(Vscsidev));
continue;
}
vd->dev = scsi;
if((s = malloc(sizeof(*s))) == nil)
break;
s->ctlr = vd;
s->idno = id++;
s->ifc = &sdvirtio10ifc;
s->nunit = scsi->max_target;
if(h)
t->next = s;
else
h = s;
t = s;
}
return h;
}
SDifc sdvirtio10ifc = {
"virtio10", /* name */
viopnp, /* pnp */
nil, /* legacy */
vioenable, /* enable */
viodisable, /* disable */
vioverify, /* verify */
vioonline, /* online */
viorio, /* rio */
nil, /* rctl */
nil, /* wctl */
viobio, /* bio */
nil, /* probe */
nil, /* clear */
nil, /* rtopctl */
nil, /* wtopctl */
};

View file

@ -78,6 +78,7 @@ link
etherwpi pci wifi
etherrt2860 pci wifi
ethervirtio pci
ethervirtio10 pci
ethermedium
# pcmciamodem
netdevmedium
@ -105,6 +106,7 @@ misc
sdiahci pci sdscsi led
# sdodin pci sdscsi led
sdvirtio pci sdscsi
sdvirtio10 pci sdscsi
sdmmc pci pmmc
sdnvme pci
sdloop