reactos/drivers/network/dd/netkvm/virtio/VirtIORing.c
2020-04-23 16:33:09 +03:00

562 lines
18 KiB
C

/*
* Virtio ring manipulation routines
*
* Copyright 2017 Red Hat, Inc.
*
* Authors:
* Ladi Prosek <lprosek@redhat.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met :
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of their contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "osdep.h"
#include "virtio_pci.h"
#include "VirtIO.h"
#include "kdebugprint.h"
#include "virtio_ring.h"
#include "windows/virtio_ring_allocation.h"
#define DESC_INDEX(num, i) ((i) & ((num) - 1))
/* This marks a buffer as continuing via the next field. */
#define VIRTQ_DESC_F_NEXT 1
/* This marks a buffer as write-only (otherwise read-only). */
#define VIRTQ_DESC_F_WRITE 2
/* This means the buffer contains a list of buffer descriptors. */
#define VIRTQ_DESC_F_INDIRECT 4
/* The Host uses this in used->flags to advise the Guest: don't kick me when
* you add a buffer. It's unreliable, so it's simply an optimization. Guest
* will still kick if it's out of buffers. */
#define VIRTQ_USED_F_NO_NOTIFY 1
/* The Guest uses this in avail->flags to advise the Host: don't interrupt me
* when you consume a buffer. It's unreliable, so it's simply an
* optimization. */
#define VIRTQ_AVAIL_F_NO_INTERRUPT 1
#pragma warning (push)
#pragma warning (disable:4200)
#include <pshpack1.h>
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
struct vring_desc {
/* Address (guest-physical). */
__virtio64 addr;
/* Length. */
__virtio32 len;
/* The flags as indicated above. */
__virtio16 flags;
/* We chain unused descriptors via this, too */
__virtio16 next;
};
struct vring_avail {
__virtio16 flags;
__virtio16 idx;
__virtio16 ring[];
};
/* u32 is used here for ids for padding reasons. */
struct vring_used_elem {
/* Index of start of used descriptor chain. */
__virtio32 id;
/* Total length of the descriptor chain which was used (written to) */
__virtio32 len;
};
struct vring_used {
__virtio16 flags;
__virtio16 idx;
struct vring_used_elem ring[];
};
#include <poppack.h>
/* Alignment requirements for vring elements.
* When using pre-virtio 1.0 layout, these fall out naturally.
*/
#define VRING_AVAIL_ALIGN_SIZE 2
#define VRING_USED_ALIGN_SIZE 4
#define VRING_DESC_ALIGN_SIZE 16
/* The standard layout for the ring is a continuous chunk of memory which looks
* like this. We assume num is a power of 2.
*
* struct vring
* {
* // The actual descriptors (16 bytes each)
* struct vring_desc desc[num];
*
* // A ring of available descriptor heads with free-running index.
* __virtio16 avail_flags;
* __virtio16 avail_idx;
* __virtio16 available[num];
* __virtio16 used_event_idx;
*
* // Padding to the next align boundary.
* char pad[];
*
* // A ring of used descriptor heads with free-running index.
* __virtio16 used_flags;
* __virtio16 used_idx;
* struct vring_used_elem used[num];
* __virtio16 avail_event_idx;
* };
*/
/* We publish the used event index at the end of the available ring, and vice
* versa. They are at the end for backwards compatibility. */
struct vring {
unsigned int num;
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
};
#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num])
static inline void vring_init(struct vring *vr, unsigned int num, void *p,
unsigned long align)
{
vr->num = num;
vr->desc = (struct vring_desc *)p;
vr->avail = (struct vring_avail *)((__u8 *)p + num * sizeof(struct vring_desc));
vr->used = (struct vring_used *)(((ULONG_PTR)&vr->avail->ring[num] + sizeof(__virtio16)
+ align - 1) & ~((ULONG_PTR)align - 1));
}
static inline unsigned vring_size_split(unsigned int num, unsigned long align)
{
#pragma warning (push)
#pragma warning (disable:4319)
return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num)
+ align - 1) & ~(align - 1))
+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
#pragma warning(pop)
}
/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
/* Assuming a given event_idx value from the other side, if
* we have just incremented index from old to new_idx,
* should we trigger an event? */
static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
{
/* Note: Xen has similar logic for notification hold-off
* in include/xen/interface/io/ring.h with req_event and req_prod
* corresponding to event_idx + 1 and new_idx respectively.
* Note also that req_event and req_prod in Xen start at 1,
* event indexes in virtio start at 0. */
return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
}
struct virtqueue_split {
struct virtqueue vq;
struct vring vring;
struct {
u16 flags;
u16 idx;
} master_vring_avail;
unsigned int num_unused;
unsigned int num_added_since_kick;
u16 first_unused;
u16 last_used;
void *opaque[];
};
#define splitvq(vq) ((struct virtqueue_split *)vq)
#pragma warning (pop)
/* Returns the index of the first unused descriptor */
static inline u16 get_unused_desc(struct virtqueue_split *vq)
{
u16 idx = vq->first_unused;
ASSERT(vq->num_unused > 0);
vq->first_unused = vq->vring.desc[idx].next;
vq->num_unused--;
return idx;
}
/* Marks the descriptor chain starting at index idx as unused */
static inline void put_unused_desc_chain(struct virtqueue_split *vq, u16 idx)
{
u16 start = idx;
vq->opaque[idx] = NULL;
while (vq->vring.desc[idx].flags & VIRTQ_DESC_F_NEXT) {
idx = vq->vring.desc[idx].next;
vq->num_unused++;
}
vq->vring.desc[idx].flags = VIRTQ_DESC_F_NEXT;
vq->vring.desc[idx].next = vq->first_unused;
vq->num_unused++;
vq->first_unused = start;
}
/* Adds a buffer to a virtqueue, returns 0 on success, negative number on error */
static int virtqueue_add_buf_split(
struct virtqueue *_vq, /* the queue */
struct scatterlist sg[], /* sg array of length out + in */
unsigned int out, /* number of driver->device buffer descriptors in sg */
unsigned int in, /* number of device->driver buffer descriptors in sg */
void *opaque, /* later returned from virtqueue_get_buf */
void *va_indirect, /* VA of the indirect page or NULL */
ULONGLONG phys_indirect) /* PA of the indirect page or 0 */
{
struct virtqueue_split *vq = splitvq(_vq);
struct vring *vring = &vq->vring;
unsigned int i;
u16 idx;
if (va_indirect && (out + in) > 1 && vq->num_unused > 0) {
/* Use one indirect descriptor */
struct vring_desc *desc = (struct vring_desc *)va_indirect;
for (i = 0; i < out + in; i++) {
desc[i].flags = (i < out ? 0 : VIRTQ_DESC_F_WRITE);
desc[i].flags |= VIRTQ_DESC_F_NEXT;
desc[i].addr = sg[i].physAddr.QuadPart;
desc[i].len = sg[i].length;
desc[i].next = (u16)i + 1;
}
desc[i - 1].flags &= ~VIRTQ_DESC_F_NEXT;
idx = get_unused_desc(vq);
vq->vring.desc[idx].flags = VIRTQ_DESC_F_INDIRECT;
vq->vring.desc[idx].addr = phys_indirect;
vq->vring.desc[idx].len = i * sizeof(struct vring_desc);
vq->opaque[idx] = opaque;
} else {
u16 last_idx;
/* Use out + in regular descriptors */
if (out + in > vq->num_unused) {
return -ENOSPC;
}
/* First descriptor */
idx = last_idx = get_unused_desc(vq);
vq->opaque[idx] = opaque;
vring->desc[idx].addr = sg[0].physAddr.QuadPart;
vring->desc[idx].len = sg[0].length;
vring->desc[idx].flags = VIRTQ_DESC_F_NEXT;
if (out == 0) {
vring->desc[idx].flags |= VIRTQ_DESC_F_WRITE;
}
vring->desc[idx].next = vq->first_unused;
/* The rest of descriptors */
for (i = 1; i < out + in; i++) {
last_idx = get_unused_desc(vq);
vring->desc[last_idx].addr = sg[i].physAddr.QuadPart;
vring->desc[last_idx].len = sg[i].length;
vring->desc[last_idx].flags = VIRTQ_DESC_F_NEXT;
if (i >= out) {
vring->desc[last_idx].flags |= VIRTQ_DESC_F_WRITE;
}
vring->desc[last_idx].next = vq->first_unused;
}
vring->desc[last_idx].flags &= ~VIRTQ_DESC_F_NEXT;
}
/* Write the first descriptor into the available ring */
vring->avail->ring[DESC_INDEX(vring->num, vq->master_vring_avail.idx)] = idx;
KeMemoryBarrier();
vring->avail->idx = ++vq->master_vring_avail.idx;
vq->num_added_since_kick++;
return 0;
}
/* Gets the opaque pointer associated with a returned buffer, or NULL if no buffer is available */
static void *virtqueue_get_buf_split(
struct virtqueue *_vq, /* the queue */
unsigned int *len) /* number of bytes returned by the device */
{
struct virtqueue_split *vq = splitvq(_vq);
void *opaque;
u16 idx;
if (vq->last_used == (int)vq->vring.used->idx) {
/* No descriptor index in the used ring */
return NULL;
}
KeMemoryBarrier();
idx = DESC_INDEX(vq->vring.num, vq->last_used);
*len = vq->vring.used->ring[idx].len;
/* Get the first used descriptor */
idx = (u16)vq->vring.used->ring[idx].id;
opaque = vq->opaque[idx];
/* Put all descriptors back to the free list */
put_unused_desc_chain(vq, idx);
vq->last_used++;
if (_vq->vdev->event_suppression_enabled && virtqueue_is_interrupt_enabled(_vq)) {
vring_used_event(&vq->vring) = vq->last_used;
KeMemoryBarrier();
}
ASSERT(opaque != NULL);
return opaque;
}
/* Returns true if at least one returned buffer is available, false otherwise */
static BOOLEAN virtqueue_has_buf_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
return (vq->last_used != vq->vring.used->idx);
}
/* Returns true if the device should be notified, false otherwise */
static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
bool wrap_around;
u16 old, new;
KeMemoryBarrier();
wrap_around = (vq->num_added_since_kick >= (1 << 16));
old = (u16)(vq->master_vring_avail.idx - vq->num_added_since_kick);
new = vq->master_vring_avail.idx;
vq->num_added_since_kick = 0;
if (_vq->vdev->event_suppression_enabled) {
return wrap_around || (bool)vring_need_event(vring_avail_event(&vq->vring), new, old);
} else {
return !(vq->vring.used->flags & VIRTQ_USED_F_NO_NOTIFY);
}
}
/* Notifies the device even if it's not necessary according to the event suppression logic */
static void virtqueue_kick_always_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
KeMemoryBarrier();
vq->num_added_since_kick = 0;
virtqueue_notify(_vq);
}
/* Enables interrupts on a virtqueue and returns false if the queue has at least one returned
* buffer available to be fetched by virtqueue_get_buf, true otherwise */
static bool virtqueue_enable_cb_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
if (!virtqueue_is_interrupt_enabled(_vq)) {
vq->master_vring_avail.flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT;
if (!_vq->vdev->event_suppression_enabled)
{
vq->vring.avail->flags = vq->master_vring_avail.flags;
}
}
vring_used_event(&vq->vring) = vq->last_used;
KeMemoryBarrier();
return (vq->last_used == vq->vring.used->idx);
}
/* Enables interrupts on a virtqueue after ~3/4 of the currently pushed buffers have been
* returned, returns false if this condition currently holds, false otherwise */
static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
u16 bufs;
if (!virtqueue_is_interrupt_enabled(_vq)) {
vq->master_vring_avail.flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT;
if (!_vq->vdev->event_suppression_enabled)
{
vq->vring.avail->flags = vq->master_vring_avail.flags;
}
}
/* Note that 3/4 is an arbitrary threshold */
bufs = (u16)(vq->master_vring_avail.idx - vq->last_used) * 3 / 4;
vring_used_event(&vq->vring) = vq->last_used + bufs;
KeMemoryBarrier();
return ((vq->vring.used->idx - vq->last_used) <= bufs);
}
/* Disables interrupts on a virtqueue */
static void virtqueue_disable_cb_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
if (virtqueue_is_interrupt_enabled(_vq)) {
vq->master_vring_avail.flags |= VIRTQ_AVAIL_F_NO_INTERRUPT;
if (!_vq->vdev->event_suppression_enabled)
{
vq->vring.avail->flags = vq->master_vring_avail.flags;
}
}
}
/* Returns true if interrupts are enabled on a virtqueue, false otherwise */
static BOOLEAN virtqueue_is_interrupt_enabled_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
return !(vq->master_vring_avail.flags & VIRTQ_AVAIL_F_NO_INTERRUPT);
}
/* Re-initializes an already initialized virtqueue */
static void virtqueue_shutdown_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
unsigned int num = vq->vring.num;
void *pages = vq->vring.desc;
unsigned int vring_align = _vq->vdev->addr ? PAGE_SIZE : SMP_CACHE_BYTES;
RtlZeroMemory(pages, vring_size_split(num, vring_align));
(void)vring_new_virtqueue_split(
_vq->index,
vq->vring.num,
vring_align,
_vq->vdev,
pages,
_vq->notification_cb,
vq);
}
/* Gets the opaque pointer associated with a not-yet-returned buffer, or NULL if no buffer is available
* to aid drivers with cleaning up all data on virtqueue shutdown */
static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
{
struct virtqueue_split *vq = splitvq(_vq);
u16 idx;
void *opaque = NULL;
for (idx = 0; idx < (u16)vq->vring.num; idx++) {
opaque = vq->opaque[idx];
if (opaque) {
put_unused_desc_chain(vq, idx);
vq->vring.avail->idx = --vq->master_vring_avail.idx;
break;
}
}
return opaque;
}
/* Returns the size of the virtqueue structure including
* additional size for per-descriptor data */
unsigned int vring_control_block_size(u16 qsize, bool packed)
{
unsigned int res;
if (packed) {
return vring_control_block_size_packed(qsize);
}
res = sizeof(struct virtqueue_split);
res += sizeof(void *) * qsize;
return res;
}
/* Initializes a new virtqueue using already allocated memory */
struct virtqueue *vring_new_virtqueue_split(
unsigned int index, /* virtqueue index */
unsigned int num, /* virtqueue size (always a power of 2) */
unsigned int vring_align, /* vring alignment requirement */
VirtIODevice *vdev, /* the virtio device owning the queue */
void *pages, /* vring memory */
void(*notify)(struct virtqueue *), /* notification callback */
void *control) /* virtqueue memory */
{
struct virtqueue_split *vq = splitvq(control);
u16 i;
if (DESC_INDEX(num, num) != 0) {
DPrintf(0, "Virtqueue length %u is not a power of 2\n", num);
return NULL;
}
RtlZeroMemory(vq, sizeof(*vq) + num * sizeof(void *));
vring_init(&vq->vring, num, pages, vring_align);
vq->vq.vdev = vdev;
vq->vq.notification_cb = notify;
vq->vq.index = index;
/* Build a linked list of unused descriptors */
vq->num_unused = num;
vq->first_unused = 0;
for (i = 0; i < num - 1; i++) {
vq->vring.desc[i].flags = VIRTQ_DESC_F_NEXT;
vq->vring.desc[i].next = i + 1;
}
vq->vq.avail_va = vq->vring.avail;
vq->vq.used_va = vq->vring.used;
vq->vq.add_buf = virtqueue_add_buf_split;
vq->vq.detach_unused_buf = virtqueue_detach_unused_buf_split;
vq->vq.disable_cb = virtqueue_disable_cb_split;
vq->vq.enable_cb = virtqueue_enable_cb_split;
vq->vq.enable_cb_delayed = virtqueue_enable_cb_delayed_split;
vq->vq.get_buf = virtqueue_get_buf_split;
vq->vq.has_buf = virtqueue_has_buf_split;
vq->vq.is_interrupt_enabled = virtqueue_is_interrupt_enabled_split;
vq->vq.kick_always = virtqueue_kick_always_split;
vq->vq.kick_prepare = virtqueue_kick_prepare_split;
vq->vq.shutdown = virtqueue_shutdown_split;
return &vq->vq;
}
/* Negotiates virtio transport features */
void vring_transport_features(
VirtIODevice *vdev,
u64 *features) /* points to device features on entry and driver accepted features on return */
{
unsigned int i;
for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
if (i != VIRTIO_RING_F_INDIRECT_DESC &&
i != VIRTIO_RING_F_EVENT_IDX &&
i != VIRTIO_F_VERSION_1) {
virtio_feature_disable(*features, i);
}
}
}
/* Returns the max number of scatter-gather elements that fit in an indirect pages */
u32 virtio_get_indirect_page_capacity()
{
return PAGE_SIZE / sizeof(struct vring_desc);
}
unsigned long vring_size(unsigned int num, unsigned long align, bool packed)
{
if (packed) {
return vring_size_packed(num, align);
} else {
return vring_size_split(num, align);
}
}