forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
270 lines
5.8 KiB
270 lines
5.8 KiB
// SPDX-License-Identifier: GPL-2.0-only |
|
/* |
|
* Copyright (C) 2016 Red Hat, Inc. |
|
* Author: Michael S. Tsirkin <[email protected]> |
|
* |
|
* Simple descriptor-based ring. virtio 0.9 compatible event index is used for |
|
* signalling, unconditionally. |
|
*/ |
|
#define _GNU_SOURCE |
|
#include "main.h" |
|
#include <stdlib.h> |
|
#include <stdio.h> |
|
#include <string.h> |
|
|
|
/* Next - Where next entry will be written. |
|
* Prev - "Next" value when event triggered previously. |
|
* Event - Peer requested event after writing this entry. |
|
*/ |
|
static inline bool need_event(unsigned short event, |
|
unsigned short next, |
|
unsigned short prev) |
|
{ |
|
return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); |
|
} |
|
|
|
/* Design: |
|
* Guest adds descriptors with unique index values and DESC_HW in flags. |
|
* Host overwrites used descriptors with correct len, index, and DESC_HW clear. |
|
* Flags are always set last. |
|
*/ |
|
#define DESC_HW 0x1 |
|
|
|
struct desc { |
|
unsigned short flags; |
|
unsigned short index; |
|
unsigned len; |
|
unsigned long long addr; |
|
}; |
|
|
|
/* how much padding is needed to avoid false cache sharing */ |
|
#define HOST_GUEST_PADDING 0x80 |
|
|
|
/* Mostly read */ |
|
struct event { |
|
unsigned short kick_index; |
|
unsigned char reserved0[HOST_GUEST_PADDING - 2]; |
|
unsigned short call_index; |
|
unsigned char reserved1[HOST_GUEST_PADDING - 2]; |
|
}; |
|
|
|
struct data { |
|
void *buf; /* descriptor is writeable, we can't get buf from there */ |
|
void *data; |
|
} *data; |
|
|
|
struct desc *ring; |
|
struct event *event; |
|
|
|
struct guest { |
|
unsigned avail_idx; |
|
unsigned last_used_idx; |
|
unsigned num_free; |
|
unsigned kicked_avail_idx; |
|
unsigned char reserved[HOST_GUEST_PADDING - 12]; |
|
} guest; |
|
|
|
struct host { |
|
/* we do not need to track last avail index |
|
* unless we have more than one in flight. |
|
*/ |
|
unsigned used_idx; |
|
unsigned called_used_idx; |
|
unsigned char reserved[HOST_GUEST_PADDING - 4]; |
|
} host; |
|
|
|
/* implemented by ring */ |
|
void alloc_ring(void) |
|
{ |
|
int ret; |
|
int i; |
|
|
|
ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); |
|
if (ret) { |
|
perror("Unable to allocate ring buffer.\n"); |
|
exit(3); |
|
} |
|
event = calloc(1, sizeof(*event)); |
|
if (!event) { |
|
perror("Unable to allocate event buffer.\n"); |
|
exit(3); |
|
} |
|
guest.avail_idx = 0; |
|
guest.kicked_avail_idx = -1; |
|
guest.last_used_idx = 0; |
|
host.used_idx = 0; |
|
host.called_used_idx = -1; |
|
for (i = 0; i < ring_size; ++i) { |
|
struct desc desc = { |
|
.index = i, |
|
}; |
|
ring[i] = desc; |
|
} |
|
guest.num_free = ring_size; |
|
data = calloc(ring_size, sizeof(*data)); |
|
if (!data) { |
|
perror("Unable to allocate data buffer.\n"); |
|
exit(3); |
|
} |
|
} |
|
|
|
/* guest side */ |
|
int add_inbuf(unsigned len, void *buf, void *datap) |
|
{ |
|
unsigned head, index; |
|
|
|
if (!guest.num_free) |
|
return -1; |
|
|
|
guest.num_free--; |
|
head = (ring_size - 1) & (guest.avail_idx++); |
|
|
|
/* Start with a write. On MESI architectures this helps |
|
* avoid a shared state with consumer that is polling this descriptor. |
|
*/ |
|
ring[head].addr = (unsigned long)(void*)buf; |
|
ring[head].len = len; |
|
/* read below might bypass write above. That is OK because it's just an |
|
* optimization. If this happens, we will get the cache line in a |
|
* shared state which is unfortunate, but probably not worth it to |
|
* add an explicit full barrier to avoid this. |
|
*/ |
|
barrier(); |
|
index = ring[head].index; |
|
data[index].buf = buf; |
|
data[index].data = datap; |
|
/* Barrier A (for pairing) */ |
|
smp_release(); |
|
ring[head].flags = DESC_HW; |
|
|
|
return 0; |
|
} |
|
|
|
void *get_buf(unsigned *lenp, void **bufp) |
|
{ |
|
unsigned head = (ring_size - 1) & guest.last_used_idx; |
|
unsigned index; |
|
void *datap; |
|
|
|
if (ring[head].flags & DESC_HW) |
|
return NULL; |
|
/* Barrier B (for pairing) */ |
|
smp_acquire(); |
|
*lenp = ring[head].len; |
|
index = ring[head].index & (ring_size - 1); |
|
datap = data[index].data; |
|
*bufp = data[index].buf; |
|
data[index].buf = NULL; |
|
data[index].data = NULL; |
|
guest.num_free++; |
|
guest.last_used_idx++; |
|
return datap; |
|
} |
|
|
|
bool used_empty() |
|
{ |
|
unsigned head = (ring_size - 1) & guest.last_used_idx; |
|
|
|
return (ring[head].flags & DESC_HW); |
|
} |
|
|
|
void disable_call() |
|
{ |
|
/* Doing nothing to disable calls might cause |
|
* extra interrupts, but reduces the number of cache misses. |
|
*/ |
|
} |
|
|
|
bool enable_call() |
|
{ |
|
event->call_index = guest.last_used_idx; |
|
/* Flush call index write */ |
|
/* Barrier D (for pairing) */ |
|
smp_mb(); |
|
return used_empty(); |
|
} |
|
|
|
void kick_available(void) |
|
{ |
|
bool need; |
|
|
|
/* Flush in previous flags write */ |
|
/* Barrier C (for pairing) */ |
|
smp_mb(); |
|
need = need_event(event->kick_index, |
|
guest.avail_idx, |
|
guest.kicked_avail_idx); |
|
|
|
guest.kicked_avail_idx = guest.avail_idx; |
|
if (need) |
|
kick(); |
|
} |
|
|
|
/* host side */ |
|
void disable_kick() |
|
{ |
|
/* Doing nothing to disable kicks might cause |
|
* extra interrupts, but reduces the number of cache misses. |
|
*/ |
|
} |
|
|
|
bool enable_kick() |
|
{ |
|
event->kick_index = host.used_idx; |
|
/* Barrier C (for pairing) */ |
|
smp_mb(); |
|
return avail_empty(); |
|
} |
|
|
|
bool avail_empty() |
|
{ |
|
unsigned head = (ring_size - 1) & host.used_idx; |
|
|
|
return !(ring[head].flags & DESC_HW); |
|
} |
|
|
|
bool use_buf(unsigned *lenp, void **bufp) |
|
{ |
|
unsigned head = (ring_size - 1) & host.used_idx; |
|
|
|
if (!(ring[head].flags & DESC_HW)) |
|
return false; |
|
|
|
/* make sure length read below is not speculated */ |
|
/* Barrier A (for pairing) */ |
|
smp_acquire(); |
|
|
|
/* simple in-order completion: we don't need |
|
* to touch index at all. This also means we |
|
* can just modify the descriptor in-place. |
|
*/ |
|
ring[head].len--; |
|
/* Make sure len is valid before flags. |
|
* Note: alternative is to write len and flags in one access - |
|
* possible on 64 bit architectures but wmb is free on Intel anyway |
|
* so I have no way to test whether it's a gain. |
|
*/ |
|
/* Barrier B (for pairing) */ |
|
smp_release(); |
|
ring[head].flags = 0; |
|
host.used_idx++; |
|
return true; |
|
} |
|
|
|
void call_used(void) |
|
{ |
|
bool need; |
|
|
|
/* Flush in previous flags write */ |
|
/* Barrier D (for pairing) */ |
|
smp_mb(); |
|
|
|
need = need_event(event->call_index, |
|
host.used_idx, |
|
host.called_used_idx); |
|
|
|
host.called_used_idx = host.used_idx; |
|
|
|
if (need) |
|
call(); |
|
}
|
|
|