2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7 * signalling, unconditionally.
15 /* Next - Where next entry will be written.
16 * Prev - "Next" value when event triggered previously.
17 * Event - Peer requested event after writing this entry.
19 static inline bool need_event(unsigned short event,
23 return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
27 * Guest adds descriptors with unique index values and DESC_HW in flags.
28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29 * Flags are always set last.
37 unsigned long long addr;
40 /* how much padding is needed to avoid false cache sharing */
41 #define HOST_GUEST_PADDING 0x80
45 unsigned short kick_index;
46 unsigned char reserved0[HOST_GUEST_PADDING - 2];
47 unsigned short call_index;
48 unsigned char reserved1[HOST_GUEST_PADDING - 2];
52 void *buf; /* descriptor is writeable, we can't get buf from there */
61 unsigned last_used_idx;
63 unsigned kicked_avail_idx;
64 unsigned char reserved[HOST_GUEST_PADDING - 12];
68 /* we do not need to track last avail index
69 * unless we have more than one in flight.
72 unsigned called_used_idx;
73 unsigned char reserved[HOST_GUEST_PADDING - 4];
76 /* implemented by ring */
82 ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
84 perror("Unable to allocate ring buffer.\n");
87 event = malloc(sizeof *event);
89 perror("Unable to allocate event buffer.\n");
92 memset(event, 0, sizeof *event);
94 guest.kicked_avail_idx = -1;
95 guest.last_used_idx = 0;
97 host.called_used_idx = -1;
98 for (i = 0; i < ring_size; ++i) {
104 guest.num_free = ring_size;
105 data = malloc(ring_size * sizeof *data);
107 perror("Unable to allocate data buffer.\n");
110 memset(data, 0, ring_size * sizeof *data);
114 int add_inbuf(unsigned len, void *buf, void *datap)
116 unsigned head, index;
122 head = (ring_size - 1) & (guest.avail_idx++);
124 /* Start with a write. On MESI architectures this helps
125 * avoid a shared state with consumer that is polling this descriptor.
127 ring[head].addr = (unsigned long)(void*)buf;
128 ring[head].len = len;
129 /* read below might bypass write above. That is OK because it's just an
130 * optimization. If this happens, we will get the cache line in a
131 * shared state which is unfortunate, but probably not worth it to
132 * add an explicit full barrier to avoid this.
135 index = ring[head].index;
136 data[index].buf = buf;
137 data[index].data = datap;
138 /* Barrier A (for pairing) */
140 ring[head].flags = DESC_HW;
145 void *get_buf(unsigned *lenp, void **bufp)
147 unsigned head = (ring_size - 1) & guest.last_used_idx;
151 if (ring[head].flags & DESC_HW)
153 /* Barrier B (for pairing) */
155 *lenp = ring[head].len;
156 index = ring[head].index & (ring_size - 1);
157 datap = data[index].data;
158 *bufp = data[index].buf;
159 data[index].buf = NULL;
160 data[index].data = NULL;
162 guest.last_used_idx++;
168 unsigned head = (ring_size - 1) & guest.last_used_idx;
170 return (ring[head].flags & DESC_HW);
175 /* Doing nothing to disable calls might cause
176 * extra interrupts, but reduces the number of cache misses.
182 event->call_index = guest.last_used_idx;
183 /* Flush call index write */
184 /* Barrier D (for pairing) */
189 void kick_available(void)
191 /* Flush in previous flags write */
192 /* Barrier C (for pairing) */
194 if (!need_event(event->kick_index,
196 guest.kicked_avail_idx))
199 guest.kicked_avail_idx = guest.avail_idx;
206 /* Doing nothing to disable kicks might cause
207 * extra interrupts, but reduces the number of cache misses.
213 event->kick_index = host.used_idx;
214 /* Barrier C (for pairing) */
216 return avail_empty();
221 unsigned head = (ring_size - 1) & host.used_idx;
223 return !(ring[head].flags & DESC_HW);
226 bool use_buf(unsigned *lenp, void **bufp)
228 unsigned head = (ring_size - 1) & host.used_idx;
230 if (!(ring[head].flags & DESC_HW))
233 /* make sure length read below is not speculated */
234 /* Barrier A (for pairing) */
237 /* simple in-order completion: we don't need
238 * to touch index at all. This also means we
239 * can just modify the descriptor in-place.
242 /* Make sure len is valid before flags.
243 * Note: alternative is to write len and flags in one access -
244 * possible on 64 bit architectures but wmb is free on Intel anyway
245 * so I have no way to test whether it's a gain.
247 /* Barrier B (for pairing) */
249 ring[head].flags = 0;
256 /* Flush in previous flags write */
257 /* Barrier D (for pairing) */
259 if (!need_event(event->call_index,
261 host.called_used_idx))
264 host.called_used_idx = host.used_idx;