Skip to content

Commit 62cba22

Browse files
committed
feat: add support for VIRTIO_NET_F_MRG_RXBUF to virtio-net
Now virtio-net device can split incoming packets across multiple descriptor chains if VIRTIO_NET_F_MRG_RXBUF is enabled by the guest. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 44fef9f commit 62cba22

File tree

2 files changed

+218
-100
lines changed

2 files changed

+218
-100
lines changed

src/vmm/src/devices/virtio/net/device.rs

Lines changed: 199 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,22 @@
99
use std::io::Read;
1010
use std::mem;
1111
use std::net::Ipv4Addr;
12+
use std::num::Wrapping;
1213
use std::sync::{Arc, Mutex};
1314

1415
use libc::EAGAIN;
15-
use log::{error, warn};
16+
use log::error;
1617
use utils::eventfd::EventFd;
1718
use utils::net::mac::MacAddr;
18-
use utils::u64_to_usize;
19-
use vm_memory::GuestMemoryError;
19+
use utils::{u64_to_usize, usize_to_u64};
20+
use vm_memory::{GuestAddress, GuestMemory, GuestMemoryError};
2021

2122
use crate::devices::virtio::device::{DeviceState, IrqTrigger, IrqType, VirtioDevice};
2223
use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
2324
use crate::devices::virtio::gen::virtio_net::{
2425
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4,
2526
VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4,
26-
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC,
27+
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
2728
};
2829
use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
2930
use crate::devices::virtio::iovec::IoVecBuffer;
@@ -32,7 +33,7 @@ use crate::devices::virtio::net::tap::Tap;
3233
use crate::devices::virtio::net::{
3334
gen, NetError, NetQueue, MAX_BUFFER_SIZE, NET_QUEUE_SIZES, RX_INDEX, TX_INDEX,
3435
};
35-
use crate::devices::virtio::queue::{DescriptorChain, Queue};
36+
use crate::devices::virtio::queue::{Queue, UsedElement};
3637
use crate::devices::virtio::{ActivateError, TYPE_NET};
3738
use crate::devices::{report_net_event_fail, DeviceError};
3839
use crate::dumbo::pdu::arp::ETH_IPV4_FRAME_LEN;
@@ -47,14 +48,14 @@ const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN;
4748

4849
#[derive(Debug, thiserror::Error, displaydoc::Display)]
4950
enum FrontendError {
50-
/// Add user.
51-
AddUsed,
52-
/// Descriptor chain too mall.
53-
DescriptorChainTooSmall,
5451
/// Empty queue.
5552
EmptyQueue,
5653
/// Guest memory error: {0}
5754
GuestMemory(GuestMemoryError),
55+
/// Attempt to write an empty packet.
56+
AttemptToWriteEmptyPacket,
57+
/// Attempt to use more descriptor chains(heads) than it is allowed.
58+
MaxHeadsUsed,
5859
/// Read only descriptor.
5960
ReadOnlyDescriptor,
6061
}
@@ -103,6 +104,20 @@ pub struct ConfigSpace {
103104
// SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding.
104105
unsafe impl ByteValued for ConfigSpace {}
105106

107+
// This struct contains information about partially
108+
// written packet.
109+
#[derive(Debug)]
110+
struct PartialWrite {
111+
// Amount of bytes written so far.
112+
bytes_written: usize,
113+
// Amount of descriptor heads used for the packet.
114+
used_heads: u16,
115+
// Guest address of the first buffer used for the packet.
116+
// This will be used to set number of descriptors heads used
117+
// to store the whole packet.
118+
packet_start_addr: GuestAddress,
119+
}
120+
106121
/// VirtIO network device.
107122
///
108123
/// It emulates a network device able to exchange L2 frames between the guest
@@ -127,6 +142,7 @@ pub struct Net {
127142

128143
rx_bytes_read: usize,
129144
rx_frame_buf: [u8; MAX_BUFFER_SIZE],
145+
rx_partial_write: Option<PartialWrite>,
130146

131147
tx_frame_headers: [u8; frame_hdr_len()],
132148

@@ -161,6 +177,7 @@ impl Net {
161177
| 1 << VIRTIO_NET_F_HOST_TSO4
162178
| 1 << VIRTIO_NET_F_HOST_TSO6
163179
| 1 << VIRTIO_NET_F_HOST_UFO
180+
| 1 << VIRTIO_NET_F_MRG_RXBUF
164181
| 1 << VIRTIO_F_VERSION_1
165182
| 1 << VIRTIO_RING_F_EVENT_IDX;
166183

@@ -191,6 +208,7 @@ impl Net {
191208
rx_deferred_frame: false,
192209
rx_bytes_read: 0,
193210
rx_frame_buf: [0u8; MAX_BUFFER_SIZE],
211+
rx_partial_write: None,
194212
tx_frame_headers: [0u8; frame_hdr_len()],
195213
irq_trigger: IrqTrigger::new().map_err(NetError::EventFd)?,
196214
config_space,
@@ -319,7 +337,17 @@ impl Net {
319337
}
320338

321339
// Attempt frame delivery.
322-
let success = self.write_frame_to_guest();
340+
let success = loop {
341+
// We retry to write a frame if there were internal errors.
342+
// Each new write will use new descriptor chains up to the
343+
// point of consuming all available descriptors, if they are
344+
// all bad.
345+
match self.write_frame_to_guest() {
346+
Ok(()) => break true,
347+
Err(FrontendError::EmptyQueue) => break false,
348+
_ => (),
349+
};
350+
};
323351

324352
// Undo the tokens consumption if guest delivery failed.
325353
if !success {
@@ -330,108 +358,186 @@ impl Net {
330358
success
331359
}
332360

333-
/// Write a slice in a descriptor chain
334-
///
335-
/// # Errors
336-
///
337-
/// Returns an error if the descriptor chain is too short or
338-
/// an inappropriate (read only) descriptor is found in the chain
339-
fn write_to_descriptor_chain(
340-
mem: &GuestMemoryMmap,
341-
data: &[u8],
342-
head: DescriptorChain,
343-
net_metrics: &NetDeviceMetrics,
344-
) -> Result<(), FrontendError> {
345-
let mut chunk = data;
346-
let mut next_descriptor = Some(head);
361+
/// Write packet contained in the internal buffer into guest provided
362+
/// descriptor chains.
363+
fn write_frame_to_guest(&mut self) -> Result<(), FrontendError> {
364+
// This is safe since we checked in the event handler that the device is activated.
365+
let mem = self.device_state.mem().unwrap();
366+
367+
if self.queues[RX_INDEX].is_empty(mem) {
368+
self.metrics.no_rx_avail_buffer.inc();
369+
return Err(FrontendError::EmptyQueue);
370+
}
371+
372+
let next_used = self.queues[RX_INDEX].next_used;
373+
let actual_size = self.queues[RX_INDEX].actual_size();
374+
375+
let (mut slice, mut packet_start_addr, mut used_heads) =
376+
if let Some(pw) = &self.rx_partial_write {
377+
(
378+
&self.rx_frame_buf[pw.bytes_written..self.rx_bytes_read],
379+
Some(pw.packet_start_addr),
380+
pw.used_heads,
381+
)
382+
} else {
383+
(&self.rx_frame_buf[..self.rx_bytes_read], None, 0)
384+
};
347385

348-
while let Some(descriptor) = &next_descriptor {
349-
if !descriptor.is_write_only() {
350-
return Err(FrontendError::ReadOnlyDescriptor);
386+
let max_used_heads = if self.has_feature(u64::from(VIRTIO_NET_F_MRG_RXBUF)) {
387+
// There is no real limit on how much heads we can use, but we will
388+
// never use more than the queue has.
389+
u16::MAX
390+
} else {
391+
// Without VIRTIO_NET_F_MRG_RXBUF only 1 head can be used for the packet.
392+
1
393+
};
394+
395+
let mut error = None;
396+
while !slice.is_empty() && error.is_none() {
397+
if used_heads == max_used_heads {
398+
error = Some(FrontendError::MaxHeadsUsed);
399+
break;
351400
}
352401

353-
let len = std::cmp::min(chunk.len(), descriptor.len as usize);
354-
match mem.write_slice(&chunk[..len], descriptor.addr) {
355-
Ok(()) => {
356-
net_metrics.rx_count.inc();
357-
chunk = &chunk[len..];
402+
let Some(head_desc) = self.queues[RX_INDEX].pop_or_enable_notification(mem) else {
403+
break;
404+
};
405+
406+
let head_desc_index = head_desc.index;
407+
let mut desc_len = 0;
408+
409+
// If this is the first head of the packet, save it for later.
410+
if packet_start_addr.is_none() {
411+
packet_start_addr = Some(head_desc.addr);
412+
}
413+
414+
// Write to the descriptor chain as much as possible.
415+
let mut desc = Some(head_desc);
416+
while !slice.is_empty() && desc.is_some() {
417+
let d = desc.unwrap();
418+
419+
if !d.is_write_only() {
420+
error = Some(FrontendError::ReadOnlyDescriptor);
421+
break;
358422
}
359-
Err(err) => {
360-
error!("Failed to write slice: {:?}", err);
423+
let len = slice.len().min(d.len as usize);
424+
if let Err(err) = mem.write_slice(&slice[..len], d.addr) {
361425
if let GuestMemoryError::PartialBuffer { .. } = err {
362-
net_metrics.rx_partial_writes.inc();
426+
self.metrics.rx_partial_writes.inc();
363427
}
364-
return Err(FrontendError::GuestMemory(err));
428+
error = Some(FrontendError::GuestMemory(err));
429+
break;
430+
} else {
431+
desc_len += len;
432+
slice = &slice[len..];
365433
}
366-
}
367434

368-
// If chunk is empty we are done here.
369-
if chunk.is_empty() {
370-
let len = data.len() as u64;
371-
net_metrics.rx_bytes_count.add(len);
372-
net_metrics.rx_packets_count.inc();
373-
return Ok(());
435+
desc = d.next_descriptor();
374436
}
375437

376-
next_descriptor = descriptor.next_descriptor();
438+
// At this point descriptor chain was processed.
439+
// We add it to the used_ring.
440+
let next_used_index = (next_used + Wrapping(used_heads)).0 % actual_size;
441+
let used_element = UsedElement {
442+
id: u32::from(head_desc_index),
443+
len: u32::try_from(desc_len).unwrap(),
444+
};
445+
// We don't update queues internals just yet.
446+
// This is done to prevent giving information to the guest
447+
// about descriptor heads we used for partialy written packets.
448+
// SAFETY:
449+
// This should never panic as we provide index in
450+
// correct bounds.
451+
self.queues[RX_INDEX]
452+
.write_used_ring(mem, next_used_index, used_element)
453+
.unwrap();
454+
455+
used_heads += 1;
377456
}
378457

379-
warn!("Receiving buffer is too small to hold frame of current size");
380-
Err(FrontendError::DescriptorChainTooSmall)
381-
}
458+
let packet_start_addr =
459+
packet_start_addr.ok_or(FrontendError::AttemptToWriteEmptyPacket)?;
382460

383-
// Copies a single frame from `self.rx_frame_buf` into the guest.
384-
fn do_write_frame_to_guest(&mut self) -> Result<(), FrontendError> {
385-
// This is safe since we checked in the event handler that the device is activated.
386-
let mem = self.device_state.mem().unwrap();
461+
let mut end_packet_processing = || {
462+
// Update queues internals as this is the end of this
463+
// packet being written.
464+
self.queues[RX_INDEX].next_used += Wrapping(used_heads);
465+
self.queues[RX_INDEX].num_added += Wrapping(used_heads);
387466

388-
let queue = &mut self.queues[RX_INDEX];
389-
let head_descriptor = queue.pop_or_enable_notification(mem).ok_or_else(|| {
390-
self.metrics.no_rx_avail_buffer.inc();
391-
FrontendError::EmptyQueue
392-
})?;
393-
let head_index = head_descriptor.index;
467+
// Update used ring with what we used to process the packet
468+
self.queues[RX_INDEX].set_used_ring_idx((next_used + Wrapping(used_heads)).0, mem);
394469

395-
let result = Self::write_to_descriptor_chain(
396-
mem,
397-
&self.rx_frame_buf[..self.rx_bytes_read],
398-
head_descriptor,
399-
&self.metrics,
400-
);
401-
// Mark the descriptor chain as used. If an error occurred, skip the descriptor chain.
402-
let used_len = if result.is_err() {
403-
self.metrics.rx_fails.inc();
404-
0
405-
} else {
406-
// Safe to unwrap because a frame must be smaller than 2^16 bytes.
407-
u32::try_from(self.rx_bytes_read).unwrap()
470+
// Clear partial write info if there was one
471+
self.rx_partial_write = None;
408472
};
409-
queue.add_used(mem, head_index, used_len).map_err(|err| {
410-
error!("Failed to add available descriptor {}: {}", head_index, err);
411-
FrontendError::AddUsed
412-
})?;
413473

414-
result
415-
}
474+
if let Some(err) = error {
475+
// There was a error during writing.
476+
end_packet_processing();
416477

417-
// Copies a single frame from `self.rx_frame_buf` into the guest. In case of an error retries
418-
// the operation if possible. Returns true if the operation was successfull.
419-
fn write_frame_to_guest(&mut self) -> bool {
420-
let max_iterations = self.queues[RX_INDEX].actual_size();
421-
for _ in 0..max_iterations {
422-
match self.do_write_frame_to_guest() {
423-
Ok(()) => return true,
424-
Err(FrontendError::EmptyQueue) | Err(FrontendError::AddUsed) => {
425-
return false;
426-
}
427-
Err(_) => {
428-
// retry
429-
continue;
430-
}
478+
self.metrics.rx_fails.inc();
479+
480+
// `next_used` is pointing at the first descriptor used to process the packet.
481+
// We used `used_heads` descriptors to process the packet. Go over all of them
482+
// and overwrite them with 0 len to discard them.
483+
for i in 0..used_heads {
484+
let next_used_index = (next_used + Wrapping(i)).0 % actual_size;
485+
486+
// SAFETY:
487+
// This should never panic as we provide index in
488+
// correct bounds.
489+
let mut used_element = self.queues[RX_INDEX]
490+
.read_used_ring(mem, next_used_index)
491+
.unwrap();
492+
used_element.len = 0;
493+
self.queues[RX_INDEX]
494+
.write_used_ring(mem, next_used_index, used_element)
495+
.unwrap();
431496
}
432-
}
433497

434-
false
498+
Err(err)
499+
} else if slice.is_empty() {
500+
// Packet was fully written.
501+
end_packet_processing();
502+
503+
self.metrics
504+
.rx_bytes_count
505+
.add(usize_to_u64(self.rx_bytes_read));
506+
self.metrics.rx_packets_count.inc();
507+
508+
// Update number of descriptor heads used to store a packet.
509+
// SAFETY:
510+
// The packet_start_addr is valid guest address and we check
511+
// memory boundaries.
512+
#[allow(clippy::transmute_ptr_to_ref)]
513+
let header: &mut virtio_net_hdr_v1 = unsafe {
514+
let header_slice = mem
515+
.get_slice(packet_start_addr, std::mem::size_of::<virtio_net_hdr_v1>())
516+
.map_err(FrontendError::GuestMemory)?;
517+
std::mem::transmute(header_slice.ptr_guard_mut().as_ptr())
518+
};
519+
header.num_buffers = used_heads;
520+
521+
Ok(())
522+
} else {
523+
// Packet could not be fully written to the guest
524+
// Save necessary info to use it during next invocation.
525+
self.metrics.rx_partial_writes.inc();
526+
527+
if let Some(pw) = &mut self.rx_partial_write {
528+
pw.bytes_written = self.rx_bytes_read - slice.len();
529+
pw.used_heads = used_heads;
530+
} else {
531+
let pw = PartialWrite {
532+
bytes_written: self.rx_bytes_read - slice.len(),
533+
used_heads,
534+
packet_start_addr,
535+
};
536+
self.rx_partial_write = Some(pw);
537+
}
538+
539+
Err(FrontendError::EmptyQueue)
540+
}
435541
}
436542

437543
// Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
@@ -1027,6 +1133,7 @@ pub mod tests {
10271133
| 1 << VIRTIO_NET_F_HOST_TSO4
10281134
| 1 << VIRTIO_NET_F_HOST_TSO6
10291135
| 1 << VIRTIO_NET_F_HOST_UFO
1136+
| 1 << VIRTIO_NET_F_MRG_RXBUF
10301137
| 1 << VIRTIO_F_VERSION_1
10311138
| 1 << VIRTIO_RING_F_EVENT_IDX;
10321139

0 commit comments

Comments
 (0)