9
9
use std:: io:: Read ;
10
10
use std:: mem;
11
11
use std:: net:: Ipv4Addr ;
12
+ use std:: num:: Wrapping ;
12
13
use std:: sync:: { Arc , Mutex } ;
13
14
14
15
use libc:: EAGAIN ;
15
- use log:: { error, warn } ;
16
+ use log:: error;
16
17
use utils:: eventfd:: EventFd ;
17
18
use utils:: net:: mac:: MacAddr ;
18
- use utils:: u64_to_usize;
19
- use vm_memory:: GuestMemoryError ;
19
+ use utils:: { u64_to_usize, usize_to_u64 } ;
20
+ use vm_memory:: { GuestAddress , GuestMemory , GuestMemoryError } ;
20
21
21
22
use crate :: devices:: virtio:: device:: { DeviceState , IrqTrigger , IrqType , VirtioDevice } ;
22
23
use crate :: devices:: virtio:: gen:: virtio_blk:: VIRTIO_F_VERSION_1 ;
23
24
use crate :: devices:: virtio:: gen:: virtio_net:: {
24
25
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
25
26
VIRTIO_NET_F_GUEST_TSO6 , VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 ,
26
- VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
27
+ VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC , VIRTIO_NET_F_MRG_RXBUF ,
27
28
} ;
28
29
use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
29
30
use crate :: devices:: virtio:: iovec:: IoVecBuffer ;
@@ -32,7 +33,7 @@ use crate::devices::virtio::net::tap::Tap;
32
33
use crate :: devices:: virtio:: net:: {
33
34
gen, NetError , NetQueue , MAX_BUFFER_SIZE , NET_QUEUE_SIZES , RX_INDEX , TX_INDEX ,
34
35
} ;
35
- use crate :: devices:: virtio:: queue:: { DescriptorChain , Queue } ;
36
+ use crate :: devices:: virtio:: queue:: { Queue , UsedElement } ;
36
37
use crate :: devices:: virtio:: { ActivateError , TYPE_NET } ;
37
38
use crate :: devices:: { report_net_event_fail, DeviceError } ;
38
39
use crate :: dumbo:: pdu:: arp:: ETH_IPV4_FRAME_LEN ;
@@ -47,14 +48,14 @@ const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN;
47
48
48
49
#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
49
50
enum FrontendError {
50
- /// Add user.
51
- AddUsed ,
52
- /// Descriptor chain too mall.
53
- DescriptorChainTooSmall ,
54
51
/// Empty queue.
55
52
EmptyQueue ,
56
53
/// Guest memory error: {0}
57
54
GuestMemory ( GuestMemoryError ) ,
55
+ /// Attempt to write an empty packet.
56
+ AttemptToWriteEmptyPacket ,
57
+ /// Attempt to use more descriptor chains(heads) than it is allowed.
58
+ MaxHeadsUsed ,
58
59
/// Read only descriptor.
59
60
ReadOnlyDescriptor ,
60
61
}
@@ -103,6 +104,20 @@ pub struct ConfigSpace {
103
104
// SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding.
104
105
unsafe impl ByteValued for ConfigSpace { }
105
106
107
+ // This struct contains information about partially
108
+ // written packet.
109
+ #[ derive( Debug ) ]
110
+ struct PartialWrite {
111
+ // Amount of bytes written so far.
112
+ bytes_written : usize ,
113
+ // Amount of descriptor heads used for the packet.
114
+ used_heads : u16 ,
115
+ // Guest address of the first buffer used for the packet.
116
+ // This will be used to set number of descriptors heads used
117
+ // to store the whole packet.
118
+ packet_start_addr : GuestAddress ,
119
+ }
120
+
106
121
/// VirtIO network device.
107
122
///
108
123
/// It emulates a network device able to exchange L2 frames between the guest
@@ -127,6 +142,7 @@ pub struct Net {
127
142
128
143
rx_bytes_read : usize ,
129
144
rx_frame_buf : [ u8 ; MAX_BUFFER_SIZE ] ,
145
+ rx_partial_write : Option < PartialWrite > ,
130
146
131
147
tx_frame_headers : [ u8 ; frame_hdr_len ( ) ] ,
132
148
@@ -161,6 +177,7 @@ impl Net {
161
177
| 1 << VIRTIO_NET_F_HOST_TSO4
162
178
| 1 << VIRTIO_NET_F_HOST_TSO6
163
179
| 1 << VIRTIO_NET_F_HOST_UFO
180
+ | 1 << VIRTIO_NET_F_MRG_RXBUF
164
181
| 1 << VIRTIO_F_VERSION_1
165
182
| 1 << VIRTIO_RING_F_EVENT_IDX ;
166
183
@@ -191,6 +208,7 @@ impl Net {
191
208
rx_deferred_frame : false ,
192
209
rx_bytes_read : 0 ,
193
210
rx_frame_buf : [ 0u8 ; MAX_BUFFER_SIZE ] ,
211
+ rx_partial_write : None ,
194
212
tx_frame_headers : [ 0u8 ; frame_hdr_len ( ) ] ,
195
213
irq_trigger : IrqTrigger :: new ( ) . map_err ( NetError :: EventFd ) ?,
196
214
config_space,
@@ -319,7 +337,17 @@ impl Net {
319
337
}
320
338
321
339
// Attempt frame delivery.
322
- let success = self . write_frame_to_guest ( ) ;
340
+ let success = loop {
341
+ // We retry to write a frame if there were internal errors.
342
+ // Each new write will use new descriptor chains up to the
343
+ // point of consuming all available descriptors, if they are
344
+ // all bad.
345
+ match self . write_frame_to_guest ( ) {
346
+ Ok ( ( ) ) => break true ,
347
+ Err ( FrontendError :: EmptyQueue ) => break false ,
348
+ _ => ( ) ,
349
+ } ;
350
+ } ;
323
351
324
352
// Undo the tokens consumption if guest delivery failed.
325
353
if !success {
@@ -330,108 +358,186 @@ impl Net {
330
358
success
331
359
}
332
360
333
- /// Write a slice in a descriptor chain
334
- ///
335
- /// # Errors
336
- ///
337
- /// Returns an error if the descriptor chain is too short or
338
- /// an inappropriate (read only) descriptor is found in the chain
339
- fn write_to_descriptor_chain (
340
- mem : & GuestMemoryMmap ,
341
- data : & [ u8 ] ,
342
- head : DescriptorChain ,
343
- net_metrics : & NetDeviceMetrics ,
344
- ) -> Result < ( ) , FrontendError > {
345
- let mut chunk = data;
346
- let mut next_descriptor = Some ( head) ;
361
+ /// Write packet contained in the internal buffer into guest provided
362
+ /// descriptor chains.
363
+ fn write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
364
+ // This is safe since we checked in the event handler that the device is activated.
365
+ let mem = self . device_state . mem ( ) . unwrap ( ) ;
366
+
367
+ if self . queues [ RX_INDEX ] . is_empty ( mem) {
368
+ self . metrics . no_rx_avail_buffer . inc ( ) ;
369
+ return Err ( FrontendError :: EmptyQueue ) ;
370
+ }
371
+
372
+ let next_used = self . queues [ RX_INDEX ] . next_used ;
373
+ let actual_size = self . queues [ RX_INDEX ] . actual_size ( ) ;
374
+
375
+ let ( mut slice, mut packet_start_addr, mut used_heads) =
376
+ if let Some ( pw) = & self . rx_partial_write {
377
+ (
378
+ & self . rx_frame_buf [ pw. bytes_written ..self . rx_bytes_read ] ,
379
+ Some ( pw. packet_start_addr ) ,
380
+ pw. used_heads ,
381
+ )
382
+ } else {
383
+ ( & self . rx_frame_buf [ ..self . rx_bytes_read ] , None , 0 )
384
+ } ;
347
385
348
- while let Some ( descriptor) = & next_descriptor {
349
- if !descriptor. is_write_only ( ) {
350
- return Err ( FrontendError :: ReadOnlyDescriptor ) ;
386
+ let max_used_heads = if self . has_feature ( u64:: from ( VIRTIO_NET_F_MRG_RXBUF ) ) {
387
+ // There is no real limit on how much heads we can use, but we will
388
+ // never use more than the queue has.
389
+ u16:: MAX
390
+ } else {
391
+ // Without VIRTIO_NET_F_MRG_RXBUF only 1 head can be used for the packet.
392
+ 1
393
+ } ;
394
+
395
+ let mut error = None ;
396
+ while !slice. is_empty ( ) && error. is_none ( ) {
397
+ if used_heads == max_used_heads {
398
+ error = Some ( FrontendError :: MaxHeadsUsed ) ;
399
+ break ;
351
400
}
352
401
353
- let len = std:: cmp:: min ( chunk. len ( ) , descriptor. len as usize ) ;
354
- match mem. write_slice ( & chunk[ ..len] , descriptor. addr ) {
355
- Ok ( ( ) ) => {
356
- net_metrics. rx_count . inc ( ) ;
357
- chunk = & chunk[ len..] ;
402
+ let Some ( head_desc) = self . queues [ RX_INDEX ] . pop_or_enable_notification ( mem) else {
403
+ break ;
404
+ } ;
405
+
406
+ let head_desc_index = head_desc. index ;
407
+ let mut desc_len = 0 ;
408
+
409
+ // If this is the first head of the packet, save it for later.
410
+ if packet_start_addr. is_none ( ) {
411
+ packet_start_addr = Some ( head_desc. addr ) ;
412
+ }
413
+
414
+ // Write to the descriptor chain as much as possible.
415
+ let mut desc = Some ( head_desc) ;
416
+ while !slice. is_empty ( ) && desc. is_some ( ) {
417
+ let d = desc. unwrap ( ) ;
418
+
419
+ if !d. is_write_only ( ) {
420
+ error = Some ( FrontendError :: ReadOnlyDescriptor ) ;
421
+ break ;
358
422
}
359
- Err ( err ) => {
360
- error ! ( "Failed to write slice: {:?}" , err ) ;
423
+ let len = slice . len ( ) . min ( d . len as usize ) ;
424
+ if let Err ( err ) = mem . write_slice ( & slice[ ..len ] , d . addr ) {
361
425
if let GuestMemoryError :: PartialBuffer { .. } = err {
362
- net_metrics . rx_partial_writes . inc ( ) ;
426
+ self . metrics . rx_partial_writes . inc ( ) ;
363
427
}
364
- return Err ( FrontendError :: GuestMemory ( err) ) ;
428
+ error = Some ( FrontendError :: GuestMemory ( err) ) ;
429
+ break ;
430
+ } else {
431
+ desc_len += len;
432
+ slice = & slice[ len..] ;
365
433
}
366
- }
367
434
368
- // If chunk is empty we are done here.
369
- if chunk. is_empty ( ) {
370
- let len = data. len ( ) as u64 ;
371
- net_metrics. rx_bytes_count . add ( len) ;
372
- net_metrics. rx_packets_count . inc ( ) ;
373
- return Ok ( ( ) ) ;
435
+ desc = d. next_descriptor ( ) ;
374
436
}
375
437
376
- next_descriptor = descriptor. next_descriptor ( ) ;
438
+ // At this point descriptor chain was processed.
439
+ // We add it to the used_ring.
440
+ let next_used_index = ( next_used + Wrapping ( used_heads) ) . 0 % actual_size;
441
+ let used_element = UsedElement {
442
+ id : u32:: from ( head_desc_index) ,
443
+ len : u32:: try_from ( desc_len) . unwrap ( ) ,
444
+ } ;
445
+ // We don't update queues internals just yet.
446
+ // This is done to prevent giving information to the guest
447
+ // about descriptor heads we used for partialy written packets.
448
+ // SAFETY:
449
+ // This should never panic as we provide index in
450
+ // correct bounds.
451
+ self . queues [ RX_INDEX ]
452
+ . write_used_ring ( mem, next_used_index, used_element)
453
+ . unwrap ( ) ;
454
+
455
+ used_heads += 1 ;
377
456
}
378
457
379
- warn ! ( "Receiving buffer is too small to hold frame of current size" ) ;
380
- Err ( FrontendError :: DescriptorChainTooSmall )
381
- }
458
+ let packet_start_addr =
459
+ packet_start_addr. ok_or ( FrontendError :: AttemptToWriteEmptyPacket ) ?;
382
460
383
- // Copies a single frame from `self.rx_frame_buf` into the guest.
384
- fn do_write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
385
- // This is safe since we checked in the event handler that the device is activated.
386
- let mem = self . device_state . mem ( ) . unwrap ( ) ;
461
+ let mut end_packet_processing = || {
462
+ // Update queues internals as this is the end of this
463
+ // packet being written.
464
+ self . queues [ RX_INDEX ] . next_used += Wrapping ( used_heads) ;
465
+ self . queues [ RX_INDEX ] . num_added += Wrapping ( used_heads) ;
387
466
388
- let queue = & mut self . queues [ RX_INDEX ] ;
389
- let head_descriptor = queue. pop_or_enable_notification ( mem) . ok_or_else ( || {
390
- self . metrics . no_rx_avail_buffer . inc ( ) ;
391
- FrontendError :: EmptyQueue
392
- } ) ?;
393
- let head_index = head_descriptor. index ;
467
+ // Update used ring with what we used to process the packet
468
+ self . queues [ RX_INDEX ] . set_used_ring_idx ( ( next_used + Wrapping ( used_heads) ) . 0 , mem) ;
394
469
395
- let result = Self :: write_to_descriptor_chain (
396
- mem,
397
- & self . rx_frame_buf [ ..self . rx_bytes_read ] ,
398
- head_descriptor,
399
- & self . metrics ,
400
- ) ;
401
- // Mark the descriptor chain as used. If an error occurred, skip the descriptor chain.
402
- let used_len = if result. is_err ( ) {
403
- self . metrics . rx_fails . inc ( ) ;
404
- 0
405
- } else {
406
- // Safe to unwrap because a frame must be smaller than 2^16 bytes.
407
- u32:: try_from ( self . rx_bytes_read ) . unwrap ( )
470
+ // Clear partial write info if there was one
471
+ self . rx_partial_write = None ;
408
472
} ;
409
- queue. add_used ( mem, head_index, used_len) . map_err ( |err| {
410
- error ! ( "Failed to add available descriptor {}: {}" , head_index, err) ;
411
- FrontendError :: AddUsed
412
- } ) ?;
413
473
414
- result
415
- }
474
+ if let Some ( err) = error {
475
+ // There was a error during writing.
476
+ end_packet_processing ( ) ;
416
477
417
- // Copies a single frame from `self.rx_frame_buf` into the guest. In case of an error retries
418
- // the operation if possible. Returns true if the operation was successfull.
419
- fn write_frame_to_guest ( & mut self ) -> bool {
420
- let max_iterations = self . queues [ RX_INDEX ] . actual_size ( ) ;
421
- for _ in 0 ..max_iterations {
422
- match self . do_write_frame_to_guest ( ) {
423
- Ok ( ( ) ) => return true ,
424
- Err ( FrontendError :: EmptyQueue ) | Err ( FrontendError :: AddUsed ) => {
425
- return false ;
426
- }
427
- Err ( _) => {
428
- // retry
429
- continue ;
430
- }
478
+ self . metrics . rx_fails . inc ( ) ;
479
+
480
+ // `next_used` is pointing at the first descriptor used to process the packet.
481
+ // We used `used_heads` descriptors to process the packet. Go over all of them
482
+ // and overwrite them with 0 len to discard them.
483
+ for i in 0 ..used_heads {
484
+ let next_used_index = ( next_used + Wrapping ( i) ) . 0 % actual_size;
485
+
486
+ // SAFETY:
487
+ // This should never panic as we provide index in
488
+ // correct bounds.
489
+ let mut used_element = self . queues [ RX_INDEX ]
490
+ . read_used_ring ( mem, next_used_index)
491
+ . unwrap ( ) ;
492
+ used_element. len = 0 ;
493
+ self . queues [ RX_INDEX ]
494
+ . write_used_ring ( mem, next_used_index, used_element)
495
+ . unwrap ( ) ;
431
496
}
432
- }
433
497
434
- false
498
+ Err ( err)
499
+ } else if slice. is_empty ( ) {
500
+ // Packet was fully written.
501
+ end_packet_processing ( ) ;
502
+
503
+ self . metrics
504
+ . rx_bytes_count
505
+ . add ( usize_to_u64 ( self . rx_bytes_read ) ) ;
506
+ self . metrics . rx_packets_count . inc ( ) ;
507
+
508
+ // Update number of descriptor heads used to store a packet.
509
+ // SAFETY:
510
+ // The packet_start_addr is valid guest address and we check
511
+ // memory boundaries.
512
+ #[ allow( clippy:: transmute_ptr_to_ref) ]
513
+ let header: & mut virtio_net_hdr_v1 = unsafe {
514
+ let header_slice = mem
515
+ . get_slice ( packet_start_addr, std:: mem:: size_of :: < virtio_net_hdr_v1 > ( ) )
516
+ . map_err ( FrontendError :: GuestMemory ) ?;
517
+ std:: mem:: transmute ( header_slice. ptr_guard_mut ( ) . as_ptr ( ) )
518
+ } ;
519
+ header. num_buffers = used_heads;
520
+
521
+ Ok ( ( ) )
522
+ } else {
523
+ // Packet could not be fully written to the guest
524
+ // Save necessary info to use it during next invocation.
525
+ self . metrics . rx_partial_writes . inc ( ) ;
526
+
527
+ if let Some ( pw) = & mut self . rx_partial_write {
528
+ pw. bytes_written = self . rx_bytes_read - slice. len ( ) ;
529
+ pw. used_heads = used_heads;
530
+ } else {
531
+ let pw = PartialWrite {
532
+ bytes_written : self . rx_bytes_read - slice. len ( ) ,
533
+ used_heads,
534
+ packet_start_addr,
535
+ } ;
536
+ self . rx_partial_write = Some ( pw) ;
537
+ }
538
+
539
+ Err ( FrontendError :: EmptyQueue )
540
+ }
435
541
}
436
542
437
543
// Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
@@ -1027,6 +1133,7 @@ pub mod tests {
1027
1133
| 1 << VIRTIO_NET_F_HOST_TSO4
1028
1134
| 1 << VIRTIO_NET_F_HOST_TSO6
1029
1135
| 1 << VIRTIO_NET_F_HOST_UFO
1136
+ | 1 << VIRTIO_NET_F_MRG_RXBUF
1030
1137
| 1 << VIRTIO_F_VERSION_1
1031
1138
| 1 << VIRTIO_RING_F_EVENT_IDX ;
1032
1139
0 commit comments