@@ -209,7 +209,12 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
209209
210210 spin_lock_irqsave (& ndlp -> lock , iflags );
211211 ndlp -> nlp_flag |= NLP_IN_DEV_LOSS ;
212- ndlp -> nlp_flag &= ~NLP_NPR_2B_DISC ;
212+
213+ /* If there is a PLOGI in progress, and we are in a
214+ * NLP_NPR_2B_DISC state, don't turn off the flag.
215+ */
216+ if (ndlp -> nlp_state != NLP_STE_PLOGI_ISSUE )
217+ ndlp -> nlp_flag &= ~NLP_NPR_2B_DISC ;
213218
214219 /*
215220 * The backend does not expect any more calls associated with this
@@ -340,6 +345,37 @@ static void lpfc_check_inactive_vmid(struct lpfc_hba *phba)
340345 lpfc_destroy_vport_work_array (phba , vports );
341346}
342347
348+ /**
349+ * lpfc_check_nlp_post_devloss - Check to restore ndlp refcnt after devloss
350+ * @vport: Pointer to vport object.
351+ * @ndlp: Pointer to remote node object.
352+ *
353+ * If NLP_IN_RECOV_POST_DEV_LOSS flag was set due to outstanding recovery of
354+ * node during dev_loss_tmo processing, then this function restores the nlp_put
355+ * kref decrement from lpfc_dev_loss_tmo_handler.
356+ **/
357+ void
358+ lpfc_check_nlp_post_devloss (struct lpfc_vport * vport ,
359+ struct lpfc_nodelist * ndlp )
360+ {
361+ unsigned long iflags ;
362+
363+ spin_lock_irqsave (& ndlp -> lock , iflags );
364+ if (ndlp -> save_flags & NLP_IN_RECOV_POST_DEV_LOSS ) {
365+ ndlp -> save_flags &= ~NLP_IN_RECOV_POST_DEV_LOSS ;
366+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
367+ lpfc_nlp_get (ndlp );
368+ lpfc_printf_vlog (vport , KERN_INFO , LOG_DISCOVERY | LOG_NODE ,
369+ "8438 Devloss timeout reversed on DID x%x "
370+ "refcnt %d ndlp %p flag x%x "
371+ "port_state = x%x\n" ,
372+ ndlp -> nlp_DID , kref_read (& ndlp -> kref ), ndlp ,
373+ ndlp -> nlp_flag , vport -> port_state );
374+ spin_lock_irqsave (& ndlp -> lock , iflags );
375+ }
376+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
377+ }
378+
343379/**
344380 * lpfc_dev_loss_tmo_handler - Remote node devloss timeout handler
345381 * @ndlp: Pointer to remote node object.
@@ -358,6 +394,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
358394 uint8_t * name ;
359395 int warn_on = 0 ;
360396 int fcf_inuse = 0 ;
397+ bool recovering = false;
398+ struct fc_vport * fc_vport = NULL ;
361399 unsigned long iflags ;
362400
363401 vport = ndlp -> vport ;
@@ -394,6 +432,64 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
394432
395433 /* Fabric nodes are done. */
396434 if (ndlp -> nlp_type & NLP_FABRIC ) {
435+ spin_lock_irqsave (& ndlp -> lock , iflags );
436+ /* In massive vport configuration settings, it's possible
437+ * dev_loss_tmo fired during node recovery. So, check if
438+ * fabric nodes are in discovery states outstanding.
439+ */
440+ switch (ndlp -> nlp_DID ) {
441+ case Fabric_DID :
442+ fc_vport = vport -> fc_vport ;
443+ if (fc_vport &&
444+ fc_vport -> vport_state == FC_VPORT_INITIALIZING )
445+ recovering = true;
446+ break ;
447+ case Fabric_Cntl_DID :
448+ if (ndlp -> nlp_flag & NLP_REG_LOGIN_SEND )
449+ recovering = true;
450+ break ;
451+ case FDMI_DID :
452+ fallthrough ;
453+ case NameServer_DID :
454+ if (ndlp -> nlp_state >= NLP_STE_PLOGI_ISSUE &&
455+ ndlp -> nlp_state <= NLP_STE_REG_LOGIN_ISSUE )
456+ recovering = true;
457+ break ;
458+ }
459+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
460+
461+ /* Mark an NLP_IN_RECOV_POST_DEV_LOSS flag to know if reversing
462+ * the following lpfc_nlp_put is necessary after fabric node is
463+ * recovered.
464+ */
465+ if (recovering ) {
466+ lpfc_printf_vlog (vport , KERN_INFO ,
467+ LOG_DISCOVERY | LOG_NODE ,
468+ "8436 Devloss timeout marked on "
469+ "DID x%x refcnt %d ndlp %p "
470+ "flag x%x port_state = x%x\n" ,
471+ ndlp -> nlp_DID , kref_read (& ndlp -> kref ),
472+ ndlp , ndlp -> nlp_flag ,
473+ vport -> port_state );
474+ spin_lock_irqsave (& ndlp -> lock , iflags );
475+ ndlp -> save_flags |= NLP_IN_RECOV_POST_DEV_LOSS ;
476+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
477+ } else if (ndlp -> nlp_state == NLP_STE_UNMAPPED_NODE ) {
478+ /* Fabric node fully recovered before this dev_loss_tmo
479+ * queue work is processed. Thus, ignore the
480+ * dev_loss_tmo event.
481+ */
482+ lpfc_printf_vlog (vport , KERN_INFO ,
483+ LOG_DISCOVERY | LOG_NODE ,
484+ "8437 Devloss timeout ignored on "
485+ "DID x%x refcnt %d ndlp %p "
486+ "flag x%x port_state = x%x\n" ,
487+ ndlp -> nlp_DID , kref_read (& ndlp -> kref ),
488+ ndlp , ndlp -> nlp_flag ,
489+ vport -> port_state );
490+ return fcf_inuse ;
491+ }
492+
397493 lpfc_nlp_put (ndlp );
398494 return fcf_inuse ;
399495 }
@@ -423,6 +519,14 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
423519 ndlp -> nlp_state , ndlp -> nlp_rpi );
424520 }
425521
522+ /* If we are devloss, but we are in the process of rediscovering the
523+ * ndlp, don't issue a NLP_EVT_DEVICE_RM event.
524+ */
525+ if (ndlp -> nlp_state >= NLP_STE_PLOGI_ISSUE &&
526+ ndlp -> nlp_state <= NLP_STE_PRLI_ISSUE ) {
527+ return fcf_inuse ;
528+ }
529+
426530 if (!(ndlp -> fc4_xpt_flags & NVME_XPT_REGD ))
427531 lpfc_disc_state_machine (vport , ndlp , NULL , NLP_EVT_DEVICE_RM );
428532
@@ -4363,6 +4467,8 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
43634467 goto out ;
43644468 }
43654469
4470+ lpfc_check_nlp_post_devloss (vport , ndlp );
4471+
43664472 if (phba -> sli_rev < LPFC_SLI_REV4 )
43674473 ndlp -> nlp_rpi = mb -> un .varWords [0 ];
43684474
@@ -4540,9 +4646,10 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
45404646void
45414647lpfc_nlp_reg_node (struct lpfc_vport * vport , struct lpfc_nodelist * ndlp )
45424648{
4543-
45444649 unsigned long iflags ;
45454650
4651+ lpfc_check_nlp_post_devloss (vport , ndlp );
4652+
45464653 spin_lock_irqsave (& ndlp -> lock , iflags );
45474654 if (ndlp -> fc4_xpt_flags & NLP_XPT_REGD ) {
45484655 /* Already registered with backend, trigger rescan */
0 commit comments