Skip to content

Commit a1ee556

Browse files
ellemoutonViktorT-11
authored andcommitted
gbn: move resending syncing logic into struct
This commit moves the logic for awaiting the parties to be synced after resending the queue into the a separate syncer struct. This is done to make the resending logic more readable easier to reason about.
1 parent bfe91e0 commit a1ee556

File tree

3 files changed

+312
-269
lines changed

3 files changed

+312
-269
lines changed

gbn/queue.go

Lines changed: 19 additions & 259 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,6 @@ import (
77
"github.com/btcsuite/btclog"
88
)
99

10-
const (
11-
// awaitingTimeoutMultiplier defines the multiplier we use when
12-
// multiplying the resend timeout during a resend catch up, resulting in
13-
// duration we wait for the resend catch up to complete before timing
14-
// out.
15-
// We set this to 3X the resend timeout. The reason we wait exactly 3X
16-
// the resend timeout is that we expect that the max time correct
17-
// behavior would take, would be:
18-
// * 1X the resendTimeout for the time it would take for the party
19-
// respond with an ACK for the last packet in the resend queue, i.e. the
20-
// awaitedACK.
21-
// * 1X the resendTimeout while waiting in proceedAfterTime before
22-
// sending the awaitedACKSignal.
23-
// * 1X extra resendTimeout as buffer, to ensure that we have enough
24-
// time to process the ACKS/NACKS by other party + some extra margin.
25-
awaitingTimeoutMultiplier = 3
26-
)
27-
2810
type queueCfg struct {
2911
// s is the maximum sequence number used to label packets. Packets
3012
// are labelled with incrementing sequence numbers modulo s.
@@ -69,41 +51,7 @@ type queue struct {
6951
// topMtx is used to guard sequenceTop.
7052
topMtx sync.RWMutex
7153

72-
// awaitedACK defines the sequence number for the last packet in the
73-
// resend queue. If we receive an ACK for this sequence number during
74-
// the resend catch up, we wait for the duration of the resend timeout,
75-
// and then proceed to send new packets, unless we receive the
76-
// awaitedNACK during the wait time. If that happens, we will proceed
77-
// send new packets as soon as we have processed the NACK.
78-
awaitedACK uint8
79-
80-
// awaitedNACK defines the sequence number that in case we get a NACK
81-
// with that sequence number during the resend catch up, we'd consider
82-
// the catch up to be complete and we can proceed to send new packets.
83-
awaitedNACK uint8
84-
85-
// awaitingCatchUp is set to true if we are awaiting a catch up after we
86-
// have resent the queue.
87-
awaitingCatchUp bool
88-
89-
// awaitingCatchUpMu must be held when accessing or mutating the values
90-
// of awaitedACK, awaitedNACK and awaitingCatchUp.
91-
awaitingCatchUpMu sync.RWMutex
92-
93-
// awaitedACKSignal is used to signal that we have received the awaited
94-
// ACK after resending the queue, and have waited for the duration of
95-
// the resend timeout. Once this signal is received, we can proceed to
96-
// send new packets.
97-
awaitedACKSignal chan struct{}
98-
99-
// awaitedNACKSignal is used to signal that we have received the awaited
100-
// NACK after resending the queue. Once this signal is received, we can
101-
// proceed to send new packets.
102-
awaitedNACKSignal chan struct{}
103-
104-
// caughtUpSignal is used to signal that we have caught up after
105-
// awaiting the catch up after resending the queue.
106-
caughtUpSignal chan struct{}
54+
syncer *syncer
10755

10856
lastResend time.Time
10957

@@ -116,14 +64,15 @@ func newQueue(cfg *queueCfg) *queue {
11664
cfg.log = log
11765
}
11866

119-
return &queue{
120-
cfg: cfg,
121-
content: make([]*PacketData, cfg.s),
122-
awaitedACKSignal: make(chan struct{}, 1),
123-
awaitedNACKSignal: make(chan struct{}, 1),
124-
caughtUpSignal: make(chan struct{}, 1),
125-
quit: make(chan struct{}),
67+
q := &queue{
68+
cfg: cfg,
69+
content: make([]*PacketData, cfg.s),
70+
quit: make(chan struct{}),
12671
}
72+
73+
q.syncer = newSyncer(cfg.s, cfg.log, cfg.timeout, q.quit)
74+
75+
return q
12776
}
12877

12978
func (q *queue) stop() {
@@ -155,63 +104,9 @@ func (q *queue) addPacket(packet *PacketData) {
155104
q.sequenceTop = (q.sequenceTop + 1) % q.cfg.s
156105
}
157106

158-
// resend resends the current contents of the queue, by invoking the callback
159-
// for each packet that needs to be resent, and then awaits that we either
160-
// receive the expected ACK or NACK after resending the queue, before returning.
161-
//
162-
// To understand why we need to await the awaited ACK/NACK after resending the
163-
// queue, it ensures that we don't end up in a situation where we resend the
164-
// queue over and over again due to latency and delayed NACKs by the other
165-
// party.
166-
//
167-
// Consider the following scenario:
168-
// 1.
169-
// Alice sends packets 1, 2, 3 & 4 to Bob.
170-
// 2.
171-
// Bob receives packets 1, 2, 3 & 4, and sends back the respective ACKs.
172-
// 3.
173-
// Alice receives ACKs for packets 1 & 2, but due to latency the ACKs for
174-
// packets 3 & 4 are delayed and aren't received until Alice resend timeout
175-
// has passed, which leads to Alice resending packets 3 & 4. Alice will after
176-
// that receive the delayed ACKs for packets 3 & 4, but will consider that as
177-
// the ACKs for the resent packets, and not the original packets which they were
178-
// actually sent for. If we didn't wait after resending the queue, Alice would
179-
// then proceed to send more packets (5 & 6).
180-
// 4.
181-
// When Bob receives the resent packets 3 & 4, Bob will respond with NACK 5. Due
182-
// to latency, the packets 5 & 6 that Alice sent in step (3) above will then be
183-
// received by Bob, and be processed as the correct response to the NACK 5. Bob
184-
// will after that await packet 7.
185-
// 5.
186-
// Alice will receive the NACK 5, and now resend packets 5 & 6. But as Bob is
187-
// now awaiting packet 7, this send will lead to a NACK 7. But due to latency,
188-
// if Alice doesn't wait resending the queue, Alice will proceed to send new
189-
// packet(s) before receiving the NACK 7.
190-
// 6.
191-
// This resend loop would continue indefinitely, so we need to ensure that Alice
192-
// waits after she has resent the queue, to ensure that she doesn't proceed to
193-
// send new packets before she is sure that both parties are in sync.
194-
//
195-
// To ensure that we are in sync, after we have resent the queue, we will await
196-
// that we either:
197-
// 1. Receive a NACK for the sequence number succeeding the last packet in the
198-
// resent queue i.e. in step (3) above, that would be NACK 5.
199-
// OR
200-
// 2. Receive an ACK for the last packet in the resent queue i.e. in step (3)
201-
// above, that would be ACK 4. After we receive the expected ACK, we will then
202-
// wait for the duration of the resend timeout before continuing. The reason why
203-
// we wait for the resend timeout before continuing, is that the ACKs we are
204-
// getting after a resend, could be delayed ACKs for the original packets we
205-
// sent, and not ACKs for the resent packets. In step (3) above, the ACKs for
206-
// packets 3 & 4 that Alice received were delayed ACKs for the original packets.
207-
// If Alice would have immediately continued to send new packets (5 & 6) after
208-
// receiving the ACK 4, she would have then received the NACK 5 from Bob which
209-
// was the actual response to the resent queue. But as Alice had already
210-
// continued to send packets 5 & 6 when receiving the NACK 5, the resend queue
211-
// response to that NACK would cause the resend loop to continue indefinitely.
212-
//
213-
// When either of the 2 conditions above are met, we will consider both parties
214-
// to be in sync, and we can proceed to send new packets.
107+
// resend resends the current contents of the queue. It allows some time for the
108+
// two parties to be seen as synced; this may fail in which case the caller is
109+
// expected to call resend again.
215110
func (q *queue) resend() error {
216111
if time.Since(q.lastResend) < q.cfg.timeout {
217112
q.cfg.log.Tracef("Resent the queue recently.")
@@ -223,10 +118,6 @@ func (q *queue) resend() error {
223118
return nil
224119
}
225120

226-
q.lastResend = time.Now()
227-
228-
q.awaitingCatchUpMu.Lock()
229-
230121
q.baseMtx.RLock()
231122
base := q.sequenceBase
232123
q.baseMtx.RUnlock()
@@ -236,35 +127,20 @@ func (q *queue) resend() error {
236127
q.topMtx.RUnlock()
237128

238129
if base == top {
239-
q.awaitingCatchUpMu.Unlock()
130+
q.syncer.reset()
240131

241132
return nil
242133
}
243134

244135
// Prepare the queue for awaiting the resend catch up.
245-
q.awaitedACK = (q.cfg.s + top - 1) % q.cfg.s
246-
q.awaitedNACK = top
247-
248-
q.cfg.log.Tracef("Set awaitedACK to %d & awaitedNACK to %d",
249-
q.awaitedACK, q.awaitedNACK)
250-
251-
q.awaitingCatchUp = true
252-
253-
// Drain the caughtUpSignal channel, in case no proceedAfterTime
254-
// func was executed after the last resend catch up.
255-
select {
256-
case <-q.caughtUpSignal:
257-
default:
258-
}
136+
q.syncer.initResendUpTo(top)
259137

260138
q.cfg.log.Tracef("Resending the queue")
261139

262140
for base != top {
263141
packet := q.content[base]
264142

265143
if err := q.cfg.sendPkt(packet); err != nil {
266-
q.awaitingCatchUpMu.Unlock()
267-
268144
return err
269145
}
270146

@@ -273,61 +149,12 @@ func (q *queue) resend() error {
273149
q.cfg.log.Tracef("Resent %d", packet.Seq)
274150
}
275151

276-
// We hold the awaitingCatchUpMu mutex for the duration of the resend to
277-
// ensure that we don't process the delayed ACKs for the packets we are
278-
// resending, during the resend. If that would happen, we would start
279-
// the "proceedAfterTime" function timeout while still resending
280-
// packets. That could mean that the NACK that the resent packets will
281-
// trigger, might be received after the timeout has passed. That would
282-
// cause the resend loop to trigger once more.
283-
q.awaitingCatchUpMu.Unlock()
284-
285-
// Then await until we know that both parties are in sync.
286-
q.awaitCatchUp()
152+
// Then wait until we know that both parties are in sync.
153+
q.syncer.waitForSync()
287154

288155
return nil
289156
}
290157

291-
// awaitCatchUp awaits that we either receive the awaited ACK or NACK signal
292-
// before returning. If we don't receive the awaited ACK or NACK signal before
293-
// 3X the resend timeout, the function will also return.
294-
// See the docs for the resend function for more details on why we need to await
295-
// the awaited ACK or NACK signal.
296-
func (q *queue) awaitCatchUp() {
297-
q.cfg.log.Tracef("Awaiting catchup after resending the queue")
298-
299-
select {
300-
case <-q.quit:
301-
return
302-
case <-q.awaitedACKSignal:
303-
q.cfg.log.Tracef("Got awaitedACKSignal")
304-
case <-q.awaitedNACKSignal:
305-
q.cfg.log.Tracef("Got awaitedNACKSignal")
306-
case <-time.After(q.cfg.timeout * awaitingTimeoutMultiplier):
307-
q.cfg.log.Tracef("Timed out while awaiting catchup")
308-
309-
q.awaitingCatchUpMu.Lock()
310-
q.awaitingCatchUp = false
311-
312-
// Drain both the ACK & NACK signal channels.
313-
select {
314-
case <-q.awaitedACKSignal:
315-
default:
316-
}
317-
318-
select {
319-
case <-q.awaitedNACKSignal:
320-
default:
321-
}
322-
323-
q.awaitingCatchUpMu.Unlock()
324-
}
325-
326-
// Send a caughtUpSignal to indicate that we have caught up after
327-
// resending the queue.
328-
q.caughtUpSignal <- struct{}{}
329-
}
330-
331158
// processACK processes an incoming ACK of a given sequence number. The function
332159
// returns true if the passed seq is an ACK for a packet we have sent but not
333160
// yet received an ACK for.
@@ -340,20 +167,7 @@ func (q *queue) processACK(seq uint8) bool {
340167
return false
341168
}
342169

343-
// If we are awaiting a catch up, and the ACK is the awaited ACK, we
344-
// start the proceedAfterTime function in a goroutine, which will send
345-
// an awaitedACKSignal if we're still awaiting the resend catch up when
346-
// the resend timeout has expired.
347-
q.awaitingCatchUpMu.RLock()
348-
if seq == q.awaitedACK && q.awaitingCatchUp {
349-
q.cfg.log.Tracef("Got awaited ACK")
350-
351-
// We start the proceedAfterTime function in a goroutine, as we
352-
// don't want to block the processing of other NACKs/ACKs while
353-
// we're waiting for the resend timeout to expire.
354-
go q.proceedAfterTime()
355-
}
356-
q.awaitingCatchUpMu.RUnlock()
170+
q.syncer.processAck(seq)
357171

358172
q.baseMtx.Lock()
359173
defer q.baseMtx.Unlock()
@@ -406,9 +220,6 @@ func (q *queue) processACK(seq uint8) bool {
406220
// the NACK sequence number. This equivalent to receiving the ACKs for the
407221
// packets before the NACK sequence number.
408222
func (q *queue) processNACK(seq uint8) (bool, bool) {
409-
q.awaitingCatchUpMu.Lock()
410-
defer q.awaitingCatchUpMu.Unlock()
411-
412223
q.baseMtx.Lock()
413224
defer q.baseMtx.Unlock()
414225

@@ -417,27 +228,9 @@ func (q *queue) processNACK(seq uint8) (bool, bool) {
417228

418229
q.cfg.log.Tracef("Received NACK %d", seq)
419230

420-
bumped := false
421-
422-
if q.awaitingCatchUp && seq == q.awaitedNACK {
423-
q.cfg.log.Tracef("Sending awaitedNACKSignal")
424-
q.awaitedNACKSignal <- struct{}{}
425-
426-
q.awaitingCatchUp = false
427-
428-
// In case the awaitedNACK is the same as sequenceTop, we can
429-
// bump the base to be equal to sequenceTop, without triggering
430-
// a new resend.
431-
if seq == q.sequenceTop && q.sequenceBase != q.sequenceTop {
432-
q.sequenceBase = q.sequenceTop
433-
434-
bumped = true
435-
}
231+
var bumped bool
436232

437-
// If we receive the awaited NACK, we shouldn't trigger a new
438-
// resend, as we can now proceed to send new packets.
439-
return false, bumped
440-
}
233+
q.syncer.processNack(seq)
441234

442235
// If the NACK is the same as sequenceTop, and we weren't awaiting this
443236
// NACK as part of the resend catch up, it probably means that queue
@@ -474,39 +267,6 @@ func (q *queue) processNACK(seq uint8) (bool, bool) {
474267
return true, bumped
475268
}
476269

477-
// proceedAfterTime will wait for the resendTimeout and then send an
478-
// awaitedACKSignal, if we're still awaiting the resend catch up.
479-
func (q *queue) proceedAfterTime() {
480-
// We await for the duration of the resendTimeout before sending the
481-
// awaitedACKSignal, as that's the time we'd expect it to take for the
482-
// other party to respond with a NACK, if the resent last packet in the
483-
// queue would lead to a NACK. If we receive the awaitedNACKSignal
484-
// before the timeout, we will receive the caughtUpSignal, and we can
485-
// stop the execution early.
486-
select {
487-
case <-q.quit:
488-
return
489-
case <-q.caughtUpSignal:
490-
q.cfg.log.Tracef("Already caught up.")
491-
492-
return
493-
case <-time.After(q.cfg.timeout):
494-
q.awaitingCatchUpMu.Lock()
495-
496-
if q.awaitingCatchUp {
497-
q.cfg.log.Tracef("Sending awaitedACKSignal")
498-
q.awaitedACKSignal <- struct{}{}
499-
500-
q.awaitingCatchUp = false
501-
} else {
502-
q.cfg.log.Tracef("Ending proceedAfterTime without any " +
503-
"action")
504-
}
505-
506-
q.awaitingCatchUpMu.Unlock()
507-
}
508-
}
509-
510270
// containsSequence is used to determine if a number, seq, is between two other
511271
// numbers, base and top, where all the numbers lie in a finite field (modulo
512272
// space) s.

gbn/queue_test.go

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package gbn
22

33
import (
4-
"errors"
54
"sync"
65
"testing"
76
"time"
@@ -142,15 +141,8 @@ func resend(t *testing.T, q *queue, wg *sync.WaitGroup) {
142141

143142
// We also ensure that the above goroutine is has started the resend
144143
// before this function returns.
145-
err := wait.NoError(func() error {
146-
q.awaitingCatchUpMu.Lock()
147-
defer q.awaitingCatchUpMu.Unlock()
148-
149-
if !q.awaitingCatchUp {
150-
return errors.New("Hasn't resent yet")
151-
}
152-
153-
return nil
144+
err := wait.Predicate(func() bool {
145+
return q.syncer.getState() == syncStateResending
154146
}, time.Second)
155147
require.NoError(t, err)
156148
}

0 commit comments

Comments
 (0)