Skip to content

Commit 0e50a40

Browse files
committed
Add simplified integration test case.
Signed-off-by: Steve Simpson <[email protected]>
1 parent a301d2d commit 0e50a40

File tree

1 file changed

+85
-10
lines changed

1 file changed

+85
-10
lines changed

integration/integration_memberlist_single_binary_test.go

Lines changed: 85 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package integration
55
import (
66
"crypto/x509"
77
"crypto/x509/pkix"
8+
"fmt"
89
"os"
910
"path/filepath"
1011
"testing"
@@ -109,16 +110,16 @@ func testSingleBinaryEnv(t *testing.T, tlsEnabled bool) {
109110

110111
func newSingleBinary(name string, servername string, join string) *e2ecortex.CortexService {
111112
flags := map[string]string{
112-
"-ingester.final-sleep": "0s",
113-
"-ingester.join-after": "0s", // join quickly
114-
"-ingester.min-ready-duration": "0s",
115-
"-ingester.concurrent-flushes": "10",
116-
"-ingester.max-transfer-retries": "0", // disable
117-
"-ingester.num-tokens": "512",
118-
"-ingester.observe-period": "5s", // to avoid conflicts in tokens
119-
"-ring.store": "memberlist",
120-
"-memberlist.bind-port": "8000",
121-
"-memberlist.pullpush-interval": "3s", // speed up state convergence to make test faster and avoid flakiness
113+
"-ingester.final-sleep": "0s",
114+
"-ingester.join-after": "0s", // join quickly
115+
"-ingester.min-ready-duration": "0s",
116+
"-ingester.concurrent-flushes": "10",
117+
"-ingester.max-transfer-retries": "0", // disable
118+
"-ingester.num-tokens": "512",
119+
"-ingester.observe-period": "5s", // to avoid conflicts in tokens
120+
"-ring.store": "memberlist",
121+
"-memberlist.bind-port": "8000",
122+
"-memberlist.left-ingesters-timeout": "600s", // effectively disable
122123
}
123124

124125
if join != "" {
@@ -145,3 +146,77 @@ func newSingleBinary(name string, servername string, join string) *e2ecortex.Cor
145146
serv.SetBackoff(backOff)
146147
return serv
147148
}
149+
150+
func TestSingleBinaryWithMemberlistScaling(t *testing.T) {
151+
s, err := e2e.NewScenario(networkName)
152+
require.NoError(t, err)
153+
defer s.Close()
154+
155+
dynamo := e2edb.NewDynamoDB()
156+
require.NoError(t, s.StartAndWaitReady(dynamo))
157+
require.NoError(t, writeFileToSharedDir(s, cortexSchemaConfigFile, []byte(cortexSchemaConfigYaml)))
158+
159+
// Scale up instances. These numbers seem enough to reliably reproduce some unwanted
160+
// consequences of slow propagation, such as missing tombstones.
161+
162+
maxCortex := 20
163+
minCortex := 3
164+
instances := make([]*e2ecortex.CortexService, 0)
165+
166+
for i := 0; i < maxCortex; i++ {
167+
name := fmt.Sprintf("cortex-%d", i+1)
168+
join := ""
169+
if i > 0 {
170+
join = fmt.Sprintf("%s-cortex-1:8000", networkName)
171+
}
172+
c := newSingleBinary(name, "", join)
173+
require.NoError(t, s.StartAndWaitReady(c))
174+
instances = append(instances, c)
175+
}
176+
177+
// Sanity check the ring membership and give each instance time to see every other instance.
178+
179+
for _, c := range instances {
180+
require.NoError(t, c.WaitSumMetrics(e2e.Equals(float64(maxCortex)), "cortex_ring_members"))
181+
require.NoError(t, c.WaitSumMetrics(e2e.Equals(0), "memberlist_client_kv_store_value_tombstones"))
182+
}
183+
184+
// Scale down as fast as possible but cleanly, in order to send out tombstones.
185+
186+
for len(instances) > minCortex {
187+
i := len(instances) - 1
188+
require.NoError(t, s.Stop(instances[i]))
189+
instances = instances[:i]
190+
}
191+
192+
// If all is working as expected, then tombstones should have propagated easily within this time period.
193+
// The logging is mildly spammy, but it has proven extremely useful for debugging convergence cases.
194+
// We don't use WaitSumMetrics [over all instances] here so we can log the per-instance metrics.
195+
196+
expectedRingMembers := float64(minCortex)
197+
expectedTombstones := float64(maxCortex - minCortex)
198+
199+
require.Eventually(t, func() bool {
200+
ok := true
201+
for _, c := range instances {
202+
metrics, err := c.SumMetrics([]string{
203+
"cortex_ring_members", "memberlist_client_kv_store_value_tombstones",
204+
})
205+
require.NoError(t, err)
206+
t.Logf("%s: cortex_ring_members=%f memberlist_client_kv_store_value_tombstones=%f\n",
207+
c.Name(), metrics[0], metrics[1])
208+
209+
// Don't short circuit the check, so we log the state for all instances.
210+
if metrics[0] != expectedRingMembers {
211+
ok = false
212+
}
213+
if metrics[1] != expectedTombstones {
214+
ok = false
215+
}
216+
217+
}
218+
return ok
219+
}, 30*time.Second, 2*time.Second,
220+
"expected all instances to have %f ring members and %f tombstones",
221+
expectedRingMembers, expectedTombstones)
222+
}

0 commit comments

Comments
 (0)