From 10a799651cca92351c0323abd74dbf7647fafbc8 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 24 Mar 2020 07:12:17 -0400 Subject: [PATCH 1/2] Sentinel: remove Thread.Sleep and throws We were throwing in event handlers...that's not good. Also post-thread sleep in timer callbacks. --- .../ConnectionMultiplexer.cs | 156 +++++++++--------- 1 file changed, 74 insertions(+), 82 deletions(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index edbc50529..f8e4dd7d4 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -2194,9 +2194,6 @@ internal void InitializeSentinel(LogProxy logProxy) } } - private const string FAILED_CONFIGURE_MASTER_FOR_SERVICE_MSG = - "Failed connecting to configured master for service: {0}, after {1} retries within interval {2}"; - /// /// Returns a managed connection to the master server indicated by /// the ServiceName in the config. @@ -2217,19 +2214,27 @@ public ConnectionMultiplexer GetSentinelMasterConnection(ConfigurationOptions co return sentinelConnectionChildren[config.ServiceName]; } - // Clear out the endpoints - config.EndPoints.Clear(); - - // Get an initial endpoint - var msg = string.Format(FAILED_CONFIGURE_MASTER_FOR_SERVICE_MSG, config.ServiceName, 5, 10); - EndPoint initialMasterEndPoint = Retry(5, 10, () => GetConfiguredMasterForService(config.ServiceName), msg); + // Get an initial endpoint - try twice + EndPoint newMasterEndPoint = GetConfiguredMasterForService(config.ServiceName) + ?? GetConfiguredMasterForService(config.ServiceName); - //SHADI: do - //{ - // initialMasterEndPoint = GetConfiguredMasterForService(config.ServiceName); - //} while(initialMasterEndPoint == null); + if (newMasterEndPoint == null) + { + throw new RedisConnectionException(ConnectionFailureType.UnableToConnect, + $"Sentinel: Failed connecting to configured master for service: {config.ServiceName}"); + } - config.EndPoints.Add(initialMasterEndPoint); + // Replace the master endpoint, if we found another one + // If not, assume the last state is the best we have and minimize the race + if (config.EndPoints.Count == 1) + { + config.EndPoints[0] = newMasterEndPoint; + } + else + { + config.EndPoints.Clear(); + config.EndPoints.Add(newMasterEndPoint); + } ConnectionMultiplexer connection = Connect(config, log); @@ -2259,35 +2264,37 @@ internal void OnManagedConnectionRestored(object sender, ConnectionFailedEventAr connection.sentinelMasterReconnectTimer.Dispose(); connection.sentinelMasterReconnectTimer = null; } - - // Run a switch to make sure we have update-to-date - // information about which master we should connect to - SwitchMaster(e.EndPoint, connection); - try { - // Verify that the reconnected endpoint is a master, - // and the correct one otherwise we should reconnect - if (connection.GetServer(e.EndPoint).IsSlave || e.EndPoint != connection.currentSentinelMasterEndPoint) + + // Run a switch to make sure we have update-to-date + // information about which master we should connect to + SwitchMaster(e.EndPoint, connection); + + try + { + // Verify that the reconnected endpoint is a master, + // and the correct one otherwise we should reconnect + if (connection.GetServer(e.EndPoint).IsSlave || e.EndPoint != connection.currentSentinelMasterEndPoint) + { + // This isn't a master, so try connecting again + SwitchMaster(e.EndPoint, connection); + } + } + catch (Exception) { - // Wait for things to smooth out - Thread.Sleep(200); + // If we get here it means that we tried to reconnect to a server that is no longer + // considered a master by Sentinel and was removed from the list of endpoints. - // This isn't a master, so try connecting again + // If we caught an exception, we may have gotten a stale endpoint + // we are not aware of, so retry SwitchMaster(e.EndPoint, connection); } } catch (Exception) { - // If we get here it means that we tried to reconnect to a server that is no longer - // considered a master by Sentinel and was removed from the list of endpoints. - - // Wait for things to smooth out - Thread.Sleep(200); - - // If we caught an exception, we may have gotten a stale endpoint - // we are not aware of, so retry - SwitchMaster(e.EndPoint, connection); + // Log, but don't throw in an event handler + // TODO: Log via new event handler? a la ConnectionFailed? } } @@ -2301,7 +2308,15 @@ internal void OnManagedConnectionFailed(object sender, ConnectionFailedEventArgs { connection.sentinelMasterReconnectTimer = new Timer((_) => { - SwitchMaster(e.EndPoint, connection); + try + { + // Attempt, but do not fail here + SwitchMaster(e.EndPoint, connection); + } + catch (Exception) + { + + } }, null, TimeSpan.FromSeconds(0), TimeSpan.FromSeconds(1)); } } @@ -2321,11 +2336,11 @@ internal EndPoint GetConfiguredMasterForService(string serviceName) => internal EndPoint currentSentinelMasterEndPoint; /// - /// Switches the SentinelMasterConnection over to a new master. + /// Switches the SentinelMasterConnection over to a new master. /// - /// the endpoing responsible for the switch - /// the connection that should be switched over to a new master endpoint - /// log output + /// The endpoing responsible for the switch + /// The connection that should be switched over to a new master endpoint + /// Log to write to, if any internal void SwitchMaster(EndPoint switchBlame, ConnectionMultiplexer connection, TextWriter log = null) { if (log == null) log = TextWriter.Null; @@ -2334,56 +2349,33 @@ internal void SwitchMaster(EndPoint switchBlame, ConnectionMultiplexer connectio { string serviceName = connection.RawConfig.ServiceName; - // Get new master - var msg = string.Format(FAILED_CONFIGURE_MASTER_FOR_SERVICE_MSG, serviceName, 5, 10); - EndPoint masterEndPoint = Retry(5, 10, () => GetConfiguredMasterForService(serviceName), msg); - - //Shadi: do - //{ - // masterEndPoint = GetConfiguredMasterForService(serviceName); - //} while(masterEndPoint == null); - - connection.currentSentinelMasterEndPoint = masterEndPoint; + // Get new master - try twice + EndPoint newMasterEndPoint = GetConfiguredMasterForService(serviceName) + ?? GetConfiguredMasterForService(serviceName); - if (!connection.servers.Contains(masterEndPoint)) + if (newMasterEndPoint == null) { - connection.RawConfig.EndPoints.Clear(); - connection.servers.Clear(); - //connection._serverSnapshot = new ServerEndPoint[0]; - connection.RawConfig.EndPoints.Add(masterEndPoint); - Trace(string.Format("Switching master to {0}", masterEndPoint)); - // Trigger a reconfigure - connection.ReconfigureAsync(false, false, logProxy, switchBlame, string.Format("master switch {0}", serviceName), false, CommandFlags.PreferMaster).Wait(); + throw new RedisConnectionException(ConnectionFailureType.UnableToConnect, + $"Sentinel: Failed connecting to switch master for service: {serviceName}"); } - UpdateSentinelAddressList(serviceName); - } - } - - /// - /// retry mechanism that executing func t times to get a non-null result within a constant interval between retries. - /// if t exceeds the times parameter without success, it will throw an exception with a descriptive message - /// - /// A generic result type expected to return - /// retries times trying to get a result before throw exception - /// interval between retries in millisconds - /// delegate repeatedly runs until getting the desired result - /// message to be used within NullReferenceException that should be thrown in case - /// there is no result after n retries - /// object of type T - private T Retry(int times, int interval, Func func, string message) - { - for (var t = 0; t < times; t++) - { - var result = func(); - if (result != null) + if (newMasterEndPoint != null) { - return result; + connection.currentSentinelMasterEndPoint = newMasterEndPoint; + + if (!connection.servers.Contains(newMasterEndPoint)) + { + connection.RawConfig.EndPoints.Clear(); + connection.servers.Clear(); + connection.RawConfig.EndPoints.Add(newMasterEndPoint); + Trace(string.Format("Switching master to {0}", newMasterEndPoint)); + // Trigger a reconfigure + connection.ReconfigureAsync(false, false, logProxy, switchBlame, string.Format("master switch {0}", serviceName), false, CommandFlags.PreferMaster).Wait(); + } + + UpdateSentinelAddressList(serviceName); } - Thread.Sleep(interval); } - - throw new NullReferenceException(message); } internal void UpdateSentinelAddressList(string serviceName) From a241223f4061474ab38b56fb7eaf0502e0150679 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 24 Mar 2020 15:16:46 -0400 Subject: [PATCH 2/2] Moving 2 master failover races to local DEBUG builds --- tests/StackExchange.Redis.Tests/Sentinel.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/StackExchange.Redis.Tests/Sentinel.cs b/tests/StackExchange.Redis.Tests/Sentinel.cs index aec7380ab..e3613babc 100644 --- a/tests/StackExchange.Redis.Tests/Sentinel.cs +++ b/tests/StackExchange.Redis.Tests/Sentinel.cs @@ -403,6 +403,7 @@ public async Task SentinelFailoverAsyncTest() } } +#if DEBUG [Fact] public async Task GetSentinelMasterConnectionFailoverTest() { @@ -467,6 +468,7 @@ await UntilCondition(TimeSpan.FromSeconds(10), () => { var conn1 = Conn.GetSentinelMasterConnection(ServiceOptions); Assert.NotEqual(endpoint, conn1.currentSentinelMasterEndPoint.ToString()); } +#endif [Fact] public async Task GetSentinelMasterConnectionWriteReadFailover()