From aae8dbdc15ec0f10efaa98c87384d2c2a069b936 Mon Sep 17 00:00:00 2001 From: Shannon Date: Tue, 10 Dec 2019 15:44:47 +0100 Subject: [PATCH] Updates to allow the sql main dom lock to be able to wait until the previous maindom has fully unwound before it can be taken over. --- src/Umbraco.Core/Runtime/MainDom.cs | 13 +- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 204 +++++++++++++++++++-- 2 files changed, 195 insertions(+), 22 deletions(-) diff --git a/src/Umbraco.Core/Runtime/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs index c6ee819cda..85cf5ad6a9 100644 --- a/src/Umbraco.Core/Runtime/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -144,9 +144,16 @@ namespace Umbraco.Core.Runtime // Get the lock _mainDomLock.AcquireLockAsync(LockTimeoutMilliseconds).Wait(); - // Listen for the signal from another AppDomain coming online to release the lock - _mainDomLock.ListenAsync() - .ContinueWith(_ => OnSignal("signal")); + try + { + // Listen for the signal from another AppDomain coming online to release the lock + _mainDomLock.ListenAsync() + .ContinueWith(_ => OnSignal("signal")); + } + catch (OperationCanceledException) + { + // the waiting task could be canceled if this appdomain is naturally shutting down, we'll just swallow this exception + } _logger.Info("Acquired."); return true; diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index 28449fe889..c077572f85 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -1,6 +1,7 @@ using System; using System.Data; using System.Data.SqlClient; +using System.Diagnostics; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -14,22 +15,22 @@ namespace Umbraco.Core.Runtime { internal class SqlMainDomLock : IMainDomLock { - private string _appDomainId; + private string _lockId; private const string MainDomKey = "Umbraco.Core.Runtime.SqlMainDom"; private readonly ILogger _logger; private IUmbracoDatabase _db; private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); private SqlServerSyntaxProvider _sqlServerSyntax = new SqlServerSyntaxProvider(); + private bool _mainDomChanging = false; public SqlMainDomLock(ILogger logger) { - _appDomainId = AppDomain.CurrentDomain.Id.ToString(); + // unique id for our appdomain, this is more unique than the appdomain id which is just an INT counter to its safer + _lockId = Guid.NewGuid().ToString(); _logger = logger; } - - - public Task AcquireLockAsync(int millisecondsTimeout) + public async Task AcquireLockAsync(int millisecondsTimeout) { var factory = new UmbracoDatabaseFactory( Constants.System.UmbracoConnectionName, @@ -38,6 +39,8 @@ namespace Umbraco.Core.Runtime _db = factory.CreateDatabase(); + var tempId = Guid.NewGuid().ToString(); + try { _db.BeginTransaction(IsolationLevel.ReadCommitted); @@ -51,18 +54,29 @@ namespace Umbraco.Core.Runtime { if (IsLockTimeoutException(ex)) { - return Task.FromResult(false); + return false; } // unexpected throw; } - InsertLockRecord(); + var result = InsertLockRecord(tempId); //we change the row to a random Id to signal other MainDom to shutdown + if (result == RecordPersistenceType.Insert) + { + // if we've inserted, then there was no MainDom so we can instantly acquire - return Task.FromResult(true); + // TODO: see the other TODO, could we just delete the row and that would indicate that we + // are MainDom? then we don't leave any orphan rows behind. + + InsertLockRecord(_lockId); // so update with our appdomain id + return true; + } + + // if we've updated, this means there is an active MainDom, now we need to wait to + // for the current MainDom to shutdown which also requires releasing our write lock } - catch(Exception) + catch (Exception) { _db.AbortTransaction(); @@ -73,6 +87,8 @@ namespace Umbraco.Core.Runtime { _db.CompleteTransaction(); } + + return await WaitForExistingAsync(tempId, millisecondsTimeout); } public Task ListenAsync() @@ -96,9 +112,14 @@ namespace Umbraco.Core.Runtime // get a read lock _sqlServerSyntax.ReadLock(_db, Constants.Locks.MainDom); - if (!IsStillMainDom()) + // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that + // we are still the maindom. An empty value might be better because then we won't have any orphan rows + // if the app is terminated. Could that work? + + if (!IsMainDomValue(_lockId)) { - // we are no longer main dom, exit + // we are no longer main dom, another one has come online, exit + _mainDomChanging = true; return; } } @@ -119,26 +140,134 @@ namespace Umbraco.Core.Runtime } /// - /// Inserts or updates the key/value row to check if the current appdomain is registerd as the maindom + /// Wait for any existing MainDom to release so we can continue booting /// - private void InsertLockRecord() + /// + /// + /// + private Task WaitForExistingAsync(string tempId, int millisecondsTimeout) { - _db.InsertOrUpdate(new KeyValueDto + var updatedTempId = tempId + "_updated"; + + return Task.Run(() => + { + var watch = new Stopwatch(); + watch.Start(); + while(true) + { + // poll very often, we need to take over as fast as we can + Thread.Sleep(100); + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + // get a read lock + _sqlServerSyntax.ReadLock(_db, Constants.Locks.MainDom); + + var mainDomRows = _db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + + if (mainDomRows.Count == 0 || mainDomRows[0].Value == updatedTempId) + { + // the other main dom has updated our record + // Or the other maindom shutdown super fast and just deleted the record + // which indicates that we + // can acquire it and it has shutdown. + + _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + + // so now we update the row with our appdomain id + InsertLockRecord(_lockId); + + return true; + } + else if (mainDomRows.Count == 1 && mainDomRows[0].Value.EndsWith("_updated")) + { + // in this case, there is a suffixed _updated value but it's not for our ID which means + // another new AppDomain has come online and is wanting to take over. In that case, we will not + // acquire. + + return false; + } + } + catch (Exception ex) + { + _db.AbortTransaction(); + + if (IsLockTimeoutException(ex)) + { + return false; + } + + throw; + } + finally + { + _db.CompleteTransaction(); + } + + if (watch.ElapsedMilliseconds >= millisecondsTimeout) + { + // if the timeout has elapsed, it either means that the other main dom is taking too long to shutdown, + // or it could mean that the previous appdomain was terminated and didn't clear out the main dom SQL row + // and it's just been left as an orphan row. + // There's really know way of knowing unless we are constantly updating the row for the current maindom + // which isn't ideal. + // So... we're going to 'just' take over, if the writelock works then we'll assume we're ok + + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + + // so now we update the row with our appdomain id + InsertLockRecord(_lockId); + return true; + } + catch (Exception ex) + { + _db.AbortTransaction(); + + if (IsLockTimeoutException(ex)) + { + // something is wrong, we cannot acquire, not much we can do + return false; + } + + throw; + } + finally + { + _db.CompleteTransaction(); + } + } + } + }, _cancellationTokenSource.Token); + } + + /// + /// Inserts or updates the key/value row + /// + private RecordPersistenceType InsertLockRecord(string id) + { + return _db.InsertOrUpdate(new KeyValueDto { Key = MainDomKey, - Value = _appDomainId, + Value = id, Updated = DateTime.Now }); } /// - /// Checks if the DB row value is our current appdomain value + /// Checks if the DB row value is equals the value /// /// - private bool IsStillMainDom() + private bool IsMainDomValue(string val) { return _db.ExecuteScalar("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val", - new { key = MainDomKey, val = _appDomainId }) == 1; + new { key = MainDomKey, val = val }) == 1; } /// @@ -157,9 +286,46 @@ namespace Umbraco.Core.Runtime { if (disposing) { - _db.Dispose(); + // capture locally just in case in some strange way a sub task somehow updates this + var mainDomChanging = _mainDomChanging; + + // immediately cancel all sub-tasks, we don't want them to keep querying _cancellationTokenSource.Cancel(); _cancellationTokenSource.Dispose(); + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + // get a write lock + _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + + // When we are disposed, it means we have released the MainDom lock + // and called all MainDom release callbacks, in this case + // if another maindom is actually coming online we need + // to signal to the MainDom coming online that we have shutdown. + // To do that, we update the existing main dom DB record with a suffixed "_updated" string. + // Otherwise, if we are just shutting down, we want to just delete the row. + if (mainDomChanging) + { + _db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); + } + else + { + _db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + } + } + catch (Exception) + { + _db.AbortTransaction(); + throw; + } + finally + { + _db.CompleteTransaction(); + + _db.Dispose(); + } } disposedValue = true;