From 1513a1254902dd755a2876321b8d50bbcf5ced48 Mon Sep 17 00:00:00 2001 From: Shannon Date: Tue, 10 Dec 2019 13:33:59 +0100 Subject: [PATCH 01/23] Introduce a new IMainDomLock and both default and sql implementations --- .../Migrations/Install/DatabaseDataCreator.cs | 2 + .../Migrations/Upgrade/UmbracoPlan.cs | 1 + .../Upgrade/V_8_6_0/AddMainDomLock.cs | 16 ++ ...AddPropertyTypeValidationMessageColumns.cs | 1 + .../Persistence/Constants-Locks.cs | 5 + .../SqlSyntax/SqlServerSyntaxProvider.cs | 7 +- src/Umbraco.Core/Runtime/CoreRuntime.cs | 2 +- src/Umbraco.Core/{ => Runtime}/IMainDom.cs | 1 + src/Umbraco.Core/Runtime/IMainDomLock.cs | 30 +++ src/Umbraco.Core/{ => Runtime}/MainDom.cs | 97 ++++------ .../Runtime/MainDomSemaphoreLock.cs | 92 +++++++++ src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 178 ++++++++++++++++++ src/Umbraco.Core/Scoping/IScopeProvider.cs | 2 +- src/Umbraco.Core/Umbraco.Core.csproj | 8 +- .../XmlPublishedSnapshotService.cs | 1 + .../LegacyXmlPublishedCache/XmlStore.cs | 1 + 16 files changed, 375 insertions(+), 69 deletions(-) create mode 100644 src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddMainDomLock.cs rename src/Umbraco.Core/{ => Runtime}/IMainDom.cs (96%) create mode 100644 src/Umbraco.Core/Runtime/IMainDomLock.cs rename src/Umbraco.Core/{ => Runtime}/MainDom.cs (77%) create mode 100644 src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs create mode 100644 src/Umbraco.Core/Runtime/SqlMainDomLock.cs diff --git a/src/Umbraco.Core/Migrations/Install/DatabaseDataCreator.cs b/src/Umbraco.Core/Migrations/Install/DatabaseDataCreator.cs index 94d8cfbc62..f6daf180b7 100644 --- a/src/Umbraco.Core/Migrations/Install/DatabaseDataCreator.cs +++ b/src/Umbraco.Core/Migrations/Install/DatabaseDataCreator.cs @@ -151,6 +151,8 @@ namespace Umbraco.Core.Migrations.Install _database.Insert(Constants.DatabaseSchema.Tables.Lock, "id", false, new LockDto { Id = Constants.Locks.Domains, Name = "Domains" }); _database.Insert(Constants.DatabaseSchema.Tables.Lock, "id", false, new LockDto { Id = Constants.Locks.KeyValues, Name = "KeyValues" }); _database.Insert(Constants.DatabaseSchema.Tables.Lock, "id", false, new LockDto { Id = Constants.Locks.Languages, Name = "Languages" }); + + _database.Insert(Constants.DatabaseSchema.Tables.Lock, "id", false, new LockDto { Id = Constants.Locks.MainDom, Name = "MainDom" }); } private void CreateContentTypeData() diff --git a/src/Umbraco.Core/Migrations/Upgrade/UmbracoPlan.cs b/src/Umbraco.Core/Migrations/Upgrade/UmbracoPlan.cs index 223603be14..6164f828f0 100644 --- a/src/Umbraco.Core/Migrations/Upgrade/UmbracoPlan.cs +++ b/src/Umbraco.Core/Migrations/Upgrade/UmbracoPlan.cs @@ -185,6 +185,7 @@ namespace Umbraco.Core.Migrations.Upgrade // to 8.6.0 To("{3D67D2C8-5E65-47D0-A9E1-DC2EE0779D6B}"); + To("{2AB29964-02A1-474D-BD6B-72148D2A53A2}"); //FINAL } diff --git a/src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddMainDomLock.cs b/src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddMainDomLock.cs new file mode 100644 index 0000000000..6ca493ac7e --- /dev/null +++ b/src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddMainDomLock.cs @@ -0,0 +1,16 @@ +using Umbraco.Core.Persistence.Dtos; + +namespace Umbraco.Core.Migrations.Upgrade.V_8_6_0 +{ + public class AddMainDomLock : MigrationBase + { + public AddMainDomLock(IMigrationContext context) + : base(context) + { } + + public override void Migrate() + { + Database.Insert(Constants.DatabaseSchema.Tables.Lock, "id", false, new LockDto { Id = Constants.Locks.MainDom, Name = "MainDom" }); + } + } +} diff --git a/src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddPropertyTypeValidationMessageColumns.cs b/src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddPropertyTypeValidationMessageColumns.cs index 30eb30109e..f44695da69 100644 --- a/src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddPropertyTypeValidationMessageColumns.cs +++ b/src/Umbraco.Core/Migrations/Upgrade/V_8_6_0/AddPropertyTypeValidationMessageColumns.cs @@ -3,6 +3,7 @@ using Umbraco.Core.Persistence.Dtos; namespace Umbraco.Core.Migrations.Upgrade.V_8_6_0 { + public class AddPropertyTypeValidationMessageColumns : MigrationBase { public AddPropertyTypeValidationMessageColumns(IMigrationContext context) diff --git a/src/Umbraco.Core/Persistence/Constants-Locks.cs b/src/Umbraco.Core/Persistence/Constants-Locks.cs index 1dcd2408e7..e64f40ced7 100644 --- a/src/Umbraco.Core/Persistence/Constants-Locks.cs +++ b/src/Umbraco.Core/Persistence/Constants-Locks.cs @@ -8,6 +8,11 @@ namespace Umbraco.Core /// public static class Locks { + /// + /// The lock + /// + public const int MainDom = -1000; + /// /// All servers. /// diff --git a/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs b/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs index 3d0adf175e..6dda49cd5e 100644 --- a/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs +++ b/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs @@ -250,6 +250,11 @@ where tbl.[name]=@0 and col.[name]=@1;", tableName, columnName) } public override void WriteLock(IDatabase db, params int[] lockIds) + { + WriteLock(db, 1800, lockIds); + } + + public void WriteLock(IDatabase db, int millisecondsTimeout, params int[] lockIds) { // soon as we get Database, a transaction is started @@ -260,7 +265,7 @@ where tbl.[name]=@0 and col.[name]=@1;", tableName, columnName) // *not* using a unique 'WHERE IN' query here because the *order* of lockIds is important to avoid deadlocks foreach (var lockId in lockIds) { - db.Execute(@"SET LOCK_TIMEOUT 1800;"); + db.Execute($"SET LOCK_TIMEOUT {millisecondsTimeout};"); var i = db.Execute(@"UPDATE umbracoLock WITH (REPEATABLEREAD) SET value = (CASE WHEN (value=1) THEN -1 ELSE 1 END) WHERE id=@id", new { id = lockId }); if (i == 0) // ensure we are actually locking! throw new ArgumentException($"LockObject with id={lockId} does not exist."); diff --git a/src/Umbraco.Core/Runtime/CoreRuntime.cs b/src/Umbraco.Core/Runtime/CoreRuntime.cs index 50653edc7c..49d7658647 100644 --- a/src/Umbraco.Core/Runtime/CoreRuntime.cs +++ b/src/Umbraco.Core/Runtime/CoreRuntime.cs @@ -147,7 +147,7 @@ namespace Umbraco.Core.Runtime // TODO: remove this in netcore, this is purely backwards compat hacks with the empty ctor if (MainDom == null) { - MainDom = new MainDom(Logger); + MainDom = new MainDom(Logger, new MainDomSemaphoreLock()); } diff --git a/src/Umbraco.Core/IMainDom.cs b/src/Umbraco.Core/Runtime/IMainDom.cs similarity index 96% rename from src/Umbraco.Core/IMainDom.cs rename to src/Umbraco.Core/Runtime/IMainDom.cs index 31b2e2eee0..444fc1c7d0 100644 --- a/src/Umbraco.Core/IMainDom.cs +++ b/src/Umbraco.Core/Runtime/IMainDom.cs @@ -1,5 +1,6 @@ using System; +// TODO: Can't change namespace due to breaking changes, change in netcore namespace Umbraco.Core { /// diff --git a/src/Umbraco.Core/Runtime/IMainDomLock.cs b/src/Umbraco.Core/Runtime/IMainDomLock.cs new file mode 100644 index 0000000000..c32e990114 --- /dev/null +++ b/src/Umbraco.Core/Runtime/IMainDomLock.cs @@ -0,0 +1,30 @@ +using System; +using System.Threading.Tasks; + +namespace Umbraco.Core.Runtime +{ + /// + /// An application-wide distributed lock + /// + /// + /// Disposing releases the lock + /// + public interface IMainDomLock : IDisposable + { + /// + /// Acquires an application-wide distributed lock + /// + /// + /// + /// A disposable object which will be disposed in order to release the lock + /// + /// Throws a if the elapsed millsecondsTimeout value is exceeded + Task AcquireLockAsync(int millisecondsTimeout); + + /// + /// Wait on a background thread to receive a signal from another AppDomain + /// + /// + Task ListenAsync(); + } +} diff --git a/src/Umbraco.Core/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs similarity index 77% rename from src/Umbraco.Core/MainDom.cs rename to src/Umbraco.Core/Runtime/MainDom.cs index e2049c0190..406fb9b731 100644 --- a/src/Umbraco.Core/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -4,10 +4,13 @@ using System.Linq; using System.Security.Cryptography; using System.Threading; using System.Web.Hosting; +using Umbraco.Core; using Umbraco.Core.Logging; +using Umbraco.Core.Persistence; -namespace Umbraco.Core +namespace Umbraco.Core.Runtime { + /// /// Provides the full implementation of . /// @@ -20,18 +23,11 @@ namespace Umbraco.Core #region Vars private readonly ILogger _logger; + private readonly IMainDomLock _mainDomLock; // our own lock for local consistency private object _locko = new object(); - // async lock representing the main domain lock - private readonly SystemLock _systemLock; - private IDisposable _systemLocker; - - // event wait handle used to notify current main domain that it should - // release the lock because a new domain wants to be the main domain - private readonly EventWaitHandle _signal; - private bool _isInitialized; // indicates whether... private bool _isMainDom; // we are the main domain @@ -47,32 +43,12 @@ namespace Umbraco.Core #region Ctor // initializes a new instance of MainDom - public MainDom(ILogger logger) + public MainDom(ILogger logger, IMainDomLock systemLock) { HostingEnvironment.RegisterObject(this); _logger = logger; - - // HostingEnvironment.ApplicationID is null in unit tests, making ReplaceNonAlphanumericChars fail - var appId = HostingEnvironment.ApplicationID?.ReplaceNonAlphanumericChars(string.Empty) ?? string.Empty; - - // combining with the physical path because if running on eg IIS Express, - // two sites could have the same appId even though they are different. - // - // now what could still collide is... two sites, running in two different processes - // and having the same appId, and running on the same app physical path - // - // we *cannot* use the process ID here because when an AppPool restarts it is - // a new process for the same application path - - var appPath = HostingEnvironment.ApplicationPhysicalPath?.ToLowerInvariant() ?? string.Empty; - var hash = (appId + ":::" + appPath).GenerateHash(); - - var lockName = "UMBRACO-" + hash + "-MAINDOM-LCK"; - _systemLock = new SystemLock(lockName); - - var eventName = "UMBRACO-" + hash + "-MAINDOM-EVT"; - _signal = new EventWaitHandle(false, EventResetMode.AutoReset, eventName); + _mainDomLock = systemLock; } #endregion @@ -130,7 +106,6 @@ namespace Umbraco.Core { _logger.Info("Stopping ({SignalSource})", source); foreach (var callback in _callbacks.OrderBy(x => x.Key).Select(x => x.Value)) - { try { callback(); // no timeout on callbacks @@ -140,14 +115,13 @@ namespace Umbraco.Core _logger.Error(e, "Error while running callback"); continue; } - } _logger.Debug("Stopped ({SignalSource})", source); } finally { // in any case... _isMainDom = false; - _systemLocker?.Dispose(); + _mainDomLock.Dispose(); _logger.Info("Released ({SignalSource})", source); } @@ -167,35 +141,11 @@ namespace Umbraco.Core _logger.Info("Acquiring."); - // signal other instances that we want the lock, then wait one the lock, - // which may timeout, and this is accepted - see comments below + // Get the lock + _mainDomLock.AcquireLockAsync(LockTimeoutMilliseconds).Wait(); - // signal, then wait for the lock, then make sure the event is - // reset (maybe there was noone listening..) - _signal.Set(); - - // if more than 1 instance reach that point, one will get the lock - // and the other one will timeout, which is accepted - - //This can throw a TimeoutException - in which case should this be in a try/finally to ensure the signal is always reset. - try - { - _systemLocker = _systemLock.Lock(LockTimeoutMilliseconds); - } - finally - { - // we need to reset the event, because otherwise we would end up - // signaling ourselves and committing suicide immediately. - // only 1 instance can reach that point, but other instances may - // have started and be trying to get the lock - they will timeout, - // which is accepted - - _signal.Reset(); - } - - //WaitOneAsync (ext method) will wait for a signal without blocking the main thread, the waiting is done on a background thread - - _signal.WaitOneAsync() + // Listen for the signal from another AppDomain coming online to release the lock + _mainDomLock.ListenAsync() .ContinueWith(_ => OnSignal("signal")); _logger.Info("Acquired."); @@ -230,8 +180,7 @@ namespace Umbraco.Core { if (disposing) { - _signal?.Close(); - _signal?.Dispose(); + _mainDomLock.Dispose(); } disposedValue = true; @@ -244,5 +193,25 @@ namespace Umbraco.Core } #endregion + + public static string GetMainDomId() + { + // HostingEnvironment.ApplicationID is null in unit tests, making ReplaceNonAlphanumericChars fail + var appId = HostingEnvironment.ApplicationID?.ReplaceNonAlphanumericChars(string.Empty) ?? string.Empty; + + // combining with the physical path because if running on eg IIS Express, + // two sites could have the same appId even though they are different. + // + // now what could still collide is... two sites, running in two different processes + // and having the same appId, and running on the same app physical path + // + // we *cannot* use the process ID here because when an AppPool restarts it is + // a new process for the same application path + + var appPath = HostingEnvironment.ApplicationPhysicalPath?.ToLowerInvariant() ?? string.Empty; + var hash = (appId + ":::" + appPath).GenerateHash(); + + return hash; + } } } diff --git a/src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs b/src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs new file mode 100644 index 0000000000..89eef8a658 --- /dev/null +++ b/src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs @@ -0,0 +1,92 @@ +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace Umbraco.Core.Runtime +{ + /// + /// Uses a system-wide Semaphore and EventWaitHandle to synchronize the current AppDomain + /// + internal class MainDomSemaphoreLock : IMainDomLock + { + private readonly SystemLock _systemLock; + + // event wait handle used to notify current main domain that it should + // release the lock because a new domain wants to be the main domain + private readonly EventWaitHandle _signal; + + private IDisposable _lockRelease; + + public MainDomSemaphoreLock() + { + var lockName = "UMBRACO-" + MainDom.GetMainDomId() + "-MAINDOM-LCK"; + _systemLock = new SystemLock(lockName); + + var eventName = "UMBRACO-" + MainDom.GetMainDomId() + "-MAINDOM-EVT"; + _signal = new EventWaitHandle(false, EventResetMode.AutoReset, eventName); + } + + //WaitOneAsync (ext method) will wait for a signal without blocking the main thread, the waiting is done on a background thread + public Task ListenAsync() => _signal.WaitOneAsync(); + + public Task AcquireLockAsync(int millisecondsTimeout) + { + // signal other instances that we want the lock, then wait on the lock, + // which may timeout, and this is accepted - see comments below + + // signal, then wait for the lock, then make sure the event is + // reset (maybe there was noone listening..) + _signal.Set(); + + // if more than 1 instance reach that point, one will get the lock + // and the other one will timeout, which is accepted + + //This can throw a TimeoutException - in which case should this be in a try/finally to ensure the signal is always reset. + try + { + _lockRelease = _systemLock.Lock(millisecondsTimeout); + return Task.FromResult(true); + } + catch (TimeoutException) + { + return Task.FromResult(false); + } + finally + { + // we need to reset the event, because otherwise we would end up + // signaling ourselves and committing suicide immediately. + // only 1 instance can reach that point, but other instances may + // have started and be trying to get the lock - they will timeout, + // which is accepted + + _signal.Reset(); + } + } + + #region IDisposable Support + private bool disposedValue = false; // To detect redundant calls + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) + { + if (disposing) + { + _lockRelease?.Dispose(); + _signal.Close(); + _signal.Dispose(); + } + + disposedValue = true; + } + } + + // This code added to correctly implement the disposable pattern. + public void Dispose() + { + // Do not change this code. Put cleanup code in Dispose(bool disposing) above. + Dispose(true); + } + #endregion + } +} diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs new file mode 100644 index 0000000000..28449fe889 --- /dev/null +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -0,0 +1,178 @@ +using System; +using System.Data; +using System.Data.SqlClient; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Umbraco.Core.Logging; +using Umbraco.Core.Persistence; +using Umbraco.Core.Persistence.Dtos; +using Umbraco.Core.Persistence.Mappers; +using Umbraco.Core.Persistence.SqlSyntax; + +namespace Umbraco.Core.Runtime +{ + internal class SqlMainDomLock : IMainDomLock + { + private string _appDomainId; + private const string MainDomKey = "Umbraco.Core.Runtime.SqlMainDom"; + private readonly ILogger _logger; + private IUmbracoDatabase _db; + private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); + private SqlServerSyntaxProvider _sqlServerSyntax = new SqlServerSyntaxProvider(); + + public SqlMainDomLock(ILogger logger) + { + _appDomainId = AppDomain.CurrentDomain.Id.ToString(); + _logger = logger; + } + + + + public Task AcquireLockAsync(int millisecondsTimeout) + { + var factory = new UmbracoDatabaseFactory( + Constants.System.UmbracoConnectionName, + _logger, + new Lazy(() => new Persistence.Mappers.MapperCollection(Enumerable.Empty()))); + + _db = factory.CreateDatabase(); + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + try + { + // wait to get a write lock + _sqlServerSyntax.WriteLock(_db, millisecondsTimeout, Constants.Locks.MainDom); + } + catch (Exception ex) + { + if (IsLockTimeoutException(ex)) + { + return Task.FromResult(false); + } + + // unexpected + throw; + } + + InsertLockRecord(); + + return Task.FromResult(true); + } + catch(Exception) + { + _db.AbortTransaction(); + + // unexpected + throw; + } + finally + { + _db.CompleteTransaction(); + } + } + + public Task ListenAsync() + { + // Create a long running task (dedicated thread) + // to poll to check if we are still the MainDom registered in the DB + return Task.Factory.StartNew(() => + { + while(true) + { + if (_cancellationTokenSource.IsCancellationRequested) + break; + + // poll every 1 second + Thread.Sleep(1000); + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + // get a read lock + _sqlServerSyntax.ReadLock(_db, Constants.Locks.MainDom); + + if (!IsStillMainDom()) + { + // we are no longer main dom, exit + return; + } + } + catch (Exception) + { + _db.AbortTransaction(); + throw; + } + finally + { + _db.CompleteTransaction(); + } + } + + + }, _cancellationTokenSource.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default); + + } + + /// + /// Inserts or updates the key/value row to check if the current appdomain is registerd as the maindom + /// + private void InsertLockRecord() + { + _db.InsertOrUpdate(new KeyValueDto + { + Key = MainDomKey, + Value = _appDomainId, + Updated = DateTime.Now + }); + } + + /// + /// Checks if the DB row value is our current appdomain value + /// + /// + private bool IsStillMainDom() + { + return _db.ExecuteScalar("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val", + new { key = MainDomKey, val = _appDomainId }) == 1; + } + + /// + /// Checks if the exception is an SQL timeout + /// + /// + /// + private bool IsLockTimeoutException(Exception exception) => exception is SqlException sqlException && sqlException.Number == 1222; + + #region IDisposable Support + private bool disposedValue = false; // To detect redundant calls + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) + { + if (disposing) + { + _db.Dispose(); + _cancellationTokenSource.Cancel(); + _cancellationTokenSource.Dispose(); + } + + disposedValue = true; + } + } + + // This code added to correctly implement the disposable pattern. + public void Dispose() + { + // Do not change this code. Put cleanup code in Dispose(bool disposing) above. + Dispose(true); + } + #endregion + + } +} diff --git a/src/Umbraco.Core/Scoping/IScopeProvider.cs b/src/Umbraco.Core/Scoping/IScopeProvider.cs index 6c9eb63ba0..96bb939f8e 100644 --- a/src/Umbraco.Core/Scoping/IScopeProvider.cs +++ b/src/Umbraco.Core/Scoping/IScopeProvider.cs @@ -13,7 +13,7 @@ namespace Umbraco.Core.Scoping /// Provides scopes. /// public interface IScopeProvider - { + { /// /// Creates an ambient scope. /// diff --git a/src/Umbraco.Core/Umbraco.Core.csproj b/src/Umbraco.Core/Umbraco.Core.csproj index 6f9ff1e783..8d278f5630 100755 --- a/src/Umbraco.Core/Umbraco.Core.csproj +++ b/src/Umbraco.Core/Umbraco.Core.csproj @@ -128,6 +128,10 @@ --> + + + + @@ -398,7 +402,7 @@ - + @@ -729,7 +733,7 @@ - + diff --git a/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlPublishedSnapshotService.cs b/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlPublishedSnapshotService.cs index ec6b854a46..97fe9057bb 100644 --- a/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlPublishedSnapshotService.cs +++ b/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlPublishedSnapshotService.cs @@ -9,6 +9,7 @@ using Umbraco.Core.Models; using Umbraco.Core.Models.Membership; using Umbraco.Core.Models.PublishedContent; using Umbraco.Core.Persistence.Repositories; +using Umbraco.Core.Runtime; using Umbraco.Core.Scoping; using Umbraco.Core.Services; using Umbraco.Web; diff --git a/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlStore.cs b/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlStore.cs index c452c4792a..803b86aec5 100644 --- a/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlStore.cs +++ b/src/Umbraco.Tests/LegacyXmlPublishedCache/XmlStore.cs @@ -14,6 +14,7 @@ using Umbraco.Core.Models; using Umbraco.Core.Persistence; using Umbraco.Core.Persistence.Repositories; using Umbraco.Core.Persistence.Repositories.Implement; +using Umbraco.Core.Runtime; using Umbraco.Core.Scoping; using Umbraco.Core.Services; using Umbraco.Core.Services.Changes; From 654511a6564af56e3d50c527cd7b35921ce120cc Mon Sep 17 00:00:00 2001 From: Shannon Date: Tue, 10 Dec 2019 13:42:53 +0100 Subject: [PATCH 02/23] adds appSetting to switch maindom lock --- src/Umbraco.Core/Constants-AppSettings.cs | 2 ++ src/Umbraco.Web/UmbracoApplication.cs | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Umbraco.Core/Constants-AppSettings.cs b/src/Umbraco.Core/Constants-AppSettings.cs index 509be46b61..85d6b24ae0 100644 --- a/src/Umbraco.Core/Constants-AppSettings.cs +++ b/src/Umbraco.Core/Constants-AppSettings.cs @@ -9,6 +9,8 @@ namespace Umbraco.Core /// public static class AppSettings { + public const string MainDomLock = "Umbraco.Core.MainDom.Lock"; + // TODO: Kill me - still used in Umbraco.Core.IO.SystemFiles:27 [Obsolete("We need to kill this appsetting as we do not use XML content cache umbraco.config anymore due to NuCache")] public const string ContentXML = "Umbraco.Core.ContentXML"; //umbracoContentXML diff --git a/src/Umbraco.Web/UmbracoApplication.cs b/src/Umbraco.Web/UmbracoApplication.cs index 94403bc1be..ad8dcb7e8b 100644 --- a/src/Umbraco.Web/UmbracoApplication.cs +++ b/src/Umbraco.Web/UmbracoApplication.cs @@ -1,7 +1,9 @@ -using System.Threading; +using System.Configuration; +using System.Threading; using System.Web; using Umbraco.Core; using Umbraco.Core.Logging.Serilog; +using Umbraco.Core.Runtime; using Umbraco.Web.Runtime; namespace Umbraco.Web @@ -14,7 +16,17 @@ namespace Umbraco.Web protected override IRuntime GetRuntime() { var logger = SerilogLogger.CreateWithDefaultConfiguration(); - return new WebRuntime(this, logger, new MainDom(logger)); + + // Determine if we should use the sql main dom or the default + var appSettingMainDomLock = ConfigurationManager.AppSettings[Constants.AppSettings.MainDomLock]; + + var mainDomLock = appSettingMainDomLock == "SqlMainDomLock" + ? (IMainDomLock)new SqlMainDomLock(logger) + : new MainDomSemaphoreLock(); + + var runtime = new WebRuntime(this, logger, new MainDom(logger, mainDomLock)); + + return runtime; } /// From e919af14d33827de2415f332907cdfc12766ed91 Mon Sep 17 00:00:00 2001 From: Shannon Date: Tue, 10 Dec 2019 13:50:10 +0100 Subject: [PATCH 03/23] adds comments --- src/Umbraco.Core/Runtime/MainDom.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Umbraco.Core/Runtime/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs index 406fb9b731..c6ee819cda 100644 --- a/src/Umbraco.Core/Runtime/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -155,6 +155,10 @@ namespace Umbraco.Core.Runtime /// /// Gets a value indicating whether the current domain is the main domain. /// + /// + /// The lazy initializer call will only call the Acquire callback when it's not been initialized, else it will just return + /// the value from _isMainDom which means when we set _isMainDom to false again after being signaled, this will return false; + /// public bool IsMainDom => LazyInitializer.EnsureInitialized(ref _isMainDom, ref _isInitialized, ref _locko, () => Acquire()); // IRegisteredObject From aae8dbdc15ec0f10efaa98c87384d2c2a069b936 Mon Sep 17 00:00:00 2001 From: Shannon Date: Tue, 10 Dec 2019 15:44:47 +0100 Subject: [PATCH 04/23] Updates to allow the sql main dom lock to be able to wait until the previous maindom has fully unwound before it can be taken over. --- src/Umbraco.Core/Runtime/MainDom.cs | 13 +- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 204 +++++++++++++++++++-- 2 files changed, 195 insertions(+), 22 deletions(-) diff --git a/src/Umbraco.Core/Runtime/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs index c6ee819cda..85cf5ad6a9 100644 --- a/src/Umbraco.Core/Runtime/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -144,9 +144,16 @@ namespace Umbraco.Core.Runtime // Get the lock _mainDomLock.AcquireLockAsync(LockTimeoutMilliseconds).Wait(); - // Listen for the signal from another AppDomain coming online to release the lock - _mainDomLock.ListenAsync() - .ContinueWith(_ => OnSignal("signal")); + try + { + // Listen for the signal from another AppDomain coming online to release the lock + _mainDomLock.ListenAsync() + .ContinueWith(_ => OnSignal("signal")); + } + catch (OperationCanceledException) + { + // the waiting task could be canceled if this appdomain is naturally shutting down, we'll just swallow this exception + } _logger.Info("Acquired."); return true; diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index 28449fe889..c077572f85 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -1,6 +1,7 @@ using System; using System.Data; using System.Data.SqlClient; +using System.Diagnostics; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -14,22 +15,22 @@ namespace Umbraco.Core.Runtime { internal class SqlMainDomLock : IMainDomLock { - private string _appDomainId; + private string _lockId; private const string MainDomKey = "Umbraco.Core.Runtime.SqlMainDom"; private readonly ILogger _logger; private IUmbracoDatabase _db; private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); private SqlServerSyntaxProvider _sqlServerSyntax = new SqlServerSyntaxProvider(); + private bool _mainDomChanging = false; public SqlMainDomLock(ILogger logger) { - _appDomainId = AppDomain.CurrentDomain.Id.ToString(); + // unique id for our appdomain, this is more unique than the appdomain id which is just an INT counter to its safer + _lockId = Guid.NewGuid().ToString(); _logger = logger; } - - - public Task AcquireLockAsync(int millisecondsTimeout) + public async Task AcquireLockAsync(int millisecondsTimeout) { var factory = new UmbracoDatabaseFactory( Constants.System.UmbracoConnectionName, @@ -38,6 +39,8 @@ namespace Umbraco.Core.Runtime _db = factory.CreateDatabase(); + var tempId = Guid.NewGuid().ToString(); + try { _db.BeginTransaction(IsolationLevel.ReadCommitted); @@ -51,18 +54,29 @@ namespace Umbraco.Core.Runtime { if (IsLockTimeoutException(ex)) { - return Task.FromResult(false); + return false; } // unexpected throw; } - InsertLockRecord(); + var result = InsertLockRecord(tempId); //we change the row to a random Id to signal other MainDom to shutdown + if (result == RecordPersistenceType.Insert) + { + // if we've inserted, then there was no MainDom so we can instantly acquire - return Task.FromResult(true); + // TODO: see the other TODO, could we just delete the row and that would indicate that we + // are MainDom? then we don't leave any orphan rows behind. + + InsertLockRecord(_lockId); // so update with our appdomain id + return true; + } + + // if we've updated, this means there is an active MainDom, now we need to wait to + // for the current MainDom to shutdown which also requires releasing our write lock } - catch(Exception) + catch (Exception) { _db.AbortTransaction(); @@ -73,6 +87,8 @@ namespace Umbraco.Core.Runtime { _db.CompleteTransaction(); } + + return await WaitForExistingAsync(tempId, millisecondsTimeout); } public Task ListenAsync() @@ -96,9 +112,14 @@ namespace Umbraco.Core.Runtime // get a read lock _sqlServerSyntax.ReadLock(_db, Constants.Locks.MainDom); - if (!IsStillMainDom()) + // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that + // we are still the maindom. An empty value might be better because then we won't have any orphan rows + // if the app is terminated. Could that work? + + if (!IsMainDomValue(_lockId)) { - // we are no longer main dom, exit + // we are no longer main dom, another one has come online, exit + _mainDomChanging = true; return; } } @@ -119,26 +140,134 @@ namespace Umbraco.Core.Runtime } /// - /// Inserts or updates the key/value row to check if the current appdomain is registerd as the maindom + /// Wait for any existing MainDom to release so we can continue booting /// - private void InsertLockRecord() + /// + /// + /// + private Task WaitForExistingAsync(string tempId, int millisecondsTimeout) { - _db.InsertOrUpdate(new KeyValueDto + var updatedTempId = tempId + "_updated"; + + return Task.Run(() => + { + var watch = new Stopwatch(); + watch.Start(); + while(true) + { + // poll very often, we need to take over as fast as we can + Thread.Sleep(100); + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + // get a read lock + _sqlServerSyntax.ReadLock(_db, Constants.Locks.MainDom); + + var mainDomRows = _db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + + if (mainDomRows.Count == 0 || mainDomRows[0].Value == updatedTempId) + { + // the other main dom has updated our record + // Or the other maindom shutdown super fast and just deleted the record + // which indicates that we + // can acquire it and it has shutdown. + + _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + + // so now we update the row with our appdomain id + InsertLockRecord(_lockId); + + return true; + } + else if (mainDomRows.Count == 1 && mainDomRows[0].Value.EndsWith("_updated")) + { + // in this case, there is a suffixed _updated value but it's not for our ID which means + // another new AppDomain has come online and is wanting to take over. In that case, we will not + // acquire. + + return false; + } + } + catch (Exception ex) + { + _db.AbortTransaction(); + + if (IsLockTimeoutException(ex)) + { + return false; + } + + throw; + } + finally + { + _db.CompleteTransaction(); + } + + if (watch.ElapsedMilliseconds >= millisecondsTimeout) + { + // if the timeout has elapsed, it either means that the other main dom is taking too long to shutdown, + // or it could mean that the previous appdomain was terminated and didn't clear out the main dom SQL row + // and it's just been left as an orphan row. + // There's really know way of knowing unless we are constantly updating the row for the current maindom + // which isn't ideal. + // So... we're going to 'just' take over, if the writelock works then we'll assume we're ok + + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + + // so now we update the row with our appdomain id + InsertLockRecord(_lockId); + return true; + } + catch (Exception ex) + { + _db.AbortTransaction(); + + if (IsLockTimeoutException(ex)) + { + // something is wrong, we cannot acquire, not much we can do + return false; + } + + throw; + } + finally + { + _db.CompleteTransaction(); + } + } + } + }, _cancellationTokenSource.Token); + } + + /// + /// Inserts or updates the key/value row + /// + private RecordPersistenceType InsertLockRecord(string id) + { + return _db.InsertOrUpdate(new KeyValueDto { Key = MainDomKey, - Value = _appDomainId, + Value = id, Updated = DateTime.Now }); } /// - /// Checks if the DB row value is our current appdomain value + /// Checks if the DB row value is equals the value /// /// - private bool IsStillMainDom() + private bool IsMainDomValue(string val) { return _db.ExecuteScalar("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val", - new { key = MainDomKey, val = _appDomainId }) == 1; + new { key = MainDomKey, val = val }) == 1; } /// @@ -157,9 +286,46 @@ namespace Umbraco.Core.Runtime { if (disposing) { - _db.Dispose(); + // capture locally just in case in some strange way a sub task somehow updates this + var mainDomChanging = _mainDomChanging; + + // immediately cancel all sub-tasks, we don't want them to keep querying _cancellationTokenSource.Cancel(); _cancellationTokenSource.Dispose(); + + try + { + _db.BeginTransaction(IsolationLevel.ReadCommitted); + + // get a write lock + _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + + // When we are disposed, it means we have released the MainDom lock + // and called all MainDom release callbacks, in this case + // if another maindom is actually coming online we need + // to signal to the MainDom coming online that we have shutdown. + // To do that, we update the existing main dom DB record with a suffixed "_updated" string. + // Otherwise, if we are just shutting down, we want to just delete the row. + if (mainDomChanging) + { + _db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); + } + else + { + _db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + } + } + catch (Exception) + { + _db.AbortTransaction(); + throw; + } + finally + { + _db.CompleteTransaction(); + + _db.Dispose(); + } } disposedValue = true; From a63de7672bd891a306d5c1a8bc2cc682fd3cc784 Mon Sep 17 00:00:00 2001 From: Shannon Date: Tue, 10 Dec 2019 15:53:57 +0100 Subject: [PATCH 05/23] updates a bool check --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index c077572f85..edd5af7ea8 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -181,9 +181,9 @@ namespace Umbraco.Core.Runtime return true; } - else if (mainDomRows.Count == 1 && mainDomRows[0].Value.EndsWith("_updated")) + else if (mainDomRows.Count == 1 && !mainDomRows[0].Value.StartsWith(tempId)) { - // in this case, there is a suffixed _updated value but it's not for our ID which means + // in this case, the prefixed ID is different which means // another new AppDomain has come online and is wanting to take over. In that case, we will not // acquire. From 0ef08db7658f1eae04c1d3602200b0c8420e4a88 Mon Sep 17 00:00:00 2001 From: Shannon Date: Wed, 11 Dec 2019 08:55:07 +0100 Subject: [PATCH 06/23] adds error logging and ensure we don't throw exceptions on a background task which would destroy the app domain --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 138 ++++++++++++++------- 1 file changed, 91 insertions(+), 47 deletions(-) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index edd5af7ea8..c567b76403 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -22,42 +22,45 @@ namespace Umbraco.Core.Runtime private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); private SqlServerSyntaxProvider _sqlServerSyntax = new SqlServerSyntaxProvider(); private bool _mainDomChanging = false; + private readonly UmbracoDatabaseFactory _dbFactory; + private bool _hasError; public SqlMainDomLock(ILogger logger) { // unique id for our appdomain, this is more unique than the appdomain id which is just an INT counter to its safer _lockId = Guid.NewGuid().ToString(); _logger = logger; + _dbFactory = new UmbracoDatabaseFactory( + Constants.System.UmbracoConnectionName, + _logger, + new Lazy(() => new Persistence.Mappers.MapperCollection(Enumerable.Empty()))); } public async Task AcquireLockAsync(int millisecondsTimeout) { - var factory = new UmbracoDatabaseFactory( - Constants.System.UmbracoConnectionName, - _logger, - new Lazy(() => new Persistence.Mappers.MapperCollection(Enumerable.Empty()))); - - _db = factory.CreateDatabase(); + var db = GetDatabase(); var tempId = Guid.NewGuid().ToString(); try { - _db.BeginTransaction(IsolationLevel.ReadCommitted); + db.BeginTransaction(IsolationLevel.ReadCommitted); try { // wait to get a write lock - _sqlServerSyntax.WriteLock(_db, millisecondsTimeout, Constants.Locks.MainDom); + _sqlServerSyntax.WriteLock(db, millisecondsTimeout, Constants.Locks.MainDom); } catch (Exception ex) { if (IsLockTimeoutException(ex)) { + _logger.Error(ex, "Sql timeout occurred, could not acquire MainDom."); + _hasError = true; return false; } - // unexpected + // unexpected (will be caught below) throw; } @@ -76,16 +79,17 @@ namespace Umbraco.Core.Runtime // if we've updated, this means there is an active MainDom, now we need to wait to // for the current MainDom to shutdown which also requires releasing our write lock } - catch (Exception) + catch (Exception ex) { - _db.AbortTransaction(); - + ResetDatabase(); // unexpected - throw; + _logger.Error(ex, "Unexpected error, cannot acquire MainDom"); + _hasError = true; + return false; } finally { - _db.CompleteTransaction(); + db?.CompleteTransaction(); } return await WaitForExistingAsync(tempId, millisecondsTimeout); @@ -93,6 +97,12 @@ namespace Umbraco.Core.Runtime public Task ListenAsync() { + if (_hasError) + { + _logger.Warn("Could not acquire MainDom, listening is canceled."); + return Task.CompletedTask; + } + // Create a long running task (dedicated thread) // to poll to check if we are still the MainDom registered in the DB return Task.Factory.StartNew(() => @@ -105,12 +115,14 @@ namespace Umbraco.Core.Runtime // poll every 1 second Thread.Sleep(1000); + var db = GetDatabase(); + try { - _db.BeginTransaction(IsolationLevel.ReadCommitted); + db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock - _sqlServerSyntax.ReadLock(_db, Constants.Locks.MainDom); + _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that // we are still the maindom. An empty value might be better because then we won't have any orphan rows @@ -123,14 +135,17 @@ namespace Umbraco.Core.Runtime return; } } - catch (Exception) + catch (Exception ex) { - _db.AbortTransaction(); - throw; + ResetDatabase(); + // unexpected + _logger.Error(ex, "Unexpected error, listening is canceled."); + _hasError = true; + return; } finally { - _db.CompleteTransaction(); + db?.CompleteTransaction(); } } @@ -139,6 +154,23 @@ namespace Umbraco.Core.Runtime } + private void ResetDatabase() + { + if (_db.InTransaction) + _db.AbortTransaction(); + _db.Dispose(); + _db = null; + } + + private IUmbracoDatabase GetDatabase() + { + if (_db != null) + return _db; + + _db = _dbFactory.CreateDatabase(); + return _db; + } + /// /// Wait for any existing MainDom to release so we can continue booting /// @@ -151,6 +183,7 @@ namespace Umbraco.Core.Runtime return Task.Run(() => { + var db = GetDatabase(); var watch = new Stopwatch(); watch.Start(); while(true) @@ -160,12 +193,13 @@ namespace Umbraco.Core.Runtime try { - _db.BeginTransaction(IsolationLevel.ReadCommitted); + db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock - _sqlServerSyntax.ReadLock(_db, Constants.Locks.MainDom); + _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); - var mainDomRows = _db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + // the row + var mainDomRows = db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); if (mainDomRows.Count == 0 || mainDomRows[0].Value == updatedTempId) { @@ -174,7 +208,7 @@ namespace Umbraco.Core.Runtime // which indicates that we // can acquire it and it has shutdown. - _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); // so now we update the row with our appdomain id InsertLockRecord(_lockId); @@ -192,18 +226,22 @@ namespace Umbraco.Core.Runtime } catch (Exception ex) { - _db.AbortTransaction(); + ResetDatabase(); if (IsLockTimeoutException(ex)) { + _logger.Error(ex, "Sql timeout occurred, waiting for existing MainDom is canceled."); + _hasError = true; return false; } - - throw; + // unexpected + _logger.Error(ex, "Unexpected error, waiting for existing MainDom is canceled."); + _hasError = true; + return false; } finally { - _db.CompleteTransaction(); + db?.CompleteTransaction(); } if (watch.ElapsedMilliseconds >= millisecondsTimeout) @@ -218,9 +256,9 @@ namespace Umbraco.Core.Runtime try { - _db.BeginTransaction(IsolationLevel.ReadCommitted); + db.BeginTransaction(IsolationLevel.ReadCommitted); - _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); // so now we update the row with our appdomain id InsertLockRecord(_lockId); @@ -228,19 +266,22 @@ namespace Umbraco.Core.Runtime } catch (Exception ex) { - _db.AbortTransaction(); + ResetDatabase(); if (IsLockTimeoutException(ex)) { - // something is wrong, we cannot acquire, not much we can do + // something is wrong, we cannot acquire, not much we can do + _logger.Error(ex, "Sql timeout occurred, could not forcibly acquire MainDom."); + _hasError = true; return false; } - - throw; + _logger.Error(ex, "Unexpected error, could not forcibly acquire MainDom."); + _hasError = true; + return false; } finally { - _db.CompleteTransaction(); + db?.CompleteTransaction(); } } } @@ -252,7 +293,8 @@ namespace Umbraco.Core.Runtime /// private RecordPersistenceType InsertLockRecord(string id) { - return _db.InsertOrUpdate(new KeyValueDto + var db = GetDatabase(); + return db.InsertOrUpdate(new KeyValueDto { Key = MainDomKey, Value = id, @@ -266,7 +308,8 @@ namespace Umbraco.Core.Runtime /// private bool IsMainDomValue(string val) { - return _db.ExecuteScalar("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val", + var db = GetDatabase(); + return db.ExecuteScalar("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val", new { key = MainDomKey, val = val }) == 1; } @@ -293,12 +336,13 @@ namespace Umbraco.Core.Runtime _cancellationTokenSource.Cancel(); _cancellationTokenSource.Dispose(); + var db = GetDatabase(); try { - _db.BeginTransaction(IsolationLevel.ReadCommitted); + db.BeginTransaction(IsolationLevel.ReadCommitted); // get a write lock - _sqlServerSyntax.WriteLock(_db, Constants.Locks.MainDom); + _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); // When we are disposed, it means we have released the MainDom lock // and called all MainDom release callbacks, in this case @@ -308,23 +352,23 @@ namespace Umbraco.Core.Runtime // Otherwise, if we are just shutting down, we want to just delete the row. if (mainDomChanging) { - _db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); + db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); } else { - _db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); } } - catch (Exception) + catch (Exception ex) { - _db.AbortTransaction(); - throw; + ResetDatabase(); + _logger.Error(ex, "Unexpected error during dipsose."); + _hasError = true; } finally { - _db.CompleteTransaction(); - - _db.Dispose(); + db?.CompleteTransaction(); + ResetDatabase(); } } From 7501629c541633c8bb33077b473514bbe288f359 Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 30 Dec 2019 17:15:57 +1100 Subject: [PATCH 07/23] Adds logging to SqlMainDomLock --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index c567b76403..a2560b71a3 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -38,6 +38,8 @@ namespace Umbraco.Core.Runtime public async Task AcquireLockAsync(int millisecondsTimeout) { + _logger.Info("Acquiring lock..."); + var db = GetDatabase(); var tempId = Guid.NewGuid().ToString(); @@ -73,6 +75,7 @@ namespace Umbraco.Core.Runtime // are MainDom? then we don't leave any orphan rows behind. InsertLockRecord(_lockId); // so update with our appdomain id + _logger.Info("Acquired with ID {LockId}", _lockId); return true; } @@ -212,7 +215,7 @@ namespace Umbraco.Core.Runtime // so now we update the row with our appdomain id InsertLockRecord(_lockId); - + _logger.Info("Acquired with ID {LockId}", _lockId); return true; } else if (mainDomRows.Count == 1 && !mainDomRows[0].Value.StartsWith(tempId)) @@ -221,6 +224,8 @@ namespace Umbraco.Core.Runtime // another new AppDomain has come online and is wanting to take over. In that case, we will not // acquire. + _logger.Info("Cannot acquire, another booting application detected."); + return false; } } @@ -253,6 +258,7 @@ namespace Umbraco.Core.Runtime // which isn't ideal. // So... we're going to 'just' take over, if the writelock works then we'll assume we're ok + _logger.Info("Timeout elapsed, assuming orphan row, acquiring MainDom."); try { @@ -262,6 +268,7 @@ namespace Umbraco.Core.Runtime // so now we update the row with our appdomain id InsertLockRecord(_lockId); + _logger.Info("Acquired with ID {LockId}", _lockId); return true; } catch (Exception ex) From 0d1101eaa748180d350662beee6040d203bda37f Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 30 Dec 2019 18:14:18 +1100 Subject: [PATCH 08/23] adds notes/logging --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index a2560b71a3..d7b800c364 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -112,6 +112,10 @@ namespace Umbraco.Core.Runtime { while(true) { + // If cancellation has been requested we will just exit. Depending on timing of the shutdown, + // we will have already flagged _mainDomChanging = true, or we're shutting down faster than + // the other MainDom is taking to startup. In this case the db row will just be deleted and the + // new MainDom will just take over. if (_cancellationTokenSource.IsCancellationRequested) break; @@ -135,6 +139,7 @@ namespace Umbraco.Core.Runtime { // we are no longer main dom, another one has come online, exit _mainDomChanging = true; + _logger.Info("Detected new booting application, releasing MainDom."); return; } } @@ -359,10 +364,12 @@ namespace Umbraco.Core.Runtime // Otherwise, if we are just shutting down, we want to just delete the row. if (mainDomChanging) { + _logger.Info("Releasing MainDom, updating row, new application is booting."); db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); } else { + _logger.Info("Releasing MainDom, deleting row, application is shutting down."); db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); } } From 5476388e787c778852802042267f1f20ceb3aae6 Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 30 Dec 2019 18:49:33 +1100 Subject: [PATCH 09/23] adds lock else we end up detecting when not needed --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 143 +++++++++++---------- 1 file changed, 74 insertions(+), 69 deletions(-) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index d7b800c364..bb44bbbfbf 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -24,6 +24,7 @@ namespace Umbraco.Core.Runtime private bool _mainDomChanging = false; private readonly UmbracoDatabaseFactory _dbFactory; private bool _hasError; + private object _locker = new object(); public SqlMainDomLock(ILogger logger) { @@ -112,49 +113,53 @@ namespace Umbraco.Core.Runtime { while(true) { - // If cancellation has been requested we will just exit. Depending on timing of the shutdown, - // we will have already flagged _mainDomChanging = true, or we're shutting down faster than - // the other MainDom is taking to startup. In this case the db row will just be deleted and the - // new MainDom will just take over. - if (_cancellationTokenSource.IsCancellationRequested) - break; - // poll every 1 second Thread.Sleep(1000); - var db = GetDatabase(); - - try + lock(_locker) { - db.BeginTransaction(IsolationLevel.ReadCommitted); + // If cancellation has been requested we will just exit. Depending on timing of the shutdown, + // we will have already flagged _mainDomChanging = true, or we're shutting down faster than + // the other MainDom is taking to startup. In this case the db row will just be deleted and the + // new MainDom will just take over. + if (_cancellationTokenSource.IsCancellationRequested) + break; - // get a read lock - _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); + var db = GetDatabase(); - // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that - // we are still the maindom. An empty value might be better because then we won't have any orphan rows - // if the app is terminated. Could that work? - - if (!IsMainDomValue(_lockId)) + try { - // we are no longer main dom, another one has come online, exit - _mainDomChanging = true; - _logger.Info("Detected new booting application, releasing MainDom."); + db.BeginTransaction(IsolationLevel.ReadCommitted); + + // get a read lock + _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); + + // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that + // we are still the maindom. An empty value might be better because then we won't have any orphan rows + // if the app is terminated. Could that work? + + if (!IsMainDomValue(_lockId)) + { + // we are no longer main dom, another one has come online, exit + _mainDomChanging = true; + _logger.Info("Detected new booting application, releasing MainDom."); + return; + } + } + catch (Exception ex) + { + ResetDatabase(); + // unexpected + _logger.Error(ex, "Unexpected error, listening is canceled."); + _hasError = true; return; } + finally + { + db?.CompleteTransaction(); + } } - catch (Exception ex) - { - ResetDatabase(); - // unexpected - _logger.Error(ex, "Unexpected error, listening is canceled."); - _hasError = true; - return; - } - finally - { - db?.CompleteTransaction(); - } + } @@ -341,48 +346,48 @@ namespace Umbraco.Core.Runtime { if (disposing) { - // capture locally just in case in some strange way a sub task somehow updates this - var mainDomChanging = _mainDomChanging; - - // immediately cancel all sub-tasks, we don't want them to keep querying - _cancellationTokenSource.Cancel(); - _cancellationTokenSource.Dispose(); - - var db = GetDatabase(); - try + lock (_locker) { - db.BeginTransaction(IsolationLevel.ReadCommitted); + // immediately cancel all sub-tasks, we don't want them to keep querying + _cancellationTokenSource.Cancel(); + _cancellationTokenSource.Dispose(); - // get a write lock - _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); - - // When we are disposed, it means we have released the MainDom lock - // and called all MainDom release callbacks, in this case - // if another maindom is actually coming online we need - // to signal to the MainDom coming online that we have shutdown. - // To do that, we update the existing main dom DB record with a suffixed "_updated" string. - // Otherwise, if we are just shutting down, we want to just delete the row. - if (mainDomChanging) + var db = GetDatabase(); + try { - _logger.Info("Releasing MainDom, updating row, new application is booting."); - db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); + db.BeginTransaction(IsolationLevel.ReadCommitted); + + // get a write lock + _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); + + // When we are disposed, it means we have released the MainDom lock + // and called all MainDom release callbacks, in this case + // if another maindom is actually coming online we need + // to signal to the MainDom coming online that we have shutdown. + // To do that, we update the existing main dom DB record with a suffixed "_updated" string. + // Otherwise, if we are just shutting down, we want to just delete the row. + if (_mainDomChanging) + { + _logger.Info("Releasing MainDom, updating row, new application is booting."); + db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); + } + else + { + _logger.Info("Releasing MainDom, deleting row, application is shutting down."); + db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + } } - else + catch (Exception ex) { - _logger.Info("Releasing MainDom, deleting row, application is shutting down."); - db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); + ResetDatabase(); + _logger.Error(ex, "Unexpected error during dipsose."); + _hasError = true; + } + finally + { + db?.CompleteTransaction(); + ResetDatabase(); } - } - catch (Exception ex) - { - ResetDatabase(); - _logger.Error(ex, "Unexpected error during dipsose."); - _hasError = true; - } - finally - { - db?.CompleteTransaction(); - ResetDatabase(); } } From b24a87d9868d70bcbc164559f74caab744785b5c Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 30 Dec 2019 18:51:38 +1100 Subject: [PATCH 10/23] Changes logging to debug --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index bb44bbbfbf..847a0c25df 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -39,7 +39,7 @@ namespace Umbraco.Core.Runtime public async Task AcquireLockAsync(int millisecondsTimeout) { - _logger.Info("Acquiring lock..."); + _logger.Debug("Acquiring lock..."); var db = GetDatabase(); @@ -76,7 +76,7 @@ namespace Umbraco.Core.Runtime // are MainDom? then we don't leave any orphan rows behind. InsertLockRecord(_lockId); // so update with our appdomain id - _logger.Info("Acquired with ID {LockId}", _lockId); + _logger.Debug("Acquired with ID {LockId}", _lockId); return true; } @@ -142,7 +142,7 @@ namespace Umbraco.Core.Runtime { // we are no longer main dom, another one has come online, exit _mainDomChanging = true; - _logger.Info("Detected new booting application, releasing MainDom."); + _logger.Debug("Detected new booting application, releasing MainDom."); return; } } @@ -225,7 +225,7 @@ namespace Umbraco.Core.Runtime // so now we update the row with our appdomain id InsertLockRecord(_lockId); - _logger.Info("Acquired with ID {LockId}", _lockId); + _logger.Debug("Acquired with ID {LockId}", _lockId); return true; } else if (mainDomRows.Count == 1 && !mainDomRows[0].Value.StartsWith(tempId)) @@ -234,7 +234,7 @@ namespace Umbraco.Core.Runtime // another new AppDomain has come online and is wanting to take over. In that case, we will not // acquire. - _logger.Info("Cannot acquire, another booting application detected."); + _logger.Debug("Cannot acquire, another booting application detected."); return false; } @@ -268,7 +268,7 @@ namespace Umbraco.Core.Runtime // which isn't ideal. // So... we're going to 'just' take over, if the writelock works then we'll assume we're ok - _logger.Info("Timeout elapsed, assuming orphan row, acquiring MainDom."); + _logger.Debug("Timeout elapsed, assuming orphan row, acquiring MainDom."); try { @@ -278,7 +278,7 @@ namespace Umbraco.Core.Runtime // so now we update the row with our appdomain id InsertLockRecord(_lockId); - _logger.Info("Acquired with ID {LockId}", _lockId); + _logger.Debug("Acquired with ID {LockId}", _lockId); return true; } catch (Exception ex) @@ -368,12 +368,12 @@ namespace Umbraco.Core.Runtime // Otherwise, if we are just shutting down, we want to just delete the row. if (_mainDomChanging) { - _logger.Info("Releasing MainDom, updating row, new application is booting."); + _logger.Debug("Releasing MainDom, updating row, new application is booting."); db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); } else { - _logger.Info("Releasing MainDom, deleting row, application is shutting down."); + _logger.Debug("Releasing MainDom, deleting row, application is shutting down."); db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); } } From a46e9124d28799067377da5d969d2730425f57bb Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 3 Jan 2020 10:38:48 +1100 Subject: [PATCH 11/23] First commit in fixing deadlock - committing my notes, etc... --- src/Umbraco.Core/Runtime/CoreRuntime.cs | 2 +- src/Umbraco.Core/Runtime/MainDom.cs | 27 ++++- .../Runtime/MainDomSemaphoreLock.cs | 9 +- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 88 +++++++-------- .../PublishedCache/NuCache/ContentStore.cs | 101 ++++++++++++++---- .../NuCache/PublishedSnapshot.cs | 8 ++ .../NuCache/PublishedSnapshotService.cs | 25 ++++- .../NuCache/{notes.txt => readme.md} | 44 ++++---- src/Umbraco.Web/Umbraco.Web.csproj | 4 +- src/Umbraco.Web/UmbracoApplication.cs | 2 +- 10 files changed, 207 insertions(+), 103 deletions(-) rename src/Umbraco.Web/PublishedCache/NuCache/{notes.txt => readme.md} (70%) diff --git a/src/Umbraco.Core/Runtime/CoreRuntime.cs b/src/Umbraco.Core/Runtime/CoreRuntime.cs index b46058fa82..b852aff2ff 100644 --- a/src/Umbraco.Core/Runtime/CoreRuntime.cs +++ b/src/Umbraco.Core/Runtime/CoreRuntime.cs @@ -147,7 +147,7 @@ namespace Umbraco.Core.Runtime // TODO: remove this in netcore, this is purely backwards compat hacks with the empty ctor if (MainDom == null) { - MainDom = new MainDom(Logger, new MainDomSemaphoreLock()); + MainDom = new MainDom(Logger, new MainDomSemaphoreLock(Logger)); } diff --git a/src/Umbraco.Core/Runtime/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs index 85cf5ad6a9..883b69b2a7 100644 --- a/src/Umbraco.Core/Runtime/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -36,7 +36,7 @@ namespace Umbraco.Core.Runtime // actions to run before releasing the main domain private readonly List> _callbacks = new List>(); - private const int LockTimeoutMilliseconds = 90000; // (1.5 * 60 * 1000) == 1 min 30 seconds + private const int LockTimeoutMilliseconds = 10000; // 10 seconds #endregion @@ -106,6 +106,7 @@ namespace Umbraco.Core.Runtime { _logger.Info("Stopping ({SignalSource})", source); foreach (var callback in _callbacks.OrderBy(x => x.Key).Select(x => x.Value)) + { try { callback(); // no timeout on callbacks @@ -115,6 +116,8 @@ namespace Umbraco.Core.Runtime _logger.Error(e, "Error while running callback"); continue; } + } + _logger.Debug("Stopped ({SignalSource})", source); } finally @@ -142,17 +145,33 @@ namespace Umbraco.Core.Runtime _logger.Info("Acquiring."); // Get the lock - _mainDomLock.AcquireLockAsync(LockTimeoutMilliseconds).Wait(); + var acquired = _mainDomLock.AcquireLockAsync(LockTimeoutMilliseconds).Result; + + if (!acquired) + { + _logger.Info("Cannot acquire (timeout)."); + + // TODO: Previously we'd throw an exception and the appdomain would not start, what do we want to do? + + // return false; + + throw new TimeoutException("Cannot acquire MainDom"); + } try { // Listen for the signal from another AppDomain coming online to release the lock _mainDomLock.ListenAsync() - .ContinueWith(_ => OnSignal("signal")); + .ContinueWith(_ => + { + _logger.Debug("Signal heard from other appdomain."); + OnSignal("signal"); + }); } - catch (OperationCanceledException) + catch (OperationCanceledException ex) { // the waiting task could be canceled if this appdomain is naturally shutting down, we'll just swallow this exception + _logger.Warn(ex, ex.Message); } _logger.Info("Acquired."); diff --git a/src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs b/src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs index 89eef8a658..2dcc37e25f 100644 --- a/src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs +++ b/src/Umbraco.Core/Runtime/MainDomSemaphoreLock.cs @@ -1,6 +1,7 @@ using System; using System.Threading; using System.Threading.Tasks; +using Umbraco.Core.Logging; namespace Umbraco.Core.Runtime { @@ -14,16 +15,17 @@ namespace Umbraco.Core.Runtime // event wait handle used to notify current main domain that it should // release the lock because a new domain wants to be the main domain private readonly EventWaitHandle _signal; - + private readonly ILogger _logger; private IDisposable _lockRelease; - public MainDomSemaphoreLock() + public MainDomSemaphoreLock(ILogger logger) { var lockName = "UMBRACO-" + MainDom.GetMainDomId() + "-MAINDOM-LCK"; _systemLock = new SystemLock(lockName); var eventName = "UMBRACO-" + MainDom.GetMainDomId() + "-MAINDOM-EVT"; _signal = new EventWaitHandle(false, EventResetMode.AutoReset, eventName); + _logger = logger; } //WaitOneAsync (ext method) will wait for a signal without blocking the main thread, the waiting is done on a background thread @@ -47,8 +49,9 @@ namespace Umbraco.Core.Runtime _lockRelease = _systemLock.Lock(millisecondsTimeout); return Task.FromResult(true); } - catch (TimeoutException) + catch (TimeoutException ex) { + _logger.Error(ex); return Task.FromResult(false); } finally diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index 847a0c25df..31f8b36ed0 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -109,62 +109,62 @@ namespace Umbraco.Core.Runtime // Create a long running task (dedicated thread) // to poll to check if we are still the MainDom registered in the DB - return Task.Factory.StartNew(() => + return Task.Factory.StartNew(ListeningLoop, _cancellationTokenSource.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default); + + } + + private void ListeningLoop() + { + while (true) { - while(true) + // poll every 1 second + Thread.Sleep(1000); + + lock (_locker) { - // poll every 1 second - Thread.Sleep(1000); + // If cancellation has been requested we will just exit. Depending on timing of the shutdown, + // we will have already flagged _mainDomChanging = true, or we're shutting down faster than + // the other MainDom is taking to startup. In this case the db row will just be deleted and the + // new MainDom will just take over. + if (_cancellationTokenSource.IsCancellationRequested) + return; - lock(_locker) + var db = GetDatabase(); + + try { - // If cancellation has been requested we will just exit. Depending on timing of the shutdown, - // we will have already flagged _mainDomChanging = true, or we're shutting down faster than - // the other MainDom is taking to startup. In this case the db row will just be deleted and the - // new MainDom will just take over. - if (_cancellationTokenSource.IsCancellationRequested) - break; + db.BeginTransaction(IsolationLevel.ReadCommitted); - var db = GetDatabase(); + // get a read lock + _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); - try + // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that + // we are still the maindom. An empty value might be better because then we won't have any orphan rows + // if the app is terminated. Could that work? + + if (!IsMainDomValue(_lockId)) { - db.BeginTransaction(IsolationLevel.ReadCommitted); - - // get a read lock - _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); - - // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that - // we are still the maindom. An empty value might be better because then we won't have any orphan rows - // if the app is terminated. Could that work? - - if (!IsMainDomValue(_lockId)) - { - // we are no longer main dom, another one has come online, exit - _mainDomChanging = true; - _logger.Debug("Detected new booting application, releasing MainDom."); - return; - } - } - catch (Exception ex) - { - ResetDatabase(); - // unexpected - _logger.Error(ex, "Unexpected error, listening is canceled."); - _hasError = true; + // we are no longer main dom, another one has come online, exit + _mainDomChanging = true; + _logger.Debug("Detected new booting application, releasing MainDom lock."); return; } - finally - { - db?.CompleteTransaction(); - } } - + catch (Exception ex) + { + ResetDatabase(); + // unexpected + _logger.Error(ex, "Unexpected error, listening is canceled."); + _hasError = true; + return; + } + finally + { + db?.CompleteTransaction(); + } } - - - }, _cancellationTokenSource.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default); + } } private void ResetDatabase() diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index 550fd507d5..81229af9e1 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -115,11 +115,14 @@ namespace Umbraco.Web.PublishedCache.NuCache // TODO: GetScopedWriter? should the dict have a ref onto the scope provider? public IDisposable GetScopedWriteLock(IScopeProvider scopeProvider) { + _logger.Debug("GetScopedWriteLock"); return ScopeContextualBase.Get(scopeProvider, _instanceId, scoped => new ScopedWriteLock(this, scoped)); } private void Lock(WriteLockInfo lockInfo, bool forceGen = false) { + // TODO: We are in a deadlock here somehow?? + Monitor.Enter(_wlocko, ref lockInfo.Taken); var rtaken = false; @@ -193,8 +196,15 @@ namespace Umbraco.Web.PublishedCache.NuCache _localDb.Commit(); } - if (lockInfo.Count) _wlocked--; - if (lockInfo.Taken) Monitor.Exit(_wlocko); + // TODO: Shouldn't this be in a finally block? + // TODO: Shouldn't this be decremented after we exit?? + // TODO: Shouldn't the locked flag never exceed 1? + if (lockInfo.Count) + _wlocked--; + + // TODO: Shouldn't this be in a finally block? + if (lockInfo.Taken) + Monitor.Exit(_wlocko); } private void Release(ReadLockInfo lockInfo) @@ -232,21 +242,29 @@ namespace Umbraco.Web.PublishedCache.NuCache public void ReleaseLocalDb() { + _logger.Info("Releasing DB..."); var lockInfo = new WriteLockInfo(); try { try { // Trying to lock could throw exceptions so always make sure to clean up. + _logger.Info("Trying to lock before releasing DB (lock count = {LockCount}) ...", _wlocked); + Lock(lockInfo); } finally { try { + _logger.Info("Disposing local DB..."); _localDb?.Dispose(); } - catch { /* TBD: May already be throwing so don't throw again */} + catch (Exception ex) + { + /* TBD: May already be throwing so don't throw again */ + _logger.Error(ex, "Error trying to release DB"); + } finally { _localDb = null; @@ -254,8 +272,17 @@ namespace Umbraco.Web.PublishedCache.NuCache } } + catch (Exception ex) + { + _logger.Error(ex, "Error trying to lock"); + throw; + } finally { + _logger.Info("Releasing ContentStore..."); + + // TODO: I don't understand this, we would have already closed the BPlusTree store above?? + // I guess it's 'safe' and will just decrement the write lock counter? Release(lockInfo); } } @@ -275,6 +302,7 @@ namespace Umbraco.Web.PublishedCache.NuCache var lockInfo = new WriteLockInfo(); try { + _logger.Debug("NewContentTypes"); Lock(lockInfo); foreach (var type in types) @@ -297,6 +325,7 @@ namespace Umbraco.Web.PublishedCache.NuCache var lockInfo = new WriteLockInfo(); try { + _logger.Debug("UpdateContentTypes"); Lock(lockInfo); var index = types.ToDictionary(x => x.Id, x => x); @@ -324,10 +353,13 @@ namespace Umbraco.Web.PublishedCache.NuCache public void SetAllContentTypes(IEnumerable types) { - var lockInfo = new WriteLockInfo(); + // TODO: There should be NO lock here! All calls made to this are wrapped in GetScopedWriteLock + + //var lockInfo = new WriteLockInfo(); try { - Lock(lockInfo); + _logger.Debug("SetAllContentTypes"); + //Lock(lockInfo); // clear all existing content types ClearLocked(_contentTypesById); @@ -345,7 +377,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } finally { - Release(lockInfo); + //Release(lockInfo); } } @@ -362,6 +394,7 @@ namespace Umbraco.Web.PublishedCache.NuCache var lockInfo = new WriteLockInfo(); try { + _logger.Debug("UpdateContentTypes"); Lock(lockInfo); var removedContentTypeNodes = new List(); @@ -435,6 +468,7 @@ namespace Umbraco.Web.PublishedCache.NuCache var lockInfo = new WriteLockInfo(); try { + _logger.Debug("UpdateDataTypes"); Lock(lockInfo); var contentTypes = _contentTypesById @@ -545,10 +579,11 @@ namespace Umbraco.Web.PublishedCache.NuCache _logger.Debug("Set content ID: {KitNodeId}", kit.Node.Id); - var lockInfo = new WriteLockInfo(); + // TODO: There should be NO locks here, all calls made to this are done within GetScopedWriteLock + //var lockInfo = new WriteLockInfo(); try { - Lock(lockInfo); + //Lock(lockInfo); // get existing _contentNodes.TryGetValue(kit.Node.Id, out var link); @@ -594,7 +629,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } finally { - Release(lockInfo); + //Release(lockInfo); } return true; @@ -623,11 +658,16 @@ namespace Umbraco.Web.PublishedCache.NuCache /// internal bool SetAllFastSorted(IEnumerable kits, bool fromDb) { - var lockInfo = new WriteLockInfo(); + + //TODO: There should be NO locks here all calls made to this are done within GetScopedWriteLock + + //var lockInfo = new WriteLockInfo(); var ok = true; try { - Lock(lockInfo); + _logger.Debug("SetAllFastSorted"); + + //Lock(lockInfo); ClearLocked(_contentNodes); ClearRootLocked(); @@ -665,7 +705,7 @@ namespace Umbraco.Web.PublishedCache.NuCache previousNode = null; // there is no previous sibling } - _logger.Debug($"Set {thisNode.Id} with parent {thisNode.ParentContentId}"); + _logger.Verbose($"Set {thisNode.Id} with parent {thisNode.ParentContentId}"); SetValueLocked(_contentNodes, thisNode.Id, thisNode); // if we are initializing from the database source ensure the local db is updated @@ -689,7 +729,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } finally { - Release(lockInfo); + //Release(lockInfo); } return ok; @@ -697,11 +737,15 @@ namespace Umbraco.Web.PublishedCache.NuCache public bool SetAll(IEnumerable kits) { - var lockInfo = new WriteLockInfo(); + //TODO: There should be NO locks here all calls made to this are done within GetScopedWriteLock + + //var lockInfo = new WriteLockInfo(); var ok = true; try { - Lock(lockInfo); + _logger.Debug("SetAll"); + + //Lock(lockInfo); ClearLocked(_contentNodes); ClearRootLocked(); @@ -717,7 +761,7 @@ namespace Umbraco.Web.PublishedCache.NuCache ok = false; continue; // skip that one } - _logger.Debug($"Set {kit.Node.Id} with parent {kit.Node.ParentContentId}"); + _logger.Verbose($"Set {kit.Node.Id} with parent {kit.Node.ParentContentId}"); SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); if (_localDb != null) RegisterChange(kit.Node.Id, kit); @@ -728,7 +772,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } finally { - Release(lockInfo); + //Release(lockInfo); } return ok; @@ -737,11 +781,15 @@ namespace Umbraco.Web.PublishedCache.NuCache // IMPORTANT kits must be sorted out by LEVEL and by SORT ORDER public bool SetBranch(int rootContentId, IEnumerable kits) { - var lockInfo = new WriteLockInfo(); + //TODO: There should be NO locks here all calls made to this are done within GetScopedWriteLock + + //var lockInfo = new WriteLockInfo(); var ok = true; try { - Lock(lockInfo); + _logger.Debug("SetBranch"); + + //Lock(lockInfo); // get existing _contentNodes.TryGetValue(rootContentId, out var link); @@ -773,7 +821,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } finally { - Release(lockInfo); + //Release(lockInfo); } return ok; @@ -781,10 +829,13 @@ namespace Umbraco.Web.PublishedCache.NuCache public bool Clear(int id) { - var lockInfo = new WriteLockInfo(); + // TODO: There should be NO locks here! All calls to this are made within GetScopedWriteLock + //var lockInfo = new WriteLockInfo(); try { - Lock(lockInfo); + _logger.Debug("Clear"); + + //Lock(lockInfo); // try to find the content // if it is not there, nothing to do @@ -804,7 +855,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } finally { - Release(lockInfo); + //Release(lockInfo); } } @@ -1200,6 +1251,8 @@ namespace Umbraco.Web.PublishedCache.NuCache var lockInfo = new ReadLockInfo(); try { + // TODO: This would be much simpler with just a lock(_rlocko) { } + // in this case I see no reason why we are using this syntax?! Lock(lockInfo); // if no next generation is required, and we already have one, @@ -1374,6 +1427,8 @@ namespace Umbraco.Web.PublishedCache.NuCache } } + // TODO: This is never used? Should it be? + public async Task WaitForPendingCollect() { Task task; diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs index 3f5c1aa4d5..bbb84fc650 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs @@ -39,7 +39,15 @@ namespace Umbraco.Web.PublishedCache.NuCache public void Resync() { + // This is annoying since this can cause deadlocks. Since this will be cleared if something happens in the same + // thread after that calls Elements, it will re-lock the _storesLock but that might already be locked again + // and since we're most likely in a ContentStore write lock, the other thread is probably wanting that one too. + // no lock - published snapshots are single-thread + + // TODO: Instead of clearing, we could hold this value in another var in case the above call to GetElements() Or potentially + // a new TryGetElements() fails (since we might be shutting down). In that case we can use the old value. + _elements?.Dispose(); _elements = null; } diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index a866297d72..2439633005 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -190,10 +190,15 @@ namespace Umbraco.Web.PublishedCache.NuCache /// private void MainDomRelease() { + _logger.Debug("Releasing from MainDom..."); + lock (_storesLock) { + _logger.Debug("Releasing content store..."); _contentStore?.ReleaseLocalDb(); //null check because we could shut down before being assigned _localContentDb = null; + + _logger.Debug("Releasing media store..."); _mediaStore?.ReleaseLocalDb(); //null check because we could shut down before being assigned _localMediaDb = null; @@ -663,10 +668,20 @@ namespace Umbraco.Web.PublishedCache.NuCache publishedChanged = publishedChanged2; } + // TODO: These resync's are a problem, they cause deadlocks because when this is called, it's generally called within a writelock + // and then we clear out the snapshot and then if there's some event handler that needs the content cache, it tries to re-get it + // which first tries locking on the _storesLock which may have already been acquired and in this case we deadlock because + // we're still holding the write lock. + // We resync at the end of a ScopedWriteLock + // BUT if we don't resync here then the current snapshot is out of date for any event handlers that wish to use the most up to date + // data... + // so we need to figure out how to deal with the _storesLock + if (draftChanged || publishedChanged) ((PublishedSnapshot)CurrentPublishedSnapshot)?.Resync(); } + // Calling this method means we have a lock on the contentStore (i.e. GetScopedWriteLock) private void NotifyLocked(IEnumerable payloads, out bool draftChanged, out bool publishedChanged) { publishedChanged = false; @@ -676,7 +691,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // content (and content types) are read-locked while reading content // contentStore is wlocked (so readable, only no new views) // and it can be wlocked by 1 thread only at a time - // contentStore is write-locked during changes + // contentStore is write-locked during changes - see note above, calls to this method are wrapped in contentStore.GetScopedWriteLock foreach (var payload in payloads) { @@ -1131,6 +1146,12 @@ namespace Umbraco.Web.PublishedCache.NuCache ContentStore.Snapshot contentSnap, mediaSnap; SnapDictionary.Snapshot domainSnap; IAppCache elementsCache; + + // TODO: Idea... TryGetElements? Might check if we are shutting down and return false and callers to this could handle it? + // Else does a readerwriterlockslim work here? (i don't think so) + // Else we have 2x locks, one for startup/shutdown, the other for getting elements and then we can maybe do a Monitor.Try? + // That is sort of the same as a TryGetElements + lock (_storesLock) { var scopeContext = _scopeProvider.Context; @@ -1156,6 +1177,8 @@ namespace Umbraco.Web.PublishedCache.NuCache // elements // just need to make sure nothing gets elements in another enlisted action... so using // a MaxValue to make sure this one runs last, and it should be ok + // ... else there is potential to deadlock since this would recursively go back into trying + // lock on _storesLock but another thread may have already tried that scopeContext.Enlist("Umbraco.Web.PublishedCache.NuCache.PublishedSnapshotService.Resync", () => this, (completed, svc) => { ((PublishedSnapshot)svc.CurrentPublishedSnapshot)?.Resync(); diff --git a/src/Umbraco.Web/PublishedCache/NuCache/notes.txt b/src/Umbraco.Web/PublishedCache/NuCache/readme.md similarity index 70% rename from src/Umbraco.Web/PublishedCache/NuCache/notes.txt rename to src/Umbraco.Web/PublishedCache/NuCache/readme.md index ff2d8dd48b..c8e8a363e9 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/notes.txt +++ b/src/Umbraco.Web/PublishedCache/NuCache/readme.md @@ -1,10 +1,6 @@ NuCache Documentation ====================== -NOTE - RENAMING -Facade = PublishedSnapshot -and everything needs to be renamed accordingly - HOW IT WORKS ------------- @@ -22,12 +18,12 @@ When reading the cache, we read views up the chain until we find a value (which null) for the given id, and finally we read the store itself. -The FacadeService manages a ContentStore for content, and another for media. -When a Facade is created, the FacadeService gets ContentView objects from the stores. +The PublishedSnapshotService manages a ContentStore for content, and another for media. +When a PublishedSnapshot is created, the PublishedSnapshotService gets ContentView objects from the stores. Views provide an immutable snapshot of the content and media. -When the FacadeService is notified of changes, it notifies the stores. -Then it resync the current Facade, so that it requires new views, etc. +When the PublishedSnapshotService is notified of changes, it notifies the stores. +Then it resync the current PublishedSnapshot, so that it requires new views, etc. Whenever a content, media or member is modified or removed, the cmsContentNu table is updated with a json dictionary of alias => property value, so that a content, @@ -50,32 +46,32 @@ Each ContentStore has a _freezeLock object used to protect the 'Frozen' state of the store. It's a disposable object that releases the lock when disposed, so usage would be: using (store.Frozen) { ... }. -The FacadeService has a _storesLock object used to guarantee atomic access to the +The PublishedSnapshotService has a _storesLock object used to guarantee atomic access to the set of content, media stores. CACHE ------ -For each set of views, the FacadeService creates a SnapshotCache. So a SnapshotCache +For each set of views, the PublishedSnapshotService creates a SnapshotCache. So a SnapshotCache is valid until anything changes in the content or media trees. In other words, things that go in the SnapshotCache stay until a content or media is modified. -For each Facade, the FacadeService creates a FacadeCache. So a FacadeCache is valid -for the duration of the Facade (usually, the request). In other words, things that go -in the FacadeCache stay (and are visible to) for the duration of the request only. +For each PublishedSnapshot, the PublishedSnapshotService creates a PublishedSnapshotCache. So a PublishedSnapshotCache is valid +for the duration of the PublishedSnapshot (usually, the request). In other words, things that go +in the PublishedSnapshotCache stay (and are visible to) for the duration of the request only. -The FacadeService defines a static constant FullCacheWhenPreviewing, that defines +The PublishedSnapshotService defines a static constant FullCacheWhenPreviewing, that defines how caches operate when previewing: - when true, the caches in preview mode work normally. -- when false, everything that would go to the SnapshotCache goes to the FacadeCache. +- when false, everything that would go to the SnapshotCache goes to the PublishedSnapshotCache. At the moment it is true in the code, which means that eg converted values for previewed content will go in the SnapshotCache. Makes for faster preview, but uses more memory on the long term... would need some benchmarking to figure out what is best. -Members only live for the duration of the Facade. So, for members SnapshotCache is -never used, and everything goes to the FacadeCache. +Members only live for the duration of the PublishedSnapshot. So, for members SnapshotCache is +never used, and everything goes to the PublishedSnapshotCache. All cache keys are computed in the CacheKeys static class. @@ -85,15 +81,15 @@ TESTS For testing purposes the following mechanisms exist: -The Facade type has a static Current property that is used to obtain the 'current' -facade in many places, going through the PublishedCachesServiceResolver to get the -current service, and asking the current service for the current facade, which by +The PublishedSnapshot type has a static Current property that is used to obtain the 'current' +PublishedSnapshot in many places, going through the PublishedCachesServiceResolver to get the +current service, and asking the current service for the current PublishedSnapshot, which by default relies on UmbracoContext. For test purposes, it is possible to override the -entire mechanism by defining Facade.GetCurrentFacadeFunc which should return a facade. +entire mechanism by defining PublishedSnapshot.GetCurrentPublishedSnapshotFunc which should return a PublishedSnapshot. A PublishedContent keeps only id-references to its parent and children, and needs a way to retrieve the actual objects from the cache - which depends on the current -facade. It is possible to override the entire mechanism by defining PublishedContent. +PublishedSnapshot. It is possible to override the entire mechanism by defining PublishedContent. GetContentByIdFunc or .GetMediaByIdFunc. Setting these functions must be done before Resolution is frozen. @@ -110,7 +106,7 @@ possible to support detached contents & properties, even those that do not have int id, but again this should be refactored entirely anyway. Not doing any row-version checks (see XmlStore) when reloading from database, though it -is maintained in the database. Two FIXME in FacadeService. Should we do it? +is maintained in the database. Two FIXME in PublishedSnapshotService. Should we do it? There is no on-disk cache at all so everything is reloaded from the cmsContentNu table when the site restarts. This is pretty fast, but we should experiment with solutions to @@ -121,4 +117,4 @@ PublishedMember exposes properties that IPublishedContent does not, and that are to be lost soon as the member is wrapped (content set, model...) - so we probably need some sort of IPublishedMember. -/ \ No newline at end of file +/ diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj index 5d29e53d4a..861b95691b 100755 --- a/src/Umbraco.Web/Umbraco.Web.csproj +++ b/src/Umbraco.Web/Umbraco.Web.csproj @@ -1216,7 +1216,7 @@ - + @@ -1279,4 +1279,4 @@ - + \ No newline at end of file diff --git a/src/Umbraco.Web/UmbracoApplication.cs b/src/Umbraco.Web/UmbracoApplication.cs index ad8dcb7e8b..f8ee238da7 100644 --- a/src/Umbraco.Web/UmbracoApplication.cs +++ b/src/Umbraco.Web/UmbracoApplication.cs @@ -22,7 +22,7 @@ namespace Umbraco.Web var mainDomLock = appSettingMainDomLock == "SqlMainDomLock" ? (IMainDomLock)new SqlMainDomLock(logger) - : new MainDomSemaphoreLock(); + : new MainDomSemaphoreLock(logger); var runtime = new WebRuntime(this, logger, new MainDom(logger, mainDomLock)); From 3d8e9a78e3da9bd8d64abd993f69c1374991c20f Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 3 Jan 2020 12:39:17 +1100 Subject: [PATCH 12/23] Fixes deadlock --- .../NuCache/PublishedSnapshotService.cs | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index 2439633005..97e546e603 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Globalization; using System.IO; using System.Linq; +using System.Threading; using CSharpTest.Net.Collections; using Newtonsoft.Json; using Umbraco.Core; @@ -54,6 +55,7 @@ namespace Umbraco.Web.PublishedCache.NuCache private readonly ContentStore _mediaStore; private readonly SnapDictionary _domainStore; private readonly object _storesLock = new object(); + private readonly object _elementsLock = new object(); private BPlusTree _localContentDb; private BPlusTree _localMediaDb; @@ -143,7 +145,7 @@ namespace Umbraco.Web.PublishedCache.NuCache _domainStore = new SnapDictionary(); - LoadCachesOnStartup(); + LoadCachesOnStartup(); } Guid GetUid(ContentStore store, int id) => store.LiveSnapshot.Get(id)?.Uid ?? default; @@ -171,7 +173,7 @@ namespace Umbraco.Web.PublishedCache.NuCache var path = GetLocalFilesPath(); var localContentDbPath = Path.Combine(path, "NuCache.Content.db"); var localMediaDbPath = Path.Combine(path, "NuCache.Media.db"); - + _localContentDbExists = File.Exists(localContentDbPath); _localMediaDbExists = File.Exists(localMediaDbPath); @@ -221,7 +223,7 @@ namespace Umbraco.Web.PublishedCache.NuCache { okContent = LockAndLoadContent(scope => LoadContentFromLocalDbLocked(true)); if (!okContent) - _logger.Warn("Loading content from local db raised warnings, will reload from database."); + _logger.Warn("Loading content from local db raised warnings, will reload from database."); } if (_localMediaDbExists) @@ -1147,12 +1149,12 @@ namespace Umbraco.Web.PublishedCache.NuCache SnapDictionary.Snapshot domainSnap; IAppCache elementsCache; - // TODO: Idea... TryGetElements? Might check if we are shutting down and return false and callers to this could handle it? - // Else does a readerwriterlockslim work here? (i don't think so) - // Else we have 2x locks, one for startup/shutdown, the other for getting elements and then we can maybe do a Monitor.Try? - // That is sort of the same as a TryGetElements + // Here we are reading/writing to shared objects so we need to lock (can't be _storesLock which manages the actual nucache files + // and would result in a deadlock). Even though we are locking around underlying readlocks (within CreateSnapshot) it's because + // we need to ensure that the result of contentSnap.Gen (etc) and the re-assignment of these values and _elements cache + // are done atomically. - lock (_storesLock) + lock (_elementsLock) { var scopeContext = _scopeProvider.Context; @@ -1177,14 +1179,14 @@ namespace Umbraco.Web.PublishedCache.NuCache // elements // just need to make sure nothing gets elements in another enlisted action... so using // a MaxValue to make sure this one runs last, and it should be ok - // ... else there is potential to deadlock since this would recursively go back into trying - // lock on _storesLock but another thread may have already tried that + scopeContext.Enlist("Umbraco.Web.PublishedCache.NuCache.PublishedSnapshotService.Resync", () => this, (completed, svc) => { ((PublishedSnapshot)svc.CurrentPublishedSnapshot)?.Resync(); }, int.MaxValue); } + // create a new snapshot cache if snapshots are different gens if (contentSnap.Gen != _contentGen || mediaSnap.Gen != _mediaGen || domainSnap.Gen != _domainGen || _elementsCache == null) { @@ -1351,7 +1353,7 @@ namespace Umbraco.Web.PublishedCache.NuCache { //culture changed on an existing language var cultureChanged = e.SavedEntities.Any(x => !x.WasPropertyDirty(nameof(ILanguage.Id)) && x.WasPropertyDirty(nameof(ILanguage.IsoCode))); - if(cultureChanged) + if (cultureChanged) { RebuildContentDbCache(); } From e6b333a3ec2f5234a081bb7f1ab821e42f332a3e Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 3 Jan 2020 12:39:56 +1100 Subject: [PATCH 13/23] Changes readlocks to normal locks, no need to have extra objects allocated and complex code. --- .../PublishedCache/NuCache/ContentStore.cs | 72 +++++-------------- .../PublishedCache/NuCache/SnapDictionary.cs | 58 ++++----------- 2 files changed, 32 insertions(+), 98 deletions(-) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index 81229af9e1..450dc2f283 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -20,6 +20,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // this class is an extended version of SnapDictionary // most of the snapshots management code, etc is an exact copy // SnapDictionary has unit tests to ensure it all works correctly + // For locking information, see SnapDictionary private readonly IPublishedSnapshotAccessor _publishedSnapshotAccessor; private readonly IVariationContextAccessor _variationContextAccessor; @@ -79,11 +80,6 @@ namespace Umbraco.Web.PublishedCache.NuCache private readonly string _instanceId = Guid.NewGuid().ToString("N"); - private class ReadLockInfo - { - public bool Taken; - } - private class WriteLockInfo { public bool Taken; @@ -121,15 +117,10 @@ namespace Umbraco.Web.PublishedCache.NuCache private void Lock(WriteLockInfo lockInfo, bool forceGen = false) { - // TODO: We are in a deadlock here somehow?? - Monitor.Enter(_wlocko, ref lockInfo.Taken); - var rtaken = false; - try + lock(_rlocko) { - Monitor.Enter(_rlocko, ref rtaken); - // see SnapDictionary try { } finally @@ -146,26 +137,16 @@ namespace Umbraco.Web.PublishedCache.NuCache _nextGen = true; } } - } - finally - { - if (rtaken) Monitor.Exit(_rlocko); - } - } - - private void Lock(ReadLockInfo lockInfo) - { - Monitor.Enter(_rlocko, ref lockInfo.Taken); + } } private void Release(WriteLockInfo lockInfo, bool commit = true) { if (commit == false) { - var rtaken = false; - try + lock(_rlocko) { - Monitor.Enter(_rlocko, ref rtaken); + // see SnapDictionary try { } finally { @@ -173,10 +154,6 @@ namespace Umbraco.Web.PublishedCache.NuCache _liveGen -= 1; } } - finally - { - if (rtaken) Monitor.Exit(_rlocko); - } Rollback(_contentNodes); RollbackRoot(); @@ -207,11 +184,6 @@ namespace Umbraco.Web.PublishedCache.NuCache Monitor.Exit(_wlocko); } - private void Release(ReadLockInfo lockInfo) - { - if (lockInfo.Taken) Monitor.Exit(_rlocko); - } - private void RollbackRoot() { if (_root.Gen <= _liveGen) return; @@ -1248,13 +1220,8 @@ namespace Umbraco.Web.PublishedCache.NuCache public Snapshot CreateSnapshot() { - var lockInfo = new ReadLockInfo(); - try + lock(_rlocko) { - // TODO: This would be much simpler with just a lock(_rlocko) { } - // in this case I see no reason why we are using this syntax?! - Lock(lockInfo); - // if no next generation is required, and we already have one, // use it and create a new snapshot if (_nextGen == false && _genObj != null) @@ -1306,10 +1273,6 @@ namespace Umbraco.Web.PublishedCache.NuCache return snapshot; } - finally - { - Release(lockInfo); - } } public Snapshot LiveSnapshot => new Snapshot(this, _liveGen @@ -1427,18 +1390,17 @@ namespace Umbraco.Web.PublishedCache.NuCache } } - // TODO: This is never used? Should it be? - - public async Task WaitForPendingCollect() - { - Task task; - lock (_rlocko) - { - task = _collectTask; - } - if (task != null) - await task; - } + // TODO: This is never used? Should it be? Maybe move to TestHelper below? + //public async Task WaitForPendingCollect() + //{ + // Task task; + // lock (_rlocko) + // { + // task = _collectTask; + // } + // if (task != null) + // await task; + //} public long GenCount => _genObjs.Count; diff --git a/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs b/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs index 9671949ff0..e0ad26eb81 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs @@ -22,6 +22,11 @@ namespace Umbraco.Web.PublishedCache.NuCache // This class is optimized for many readers, few writers // Readers are lock-free + // NOTE - we used to lock _rlocko the long hand way with Monitor.Enter(_rlocko, ref lockTaken) but this has + // been replaced with a normal c# lock because that's exactly how the normal c# lock works, + // see https://blogs.msdn.microsoft.com/ericlippert/2009/03/06/locks-and-exceptions-do-not-mix/ + // for the readlock, there's no reason here to use the long hand way. + private readonly ConcurrentDictionary> _items; private readonly ConcurrentQueue _genObjs; private GenObj _genObj; @@ -71,16 +76,11 @@ namespace Umbraco.Web.PublishedCache.NuCache // are all ignored - Release is private and meant to be invoked with 'commit' being false only // only on the outermost lock (by SnapDictionaryWriter) - // using (...) {} for locking is prone to nasty leaks in case of weird exceptions - // such as thread-abort or out-of-memory, but let's not worry about it now + // side note - using (...) {} for locking is prone to nasty leaks in case of weird exceptions + // such as thread-abort or out-of-memory, which is why we've moved away from the old using wrapper we had on locking. private readonly string _instanceId = Guid.NewGuid().ToString("N"); - private class ReadLockInfo - { - public bool Taken; - } - private class WriteLockInfo { public bool Taken; @@ -121,18 +121,16 @@ namespace Umbraco.Web.PublishedCache.NuCache { Monitor.Enter(_wlocko, ref lockInfo.Taken); - var rtaken = false; - try + lock(_rlocko) { - Monitor.Enter(_rlocko, ref rtaken); - // assume everything in finally runs atomically // http://stackoverflow.com/questions/18501678/can-this-unexpected-behavior-of-prepareconstrainedregions-and-thread-abort-be-ex // http://joeduffyblog.com/2005/03/18/atomicity-and-asynchronous-exception-failures/ // http://joeduffyblog.com/2007/02/07/introducing-the-new-readerwriterlockslim-in-orcas/ // http://chabster.blogspot.fr/2013/12/readerwriterlockslim-fails-on-dual.html //RuntimeHelpers.PrepareConstrainedRegions(); - try { } finally + try { } + finally { // increment the lock count, and register that this lock is counting _wlocked++; @@ -149,15 +147,6 @@ namespace Umbraco.Web.PublishedCache.NuCache } } } - finally - { - if (rtaken) Monitor.Exit(_rlocko); - } - } - - private void Lock(ReadLockInfo lockInfo) - { - Monitor.Enter(_rlocko, ref lockInfo.Taken); } private void Release(WriteLockInfo lockInfo, bool commit = true) @@ -168,22 +157,17 @@ namespace Umbraco.Web.PublishedCache.NuCache if (commit == false) { - var rtaken = false; - try + lock(_rlocko) { - Monitor.Enter(_rlocko, ref rtaken); - try { } finally + try { } + finally { // forget about the temp. liveGen _nextGen = false; _liveGen -= 1; } } - finally - { - if (rtaken) Monitor.Exit(_rlocko); - } - + foreach (var item in _items) { var link = item.Value; @@ -202,11 +186,6 @@ namespace Umbraco.Web.PublishedCache.NuCache Monitor.Exit(_wlocko); } - private void Release(ReadLockInfo lockInfo) - { - if (lockInfo.Taken) Monitor.Exit(_rlocko); - } - #endregion #region Set, Clear, Get, Has @@ -347,11 +326,8 @@ namespace Umbraco.Web.PublishedCache.NuCache public Snapshot CreateSnapshot() { - var lockInfo = new ReadLockInfo(); - try + lock(_rlocko) { - Lock(lockInfo); - // if no next generation is required, and we already have a gen object, // use it to create a new snapshot if (_nextGen == false && _genObj != null) @@ -398,10 +374,6 @@ namespace Umbraco.Web.PublishedCache.NuCache return snapshot; } - finally - { - Release(lockInfo); - } } public Task CollectAsync() From 8e3b3c83261e5272fefb219aaceaa645775ad164 Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 3 Jan 2020 13:21:49 +1100 Subject: [PATCH 14/23] Changes methods that should already be locked to check that they are and changes their names/adds docs --- .../PublishedCache/NuCache/ContentStore.cs | 488 +++++++++--------- .../NuCache/PublishedSnapshotService.cs | 30 +- 2 files changed, 270 insertions(+), 248 deletions(-) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index 450dc2f283..82fae0d32a 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -115,6 +115,12 @@ namespace Umbraco.Web.PublishedCache.NuCache return ScopeContextualBase.Get(scopeProvider, _instanceId, scoped => new ScopedWriteLock(this, scoped)); } + private void EnsureLocked() + { + if (!Monitor.IsEntered(_wlocko)) + throw new InvalidOperationException("Write lock must be acquried."); + } + private void Lock(WriteLockInfo lockInfo, bool forceGen = false) { Monitor.Enter(_wlocko, ref lockInfo.Taken); @@ -323,34 +329,36 @@ namespace Umbraco.Web.PublishedCache.NuCache } } - public void SetAllContentTypes(IEnumerable types) + /// + /// Updates/sets data for all content types + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public void SetAllContentTypesLocked(IEnumerable types) { - // TODO: There should be NO lock here! All calls made to this are wrapped in GetScopedWriteLock + EnsureLocked(); - //var lockInfo = new WriteLockInfo(); - try + _logger.Debug("SetAllContentTypes"); + + // clear all existing content types + ClearLocked(_contentTypesById); + ClearLocked(_contentTypesByAlias); + + // set all new content types + foreach (var type in types) { - _logger.Debug("SetAllContentTypes"); - //Lock(lockInfo); - - // clear all existing content types - ClearLocked(_contentTypesById); - ClearLocked(_contentTypesByAlias); - - // set all new content types - foreach (var type in types) - { - SetValueLocked(_contentTypesById, type.Id, type); - SetValueLocked(_contentTypesByAlias, type.Alias, type); - } - - // beware! at that point the cache is inconsistent, - // assuming we are going to SetAll content items! - } - finally - { - //Release(lockInfo); + SetValueLocked(_contentTypesById, type.Id, type); + SetValueLocked(_contentTypesByAlias, type.Alias, type); } + + // beware! at that point the cache is inconsistent, + // assuming we are going to SetAll content items! } public void UpdateContentTypes(IReadOnlyCollection removedIds, IReadOnlyCollection refreshedTypes, IReadOnlyCollection kits) @@ -540,8 +548,22 @@ namespace Umbraco.Web.PublishedCache.NuCache return link; } - public bool Set(ContentNodeKit kit) + /// + /// Sets the data for a + /// + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public bool SetLocked(ContentNodeKit kit) { + EnsureLocked(); + // ReSharper disable LocalizableElement if (kit.IsEmpty) throw new ArgumentException("Kit is empty.", nameof(kit)); @@ -551,58 +573,47 @@ namespace Umbraco.Web.PublishedCache.NuCache _logger.Debug("Set content ID: {KitNodeId}", kit.Node.Id); - // TODO: There should be NO locks here, all calls made to this are done within GetScopedWriteLock - //var lockInfo = new WriteLockInfo(); - try + // get existing + _contentNodes.TryGetValue(kit.Node.Id, out var link); + var existing = link?.Value; + + if (!BuildKit(kit, out var parent)) + return false; + + // moving? + var moving = existing != null && existing.ParentContentId != kit.Node.ParentContentId; + + // manage children + if (existing != null) { - //Lock(lockInfo); - - // get existing - _contentNodes.TryGetValue(kit.Node.Id, out var link); - var existing = link?.Value; - - if (!BuildKit(kit, out var parent)) - return false; - - // moving? - var moving = existing != null && existing.ParentContentId != kit.Node.ParentContentId; - - // manage children - if (existing != null) - { - kit.Node.FirstChildContentId = existing.FirstChildContentId; - kit.Node.LastChildContentId = existing.LastChildContentId; - } - - // set - SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); - if (_localDb != null) RegisterChange(kit.Node.Id, kit); - - // manage the tree - if (existing == null) - { - // new, add to parent - AddTreeNodeLocked(kit.Node, parent); - } - else if (moving || existing.SortOrder != kit.Node.SortOrder) - { - // moved, remove existing from its parent, add content to its parent - RemoveTreeNodeLocked(existing); - AddTreeNodeLocked(kit.Node); - } - else - { - // replacing existing, handle siblings - kit.Node.NextSiblingContentId = existing.NextSiblingContentId; - kit.Node.PreviousSiblingContentId = existing.PreviousSiblingContentId; - } - - _xmap[kit.Node.Uid] = kit.Node.Id; + kit.Node.FirstChildContentId = existing.FirstChildContentId; + kit.Node.LastChildContentId = existing.LastChildContentId; } - finally + + // set + SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); + if (_localDb != null) RegisterChange(kit.Node.Id, kit); + + // manage the tree + if (existing == null) { - //Release(lockInfo); + // new, add to parent + AddTreeNodeLocked(kit.Node, parent); } + else if (moving || existing.SortOrder != kit.Node.SortOrder) + { + // moved, remove existing from its parent, add content to its parent + RemoveTreeNodeLocked(existing); + AddTreeNodeLocked(kit.Node); + } + else + { + // replacing existing, handle siblings + kit.Node.NextSiblingContentId = existing.NextSiblingContentId; + kit.Node.PreviousSiblingContentId = existing.PreviousSiblingContentId; + } + + _xmap[kit.Node.Uid] = kit.Node.Id; return true; } @@ -624,211 +635,222 @@ namespace Umbraco.Web.PublishedCache.NuCache /// True if the data is coming from the database (not the local cache db) /// /// + /// /// This requires that the collection is sorted by Level + ParentId + Sort Order. /// This should be used only on a site startup as the first generations. /// This CANNOT be used after startup since it bypasses all checks for Generations. + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// /// - internal bool SetAllFastSorted(IEnumerable kits, bool fromDb) + /// + /// Thrown if this method is not called within a write lock + /// + public bool SetAllFastSortedLocked(IEnumerable kits, bool fromDb) { + EnsureLocked(); - //TODO: There should be NO locks here all calls made to this are done within GetScopedWriteLock + _logger.Debug("SetAllFastSorted"); - //var lockInfo = new WriteLockInfo(); var ok = true; - try + + ClearLocked(_contentNodes); + ClearRootLocked(); + + // The name of the game here is to populate each kit's + // FirstChildContentId + // LastChildContentId + // NextSiblingContentId + // PreviousSiblingContentId + + ContentNode previousNode = null; + ContentNode parent = null; + + foreach (var kit in kits) { - _logger.Debug("SetAllFastSorted"); - - //Lock(lockInfo); - - ClearLocked(_contentNodes); - ClearRootLocked(); - - // The name of the game here is to populate each kit's - // FirstChildContentId - // LastChildContentId - // NextSiblingContentId - // PreviousSiblingContentId - - ContentNode previousNode = null; - ContentNode parent = null; - - foreach (var kit in kits) + if (!BuildKit(kit, out var parentLink)) { - if (!BuildKit(kit, out var parentLink)) - { - ok = false; - continue; // skip that one - } - - var thisNode = kit.Node; - - if (parent == null) - { - // first parent - parent = parentLink.Value; - parent.FirstChildContentId = thisNode.Id; // this node is the first node - } - else if (parent.Id != parentLink.Value.Id) - { - // new parent - parent = parentLink.Value; - parent.FirstChildContentId = thisNode.Id; // this node is the first node - previousNode = null; // there is no previous sibling - } - - _logger.Verbose($"Set {thisNode.Id} with parent {thisNode.ParentContentId}"); - SetValueLocked(_contentNodes, thisNode.Id, thisNode); - - // if we are initializing from the database source ensure the local db is updated - if (fromDb && _localDb != null) RegisterChange(thisNode.Id, kit); - - // this node is always the last child - parent.LastChildContentId = thisNode.Id; - - // wire previous node as previous sibling - if (previousNode != null) - { - previousNode.NextSiblingContentId = thisNode.Id; - thisNode.PreviousSiblingContentId = previousNode.Id; - } - - // this node becomes the previous node - previousNode = thisNode; - - _xmap[kit.Node.Uid] = kit.Node.Id; + ok = false; + continue; // skip that one } - } - finally - { - //Release(lockInfo); + + var thisNode = kit.Node; + + if (parent == null) + { + // first parent + parent = parentLink.Value; + parent.FirstChildContentId = thisNode.Id; // this node is the first node + } + else if (parent.Id != parentLink.Value.Id) + { + // new parent + parent = parentLink.Value; + parent.FirstChildContentId = thisNode.Id; // this node is the first node + previousNode = null; // there is no previous sibling + } + + _logger.Verbose($"Set {thisNode.Id} with parent {thisNode.ParentContentId}"); + SetValueLocked(_contentNodes, thisNode.Id, thisNode); + + // if we are initializing from the database source ensure the local db is updated + if (fromDb && _localDb != null) RegisterChange(thisNode.Id, kit); + + // this node is always the last child + parent.LastChildContentId = thisNode.Id; + + // wire previous node as previous sibling + if (previousNode != null) + { + previousNode.NextSiblingContentId = thisNode.Id; + thisNode.PreviousSiblingContentId = previousNode.Id; + } + + // this node becomes the previous node + previousNode = thisNode; + + _xmap[kit.Node.Uid] = kit.Node.Id; } return ok; } - public bool SetAll(IEnumerable kits) + /// + /// Set all data for a collection of + /// + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public bool SetAllLocked(IEnumerable kits) { - //TODO: There should be NO locks here all calls made to this are done within GetScopedWriteLock + EnsureLocked(); - //var lockInfo = new WriteLockInfo(); var ok = true; - try + _logger.Debug("SetAll"); + + ClearLocked(_contentNodes); + ClearRootLocked(); + + // do NOT clear types else they are gone! + //ClearLocked(_contentTypesById); + //ClearLocked(_contentTypesByAlias); + + foreach (var kit in kits) { - _logger.Debug("SetAll"); - - //Lock(lockInfo); - - ClearLocked(_contentNodes); - ClearRootLocked(); - - // do NOT clear types else they are gone! - //ClearLocked(_contentTypesById); - //ClearLocked(_contentTypesByAlias); - - foreach (var kit in kits) + if (!BuildKit(kit, out var parent)) { - if (!BuildKit(kit, out var parent)) - { - ok = false; - continue; // skip that one - } - _logger.Verbose($"Set {kit.Node.Id} with parent {kit.Node.ParentContentId}"); - SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); - - if (_localDb != null) RegisterChange(kit.Node.Id, kit); - AddTreeNodeLocked(kit.Node, parent); - - _xmap[kit.Node.Uid] = kit.Node.Id; + ok = false; + continue; // skip that one } - } - finally - { - //Release(lockInfo); + _logger.Verbose($"Set {kit.Node.Id} with parent {kit.Node.ParentContentId}"); + SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); + + if (_localDb != null) RegisterChange(kit.Node.Id, kit); + AddTreeNodeLocked(kit.Node, parent); + + _xmap[kit.Node.Uid] = kit.Node.Id; } return ok; } - // IMPORTANT kits must be sorted out by LEVEL and by SORT ORDER - public bool SetBranch(int rootContentId, IEnumerable kits) + /// + /// Sets data for a branch of + /// + /// + /// + /// + /// + /// + /// IMPORTANT kits must be sorted out by LEVEL and by SORT ORDER + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// + /// Thrown if this method is not called within a write lock + /// + public bool SetBranchLocked(int rootContentId, IEnumerable kits) { - //TODO: There should be NO locks here all calls made to this are done within GetScopedWriteLock + EnsureLocked(); - //var lockInfo = new WriteLockInfo(); var ok = true; - try + _logger.Debug("SetBranch"); + + // get existing + _contentNodes.TryGetValue(rootContentId, out var link); + var existing = link?.Value; + + // clear + if (existing != null) { - _logger.Debug("SetBranch"); - - //Lock(lockInfo); - - // get existing - _contentNodes.TryGetValue(rootContentId, out var link); - var existing = link?.Value; - - // clear - if (existing != null) - { - //this zero's out the branch (recursively), if we're in a new gen this will add a NULL placeholder for the gen - ClearBranchLocked(existing); - //TODO: This removes the current GEN from the tree - do we really want to do that? - RemoveTreeNodeLocked(existing); - } - - // now add them all back - foreach (var kit in kits) - { - if (!BuildKit(kit, out var parent)) - { - ok = false; - continue; // skip that one - } - SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); - if (_localDb != null) RegisterChange(kit.Node.Id, kit); - AddTreeNodeLocked(kit.Node, parent); - - _xmap[kit.Node.Uid] = kit.Node.Id; - } + //this zero's out the branch (recursively), if we're in a new gen this will add a NULL placeholder for the gen + ClearBranchLocked(existing); + //TODO: This removes the current GEN from the tree - do we really want to do that? + RemoveTreeNodeLocked(existing); } - finally + + // now add them all back + foreach (var kit in kits) { - //Release(lockInfo); + if (!BuildKit(kit, out var parent)) + { + ok = false; + continue; // skip that one + } + SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); + if (_localDb != null) RegisterChange(kit.Node.Id, kit); + AddTreeNodeLocked(kit.Node, parent); + + _xmap[kit.Node.Uid] = kit.Node.Id; } return ok; } - public bool Clear(int id) + /// + /// Clears data for a given node id + /// + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public bool ClearLocked(int id) { - // TODO: There should be NO locks here! All calls to this are made within GetScopedWriteLock - //var lockInfo = new WriteLockInfo(); - try - { - _logger.Debug("Clear"); + EnsureLocked(); - //Lock(lockInfo); + _logger.Debug("Clear"); - // try to find the content - // if it is not there, nothing to do - _contentNodes.TryGetValue(id, out var link); // else null - if (link?.Value == null) return false; + // try to find the content + // if it is not there, nothing to do + _contentNodes.TryGetValue(id, out var link); // else null + if (link?.Value == null) return false; - var content = link.Value; - _logger.Debug("Clear content ID: {ContentId}", content.Id); + var content = link.Value; + _logger.Debug("Clear content ID: {ContentId}", content.Id); - // clear the entire branch - ClearBranchLocked(content); + // clear the entire branch + ClearBranchLocked(content); - // manage the tree - RemoveTreeNodeLocked(content); + // manage the tree + RemoveTreeNodeLocked(content); - return true; - } - finally - { - //Release(lockInfo); - } + return true; } private void ClearBranchLocked(int id) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index 97e546e603..b761fe0fa4 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -384,7 +384,7 @@ namespace Umbraco.Web.PublishedCache.NuCache var contentTypes = _serviceContext.ContentTypeService.GetAll() .Select(x => _publishedContentTypeFactory.CreateContentType(x)); - _contentStore.SetAllContentTypes(contentTypes); + _contentStore.SetAllContentTypesLocked(contentTypes); using (_logger.TraceDuration("Loading content from database")) { @@ -395,7 +395,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // IMPORTANT GetAllContentSources sorts kits by level + parentId + sortOrder var kits = _dataSource.GetAllContentSources(scope); - return onStartup ? _contentStore.SetAllFastSorted(kits, true) : _contentStore.SetAll(kits); + return onStartup ? _contentStore.SetAllFastSortedLocked(kits, true) : _contentStore.SetAllLocked(kits); } } @@ -403,7 +403,7 @@ namespace Umbraco.Web.PublishedCache.NuCache { var contentTypes = _serviceContext.ContentTypeService.GetAll() .Select(x => _publishedContentTypeFactory.CreateContentType(x)); - _contentStore.SetAllContentTypes(contentTypes); + _contentStore.SetAllContentTypesLocked(contentTypes); using (_logger.TraceDuration("Loading content from local cache file")) { @@ -455,7 +455,7 @@ namespace Umbraco.Web.PublishedCache.NuCache var mediaTypes = _serviceContext.MediaTypeService.GetAll() .Select(x => _publishedContentTypeFactory.CreateContentType(x)); - _mediaStore.SetAllContentTypes(mediaTypes); + _mediaStore.SetAllContentTypesLocked(mediaTypes); using (_logger.TraceDuration("Loading media from database")) { @@ -467,7 +467,7 @@ namespace Umbraco.Web.PublishedCache.NuCache _logger.Debug("Loading media from database..."); // IMPORTANT GetAllMediaSources sorts kits by level + parentId + sortOrder var kits = _dataSource.GetAllMediaSources(scope); - return onStartup ? _mediaStore.SetAllFastSorted(kits, true) : _mediaStore.SetAll(kits); + return onStartup ? _mediaStore.SetAllFastSortedLocked(kits, true) : _mediaStore.SetAllLocked(kits); } } @@ -475,7 +475,7 @@ namespace Umbraco.Web.PublishedCache.NuCache { var mediaTypes = _serviceContext.MediaTypeService.GetAll() .Select(x => _publishedContentTypeFactory.CreateContentType(x)); - _mediaStore.SetAllContentTypes(mediaTypes); + _mediaStore.SetAllContentTypesLocked(mediaTypes); using (_logger.TraceDuration("Loading media from local cache file")) { @@ -516,7 +516,7 @@ namespace Umbraco.Web.PublishedCache.NuCache return false; } - return onStartup ? store.SetAllFastSorted(kits, false) : store.SetAll(kits); + return onStartup ? store.SetAllFastSortedLocked(kits, false) : store.SetAllLocked(kits); } // keep these around - might be useful @@ -713,7 +713,7 @@ namespace Umbraco.Web.PublishedCache.NuCache if (payload.ChangeTypes.HasType(TreeChangeTypes.Remove)) { - if (_contentStore.Clear(payload.Id)) + if (_contentStore.ClearLocked(payload.Id)) draftChanged = publishedChanged = true; continue; } @@ -736,7 +736,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // ?? should we do some RV check here? // IMPORTANT GetbranchContentSources sorts kits by level and by sort order var kits = _dataSource.GetBranchContentSources(scope, capture.Id); - _contentStore.SetBranch(capture.Id, kits); + _contentStore.SetBranchLocked(capture.Id, kits); } else { @@ -744,11 +744,11 @@ namespace Umbraco.Web.PublishedCache.NuCache var kit = _dataSource.GetContentSource(scope, capture.Id); if (kit.IsEmpty) { - _contentStore.Clear(capture.Id); + _contentStore.ClearLocked(capture.Id); } else { - _contentStore.Set(kit); + _contentStore.SetLocked(kit); } } @@ -806,7 +806,7 @@ namespace Umbraco.Web.PublishedCache.NuCache if (payload.ChangeTypes.HasType(TreeChangeTypes.Remove)) { - if (_mediaStore.Clear(payload.Id)) + if (_mediaStore.ClearLocked(payload.Id)) anythingChanged = true; continue; } @@ -829,7 +829,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // ?? should we do some RV check here? // IMPORTANT GetbranchContentSources sorts kits by level and by sort order var kits = _dataSource.GetBranchMediaSources(scope, capture.Id); - _mediaStore.SetBranch(capture.Id, kits); + _mediaStore.SetBranchLocked(capture.Id, kits); } else { @@ -837,11 +837,11 @@ namespace Umbraco.Web.PublishedCache.NuCache var kit = _dataSource.GetMediaSource(scope, capture.Id); if (kit.IsEmpty) { - _mediaStore.Clear(capture.Id); + _mediaStore.ClearLocked(capture.Id); } else { - _mediaStore.Set(kit); + _mediaStore.SetLocked(kit); } } From 243e76b3cc3e5fccdf9c021354d4a2d88b6bb707 Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 3 Jan 2020 15:04:39 +1100 Subject: [PATCH 15/23] Removes ability to have recursive locks in SnapDictionary, changes logic to require locking around the methods just like ContentStore, updates tests --- .../Cache/SnapDictionaryTests.cs | 126 ++++++++------ .../PublishedCache/NuCache/ContentStore.cs | 104 ++++++------ .../NuCache/PublishedSnapshotService.cs | 14 +- .../PublishedCache/NuCache/SnapDictionary.cs | 154 ++++++++---------- 4 files changed, 204 insertions(+), 194 deletions(-) diff --git a/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs b/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs index b435af9e77..86426d196c 100644 --- a/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs +++ b/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs @@ -9,6 +9,47 @@ using Umbraco.Web.PublishedCache.NuCache.Snap; namespace Umbraco.Tests.Cache { + /// + /// Used for tests + /// + public static class SnapDictionaryExtensions + { + internal static void Set(this SnapDictionary d, TKey key, TValue value) + where TValue : class + { + using (d.GetScopedWriteLock(GetScopeProvider())) + { + d.SetLocked(key, value); + } + } + + internal static void Clear(this SnapDictionary d) + where TValue : class + { + using (d.GetScopedWriteLock(GetScopeProvider())) + { + d.ClearLocked(); + } + } + + internal static void Clear(this SnapDictionary d, TKey key) + where TValue : class + { + using (d.GetScopedWriteLock(GetScopeProvider())) + { + d.ClearLocked(key); + } + } + + private static IScopeProvider GetScopeProvider() + { + var scopeProvider = Mock.Of(); + Mock.Get(scopeProvider) + .Setup(x => x.Context).Returns(() => null); + return scopeProvider; + } + } + [TestFixture] public class SnapDictionaryTests { @@ -223,7 +264,7 @@ namespace Umbraco.Tests.Cache { var d = new SnapDictionary(); d.Test.CollectAuto = false; - + // gen 1 d.Set(1, "one"); Assert.AreEqual(1, d.Test.GetValues(1).Length); @@ -321,7 +362,7 @@ namespace Umbraco.Tests.Cache { var d = new SnapDictionary(); d.Test.CollectAuto = false; - + Assert.AreEqual(0, d.Test.GetValues(1).Length); // gen 1 @@ -416,7 +457,7 @@ namespace Umbraco.Tests.Cache { var d = new SnapDictionary(); d.Test.CollectAuto = false; - + // gen 1 d.Set(1, "one"); Assert.AreEqual(1, d.Test.GetValues(1).Length); @@ -578,7 +619,7 @@ namespace Umbraco.Tests.Cache { var d = new SnapDictionary(); d.Test.CollectAuto = false; - + d.Set(1, "one"); d.Set(2, "two"); @@ -689,7 +730,7 @@ namespace Umbraco.Tests.Cache { // gen 3 Assert.AreEqual(2, d.Test.GetValues(1).Length); - d.Set(1, "ein"); + d.SetLocked(1, "ein"); Assert.AreEqual(3, d.Test.GetValues(1).Length); Assert.AreEqual(3, d.Test.LiveGen); @@ -727,31 +768,25 @@ namespace Umbraco.Tests.Cache using (var w1 = d.GetScopedWriteLock(scopeProvider)) { Assert.AreEqual(1, t.LiveGen); - Assert.AreEqual(1, t.WLocked); + Assert.IsTrue(t.IsLocked); Assert.IsTrue(t.NextGen); - using (var w2 = d.GetScopedWriteLock(scopeProvider)) + Assert.Throws(() => { - Assert.AreEqual(1, t.LiveGen); - Assert.AreEqual(2, t.WLocked); - Assert.IsTrue(t.NextGen); - - Assert.AreNotSame(w1, w2); // get a new writer each time - - d.Set(1, "one"); - - Assert.AreEqual(0, d.CreateSnapshot().Gen); - } + using (var w2 = d.GetScopedWriteLock(scopeProvider)) + { + } + }); Assert.AreEqual(1, t.LiveGen); - Assert.AreEqual(1, t.WLocked); + Assert.IsTrue(t.IsLocked); Assert.IsTrue(t.NextGen); Assert.AreEqual(0, d.CreateSnapshot().Gen); } Assert.AreEqual(1, t.LiveGen); - Assert.AreEqual(0, t.WLocked); + Assert.IsFalse(t.IsLocked); Assert.IsTrue(t.NextGen); Assert.AreEqual(1, d.CreateSnapshot().Gen); @@ -772,11 +807,14 @@ namespace Umbraco.Tests.Cache using (var w1 = d.GetScopedWriteLock(scopeProvider)) { + // This one is interesting, although we don't allow recursive locks, since this is + // using the same ScopeContext/key, the lock acquisition is only done once + using (var w2 = d.GetScopedWriteLock(scopeProvider)) { Assert.AreSame(w1, w2); - d.Set(1, "one"); + d.SetLocked(1, "one"); } } } @@ -797,19 +835,16 @@ namespace Umbraco.Tests.Cache using (var w1 = d.GetScopedWriteLock(scopeProvider1)) { Assert.AreEqual(1, t.LiveGen); - Assert.AreEqual(1, t.WLocked); + Assert.IsTrue(t.IsLocked); Assert.IsTrue(t.NextGen); - using (var w2 = d.GetScopedWriteLock(scopeProvider2)) + Assert.Throws(() => { - Assert.AreEqual(1, t.LiveGen); - Assert.AreEqual(2, t.WLocked); - Assert.IsTrue(t.NextGen); + using (var w2 = d.GetScopedWriteLock(scopeProvider2)) + { + } + }); - Assert.AreNotSame(w1, w2); - - d.Set(1, "one"); - } } } @@ -848,13 +883,13 @@ namespace Umbraco.Tests.Cache Assert.IsFalse(d.Test.NextGen); Assert.AreEqual("uno", s2.Get(1)); - var scopeProvider = GetScopeProvider(); + var scopeProvider = GetScopeProvider(); using (d.GetScopedWriteLock(scopeProvider)) { // gen 3 Assert.AreEqual(2, d.Test.GetValues(1).Length); - d.Set(1, "ein"); + d.SetLocked(1, "ein"); Assert.AreEqual(3, d.Test.GetValues(1).Length); Assert.AreEqual(3, d.Test.LiveGen); @@ -881,6 +916,7 @@ namespace Umbraco.Tests.Cache { var d = new SnapDictionary(); d.Test.CollectAuto = false; + // gen 1 d.Set(1, "one"); @@ -894,12 +930,11 @@ namespace Umbraco.Tests.Cache Assert.AreEqual("uno", s2.Get(1)); var scopeProvider = GetScopeProvider(); - using (d.GetScopedWriteLock(scopeProvider)) { // creating a snapshot in a write-lock does NOT return the "current" content // it uses the previous snapshot, so new snapshot created only on release - d.Set(1, "ein"); + d.SetLocked(1, "ein"); var s3 = d.CreateSnapshot(); Assert.AreEqual(2, s3.Gen); Assert.AreEqual("uno", s3.Get(1)); @@ -934,12 +969,11 @@ namespace Umbraco.Tests.Cache var scopeContext = new ScopeContext(); var scopeProvider = GetScopeProvider(scopeContext); - using (d.GetScopedWriteLock(scopeProvider)) { // creating a snapshot in a write-lock does NOT return the "current" content // it uses the previous snapshot, so new snapshot created only on release - d.Set(1, "ein"); + d.SetLocked(1, "ein"); var s3 = d.CreateSnapshot(); Assert.AreEqual(2, s3.Gen); Assert.AreEqual("uno", s3.Get(1)); @@ -967,7 +1001,7 @@ namespace Umbraco.Tests.Cache var d = new SnapDictionary(); var t = d.Test; t.CollectAuto = false; - + // gen 1 d.Set(1, "one"); var s1 = d.CreateSnapshot(); @@ -984,12 +1018,11 @@ namespace Umbraco.Tests.Cache var scopeContext = new ScopeContext(); var scopeProvider = GetScopeProvider(scopeContext); - using (d.GetScopedWriteLock(scopeProvider)) { // creating a snapshot in a write-lock does NOT return the "current" content // it uses the previous snapshot, so new snapshot created only on release - d.Set(1, "ein"); + d.SetLocked(1, "ein"); var s3 = d.CreateSnapshot(); Assert.AreEqual(2, s3.Gen); Assert.AreEqual("uno", s3.Get(1)); @@ -997,7 +1030,7 @@ namespace Umbraco.Tests.Cache // we made some changes, so a next gen is required Assert.AreEqual(3, t.LiveGen); Assert.IsTrue(t.NextGen); - Assert.AreEqual(1, t.WLocked); + Assert.IsTrue(t.IsLocked); // but live snapshot contains changes var ls = t.LiveSnapshot; @@ -1008,7 +1041,7 @@ namespace Umbraco.Tests.Cache // nothing is committed until scope exits Assert.AreEqual(3, t.LiveGen); Assert.IsTrue(t.NextGen); - Assert.AreEqual(1, t.WLocked); + Assert.IsTrue(t.IsLocked); // no changes until exit var s4 = d.CreateSnapshot(); @@ -1020,7 +1053,7 @@ namespace Umbraco.Tests.Cache // now things have changed Assert.AreEqual(2, t.LiveGen); Assert.IsFalse(t.NextGen); - Assert.AreEqual(0, t.WLocked); + Assert.IsFalse(t.IsLocked); // no changes since not completed var s5 = d.CreateSnapshot(); @@ -1097,9 +1130,10 @@ namespace Umbraco.Tests.Cache // writer is scope contextual and scoped // when disposed, nothing happens // when the context exists, the writer is released + using (d.GetScopedWriteLock(scopeProvider)) { - d.Set(1, "ein"); + d.SetLocked(1, "ein"); Assert.IsTrue(d.Test.NextGen); Assert.AreEqual(3, d.Test.LiveGen); Assert.IsNotNull(d.Test.GenObj); @@ -1107,7 +1141,7 @@ namespace Umbraco.Tests.Cache } // writer has not released - Assert.AreEqual(1, d.Test.WLocked); + Assert.IsTrue(d.Test.IsLocked); Assert.IsNotNull(d.Test.GenObj); Assert.AreEqual(2, d.Test.GenObj.Gen); @@ -1118,7 +1152,7 @@ namespace Umbraco.Tests.Cache // panic! var s2 = d.CreateSnapshot(); - Assert.AreEqual(1, d.Test.WLocked); + Assert.IsTrue(d.Test.IsLocked); Assert.IsNotNull(d.Test.GenObj); Assert.AreEqual(2, d.Test.GenObj.Gen); Assert.AreEqual(3, d.Test.LiveGen); @@ -1127,7 +1161,7 @@ namespace Umbraco.Tests.Cache // release writer scopeContext.ScopeExit(true); - Assert.AreEqual(0, d.Test.WLocked); + Assert.IsFalse(d.Test.IsLocked); Assert.IsNotNull(d.Test.GenObj); Assert.AreEqual(2, d.Test.GenObj.Gen); Assert.AreEqual(3, d.Test.LiveGen); @@ -1135,7 +1169,7 @@ namespace Umbraco.Tests.Cache var s3 = d.CreateSnapshot(); - Assert.AreEqual(0, d.Test.WLocked); + Assert.IsFalse(d.Test.IsLocked); Assert.IsNotNull(d.Test.GenObj); Assert.AreEqual(3, d.Test.GenObj.Gen); Assert.AreEqual(3, d.Test.LiveGen); diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index 82fae0d32a..17eb2b6457 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -39,7 +39,6 @@ namespace Umbraco.Web.PublishedCache.NuCache private long _liveGen, _floorGen; private bool _nextGen, _collectAuto; private Task _collectTask; - private volatile int _wlocked; private List> _wchanges; // TODO: collection trigger (ok for now) @@ -83,7 +82,6 @@ namespace Umbraco.Web.PublishedCache.NuCache private class WriteLockInfo { public bool Taken; - public bool Count; } // a scope contextual that represents a locked writer to the dictionary @@ -111,7 +109,6 @@ namespace Umbraco.Web.PublishedCache.NuCache // TODO: GetScopedWriter? should the dict have a ref onto the scope provider? public IDisposable GetScopedWriteLock(IScopeProvider scopeProvider) { - _logger.Debug("GetScopedWriteLock"); return ScopeContextualBase.Get(scopeProvider, _instanceId, scoped => new ScopedWriteLock(this, scoped)); } @@ -123,6 +120,9 @@ namespace Umbraco.Web.PublishedCache.NuCache private void Lock(WriteLockInfo lockInfo, bool forceGen = false) { + if (Monitor.IsEntered(_wlocko)) + throw new InvalidOperationException("Recursive locks not allowed"); + Monitor.Enter(_wlocko, ref lockInfo.Taken); lock(_rlocko) @@ -131,9 +131,7 @@ namespace Umbraco.Web.PublishedCache.NuCache try { } finally { - _wlocked++; - lockInfo.Count = true; - if (_nextGen == false || (forceGen && _wlocked == 1)) + if (_nextGen == false || (forceGen)) { // because we are changing things, a new generation // is created, which will trigger a new snapshot @@ -179,12 +177,6 @@ namespace Umbraco.Web.PublishedCache.NuCache _localDb.Commit(); } - // TODO: Shouldn't this be in a finally block? - // TODO: Shouldn't this be decremented after we exit?? - // TODO: Shouldn't the locked flag never exceed 1? - if (lockInfo.Count) - _wlocked--; - // TODO: Shouldn't this be in a finally block? if (lockInfo.Taken) Monitor.Exit(_wlocko); @@ -220,22 +212,18 @@ namespace Umbraco.Web.PublishedCache.NuCache public void ReleaseLocalDb() { - _logger.Info("Releasing DB..."); var lockInfo = new WriteLockInfo(); try { try { - // Trying to lock could throw exceptions so always make sure to clean up. - _logger.Info("Trying to lock before releasing DB (lock count = {LockCount}) ...", _wlocked); - + // Trying to lock could throw exceptions so always make sure to clean up. Lock(lockInfo); } finally { try { - _logger.Info("Disposing local DB..."); _localDb?.Dispose(); } catch (Exception ex) @@ -275,57 +263,61 @@ namespace Umbraco.Web.PublishedCache.NuCache #region Content types - public void NewContentTypes(IEnumerable types) + /// + /// Sets data for new content types + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public void NewContentTypesLocked(IEnumerable types) { - var lockInfo = new WriteLockInfo(); - try - { - _logger.Debug("NewContentTypes"); - Lock(lockInfo); + EnsureLocked(); - foreach (var type in types) - { - SetValueLocked(_contentTypesById, type.Id, type); - SetValueLocked(_contentTypesByAlias, type.Alias, type); - } - } - finally + foreach (var type in types) { - Release(lockInfo); + SetValueLocked(_contentTypesById, type.Id, type); + SetValueLocked(_contentTypesByAlias, type.Alias, type); } } - public void UpdateContentTypes(IEnumerable types) + /// + /// Sets data for updated content types + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public void UpdateContentTypesLocked(IEnumerable types) { //nothing to do if this is empty, no need to lock/allocate/iterate/etc... if (!types.Any()) return; - var lockInfo = new WriteLockInfo(); - try + EnsureLocked(); + + var index = types.ToDictionary(x => x.Id, x => x); + + foreach (var type in index.Values) { - _logger.Debug("UpdateContentTypes"); - Lock(lockInfo); - - var index = types.ToDictionary(x => x.Id, x => x); - - foreach (var type in index.Values) - { - SetValueLocked(_contentTypesById, type.Id, type); - SetValueLocked(_contentTypesByAlias, type.Alias, type); - } - - foreach (var link in _contentNodes.Values) - { - var node = link.Value; - if (node == null) continue; - var contentTypeId = node.ContentType.Id; - if (index.TryGetValue(contentTypeId, out var contentType) == false) continue; - SetValueLocked(_contentNodes, node.Id, new ContentNode(node, contentType)); - } + SetValueLocked(_contentTypesById, type.Id, type); + SetValueLocked(_contentTypesByAlias, type.Alias, type); } - finally + + foreach (var link in _contentNodes.Values) { - Release(lockInfo); + var node = link.Value; + if (node == null) continue; + var contentTypeId = node.ContentType.Id; + if (index.TryGetValue(contentTypeId, out var contentType) == false) continue; + SetValueLocked(_contentNodes, node.Id, new ContentNode(node, contentType)); } } @@ -1256,7 +1248,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // else we need to try to create a new gen ref // whether we are wlocked or not, noone can rlock while we do, // so _liveGen and _nextGen are safe - if (_wlocked > 0) // volatile, cannot ++ but could -- + if (Monitor.IsEntered(_wlocko)) { // write-locked, cannot use latest gen (at least 1) so use previous var snapGen = _nextGen ? _liveGen - 1 : _liveGen; diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index b761fe0fa4..249eb882f9 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -622,7 +622,7 @@ namespace Umbraco.Web.PublishedCache.NuCache .Where(x => x.RootContentId.HasValue && x.LanguageIsoCode.IsNullOrWhiteSpace() == false) .Select(x => new Domain(x.Id, x.DomainName, x.RootContentId.Value, CultureInfo.GetCultureInfo(x.LanguageIsoCode), x.IsWildcard))) { - _domainStore.Set(domain.Id, domain); + _domainStore.SetLocked(domain.Id, domain); } } @@ -980,7 +980,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } break; case DomainChangeTypes.Remove: - _domainStore.Clear(payload.Id); + _domainStore.ClearLocked(payload.Id); break; case DomainChangeTypes.Refresh: var domain = _serviceContext.DomainService.GetById(payload.Id); @@ -988,7 +988,7 @@ namespace Umbraco.Web.PublishedCache.NuCache if (domain.RootContentId.HasValue == false) continue; // anomaly if (domain.LanguageIsoCode.IsNullOrWhiteSpace()) continue; // anomaly var culture = CultureInfo.GetCultureInfo(domain.LanguageIsoCode); - _domainStore.Set(domain.Id, new Domain(domain.Id, domain.DomainName, domain.RootContentId.Value, culture, domain.IsWildcard)); + _domainStore.SetLocked(domain.Id, new Domain(domain.Id, domain.DomainName, domain.RootContentId.Value, culture, domain.IsWildcard)); break; } } @@ -1075,9 +1075,9 @@ namespace Umbraco.Web.PublishedCache.NuCache _contentStore.UpdateContentTypes(removedIds, typesA, kits); if (!otherIds.IsCollectionEmpty()) - _contentStore.UpdateContentTypes(CreateContentTypes(PublishedItemType.Content, otherIds.ToArray())); + _contentStore.UpdateContentTypesLocked(CreateContentTypes(PublishedItemType.Content, otherIds.ToArray())); if (!newIds.IsCollectionEmpty()) - _contentStore.NewContentTypes(CreateContentTypes(PublishedItemType.Content, newIds.ToArray())); + _contentStore.NewContentTypesLocked(CreateContentTypes(PublishedItemType.Content, newIds.ToArray())); scope.Complete(); } } @@ -1106,9 +1106,9 @@ namespace Umbraco.Web.PublishedCache.NuCache _mediaStore.UpdateContentTypes(removedIds, typesA, kits); if (!otherIds.IsCollectionEmpty()) - _mediaStore.UpdateContentTypes(CreateContentTypes(PublishedItemType.Media, otherIds.ToArray()).ToArray()); + _mediaStore.UpdateContentTypesLocked(CreateContentTypes(PublishedItemType.Media, otherIds.ToArray()).ToArray()); if (!newIds.IsCollectionEmpty()) - _mediaStore.NewContentTypes(CreateContentTypes(PublishedItemType.Media, newIds.ToArray()).ToArray()); + _mediaStore.NewContentTypesLocked(CreateContentTypes(PublishedItemType.Media, newIds.ToArray()).ToArray()); scope.Complete(); } } diff --git a/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs b/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs index e0ad26eb81..c38940da25 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/SnapDictionary.cs @@ -35,7 +35,6 @@ namespace Umbraco.Web.PublishedCache.NuCache private long _liveGen, _floorGen; private bool _nextGen, _collectAuto; private Task _collectTask; - private volatile int _wlocked; // minGenDelta to be adjusted // we may want to throttle collects even if delta is reached @@ -84,7 +83,6 @@ namespace Umbraco.Web.PublishedCache.NuCache private class WriteLockInfo { public bool Taken; - public bool Count; } // a scope contextual that represents a locked writer to the dictionary @@ -92,8 +90,7 @@ namespace Umbraco.Web.PublishedCache.NuCache { private readonly WriteLockInfo _lockinfo = new WriteLockInfo(); private readonly SnapDictionary _dictionary; - private int _released; - + public ScopedWriteLock(SnapDictionary dictionary, bool scoped) { _dictionary = dictionary; @@ -102,8 +99,6 @@ namespace Umbraco.Web.PublishedCache.NuCache public override void Release(bool completed) { - if (Interlocked.CompareExchange(ref _released, 1, 0) != 0) - return; _dictionary.Release(_lockinfo, completed); } } @@ -117,8 +112,17 @@ namespace Umbraco.Web.PublishedCache.NuCache return ScopeContextualBase.Get(scopeProvider, _instanceId, scoped => new ScopedWriteLock(this, scoped)); } + private void EnsureLocked() + { + if (!Monitor.IsEntered(_wlocko)) + throw new InvalidOperationException("Write lock must be acquried."); + } + private void Lock(WriteLockInfo lockInfo, bool forceGen = false) { + if (Monitor.IsEntered(_wlocko)) + throw new InvalidOperationException("Recursive locks not allowed"); + Monitor.Enter(_wlocko, ref lockInfo.Taken); lock(_rlocko) @@ -132,11 +136,7 @@ namespace Umbraco.Web.PublishedCache.NuCache try { } finally { - // increment the lock count, and register that this lock is counting - _wlocked++; - lockInfo.Count = true; - - if (_nextGen == false || (forceGen && _wlocked == 1)) + if (_nextGen == false || (forceGen)) { // because we are changing things, a new generation // is created, which will trigger a new snapshot @@ -181,8 +181,7 @@ namespace Umbraco.Web.PublishedCache.NuCache } } - // decrement the lock count, if counting, then exit the lock - if (lockInfo.Count) _wlocked--; + // TODO: Shouldn't this be in a finally block? Monitor.Exit(_wlocko); } @@ -198,75 +197,59 @@ namespace Umbraco.Web.PublishedCache.NuCache return link; } - public void Set(TKey key, TValue value) + public void SetLocked(TKey key, TValue value) { - var lockInfo = new WriteLockInfo(); - try - { - Lock(lockInfo); + EnsureLocked(); - // this is safe only because we're write-locked - var link = GetHead(key); - if (link != null) + // this is safe only because we're write-locked + var link = GetHead(key); + if (link != null) + { + // already in the dict + if (link.Gen != _liveGen) { - // already in the dict - if (link.Gen != _liveGen) - { - // for an older gen - if value is different then insert a new - // link for the new gen, with the new value - if (link.Value != value) - _items.TryUpdate(key, new LinkedNode(value, _liveGen, link), link); - } - else - { - // for the live gen - we can fix the live gen - and remove it - // if value is null and there's no next gen - if (value == null && link.Next == null) - _items.TryRemove(key, out link); - else - link.Value = value; - } + // for an older gen - if value is different then insert a new + // link for the new gen, with the new value + if (link.Value != value) + _items.TryUpdate(key, new LinkedNode(value, _liveGen, link), link); } else { - _items.TryAdd(key, new LinkedNode(value, _liveGen)); - } - } - finally - { - Release(lockInfo); - } - } - - public void Clear(TKey key) - { - Set(key, null); - } - - public void Clear() - { - var lockInfo = new WriteLockInfo(); - try - { - Lock(lockInfo); - - // this is safe only because we're write-locked - foreach (var kvp in _items.Where(x => x.Value != null)) - { - if (kvp.Value.Gen < _liveGen) - { - var link = new LinkedNode(null, _liveGen, kvp.Value); - _items.TryUpdate(kvp.Key, link, kvp.Value); - } + // for the live gen - we can fix the live gen - and remove it + // if value is null and there's no next gen + if (value == null && link.Next == null) + _items.TryRemove(key, out link); else - { - kvp.Value.Value = null; - } + link.Value = value; } } - finally + else { - Release(lockInfo); + _items.TryAdd(key, new LinkedNode(value, _liveGen)); + } + } + + public void ClearLocked(TKey key) + { + SetLocked(key, null); + } + + public void ClearLocked() + { + EnsureLocked(); + + // this is safe only because we're write-locked + foreach (var kvp in _items.Where(x => x.Value != null)) + { + if (kvp.Value.Gen < _liveGen) + { + var link = new LinkedNode(null, _liveGen, kvp.Value); + _items.TryUpdate(kvp.Key, link, kvp.Value); + } + else + { + kvp.Value.Value = null; + } } } @@ -336,7 +319,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // else we need to try to create a new gen object // whether we are wlocked or not, noone can rlock while we do, // so _liveGen and _nextGen are safe - if (_wlocked > 0) // volatile, cannot ++ but could -- + if (Monitor.IsEntered(_wlocko)) { // write-locked, cannot use latest gen (at least 1) so use previous var snapGen = _nextGen ? _liveGen - 1 : _liveGen; @@ -468,17 +451,18 @@ namespace Umbraco.Web.PublishedCache.NuCache } } - public /*async*/ Task PendingCollect() - { - Task task; - lock (_rlocko) - { - task = _collectTask; - } - return task ?? Task.CompletedTask; - //if (task != null) - // await task; - } + // TODO: This is never used? Should it be? Maybe move to TestHelper below? + //public /*async*/ Task PendingCollect() + //{ + // Task task; + // lock (_rlocko) + // { + // task = _collectTask; + // } + // return task ?? Task.CompletedTask; + // //if (task != null) + // // await task; + //} public long GenCount => _genObjs.Count; @@ -503,7 +487,7 @@ namespace Umbraco.Web.PublishedCache.NuCache public long LiveGen => _dict._liveGen; public long FloorGen => _dict._floorGen; public bool NextGen => _dict._nextGen; - public int WLocked => _dict._wlocked; + public bool IsLocked => Monitor.IsEntered(_dict._wlocko); public bool CollectAuto { From 7a129f890dc5b87094901d2ac3bbe953486be04b Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 3 Jan 2020 15:07:21 +1100 Subject: [PATCH 16/23] Changes MainDom testing timeout to be larger... but not 1.5 mins! --- src/Umbraco.Core/Runtime/MainDom.cs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Umbraco.Core/Runtime/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs index 883b69b2a7..d7bd407015 100644 --- a/src/Umbraco.Core/Runtime/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -36,7 +36,7 @@ namespace Umbraco.Core.Runtime // actions to run before releasing the main domain private readonly List> _callbacks = new List>(); - private const int LockTimeoutMilliseconds = 10000; // 10 seconds + private const int LockTimeoutMilliseconds = 40000; // 40 seconds #endregion @@ -151,22 +151,20 @@ namespace Umbraco.Core.Runtime { _logger.Info("Cannot acquire (timeout)."); - // TODO: Previously we'd throw an exception and the appdomain would not start, what do we want to do? - - // return false; + // TODO: In previous versions we'd let a TimeoutException be thrown + // and the appdomain would not start. We have the opportunity to allow it to + // start without having MainDom? This would mean that it couldn't write + // to nucache/examine and would only be ok if this was a super short lived appdomain. + // maybe safer to just keep throwing in this case. throw new TimeoutException("Cannot acquire MainDom"); + // return false; } try { // Listen for the signal from another AppDomain coming online to release the lock - _mainDomLock.ListenAsync() - .ContinueWith(_ => - { - _logger.Debug("Signal heard from other appdomain."); - OnSignal("signal"); - }); + _mainDomLock.ListenAsync().ContinueWith(_ => OnSignal("signal")); } catch (OperationCanceledException ex) { From c2ac5e85311b39ee0d9c8734659a39fb6be55ab9 Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 6 Jan 2020 18:34:04 +1100 Subject: [PATCH 17/23] Fixes a couple more issues with recursive locks --- .../PublishedCache/NuCache/ContentStore.cs | 244 +++++++++--------- .../NuCache/PublishedSnapshotService.cs | 8 +- 2 files changed, 130 insertions(+), 122 deletions(-) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index 17eb2b6457..8ce299932c 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -14,7 +14,18 @@ using Umbraco.Web.PublishedCache.NuCache.Snap; namespace Umbraco.Web.PublishedCache.NuCache { - // stores content + /// + /// Stores content in memory and persists it back to disk + /// + /// + /// + /// Methods in this class suffixed with the term "Locked" means that those methods can only be called within a WriteLock. A WriteLock + /// is acquired by the GetScopedWriteLock method. Locks are not allowed to be recursive. + /// + /// + /// This class's logic is based on the class but has been slightly modified to suit these purposes. + /// + /// internal class ContentStore { // this class is an extended version of SnapDictionary @@ -336,8 +347,6 @@ namespace Umbraco.Web.PublishedCache.NuCache { EnsureLocked(); - _logger.Debug("SetAllContentTypes"); - // clear all existing content types ClearLocked(_contentTypesById); ClearLocked(_contentTypesByAlias); @@ -353,8 +362,23 @@ namespace Umbraco.Web.PublishedCache.NuCache // assuming we are going to SetAll content items! } - public void UpdateContentTypes(IReadOnlyCollection removedIds, IReadOnlyCollection refreshedTypes, IReadOnlyCollection kits) + /// + /// Updates/sets/removes data for content types + /// + /// + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public void UpdateContentTypesLocked(IReadOnlyCollection removedIds, IReadOnlyCollection refreshedTypes, IReadOnlyCollection kits) { + EnsureLocked(); + var removedIdsA = removedIds ?? Array.Empty(); var refreshedTypesA = refreshedTypes ?? Array.Empty(); var refreshedIdsA = refreshedTypesA.Select(x => x.Id).ToList(); @@ -363,87 +387,82 @@ namespace Umbraco.Web.PublishedCache.NuCache if (kits.Count == 0 && refreshedIdsA.Count == 0 && removedIdsA.Count == 0) return; //exit - there is nothing to do here - var lockInfo = new WriteLockInfo(); - try + var removedContentTypeNodes = new List(); + var refreshedContentTypeNodes = new List(); + + // find all the nodes that are either refreshed or removed, + // because of their content type being either refreshed or removed + foreach (var link in _contentNodes.Values) { - _logger.Debug("UpdateContentTypes"); - Lock(lockInfo); - - var removedContentTypeNodes = new List(); - var refreshedContentTypeNodes = new List(); - - // find all the nodes that are either refreshed or removed, - // because of their content type being either refreshed or removed - foreach (var link in _contentNodes.Values) - { - var node = link.Value; - if (node == null) continue; - var contentTypeId = node.ContentType.Id; - if (removedIdsA.Contains(contentTypeId)) removedContentTypeNodes.Add(node.Id); - if (refreshedIdsA.Contains(contentTypeId)) refreshedContentTypeNodes.Add(node.Id); - } - - // perform deletion of content with removed content type - // removing content types should have removed their content already - // but just to be 100% sure, clear again here - foreach (var node in removedContentTypeNodes) - ClearBranchLocked(node); - - // perform deletion of removed content types - foreach (var id in removedIdsA) - { - if (_contentTypesById.TryGetValue(id, out var link) == false || link.Value == null) - continue; - SetValueLocked(_contentTypesById, id, null); - SetValueLocked(_contentTypesByAlias, link.Value.Alias, null); - } - - // perform update of refreshed content types - foreach (var type in refreshedTypesA) - { - SetValueLocked(_contentTypesById, type.Id, type); - SetValueLocked(_contentTypesByAlias, type.Alias, type); - } - - // perform update of content with refreshed content type - from the kits - // skip missing type, skip missing parents & un-buildable kits - what else could we do? - // kits are ordered by level, so ParentExists is ok here - var visited = new List(); - foreach (var kit in kits.Where(x => - refreshedIdsA.Contains(x.ContentTypeId) && - BuildKit(x, out _))) - { - // replacing the node: must preserve the parents - var node = GetHead(_contentNodes, kit.Node.Id)?.Value; - if (node != null) - kit.Node.FirstChildContentId = node.FirstChildContentId; - - SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); - - visited.Add(kit.Node.Id); - if (_localDb != null) RegisterChange(kit.Node.Id, kit); - } - - // all content should have been refreshed - but... - var orphans = refreshedContentTypeNodes.Except(visited); - foreach (var id in orphans) - ClearBranchLocked(id); + var node = link.Value; + if (node == null) continue; + var contentTypeId = node.ContentType.Id; + if (removedIdsA.Contains(contentTypeId)) removedContentTypeNodes.Add(node.Id); + if (refreshedIdsA.Contains(contentTypeId)) refreshedContentTypeNodes.Add(node.Id); } - finally + + // perform deletion of content with removed content type + // removing content types should have removed their content already + // but just to be 100% sure, clear again here + foreach (var node in removedContentTypeNodes) + ClearBranchLocked(node); + + // perform deletion of removed content types + foreach (var id in removedIdsA) { - Release(lockInfo); + if (_contentTypesById.TryGetValue(id, out var link) == false || link.Value == null) + continue; + SetValueLocked(_contentTypesById, id, null); + SetValueLocked(_contentTypesByAlias, link.Value.Alias, null); } + + // perform update of refreshed content types + foreach (var type in refreshedTypesA) + { + SetValueLocked(_contentTypesById, type.Id, type); + SetValueLocked(_contentTypesByAlias, type.Alias, type); + } + + // perform update of content with refreshed content type - from the kits + // skip missing type, skip missing parents & un-buildable kits - what else could we do? + // kits are ordered by level, so ParentExists is ok here + var visited = new List(); + foreach (var kit in kits.Where(x => + refreshedIdsA.Contains(x.ContentTypeId) && + BuildKit(x, out _))) + { + // replacing the node: must preserve the parents + var node = GetHead(_contentNodes, kit.Node.Id)?.Value; + if (node != null) + kit.Node.FirstChildContentId = node.FirstChildContentId; + + SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); + + visited.Add(kit.Node.Id); + if (_localDb != null) RegisterChange(kit.Node.Id, kit); + } + + // all content should have been refreshed - but... + var orphans = refreshedContentTypeNodes.Except(visited); + foreach (var id in orphans) + ClearBranchLocked(id); } - public void UpdateDataTypes(IEnumerable dataTypeIds, Func getContentType) + /// + /// Updates data types + /// + /// + /// + /// + /// This methods MUST be called from within a write lock, normally wrapped within GetScopedWriteLock + /// otherwise an exception will occur. + /// + /// + /// Thrown if this method is not called within a write lock + /// + public void UpdateDataTypesLocked(IEnumerable dataTypeIds, Func getContentType) { - var lockInfo = new WriteLockInfo(); - try - { - _logger.Debug("UpdateDataTypes"); - Lock(lockInfo); - - var contentTypes = _contentTypesById + var contentTypes = _contentTypesById .Where(kvp => kvp.Value.Value != null && kvp.Value.Value.PropertyTypes.Any(p => dataTypeIds.Contains(p.DataType.Id))) @@ -452,37 +471,32 @@ namespace Umbraco.Web.PublishedCache.NuCache .Where(x => x != null) // poof, gone, very unlikely and probably an anomaly .ToArray(); - var contentTypeIdsA = contentTypes.Select(x => x.Id).ToArray(); - var contentTypeNodes = new Dictionary>(); - foreach (var id in contentTypeIdsA) - contentTypeNodes[id] = new List(); - foreach (var link in _contentNodes.Values) - { - var node = link.Value; - if (node != null && contentTypeIdsA.Contains(node.ContentType.Id)) - contentTypeNodes[node.ContentType.Id].Add(node.Id); - } - - foreach (var contentType in contentTypes) - { - // again, weird situation - if (contentTypeNodes.ContainsKey(contentType.Id) == false) - continue; - - foreach (var id in contentTypeNodes[contentType.Id]) - { - _contentNodes.TryGetValue(id, out var link); - if (link?.Value == null) - continue; - var node = new ContentNode(link.Value, contentType); - SetValueLocked(_contentNodes, id, node); - if (_localDb != null) RegisterChange(id, node.ToKit()); - } - } - } - finally + var contentTypeIdsA = contentTypes.Select(x => x.Id).ToArray(); + var contentTypeNodes = new Dictionary>(); + foreach (var id in contentTypeIdsA) + contentTypeNodes[id] = new List(); + foreach (var link in _contentNodes.Values) { - Release(lockInfo); + var node = link.Value; + if (node != null && contentTypeIdsA.Contains(node.ContentType.Id)) + contentTypeNodes[node.ContentType.Id].Add(node.Id); + } + + foreach (var contentType in contentTypes) + { + // again, weird situation + if (contentTypeNodes.ContainsKey(contentType.Id) == false) + continue; + + foreach (var id in contentTypeNodes[contentType.Id]) + { + _contentNodes.TryGetValue(id, out var link); + if (link?.Value == null) + continue; + var node = new ContentNode(link.Value, contentType); + SetValueLocked(_contentNodes, id, node); + if (_localDb != null) RegisterChange(id, node.ToKit()); + } } } @@ -644,8 +658,6 @@ namespace Umbraco.Web.PublishedCache.NuCache { EnsureLocked(); - _logger.Debug("SetAllFastSorted"); - var ok = true; ClearLocked(_contentNodes); @@ -684,7 +696,7 @@ namespace Umbraco.Web.PublishedCache.NuCache previousNode = null; // there is no previous sibling } - _logger.Verbose($"Set {thisNode.Id} with parent {thisNode.ParentContentId}"); + _logger.Debug($"Set {thisNode.Id} with parent {thisNode.ParentContentId}"); SetValueLocked(_contentNodes, thisNode.Id, thisNode); // if we are initializing from the database source ensure the local db is updated @@ -726,8 +738,7 @@ namespace Umbraco.Web.PublishedCache.NuCache EnsureLocked(); var ok = true; - _logger.Debug("SetAll"); - + ClearLocked(_contentNodes); ClearRootLocked(); @@ -742,7 +753,7 @@ namespace Umbraco.Web.PublishedCache.NuCache ok = false; continue; // skip that one } - _logger.Verbose($"Set {kit.Node.Id} with parent {kit.Node.ParentContentId}"); + _logger.Debug($"Set {kit.Node.Id} with parent {kit.Node.ParentContentId}"); SetValueLocked(_contentNodes, kit.Node.Id, kit.Node); if (_localDb != null) RegisterChange(kit.Node.Id, kit); @@ -777,8 +788,7 @@ namespace Umbraco.Web.PublishedCache.NuCache EnsureLocked(); var ok = true; - _logger.Debug("SetBranch"); - + // get existing _contentNodes.TryGetValue(rootContentId, out var link); var existing = link?.Value; @@ -826,8 +836,6 @@ namespace Umbraco.Web.PublishedCache.NuCache { EnsureLocked(); - _logger.Debug("Clear"); - // try to find the content // if it is not there, nothing to do _contentNodes.TryGetValue(id, out var link); // else null diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index 249eb882f9..c37e5731e4 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -943,14 +943,14 @@ namespace Umbraco.Web.PublishedCache.NuCache using (var scope = _scopeProvider.CreateScope()) { scope.ReadLock(Constants.Locks.ContentTree); - _contentStore.UpdateDataTypes(idsA, id => CreateContentType(PublishedItemType.Content, id)); + _contentStore.UpdateDataTypesLocked(idsA, id => CreateContentType(PublishedItemType.Content, id)); scope.Complete(); } using (var scope = _scopeProvider.CreateScope()) { scope.ReadLock(Constants.Locks.MediaTree); - _mediaStore.UpdateDataTypes(idsA, id => CreateContentType(PublishedItemType.Media, id)); + _mediaStore.UpdateDataTypesLocked(idsA, id => CreateContentType(PublishedItemType.Media, id)); scope.Complete(); } } @@ -1073,7 +1073,7 @@ namespace Umbraco.Web.PublishedCache.NuCache ? Array.Empty() : _dataSource.GetTypeContentSources(scope, refreshedIds).ToArray(); - _contentStore.UpdateContentTypes(removedIds, typesA, kits); + _contentStore.UpdateContentTypesLocked(removedIds, typesA, kits); if (!otherIds.IsCollectionEmpty()) _contentStore.UpdateContentTypesLocked(CreateContentTypes(PublishedItemType.Content, otherIds.ToArray())); if (!newIds.IsCollectionEmpty()) @@ -1104,7 +1104,7 @@ namespace Umbraco.Web.PublishedCache.NuCache ? Array.Empty() : _dataSource.GetTypeMediaSources(scope, refreshedIds).ToArray(); - _mediaStore.UpdateContentTypes(removedIds, typesA, kits); + _mediaStore.UpdateContentTypesLocked(removedIds, typesA, kits); if (!otherIds.IsCollectionEmpty()) _mediaStore.UpdateContentTypesLocked(CreateContentTypes(PublishedItemType.Media, otherIds.ToArray()).ToArray()); if (!newIds.IsCollectionEmpty()) From 95337d5a7008469a1cfc84215b9c98e461798fe1 Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 6 Jan 2020 21:39:26 +1100 Subject: [PATCH 18/23] Cleans up old notes --- src/Umbraco.Core/Runtime/MainDom.cs | 2 +- .../Cache/SnapDictionaryTests.cs | 82 +++++++++---------- .../PublishedCache/NuCache/ContentStore.cs | 60 +++++++------- .../NuCache/PublishedSnapshot.cs | 8 -- .../NuCache/PublishedSnapshotService.cs | 10 +-- 5 files changed, 73 insertions(+), 89 deletions(-) diff --git a/src/Umbraco.Core/Runtime/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs index d7bd407015..67c7be3d21 100644 --- a/src/Umbraco.Core/Runtime/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -151,7 +151,7 @@ namespace Umbraco.Core.Runtime { _logger.Info("Cannot acquire (timeout)."); - // TODO: In previous versions we'd let a TimeoutException be thrown + // In previous versions we'd let a TimeoutException be thrown // and the appdomain would not start. We have the opportunity to allow it to // start without having MainDom? This would mean that it couldn't write // to nucache/examine and would only be ok if this was a super short lived appdomain. diff --git a/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs b/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs index 86426d196c..00ba721bdb 100644 --- a/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs +++ b/src/Umbraco.Tests/Cache/SnapDictionaryTests.cs @@ -9,47 +9,6 @@ using Umbraco.Web.PublishedCache.NuCache.Snap; namespace Umbraco.Tests.Cache { - /// - /// Used for tests - /// - public static class SnapDictionaryExtensions - { - internal static void Set(this SnapDictionary d, TKey key, TValue value) - where TValue : class - { - using (d.GetScopedWriteLock(GetScopeProvider())) - { - d.SetLocked(key, value); - } - } - - internal static void Clear(this SnapDictionary d) - where TValue : class - { - using (d.GetScopedWriteLock(GetScopeProvider())) - { - d.ClearLocked(); - } - } - - internal static void Clear(this SnapDictionary d, TKey key) - where TValue : class - { - using (d.GetScopedWriteLock(GetScopeProvider())) - { - d.ClearLocked(key); - } - } - - private static IScopeProvider GetScopeProvider() - { - var scopeProvider = Mock.Of(); - Mock.Get(scopeProvider) - .Setup(x => x.Context).Returns(() => null); - return scopeProvider; - } - } - [TestFixture] public class SnapDictionaryTests { @@ -1184,4 +1143,45 @@ namespace Umbraco.Tests.Cache return scopeProvider; } } + + /// + /// Used for tests so that we don't have to wrap every Set/Clear call in locks + /// + public static class SnapDictionaryExtensions + { + internal static void Set(this SnapDictionary d, TKey key, TValue value) + where TValue : class + { + using (d.GetScopedWriteLock(GetScopeProvider())) + { + d.SetLocked(key, value); + } + } + + internal static void Clear(this SnapDictionary d) + where TValue : class + { + using (d.GetScopedWriteLock(GetScopeProvider())) + { + d.ClearLocked(); + } + } + + internal static void Clear(this SnapDictionary d, TKey key) + where TValue : class + { + using (d.GetScopedWriteLock(GetScopeProvider())) + { + d.ClearLocked(key); + } + } + + private static IScopeProvider GetScopeProvider() + { + var scopeProvider = Mock.Of(); + Mock.Get(scopeProvider) + .Setup(x => x.Context).Returns(() => null); + return scopeProvider; + } + } } diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index 8ce299932c..c959d9f67a 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -157,40 +157,44 @@ namespace Umbraco.Web.PublishedCache.NuCache private void Release(WriteLockInfo lockInfo, bool commit = true) { - if (commit == false) + try { - lock(_rlocko) + if (commit == false) { - // see SnapDictionary - try { } - finally + lock (_rlocko) { - _nextGen = false; - _liveGen -= 1; + // see SnapDictionary + try { } + finally + { + _nextGen = false; + _liveGen -= 1; + } } - } - Rollback(_contentNodes); - RollbackRoot(); - Rollback(_contentTypesById); - Rollback(_contentTypesByAlias); - } - else if (_localDb != null && _wchanges != null) - { - foreach (var change in _wchanges) + Rollback(_contentNodes); + RollbackRoot(); + Rollback(_contentTypesById); + Rollback(_contentTypesByAlias); + } + else if (_localDb != null && _wchanges != null) { - if (change.Value.IsNull) - _localDb.TryRemove(change.Key, out ContentNodeKit unused); - else - _localDb[change.Key] = change.Value; + foreach (var change in _wchanges) + { + if (change.Value.IsNull) + _localDb.TryRemove(change.Key, out ContentNodeKit unused); + else + _localDb[change.Key] = change.Value; + } + _wchanges = null; + _localDb.Commit(); } - _wchanges = null; - _localDb.Commit(); } - - // TODO: Shouldn't this be in a finally block? - if (lockInfo.Taken) - Monitor.Exit(_wlocko); + finally + { + if (lockInfo.Taken) + Monitor.Exit(_wlocko); + } } private void RollbackRoot() @@ -256,10 +260,6 @@ namespace Umbraco.Web.PublishedCache.NuCache } finally { - _logger.Info("Releasing ContentStore..."); - - // TODO: I don't understand this, we would have already closed the BPlusTree store above?? - // I guess it's 'safe' and will just decrement the write lock counter? Release(lockInfo); } } diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs index bbb84fc650..3f5c1aa4d5 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshot.cs @@ -39,15 +39,7 @@ namespace Umbraco.Web.PublishedCache.NuCache public void Resync() { - // This is annoying since this can cause deadlocks. Since this will be cleared if something happens in the same - // thread after that calls Elements, it will re-lock the _storesLock but that might already be locked again - // and since we're most likely in a ContentStore write lock, the other thread is probably wanting that one too. - // no lock - published snapshots are single-thread - - // TODO: Instead of clearing, we could hold this value in another var in case the above call to GetElements() Or potentially - // a new TryGetElements() fails (since we might be shutting down). In that case we can use the old value. - _elements?.Dispose(); _elements = null; } diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index c37e5731e4..aee588d567 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -670,15 +670,7 @@ namespace Umbraco.Web.PublishedCache.NuCache publishedChanged = publishedChanged2; } - // TODO: These resync's are a problem, they cause deadlocks because when this is called, it's generally called within a writelock - // and then we clear out the snapshot and then if there's some event handler that needs the content cache, it tries to re-get it - // which first tries locking on the _storesLock which may have already been acquired and in this case we deadlock because - // we're still holding the write lock. - // We resync at the end of a ScopedWriteLock - // BUT if we don't resync here then the current snapshot is out of date for any event handlers that wish to use the most up to date - // data... - // so we need to figure out how to deal with the _storesLock - + if (draftChanged || publishedChanged) ((PublishedSnapshot)CurrentPublishedSnapshot)?.Resync(); } From b02360518d2da1efb64be81fc6c724c9047699db Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 13 Jan 2020 22:25:46 +1100 Subject: [PATCH 19/23] Changed to GetAwaiter/GetResult and updates comment --- src/Umbraco.Core/Runtime/IMainDomLock.cs | 3 +-- src/Umbraco.Core/Runtime/MainDom.cs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Umbraco.Core/Runtime/IMainDomLock.cs b/src/Umbraco.Core/Runtime/IMainDomLock.cs index c32e990114..6a62f48194 100644 --- a/src/Umbraco.Core/Runtime/IMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/IMainDomLock.cs @@ -16,9 +16,8 @@ namespace Umbraco.Core.Runtime /// /// /// - /// A disposable object which will be disposed in order to release the lock + /// An awaitable boolean value which will be false if the elapsed millsecondsTimeout value is exceeded /// - /// Throws a if the elapsed millsecondsTimeout value is exceeded Task AcquireLockAsync(int millisecondsTimeout); /// diff --git a/src/Umbraco.Core/Runtime/MainDom.cs b/src/Umbraco.Core/Runtime/MainDom.cs index 67c7be3d21..e6780ec876 100644 --- a/src/Umbraco.Core/Runtime/MainDom.cs +++ b/src/Umbraco.Core/Runtime/MainDom.cs @@ -145,7 +145,7 @@ namespace Umbraco.Core.Runtime _logger.Info("Acquiring."); // Get the lock - var acquired = _mainDomLock.AcquireLockAsync(LockTimeoutMilliseconds).Result; + var acquired = _mainDomLock.AcquireLockAsync(LockTimeoutMilliseconds).GetAwaiter().GetResult(); if (!acquired) { From 3e71de4698f8a46e92ef747e39366b80dcf635aa Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 13 Jan 2020 22:28:25 +1100 Subject: [PATCH 20/23] ensure locks the data types --- src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index c959d9f67a..cecca3eb75 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -462,6 +462,8 @@ namespace Umbraco.Web.PublishedCache.NuCache /// public void UpdateDataTypesLocked(IEnumerable dataTypeIds, Func getContentType) { + EnsureLocked(); + var contentTypes = _contentTypesById .Where(kvp => kvp.Value.Value != null && From 201927580c299b223c5eb17f798e1aaa263944aa Mon Sep 17 00:00:00 2001 From: Shannon Date: Thu, 23 Jan 2020 16:23:27 +1100 Subject: [PATCH 21/23] Fixing accidental api signature breaking change --- .../Persistence/SqlSyntax/SqlServerSyntaxProvider.cs | 6 +++--- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 11 ++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs b/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs index 6dda49cd5e..bb50fa98a1 100644 --- a/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs +++ b/src/Umbraco.Core/Persistence/SqlSyntax/SqlServerSyntaxProvider.cs @@ -251,10 +251,10 @@ where tbl.[name]=@0 and col.[name]=@1;", tableName, columnName) public override void WriteLock(IDatabase db, params int[] lockIds) { - WriteLock(db, 1800, lockIds); + WriteLock(db, TimeSpan.FromMilliseconds(1800), lockIds); } - public void WriteLock(IDatabase db, int millisecondsTimeout, params int[] lockIds) + public void WriteLock(IDatabase db, TimeSpan timeout, params int[] lockIds) { // soon as we get Database, a transaction is started @@ -265,7 +265,7 @@ where tbl.[name]=@0 and col.[name]=@1;", tableName, columnName) // *not* using a unique 'WHERE IN' query here because the *order* of lockIds is important to avoid deadlocks foreach (var lockId in lockIds) { - db.Execute($"SET LOCK_TIMEOUT {millisecondsTimeout};"); + db.Execute($"SET LOCK_TIMEOUT {timeout.TotalMilliseconds};"); var i = db.Execute(@"UPDATE umbracoLock WITH (REPEATABLEREAD) SET value = (CASE WHEN (value=1) THEN -1 ELSE 1 END) WHERE id=@id", new { id = lockId }); if (i == 0) // ensure we are actually locking! throw new ArgumentException($"LockObject with id={lockId} does not exist."); diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index 31f8b36ed0..418e4c13fc 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -39,6 +39,11 @@ namespace Umbraco.Core.Runtime public async Task AcquireLockAsync(int millisecondsTimeout) { + if (!(_dbFactory.SqlContext.SqlSyntax is SqlServerSyntaxProvider sqlServerSyntaxProvider)) + throw new NotSupportedException("SqlMainDomLock is only supported for Sql Server"); + + _sqlServerSyntax = sqlServerSyntaxProvider; + _logger.Debug("Acquiring lock..."); var db = GetDatabase(); @@ -52,7 +57,7 @@ namespace Umbraco.Core.Runtime try { // wait to get a write lock - _sqlServerSyntax.WriteLock(db, millisecondsTimeout, Constants.Locks.MainDom); + _sqlServerSyntax.WriteLock(db, TimeSpan.FromMilliseconds(millisecondsTimeout), Constants.Locks.MainDom); } catch (Exception ex) { @@ -136,7 +141,7 @@ namespace Umbraco.Core.Runtime db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock - _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); + _dbFactory.SqlContext.SqlSyntax.ReadLock(db, Constants.Locks.MainDom); // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that // we are still the maindom. An empty value might be better because then we won't have any orphan rows @@ -209,7 +214,7 @@ namespace Umbraco.Core.Runtime db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock - _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); + _dbFactory.SqlContext.SqlSyntax.ReadLock(db, Constants.Locks.MainDom); // the row var mainDomRows = db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); From 8d7ed7dd32b6ff0bb415d126b56f4c0a57b49491 Mon Sep 17 00:00:00 2001 From: Shannon Date: Thu, 23 Jan 2020 16:26:13 +1100 Subject: [PATCH 22/23] adds const fixing naming --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index 418e4c13fc..37db2899c6 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -17,6 +17,7 @@ namespace Umbraco.Core.Runtime { private string _lockId; private const string MainDomKey = "Umbraco.Core.Runtime.SqlMainDom"; + private const string UpdatedSuffix = "_updated"; private readonly ILogger _logger; private IUmbracoDatabase _db; private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); @@ -197,7 +198,7 @@ namespace Umbraco.Core.Runtime /// private Task WaitForExistingAsync(string tempId, int millisecondsTimeout) { - var updatedTempId = tempId + "_updated"; + var updatedTempId = tempId + UpdatedSuffix; return Task.Run(() => { @@ -343,11 +344,11 @@ namespace Umbraco.Core.Runtime private bool IsLockTimeoutException(Exception exception) => exception is SqlException sqlException && sqlException.Number == 1222; #region IDisposable Support - private bool disposedValue = false; // To detect redundant calls + private bool _disposedValue = false; // To detect redundant calls protected virtual void Dispose(bool disposing) { - if (!disposedValue) + if (!_disposedValue) { if (disposing) { @@ -374,7 +375,7 @@ namespace Umbraco.Core.Runtime if (_mainDomChanging) { _logger.Debug("Releasing MainDom, updating row, new application is booting."); - db.Execute("UPDATE umbracoKeyValue SET [value] = [value] + '_updated' WHERE [key] = @key", new { key = MainDomKey }); + db.Execute($"UPDATE umbracoKeyValue SET [value] = [value] + '{UpdatedSuffix}' WHERE [key] = @key", new { key = MainDomKey }); } else { @@ -396,7 +397,7 @@ namespace Umbraco.Core.Runtime } } - disposedValue = true; + _disposedValue = true; } } From 52b93bfc9c0c11a1225383681de5f6a716b67730 Mon Sep 17 00:00:00 2001 From: Shannon Date: Thu, 23 Jan 2020 18:33:55 +1100 Subject: [PATCH 23/23] ensures only _sqlServerSyntax is used --- src/Umbraco.Core/Runtime/SqlMainDomLock.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs index 37db2899c6..4433a8e307 100644 --- a/src/Umbraco.Core/Runtime/SqlMainDomLock.cs +++ b/src/Umbraco.Core/Runtime/SqlMainDomLock.cs @@ -142,7 +142,7 @@ namespace Umbraco.Core.Runtime db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock - _dbFactory.SqlContext.SqlSyntax.ReadLock(db, Constants.Locks.MainDom); + _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that // we are still the maindom. An empty value might be better because then we won't have any orphan rows @@ -215,7 +215,7 @@ namespace Umbraco.Core.Runtime db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock - _dbFactory.SqlContext.SqlSyntax.ReadLock(db, Constants.Locks.MainDom); + _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); // the row var mainDomRows = db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey });