using System; using System.Data; using System.Data.SqlClient; using System.Diagnostics; using System.Linq; using System.Security.Cryptography; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using NPoco; using Umbraco.Cms.Core; using Umbraco.Cms.Core.Configuration.Models; using Umbraco.Cms.Core.Hosting; using Umbraco.Cms.Core.Runtime; using Umbraco.Cms.Infrastructure.Migrations.Install; using Umbraco.Cms.Infrastructure.Persistence; using Umbraco.Cms.Infrastructure.Persistence.Dtos; using Umbraco.Cms.Infrastructure.Persistence.Mappers; using Umbraco.Cms.Infrastructure.Persistence.SqlSyntax; using Umbraco.Extensions; using MapperCollection = Umbraco.Cms.Infrastructure.Persistence.Mappers.MapperCollection; namespace Umbraco.Cms.Infrastructure.Runtime { public class SqlMainDomLock : IMainDomLock { private string _lockId; private const string MainDomKeyPrefix = "Umbraco.Core.Runtime.SqlMainDom"; private const string UpdatedSuffix = "_updated"; private readonly ILogger _logger; private readonly IHostingEnvironment _hostingEnvironment; private IUmbracoDatabase _db; private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); private SqlServerSyntaxProvider _sqlServerSyntax = new SqlServerSyntaxProvider(); private bool _mainDomChanging = false; private readonly UmbracoDatabaseFactory _dbFactory; private bool _errorDuringAcquiring; private object _locker = new object(); private bool _hasTable = false; public SqlMainDomLock(ILogger logger, ILoggerFactory loggerFactory, GlobalSettings globalSettings, ConnectionStrings connectionStrings, IDbProviderFactoryCreator dbProviderFactoryCreator, IHostingEnvironment hostingEnvironment, DatabaseSchemaCreatorFactory databaseSchemaCreatorFactory) { // unique id for our appdomain, this is more unique than the appdomain id which is just an INT counter to its safer _lockId = Guid.NewGuid().ToString(); _logger = logger; _hostingEnvironment = hostingEnvironment; _dbFactory = new UmbracoDatabaseFactory(loggerFactory.CreateLogger(), loggerFactory, globalSettings, connectionStrings, new Lazy(() => new MapperCollection(Enumerable.Empty())), dbProviderFactoryCreator, databaseSchemaCreatorFactory); MainDomKey = MainDomKeyPrefix + "-" + (NetworkHelper.MachineName + MainDom.GetMainDomId(_hostingEnvironment)).GenerateHash(); } public async Task AcquireLockAsync(int millisecondsTimeout) { if (!_dbFactory.Configured) { // if we aren't configured then we're in an install state, in which case we have no choice but to assume we can acquire return true; } if (!(_dbFactory.SqlContext.SqlSyntax is SqlServerSyntaxProvider sqlServerSyntaxProvider)) { throw new NotSupportedException("SqlMainDomLock is only supported for Sql Server"); } _sqlServerSyntax = sqlServerSyntaxProvider; _logger.LogDebug("Acquiring lock..."); var tempId = Guid.NewGuid().ToString(); IUmbracoDatabase db = null; try { db = _dbFactory.CreateDatabase(); _hasTable = db.HasTable(Cms.Core.Constants.DatabaseSchema.Tables.KeyValue); if (!_hasTable) { // the Db does not contain the required table, we must be in an install state we have no choice but to assume we can acquire return true; } db.BeginTransaction(IsolationLevel.ReadCommitted); try { // wait to get a write lock _sqlServerSyntax.WriteLock(db, TimeSpan.FromMilliseconds(millisecondsTimeout), Cms.Core.Constants.Locks.MainDom); } catch(SqlException ex) { if (IsLockTimeoutException(ex)) { _logger.LogError(ex, "Sql timeout occurred, could not acquire MainDom."); _errorDuringAcquiring = true; return false; } // unexpected (will be caught below) throw; } var result = InsertLockRecord(tempId, db); //we change the row to a random Id to signal other MainDom to shutdown if (result == RecordPersistenceType.Insert) { // if we've inserted, then there was no MainDom so we can instantly acquire InsertLockRecord(_lockId, db); // so update with our appdomain id _logger.LogDebug("Acquired with ID {LockId}", _lockId); return true; } // if we've updated, this means there is an active MainDom, now we need to wait to // for the current MainDom to shutdown which also requires releasing our write lock } catch (Exception ex) { // unexpected _logger.LogError(ex, "Unexpected error, cannot acquire MainDom"); _errorDuringAcquiring = true; return false; } finally { db?.CompleteTransaction(); db?.Dispose(); } return await WaitForExistingAsync(tempId, millisecondsTimeout); } public Task ListenAsync() { if (_errorDuringAcquiring) { _logger.LogWarning("Could not acquire MainDom, listening is canceled."); return Task.CompletedTask; } // Create a long running task (dedicated thread) // to poll to check if we are still the MainDom registered in the DB return Task.Factory.StartNew( ListeningLoop, _cancellationTokenSource.Token, TaskCreationOptions.LongRunning, // Must explicitly specify this, see https://blog.stephencleary.com/2013/10/continuewith-is-dangerous-too.html TaskScheduler.Default); } /// /// Returns the keyvalue table key for the current server/app /// /// /// The key is the the normal MainDomId which takes into account the AppDomainAppId and the physical file path of the app and this is /// combined with the current machine name. The machine name is required because the default semaphore lock is machine wide so it implicitly /// takes into account machine name whereas this needs to be explicitly per machine. /// private string MainDomKey { get; } private void ListeningLoop() { while (true) { // poll every couple of seconds // local testing shows the actual query to be executed from client/server is approx 300ms but would change depending on environment/IO Thread.Sleep(2000); if (!_dbFactory.Configured) { // if we aren't configured, we just keep looping since we can't query the db continue; } lock (_locker) { // If cancellation has been requested we will just exit. Depending on timing of the shutdown, // we will have already flagged _mainDomChanging = true, or we're shutting down faster than // the other MainDom is taking to startup. In this case the db row will just be deleted and the // new MainDom will just take over. if (_cancellationTokenSource.IsCancellationRequested) { _logger.LogDebug("Task canceled, exiting loop"); return; } IUmbracoDatabase db = null; try { db = _dbFactory.CreateDatabase(); if (!_hasTable) { // re-check if its still false, we don't want to re-query once we know its there since this // loop needs to use minimal resources _hasTable = db.HasTable(Cms.Core.Constants.DatabaseSchema.Tables.KeyValue); if (!_hasTable) { // the Db does not contain the required table, we just keep looping since we can't query the db continue; } } db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock _sqlServerSyntax.ReadLock(db, Cms.Core.Constants.Locks.MainDom); if (!IsMainDomValue(_lockId, db)) { // we are no longer main dom, another one has come online, exit _mainDomChanging = true; _logger.LogDebug("Detected new booting application, releasing MainDom lock."); return; } } catch (Exception ex) { _logger.LogError(ex, "Unexpected error during listening."); // We need to keep on listening unless we've been notified by our own AppDomain to shutdown since // we don't want to shutdown resources controlled by MainDom inadvertently. We'll just keep listening otherwise. if (_cancellationTokenSource.IsCancellationRequested) { _logger.LogDebug("Task canceled, exiting loop"); return; } } finally { db?.CompleteTransaction(); db?.Dispose(); } } } } /// /// Wait for any existing MainDom to release so we can continue booting /// /// /// /// private Task WaitForExistingAsync(string tempId, int millisecondsTimeout) { var updatedTempId = tempId + UpdatedSuffix; return Task.Run(() => { try { using var db = _dbFactory.CreateDatabase(); var watch = new Stopwatch(); watch.Start(); while (true) { // poll very often, we need to take over as fast as we can // local testing shows the actual query to be executed from client/server is approx 300ms but would change depending on environment/IO Thread.Sleep(1000); var acquired = TryAcquire(db, tempId, updatedTempId); if (acquired.HasValue) return acquired.Value; if (watch.ElapsedMilliseconds >= millisecondsTimeout) { return AcquireWhenMaxWaitTimeElapsed(db); } } } catch (Exception ex) { _logger.LogError(ex, "An error occurred trying to acquire and waiting for existing SqlMainDomLock to shutdown"); return false; } }, _cancellationTokenSource.Token); } private bool? TryAcquire(IUmbracoDatabase db, string tempId, string updatedTempId) { // Creates a separate transaction to the DB instance so we aren't allocating tons of new DB instances for each transaction // since this is executed in a tight loop ITransaction transaction = null; try { transaction = db.GetTransaction(IsolationLevel.ReadCommitted); // get a read lock _sqlServerSyntax.ReadLock(db, Cms.Core.Constants.Locks.MainDom); // the row var mainDomRows = db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); if (mainDomRows.Count == 0 || mainDomRows[0].Value == updatedTempId) { // the other main dom has updated our record // Or the other maindom shutdown super fast and just deleted the record // which indicates that we // can acquire it and it has shutdown. _sqlServerSyntax.WriteLock(db, Cms.Core.Constants.Locks.MainDom); // so now we update the row with our appdomain id InsertLockRecord(_lockId, db); _logger.LogDebug("Acquired with ID {LockId}", _lockId); return true; } else if (mainDomRows.Count == 1 && !mainDomRows[0].Value.StartsWith(tempId)) { // in this case, the prefixed ID is different which means // another new AppDomain has come online and is wanting to take over. In that case, we will not // acquire. _logger.LogDebug("Cannot acquire, another booting application detected."); return false; } } catch (Exception ex) { if (IsLockTimeoutException(ex as SqlException)) { _logger.LogError(ex, "Sql timeout occurred, waiting for existing MainDom is canceled."); _errorDuringAcquiring = true; return false; } // unexpected _logger.LogError(ex, "Unexpected error, waiting for existing MainDom is canceled."); _errorDuringAcquiring = true; return false; } finally { transaction?.Complete(); transaction?.Dispose(); } return null; // continue } private bool AcquireWhenMaxWaitTimeElapsed(IUmbracoDatabase db) { // Creates a separate transaction to the DB instance so we aren't allocating tons of new DB instances for each transaction // since this is executed in a tight loop // if the timeout has elapsed, it either means that the other main dom is taking too long to shutdown, // or it could mean that the previous appdomain was terminated and didn't clear out the main dom SQL row // and it's just been left as an orphan row. // There's really know way of knowing unless we are constantly updating the row for the current maindom // which isn't ideal. // So... we're going to 'just' take over, if the writelock works then we'll assume we're ok _logger.LogDebug("Timeout elapsed, assuming orphan row, acquiring MainDom."); ITransaction transaction = null; try { transaction = db.GetTransaction(IsolationLevel.ReadCommitted); _sqlServerSyntax.WriteLock(db, Cms.Core.Constants.Locks.MainDom); // so now we update the row with our appdomain id InsertLockRecord(_lockId, db); _logger.LogDebug("Acquired with ID {LockId}", _lockId); return true; } catch (Exception ex) { if (IsLockTimeoutException(ex as SqlException)) { // something is wrong, we cannot acquire, not much we can do _logger.LogError(ex, "Sql timeout occurred, could not forcibly acquire MainDom."); _errorDuringAcquiring = true; return false; } _logger.LogError(ex, "Unexpected error, could not forcibly acquire MainDom."); _errorDuringAcquiring = true; return false; } finally { transaction?.Complete(); transaction?.Dispose(); } } /// /// Inserts or updates the key/value row /// private RecordPersistenceType InsertLockRecord(string id, IUmbracoDatabase db) { return db.InsertOrUpdate(new KeyValueDto { Key = MainDomKey, Value = id, UpdateDate = DateTime.Now }); } /// /// Checks if the DB row value is equals the value /// /// private bool IsMainDomValue(string val, IUmbracoDatabase db) { return db.ExecuteScalar("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val", new { key = MainDomKey, val = val }) == 1; } /// /// Checks if the exception is an SQL timeout /// /// /// private bool IsLockTimeoutException(SqlException sqlException) => sqlException?.Number == 1222; #region IDisposable Support private bool _disposedValue = false; // To detect redundant calls protected virtual void Dispose(bool disposing) { if (!_disposedValue) { if (disposing) { lock (_locker) { _logger.LogDebug($"{nameof(SqlMainDomLock)} Disposing..."); // immediately cancel all sub-tasks, we don't want them to keep querying _cancellationTokenSource.Cancel(); _cancellationTokenSource.Dispose(); if (_dbFactory.Configured && _hasTable) { IUmbracoDatabase db = null; try { db = _dbFactory.CreateDatabase(); db.BeginTransaction(IsolationLevel.ReadCommitted); // get a write lock _sqlServerSyntax.WriteLock(db, Cms.Core.Constants.Locks.MainDom); // When we are disposed, it means we have released the MainDom lock // and called all MainDom release callbacks, in this case // if another maindom is actually coming online we need // to signal to the MainDom coming online that we have shutdown. // To do that, we update the existing main dom DB record with a suffixed "_updated" string. // Otherwise, if we are just shutting down, we want to just delete the row. if (_mainDomChanging) { _logger.LogDebug("Releasing MainDom, updating row, new application is booting."); var count = db.Execute($"UPDATE umbracoKeyValue SET [value] = [value] + '{UpdatedSuffix}' WHERE [key] = @key", new { key = MainDomKey }); } else { _logger.LogDebug("Releasing MainDom, deleting row, application is shutting down."); var count = db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); } } catch (Exception ex) { _logger.LogError(ex, "Unexpected error during dipsose."); } finally { try { db?.CompleteTransaction(); db?.Dispose(); } catch (Exception ex) { _logger.LogError(ex, "Unexpected error during dispose when completing transaction."); } } } } } _disposedValue = true; } } // This code added to correctly implement the disposable pattern. public void Dispose() { // Do not change this code. Put cleanup code in Dispose(bool disposing) above. Dispose(true); } #endregion } }