using System; using System.Data; using System.Data.SqlClient; using System.Diagnostics; using System.Linq; using System.Security.Cryptography; using System.Threading; using System.Threading.Tasks; using System.Web; using Umbraco.Core.Logging; using Umbraco.Core.Persistence; using Umbraco.Core.Persistence.Dtos; using Umbraco.Core.Persistence.Mappers; using Umbraco.Core.Persistence.SqlSyntax; namespace Umbraco.Core.Runtime { internal class SqlMainDomLock : IMainDomLock { private string _lockId; private const string MainDomKeyPrefix = "Umbraco.Core.Runtime.SqlMainDom"; private const string UpdatedSuffix = "_updated"; private readonly ILogger _logger; private IUmbracoDatabase _db; private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); private SqlServerSyntaxProvider _sqlServerSyntax = new SqlServerSyntaxProvider(); private bool _mainDomChanging = false; private readonly UmbracoDatabaseFactory _dbFactory; private bool _hasError; private object _locker = new object(); public SqlMainDomLock(ILogger logger) { // unique id for our appdomain, this is more unique than the appdomain id which is just an INT counter to its safer _lockId = Guid.NewGuid().ToString(); _logger = logger; _dbFactory = new UmbracoDatabaseFactory( Constants.System.UmbracoConnectionName, _logger, new Lazy(() => new Persistence.Mappers.MapperCollection(Enumerable.Empty()))); } public async Task AcquireLockAsync(int millisecondsTimeout) { if (!_dbFactory.Configured) { // if we aren't configured, then we're in an install state, in which case we have no choice but to assume we can acquire return true; } if (!(_dbFactory.SqlContext.SqlSyntax is SqlServerSyntaxProvider sqlServerSyntaxProvider)) throw new NotSupportedException("SqlMainDomLock is only supported for Sql Server"); _sqlServerSyntax = sqlServerSyntaxProvider; _logger.Debug("Acquiring lock..."); var db = GetDatabase(); var tempId = Guid.NewGuid().ToString(); try { db.BeginTransaction(IsolationLevel.ReadCommitted); try { // wait to get a write lock _sqlServerSyntax.WriteLock(db, TimeSpan.FromMilliseconds(millisecondsTimeout), Constants.Locks.MainDom); } catch (Exception ex) { if (IsLockTimeoutException(ex)) { _logger.Error(ex, "Sql timeout occurred, could not acquire MainDom."); _hasError = true; return false; } // unexpected (will be caught below) throw; } var result = InsertLockRecord(tempId); //we change the row to a random Id to signal other MainDom to shutdown if (result == RecordPersistenceType.Insert) { // if we've inserted, then there was no MainDom so we can instantly acquire // TODO: see the other TODO, could we just delete the row and that would indicate that we // are MainDom? then we don't leave any orphan rows behind. InsertLockRecord(_lockId); // so update with our appdomain id _logger.Debug("Acquired with ID {LockId}", _lockId); return true; } // if we've updated, this means there is an active MainDom, now we need to wait to // for the current MainDom to shutdown which also requires releasing our write lock } catch (Exception ex) { ResetDatabase(); // unexpected _logger.Error(ex, "Unexpected error, cannot acquire MainDom"); _hasError = true; return false; } finally { db?.CompleteTransaction(); } return await WaitForExistingAsync(tempId, millisecondsTimeout); } public Task ListenAsync() { if (_hasError) { _logger.Warn("Could not acquire MainDom, listening is canceled."); return Task.CompletedTask; } // Create a long running task (dedicated thread) // to poll to check if we are still the MainDom registered in the DB return Task.Factory.StartNew(ListeningLoop, _cancellationTokenSource.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default); } /// /// Returns the keyvalue table key for the current server/app /// /// /// The key is the the normal MainDomId which takes into account the AppDomainAppId and the physical file path of the app and this is /// combined with the current machine name. The machine name is required because the default semaphore lock is machine wide so it implicitly /// takes into account machine name whereas this needs to be explicitly per machine. /// private string MainDomKey { get; } = MainDomKeyPrefix + "-" + (NetworkHelper.MachineName + MainDom.GetMainDomId()).GenerateHash(); private void ListeningLoop() { while (true) { // poll every 1 second Thread.Sleep(1000); if (!_dbFactory.Configured) { // if we aren't configured, we just keep looping since we can't query the db continue; } lock (_locker) { // If cancellation has been requested we will just exit. Depending on timing of the shutdown, // we will have already flagged _mainDomChanging = true, or we're shutting down faster than // the other MainDom is taking to startup. In this case the db row will just be deleted and the // new MainDom will just take over. if (_cancellationTokenSource.IsCancellationRequested) return; var db = GetDatabase(); try { db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); // TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that // we are still the maindom. An empty value might be better because then we won't have any orphan rows // if the app is terminated. Could that work? if (!IsMainDomValue(_lockId)) { // we are no longer main dom, another one has come online, exit _mainDomChanging = true; _logger.Debug("Detected new booting application, releasing MainDom lock."); return; } } catch (Exception ex) { ResetDatabase(); // unexpected _logger.Error(ex, "Unexpected error, listening is canceled."); _hasError = true; return; } finally { db?.CompleteTransaction(); } } } } private void ResetDatabase() { if (_db.InTransaction) _db.AbortTransaction(); _db.Dispose(); _db = null; } private IUmbracoDatabase GetDatabase() { if (_db != null) return _db; _db = _dbFactory.CreateDatabase(); return _db; } /// /// Wait for any existing MainDom to release so we can continue booting /// /// /// /// private Task WaitForExistingAsync(string tempId, int millisecondsTimeout) { var updatedTempId = tempId + UpdatedSuffix; return Task.Run(() => { var db = GetDatabase(); var watch = new Stopwatch(); watch.Start(); while(true) { // poll very often, we need to take over as fast as we can Thread.Sleep(100); try { db.BeginTransaction(IsolationLevel.ReadCommitted); // get a read lock _sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom); // the row var mainDomRows = db.Fetch("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); if (mainDomRows.Count == 0 || mainDomRows[0].Value == updatedTempId) { // the other main dom has updated our record // Or the other maindom shutdown super fast and just deleted the record // which indicates that we // can acquire it and it has shutdown. _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); // so now we update the row with our appdomain id InsertLockRecord(_lockId); _logger.Debug("Acquired with ID {LockId}", _lockId); return true; } else if (mainDomRows.Count == 1 && !mainDomRows[0].Value.StartsWith(tempId)) { // in this case, the prefixed ID is different which means // another new AppDomain has come online and is wanting to take over. In that case, we will not // acquire. _logger.Debug("Cannot acquire, another booting application detected."); return false; } } catch (Exception ex) { ResetDatabase(); if (IsLockTimeoutException(ex)) { _logger.Error(ex, "Sql timeout occurred, waiting for existing MainDom is canceled."); _hasError = true; return false; } // unexpected _logger.Error(ex, "Unexpected error, waiting for existing MainDom is canceled."); _hasError = true; return false; } finally { db?.CompleteTransaction(); } if (watch.ElapsedMilliseconds >= millisecondsTimeout) { // if the timeout has elapsed, it either means that the other main dom is taking too long to shutdown, // or it could mean that the previous appdomain was terminated and didn't clear out the main dom SQL row // and it's just been left as an orphan row. // There's really know way of knowing unless we are constantly updating the row for the current maindom // which isn't ideal. // So... we're going to 'just' take over, if the writelock works then we'll assume we're ok _logger.Debug("Timeout elapsed, assuming orphan row, acquiring MainDom."); try { db.BeginTransaction(IsolationLevel.ReadCommitted); _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); // so now we update the row with our appdomain id InsertLockRecord(_lockId); _logger.Debug("Acquired with ID {LockId}", _lockId); return true; } catch (Exception ex) { ResetDatabase(); if (IsLockTimeoutException(ex)) { // something is wrong, we cannot acquire, not much we can do _logger.Error(ex, "Sql timeout occurred, could not forcibly acquire MainDom."); _hasError = true; return false; } _logger.Error(ex, "Unexpected error, could not forcibly acquire MainDom."); _hasError = true; return false; } finally { db?.CompleteTransaction(); } } } }, _cancellationTokenSource.Token); } /// /// Inserts or updates the key/value row /// private RecordPersistenceType InsertLockRecord(string id) { var db = GetDatabase(); return db.InsertOrUpdate(new KeyValueDto { Key = MainDomKey, Value = id, Updated = DateTime.Now }); } /// /// Checks if the DB row value is equals the value /// /// private bool IsMainDomValue(string val) { var db = GetDatabase(); return db.ExecuteScalar("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val", new { key = MainDomKey, val = val }) == 1; } /// /// Checks if the exception is an SQL timeout /// /// /// private bool IsLockTimeoutException(Exception exception) => exception is SqlException sqlException && sqlException.Number == 1222; #region IDisposable Support private bool _disposedValue = false; // To detect redundant calls protected virtual void Dispose(bool disposing) { if (!_disposedValue) { if (disposing) { lock (_locker) { // immediately cancel all sub-tasks, we don't want them to keep querying _cancellationTokenSource.Cancel(); _cancellationTokenSource.Dispose(); if (_dbFactory.Configured) { var db = GetDatabase(); try { db.BeginTransaction(IsolationLevel.ReadCommitted); // get a write lock _sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom); // When we are disposed, it means we have released the MainDom lock // and called all MainDom release callbacks, in this case // if another maindom is actually coming online we need // to signal to the MainDom coming online that we have shutdown. // To do that, we update the existing main dom DB record with a suffixed "_updated" string. // Otherwise, if we are just shutting down, we want to just delete the row. if (_mainDomChanging) { _logger.Debug("Releasing MainDom, updating row, new application is booting."); db.Execute($"UPDATE umbracoKeyValue SET [value] = [value] + '{UpdatedSuffix}' WHERE [key] = @key", new { key = MainDomKey }); } else { _logger.Debug("Releasing MainDom, deleting row, application is shutting down."); db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey }); } } catch (Exception ex) { ResetDatabase(); _logger.Error(ex, "Unexpected error during dipsose."); _hasError = true; } finally { db?.CompleteTransaction(); ResetDatabase(); } } } } _disposedValue = true; } } // This code added to correctly implement the disposable pattern. public void Dispose() { // Do not change this code. Put cleanup code in Dispose(bool disposing) above. Dispose(true); } #endregion } }