Files
Umbraco-CMS/src/Umbraco.Core/Runtime/SqlMainDomLock.cs
Sebastiaan Janssen a88a5a0cae Merge pull request #7984 from umbraco/v8/bugfix/7967-sqlmaindom
Fixes #7967 - cannot start site with SqlMainDomLock  when db isn't configured

(cherry picked from commit cbfe643a79)
2020-07-02 10:38:10 +02:00

442 lines
18 KiB
C#

using System;
using System.Data;
using System.Data.SqlClient;
using System.Diagnostics;
using System.Linq;
using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;
using System.Web;
using Umbraco.Core.Logging;
using Umbraco.Core.Persistence;
using Umbraco.Core.Persistence.Dtos;
using Umbraco.Core.Persistence.Mappers;
using Umbraco.Core.Persistence.SqlSyntax;
namespace Umbraco.Core.Runtime
{
internal class SqlMainDomLock : IMainDomLock
{
private string _lockId;
private const string MainDomKeyPrefix = "Umbraco.Core.Runtime.SqlMainDom";
private const string UpdatedSuffix = "_updated";
private readonly ILogger _logger;
private IUmbracoDatabase _db;
private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource();
private SqlServerSyntaxProvider _sqlServerSyntax = new SqlServerSyntaxProvider();
private bool _mainDomChanging = false;
private readonly UmbracoDatabaseFactory _dbFactory;
private bool _hasError;
private object _locker = new object();
public SqlMainDomLock(ILogger logger)
{
// unique id for our appdomain, this is more unique than the appdomain id which is just an INT counter to its safer
_lockId = Guid.NewGuid().ToString();
_logger = logger;
_dbFactory = new UmbracoDatabaseFactory(
Constants.System.UmbracoConnectionName,
_logger,
new Lazy<IMapperCollection>(() => new Persistence.Mappers.MapperCollection(Enumerable.Empty<BaseMapper>())));
}
public async Task<bool> AcquireLockAsync(int millisecondsTimeout)
{
if (!_dbFactory.Configured)
{
// if we aren't configured, then we're in an install state, in which case we have no choice but to assume we can acquire
return true;
}
if (!(_dbFactory.SqlContext.SqlSyntax is SqlServerSyntaxProvider sqlServerSyntaxProvider))
throw new NotSupportedException("SqlMainDomLock is only supported for Sql Server");
_sqlServerSyntax = sqlServerSyntaxProvider;
_logger.Debug<SqlMainDomLock>("Acquiring lock...");
var db = GetDatabase();
var tempId = Guid.NewGuid().ToString();
try
{
db.BeginTransaction(IsolationLevel.ReadCommitted);
try
{
// wait to get a write lock
_sqlServerSyntax.WriteLock(db, TimeSpan.FromMilliseconds(millisecondsTimeout), Constants.Locks.MainDom);
}
catch (Exception ex)
{
if (IsLockTimeoutException(ex))
{
_logger.Error<SqlMainDomLock>(ex, "Sql timeout occurred, could not acquire MainDom.");
_hasError = true;
return false;
}
// unexpected (will be caught below)
throw;
}
var result = InsertLockRecord(tempId); //we change the row to a random Id to signal other MainDom to shutdown
if (result == RecordPersistenceType.Insert)
{
// if we've inserted, then there was no MainDom so we can instantly acquire
// TODO: see the other TODO, could we just delete the row and that would indicate that we
// are MainDom? then we don't leave any orphan rows behind.
InsertLockRecord(_lockId); // so update with our appdomain id
_logger.Debug<SqlMainDomLock>("Acquired with ID {LockId}", _lockId);
return true;
}
// if we've updated, this means there is an active MainDom, now we need to wait to
// for the current MainDom to shutdown which also requires releasing our write lock
}
catch (Exception ex)
{
ResetDatabase();
// unexpected
_logger.Error<SqlMainDomLock>(ex, "Unexpected error, cannot acquire MainDom");
_hasError = true;
return false;
}
finally
{
db?.CompleteTransaction();
}
return await WaitForExistingAsync(tempId, millisecondsTimeout);
}
public Task ListenAsync()
{
if (_hasError)
{
_logger.Warn<SqlMainDomLock>("Could not acquire MainDom, listening is canceled.");
return Task.CompletedTask;
}
// Create a long running task (dedicated thread)
// to poll to check if we are still the MainDom registered in the DB
return Task.Factory.StartNew(ListeningLoop, _cancellationTokenSource.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default);
}
/// <summary>
/// Returns the keyvalue table key for the current server/app
/// </summary>
/// <remarks>
/// The key is the the normal MainDomId which takes into account the AppDomainAppId and the physical file path of the app and this is
/// combined with the current machine name. The machine name is required because the default semaphore lock is machine wide so it implicitly
/// takes into account machine name whereas this needs to be explicitly per machine.
/// </remarks>
private string MainDomKey { get; } = MainDomKeyPrefix + "-" + (NetworkHelper.MachineName + MainDom.GetMainDomId()).GenerateHash<SHA1>();
private void ListeningLoop()
{
while (true)
{
// poll every 1 second
Thread.Sleep(1000);
if (!_dbFactory.Configured)
{
// if we aren't configured, we just keep looping since we can't query the db
continue;
}
lock (_locker)
{
// If cancellation has been requested we will just exit. Depending on timing of the shutdown,
// we will have already flagged _mainDomChanging = true, or we're shutting down faster than
// the other MainDom is taking to startup. In this case the db row will just be deleted and the
// new MainDom will just take over.
if (_cancellationTokenSource.IsCancellationRequested)
return;
var db = GetDatabase();
try
{
db.BeginTransaction(IsolationLevel.ReadCommitted);
// get a read lock
_sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom);
// TODO: We could in theory just check if the main dom row doesn't exist, that could indicate that
// we are still the maindom. An empty value might be better because then we won't have any orphan rows
// if the app is terminated. Could that work?
if (!IsMainDomValue(_lockId))
{
// we are no longer main dom, another one has come online, exit
_mainDomChanging = true;
_logger.Debug<SqlMainDomLock>("Detected new booting application, releasing MainDom lock.");
return;
}
}
catch (Exception ex)
{
ResetDatabase();
// unexpected
_logger.Error<SqlMainDomLock>(ex, "Unexpected error, listening is canceled.");
_hasError = true;
return;
}
finally
{
db?.CompleteTransaction();
}
}
}
}
private void ResetDatabase()
{
if (_db.InTransaction)
_db.AbortTransaction();
_db.Dispose();
_db = null;
}
private IUmbracoDatabase GetDatabase()
{
if (_db != null)
return _db;
_db = _dbFactory.CreateDatabase();
return _db;
}
/// <summary>
/// Wait for any existing MainDom to release so we can continue booting
/// </summary>
/// <param name="tempId"></param>
/// <param name="millisecondsTimeout"></param>
/// <returns></returns>
private Task<bool> WaitForExistingAsync(string tempId, int millisecondsTimeout)
{
var updatedTempId = tempId + UpdatedSuffix;
return Task.Run(() =>
{
var db = GetDatabase();
var watch = new Stopwatch();
watch.Start();
while(true)
{
// poll very often, we need to take over as fast as we can
Thread.Sleep(100);
try
{
db.BeginTransaction(IsolationLevel.ReadCommitted);
// get a read lock
_sqlServerSyntax.ReadLock(db, Constants.Locks.MainDom);
// the row
var mainDomRows = db.Fetch<KeyValueDto>("SELECT * FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey });
if (mainDomRows.Count == 0 || mainDomRows[0].Value == updatedTempId)
{
// the other main dom has updated our record
// Or the other maindom shutdown super fast and just deleted the record
// which indicates that we
// can acquire it and it has shutdown.
_sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom);
// so now we update the row with our appdomain id
InsertLockRecord(_lockId);
_logger.Debug<SqlMainDomLock>("Acquired with ID {LockId}", _lockId);
return true;
}
else if (mainDomRows.Count == 1 && !mainDomRows[0].Value.StartsWith(tempId))
{
// in this case, the prefixed ID is different which means
// another new AppDomain has come online and is wanting to take over. In that case, we will not
// acquire.
_logger.Debug<SqlMainDomLock>("Cannot acquire, another booting application detected.");
return false;
}
}
catch (Exception ex)
{
ResetDatabase();
if (IsLockTimeoutException(ex))
{
_logger.Error<SqlMainDomLock>(ex, "Sql timeout occurred, waiting for existing MainDom is canceled.");
_hasError = true;
return false;
}
// unexpected
_logger.Error<SqlMainDomLock>(ex, "Unexpected error, waiting for existing MainDom is canceled.");
_hasError = true;
return false;
}
finally
{
db?.CompleteTransaction();
}
if (watch.ElapsedMilliseconds >= millisecondsTimeout)
{
// if the timeout has elapsed, it either means that the other main dom is taking too long to shutdown,
// or it could mean that the previous appdomain was terminated and didn't clear out the main dom SQL row
// and it's just been left as an orphan row.
// There's really know way of knowing unless we are constantly updating the row for the current maindom
// which isn't ideal.
// So... we're going to 'just' take over, if the writelock works then we'll assume we're ok
_logger.Debug<SqlMainDomLock>("Timeout elapsed, assuming orphan row, acquiring MainDom.");
try
{
db.BeginTransaction(IsolationLevel.ReadCommitted);
_sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom);
// so now we update the row with our appdomain id
InsertLockRecord(_lockId);
_logger.Debug<SqlMainDomLock>("Acquired with ID {LockId}", _lockId);
return true;
}
catch (Exception ex)
{
ResetDatabase();
if (IsLockTimeoutException(ex))
{
// something is wrong, we cannot acquire, not much we can do
_logger.Error<SqlMainDomLock>(ex, "Sql timeout occurred, could not forcibly acquire MainDom.");
_hasError = true;
return false;
}
_logger.Error<SqlMainDomLock>(ex, "Unexpected error, could not forcibly acquire MainDom.");
_hasError = true;
return false;
}
finally
{
db?.CompleteTransaction();
}
}
}
}, _cancellationTokenSource.Token);
}
/// <summary>
/// Inserts or updates the key/value row
/// </summary>
private RecordPersistenceType InsertLockRecord(string id)
{
var db = GetDatabase();
return db.InsertOrUpdate(new KeyValueDto
{
Key = MainDomKey,
Value = id,
Updated = DateTime.Now
});
}
/// <summary>
/// Checks if the DB row value is equals the value
/// </summary>
/// <returns></returns>
private bool IsMainDomValue(string val)
{
var db = GetDatabase();
return db.ExecuteScalar<int>("SELECT COUNT(*) FROM umbracoKeyValue WHERE [key] = @key AND [value] = @val",
new { key = MainDomKey, val = val }) == 1;
}
/// <summary>
/// Checks if the exception is an SQL timeout
/// </summary>
/// <param name="exception"></param>
/// <returns></returns>
private bool IsLockTimeoutException(Exception exception) => exception is SqlException sqlException && sqlException.Number == 1222;
#region IDisposable Support
private bool _disposedValue = false; // To detect redundant calls
protected virtual void Dispose(bool disposing)
{
if (!_disposedValue)
{
if (disposing)
{
lock (_locker)
{
// immediately cancel all sub-tasks, we don't want them to keep querying
_cancellationTokenSource.Cancel();
_cancellationTokenSource.Dispose();
if (_dbFactory.Configured)
{
var db = GetDatabase();
try
{
db.BeginTransaction(IsolationLevel.ReadCommitted);
// get a write lock
_sqlServerSyntax.WriteLock(db, Constants.Locks.MainDom);
// When we are disposed, it means we have released the MainDom lock
// and called all MainDom release callbacks, in this case
// if another maindom is actually coming online we need
// to signal to the MainDom coming online that we have shutdown.
// To do that, we update the existing main dom DB record with a suffixed "_updated" string.
// Otherwise, if we are just shutting down, we want to just delete the row.
if (_mainDomChanging)
{
_logger.Debug<SqlMainDomLock>("Releasing MainDom, updating row, new application is booting.");
db.Execute($"UPDATE umbracoKeyValue SET [value] = [value] + '{UpdatedSuffix}' WHERE [key] = @key", new { key = MainDomKey });
}
else
{
_logger.Debug<SqlMainDomLock>("Releasing MainDom, deleting row, application is shutting down.");
db.Execute("DELETE FROM umbracoKeyValue WHERE [key] = @key", new { key = MainDomKey });
}
}
catch (Exception ex)
{
ResetDatabase();
_logger.Error<SqlMainDomLock>(ex, "Unexpected error during dipsose.");
_hasError = true;
}
finally
{
db?.CompleteTransaction();
ResetDatabase();
}
}
}
}
_disposedValue = true;
}
}
// This code added to correctly implement the disposable pattern.
public void Dispose()
{
// Do not change this code. Put cleanup code in Dispose(bool disposing) above.
Dispose(true);
}
#endregion
}
}