Fixes our sql azure transient fault detection to be inline with current standards, adds a scope for the health check schedule tasks

This commit is contained in:
Shannon
2020-07-08 17:26:31 +10:00
parent 384531ea68
commit e1757178b9
5 changed files with 57 additions and 43 deletions

View File

@@ -4,6 +4,10 @@ using System.Data.SqlClient;
namespace Umbraco.Core.Persistence.FaultHandling.Strategies
{
// See https://docs.microsoft.com/en-us/azure/azure-sql/database/troubleshoot-common-connectivity-issues
// Also we could just use the nuget package instead https://www.nuget.org/packages/EnterpriseLibrary.TransientFaultHandling/ ?
// but i guess that's not netcore so we'll just leave it.
/// <summary>
/// Provides the transient error detection logic for transient faults that are specific to SQL Azure.
/// </summary>
@@ -71,7 +75,7 @@ namespace Umbraco.Core.Persistence.FaultHandling.Strategies
/// Determines whether the specified exception represents a transient failure that can be compensated by a retry.
/// </summary>
/// <param name="ex">The exception object to be verified.</param>
/// <returns>True if the specified exception is considered as transient, otherwise false.</returns>
/// <returns>true if the specified exception is considered as transient; otherwise, false.</returns>
public bool IsTransient(Exception ex)
{
if (ex != null)
@@ -97,40 +101,50 @@ namespace Umbraco.Core.Persistence.FaultHandling.Strategies
return true;
// SQL Error Code: 40197
// The service has encountered an error processing your request. Please try again.
case 40197:
// SQL Error Code: 10928
// Resource ID: %d. The %s limit for the database is %d and has been reached.
case 10928:
// SQL Error Code: 10929
// Resource ID: %d. The %s minimum guarantee is %d, maximum limit is %d and the current usage for the database is %d.
// However, the server is currently too busy to support requests greater than %d for this database.
case 10929:
// SQL Error Code: 10053
// A transport-level error has occurred when receiving results from the server.
// An established connection was aborted by the software in your host machine.
case 10053:
// SQL Error Code: 10054
// A transport-level error has occurred when sending the request to the server.
// A transport-level error has occurred when sending the request to the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 10054:
// SQL Error Code: 10060
// A network-related or instance-specific error occurred while establishing a connection to SQL Server.
// The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
// is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
// because the connected party did not properly respond after a period of time, or established connection failed
// A network-related or instance-specific error occurred while establishing a connection to SQL Server.
// The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
// is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
// because the connected party did not properly respond after a period of time, or established connection failed
// because connected host has failed to respond.)"}
case 10060:
// SQL Error Code: 40197
// The service has encountered an error processing your request. Please try again.
case 40197:
// SQL Error Code: 40540
// The service has encountered an error processing your request. Please try again.
case 40540:
// SQL Error Code: 40613
// Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
// Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
// support, and provide them the session tracing ID of ZZZZZ.
case 40613:
// SQL Error Code: 40143
// The service has encountered an error processing your request. Please try again.
case 40143:
// SQL Error Code: 233
// The client was unable to establish a connection because of an error during connection initialization process before login.
// Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
// to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
// The client was unable to establish a connection because of an error during connection initialization process before login.
// Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
// to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 233:
// SQL Error Code: 64
// A connection was successfully established with the server, but then an error occurred during the login process.
// (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
// A connection was successfully established with the server, but then an error occurred during the login process.
// (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
case 64:
// DBNETLIB Error Code: 20
// The instance of SQL Server you attempted to connect to does not support encryption.

View File

@@ -5,6 +5,7 @@ using Umbraco.Core;
using Umbraco.Core.Composing;
using Umbraco.Core.Configuration;
using Umbraco.Core.Logging;
using Umbraco.Core.Scoping;
using Umbraco.Core.Sync;
using Umbraco.Web.HealthCheck;
@@ -15,16 +16,17 @@ namespace Umbraco.Web.Scheduling
private readonly IRuntimeState _runtimeState;
private readonly HealthCheckCollection _healthChecks;
private readonly HealthCheckNotificationMethodCollection _notifications;
private readonly IScopeProvider _scopeProvider;
private readonly IProfilingLogger _logger;
public HealthCheckNotifier(IBackgroundTaskRunner<RecurringTaskBase> runner, int delayMilliseconds, int periodMilliseconds,
HealthCheckCollection healthChecks, HealthCheckNotificationMethodCollection notifications,
IRuntimeState runtimeState,
IProfilingLogger logger)
IScopeProvider scopeProvider, IRuntimeState runtimeState, IProfilingLogger logger)
: base(runner, delayMilliseconds, periodMilliseconds)
{
_healthChecks = healthChecks;
_notifications = notifications;
_scopeProvider = scopeProvider;
_runtimeState = runtimeState;
_logger = logger;
}
@@ -51,6 +53,10 @@ namespace Umbraco.Web.Scheduling
return false; // do NOT repeat, going down
}
// Ensure we use an explicit scope since we are running on a background thread and plugin health
// checks can be making service/database calls so we want to ensure the CallContext/Ambient scope
// isn't used since that can be problematic.
using (var scope = _scopeProvider.CreateScope())
using (_logger.DebugDuration<HealthCheckNotifier>("Health checks executing", "Health checks complete"))
{
var healthCheckConfig = Current.Configs.HealthChecks();

View File

@@ -70,8 +70,7 @@ namespace Umbraco.Web.Scheduling
return false; // do NOT repeat, going down
}
// running on a background task, and Log.CleanLogs uses the old SqlHelper,
// better wrap in a scope and ensure it's all cleaned up and nothing leaks
// Ensure we use an explicit scope since we are running on a background thread.
using (var scope = _scopeProvider.CreateScope())
using (_logger.DebugDuration<LogScrubber>("Log scrubbing executing", "Log scrubbing complete"))
{

View File

@@ -2,6 +2,7 @@
using System.Linq;
using Umbraco.Core;
using Umbraco.Core.Logging;
using Umbraco.Core.Scoping;
using Umbraco.Core.Services;
using Umbraco.Core.Sync;
@@ -55,30 +56,24 @@ namespace Umbraco.Web.Scheduling
try
{
// ensure we run with an UmbracoContext, because this may run in a background task,
// yet developers may be using the 'current' UmbracoContext in the event handlers
//
// TODO: or maybe not, CacheRefresherComponent already ensures a context when handling events
// - UmbracoContext 'current' needs to be refactored and cleaned up
// - batched messenger should not depend on a current HttpContext
// but then what should be its "scope"? could we attach it to scopes?
// - and we should definitively *not* have to flush it here (should be auto)
//
using (var contextReference = _umbracoContextFactory.EnsureUmbracoContext())
// We don't need an explicit scope here because PerformScheduledPublish creates it's own scope
// so it's safe as it will create it's own ambient scope.
// Ensure we run with an UmbracoContext, because this will run in a background task,
// and developers may be using the UmbracoContext in the event handlers.
using var contextReference = _umbracoContextFactory.EnsureUmbracoContext();
try
{
try
{
// run
var result = _contentService.PerformScheduledPublish(DateTime.Now);
foreach (var grouped in result.GroupBy(x => x.Result))
_logger.Info<ScheduledPublishing>("Scheduled publishing result: '{StatusCount}' items with status {Status}", grouped.Count(), grouped.Key);
}
finally
{
// if running on a temp context, we have to flush the messenger
if (contextReference.IsRoot && Composing.Current.ServerMessenger is BatchedDatabaseServerMessenger m)
m.FlushBatch();
}
// run
var result = _contentService.PerformScheduledPublish(DateTime.Now);
foreach (var grouped in result.GroupBy(x => x.Result))
_logger.Info<ScheduledPublishing>("Scheduled publishing result: '{StatusCount}' items with status {Status}", grouped.Count(), grouped.Key);
}
finally
{
// if running on a temp context, we have to flush the messenger
if (contextReference.IsRoot && Composing.Current.ServerMessenger is BatchedDatabaseServerMessenger m)
m.FlushBatch();
}
}
catch (Exception ex)

View File

@@ -155,7 +155,7 @@ namespace Umbraco.Web.Scheduling
}
var periodInMilliseconds = healthCheckConfig.NotificationSettings.PeriodInHours * 60 * 60 * 1000;
var task = new HealthCheckNotifier(_healthCheckRunner, delayInMilliseconds, periodInMilliseconds, healthChecks, notifications, _runtime, logger);
var task = new HealthCheckNotifier(_healthCheckRunner, delayInMilliseconds, periodInMilliseconds, healthChecks, notifications, _scopeProvider, _runtime, logger);
_healthCheckRunner.TryAdd(task);
return task;
}