From 34df5009e040463335d76571938e649008800b79 Mon Sep 17 00:00:00 2001
From: Shannon <sdeminick@gmail.com>
Date: Fri, 13 Dec 2013 16:58:21 +1100
Subject: [PATCH 1/3] Upgraded installer to do a two part install - first
 install then if an upgrade is required do the upgrade in a second call - this
 way we can have the progress bar update since the media xml installer can
 take some time.

---
 src/Umbraco.Core/DatabaseContext.cs           | 180 ++++++++++++------
 .../install/InstallerRestService.aspx.cs      |  43 ++++-
 .../install/steps/database.ascx               |  92 ++++++---
 .../install/utills/LegacyClasses.cs           |   6 +-
 4 files changed, 233 insertions(+), 88 deletions(-)
diff --git a/src/Umbraco.Core/DatabaseContext.cs b/src/Umbraco.Core/DatabaseContext.cs
index 4d5170ec80..e074297eb8 100644
--- a/src/Umbraco.Core/DatabaseContext.cs
+++ b/src/Umbraco.Core/DatabaseContext.cs
@@ -433,24 +433,19 @@ namespace Umbraco.Core
             return _result;
         }
 
-        internal Result CreateDatabaseSchemaAndDataOrUpgrade()
+        internal Result CreateDatabaseSchemaAndData()
         {
-            if (_configured == false || (string.IsNullOrEmpty(_connectionString) || string.IsNullOrEmpty(ProviderName)))
+            var readyForInstall = CheckReadyForInstall();
+            if (readyForInstall.Success == false)
             {
-                return new Result
-                           {
-                               Message =
-                                   "Database configuration is invalid. Please check that the entered database exists and that the provided username and password has write access to the database.",
-                               Success = false,
-                               Percentage = "10"
-                           };
+                return readyForInstall.Result;
             }
-
+            
             try
             {
                 LogHelper.Info<DatabaseContext>("Database configuration status: Started");
 
-                var message = string.Empty;
+                string message;
 
                 var database = new UmbracoDatabase(_connectionString, ProviderName);
                 var supportsCaseInsensitiveQueries = SqlSyntaxContext.SqlSyntaxProvider.SupportsCaseInsensitiveQueries(database);
@@ -465,50 +460,73 @@ namespace Umbraco.Core
 
                     return new Result { Message = message, Success = false, Percentage = "15" };
                 }
-                else if (supportsCaseInsensitiveQueries == null)
-                {
-                    message = "<p>&nbsp;</p><p>Warning! Could not check if your database type supports case insensitive queries. <br />We currently do not support these databases that do not support case insensitive queries.</p>" +
-                              "<p>You can check this by looking for the following setting in your my.ini file in your MySQL installation directory:</p>" +
-                              "<pre>lower_case_table_names=1</pre><br />" +
-                              "<p>Note: Make sure to check with your hosting provider if they support case insensitive queries as well.</p>" +
-                              "<p>For more technical information on case sensitivity in MySQL, have a look at " +
-                              "<a href='http://dev.mysql.com/doc/refman/5.0/en/identifier-case-sensitivity.html'>the documentation on the subject</a></p>";
-                }
-                else
-                {
-                    if (SqlSyntaxContext.SqlSyntaxProvider.GetType() == typeof(MySqlSyntaxProvider))
-                    {
-                        message = "<p>&nbsp;</p><p>Congratulations, the database step ran successfully!</p>" +
-                                  "<p>Note: You're using MySQL and the database instance you're connecting to seems to support case insensitive queries.</p>" +
-                                  "<p>However, your hosting provider may not support this option. Umbraco does not currently support MySQL installs that do not support case insensitive queries</p>" +
-                                  "<p>Make sure to check with your hosting provider if they support case insensitive queries as well.</p>" +
-                                  "<p>They can check this by looking for the following setting in the my.ini file in their MySQL installation directory:</p>" +
-                                  "<pre>lower_case_table_names=1</pre><br />" +
-                                  "<p>For more technical information on case sensitivity in MySQL, have a look at " +
-                                  "<a href='http://dev.mysql.com/doc/refman/5.0/en/identifier-case-sensitivity.html'>the documentation on the subject</a></p>";
-                    }
-                }
+
+                message = GetResultMessageForMySql(supportsCaseInsensitiveQueries);
 
                 var schemaResult = ValidateDatabaseSchema();
                 var installedVersion = schemaResult.DetermineInstalledVersion();
                 
-
                 //If Configuration Status is empty and the determined version is "empty" its a new install - otherwise upgrade the existing
                 if (string.IsNullOrEmpty(GlobalSettings.ConfigurationStatus) && installedVersion.Equals(new Version(0, 0, 0)))
                 {
                     database.CreateDatabaseSchema();
                     message = message + "<p>Installation completed!</p>";
+
+                    //now that everything is done, we need to determine the version of SQL server that is executing
+                    LogHelper.Info<DatabaseContext>("Database configuration status: " + message);
+                    return new Result { Message = message, Success = true, Percentage = "100" };
                 }
-                else
-                {
-                    var configuredVersion = string.IsNullOrEmpty(GlobalSettings.ConfigurationStatus)
+
+                //we need to do an upgrade so return a new status message and it will need to be done during the next step
+                LogHelper.Info<DatabaseContext>("Database requires upgrade");
+                message = "<p>Upgrading database, this may take some time...</p>";
+                return new Result
+                    {
+                        RequiresUpgrade = true, 
+                        Message = message, 
+                        Success = true, 
+                        Percentage = "30"
+                    };
+            }
+            catch (Exception ex)
+            {
+                return HandleInstallException(ex);
+            }
+        }
+
+        /// <summary>
+        /// This assumes all of the previous checks are done!
+        /// </summary>
+        /// <returns></returns>
+        internal Result UpgradeSchemaAndData()
+        {
+            var readyForInstall = CheckReadyForInstall();
+            if (readyForInstall.Success == false)
+            {
+                return readyForInstall.Result;
+            }
+
+            try
+            {
+                LogHelper.Info<DatabaseContext>("Database upgrade started");
+
+                var database = new UmbracoDatabase(_connectionString, ProviderName);
+                var supportsCaseInsensitiveQueries = SqlSyntaxContext.SqlSyntaxProvider.SupportsCaseInsensitiveQueries(database);                
+
+                var message = GetResultMessageForMySql(supportsCaseInsensitiveQueries);
+
+                var schemaResult = ValidateDatabaseSchema();
+                var installedVersion = schemaResult.DetermineInstalledVersion();
+                
+                //DO the upgrade!
+
+                var configuredVersion = string.IsNullOrEmpty(GlobalSettings.ConfigurationStatus)
                                                 ? installedVersion
                                                 : new Version(GlobalSettings.ConfigurationStatus);
-                    var targetVersion = UmbracoVersion.Current;
-                    var runner = new MigrationRunner(configuredVersion, targetVersion, GlobalSettings.UmbracoMigrationName);
-                    var upgraded = runner.Execute(database, true);
-                    message = message + "<p>Upgrade completed!</p>";
-                }
+                var targetVersion = UmbracoVersion.Current;
+                var runner = new MigrationRunner(configuredVersion, targetVersion, GlobalSettings.UmbracoMigrationName);
+                var upgraded = runner.Execute(database, true);
+                message = message + "<p>Upgrade completed!</p>";
 
                 //now that everything is done, we need to determine the version of SQL server that is executing
 
@@ -518,26 +536,72 @@ namespace Umbraco.Core
             }
             catch (Exception ex)
             {
-                LogHelper.Info<DatabaseContext>("Database configuration failed with the following error and stack trace: " + ex.Message + "\n" + ex.StackTrace);
-
-                if (_result != null)
-                {
-                    LogHelper.Info<DatabaseContext>("The database schema validation produced the following summary: \n" + _result.GetSummary());
-                }
-
-                return new Result
-                           {
-                               Message =
-                                   "The database configuration failed with the following message: " + ex.Message +
-                                   "\n Please check log file for additional information (can be found in '/App_Data/Logs/UmbracoTraceLog.txt')",
-                               Success = false,
-                               Percentage = "90"
-                           };
+                return HandleInstallException(ex);
             }
         }
 
+        private string GetResultMessageForMySql(bool? supportsCaseInsensitiveQueries)
+        {
+            if (supportsCaseInsensitiveQueries == null)
+            {
+                return "<p>&nbsp;</p><p>Warning! Could not check if your database type supports case insensitive queries. <br />We currently do not support these databases that do not support case insensitive queries.</p>" +
+                          "<p>You can check this by looking for the following setting in your my.ini file in your MySQL installation directory:</p>" +
+                          "<pre>lower_case_table_names=1</pre><br />" +
+                          "<p>Note: Make sure to check with your hosting provider if they support case insensitive queries as well.</p>" +
+                          "<p>For more technical information on case sensitivity in MySQL, have a look at " +
+                          "<a href='http://dev.mysql.com/doc/refman/5.0/en/identifier-case-sensitivity.html'>the documentation on the subject</a></p>";
+            }
+            if (SqlSyntaxContext.SqlSyntaxProvider.GetType() == typeof(MySqlSyntaxProvider))
+            {
+                return "<p>&nbsp;</p><p>Congratulations, the database step ran successfully!</p>" +
+                       "<p>Note: You're using MySQL and the database instance you're connecting to seems to support case insensitive queries.</p>" +
+                       "<p>However, your hosting provider may not support this option. Umbraco does not currently support MySQL installs that do not support case insensitive queries</p>" +
+                       "<p>Make sure to check with your hosting provider if they support case insensitive queries as well.</p>" +
+                       "<p>They can check this by looking for the following setting in the my.ini file in their MySQL installation directory:</p>" +
+                       "<pre>lower_case_table_names=1</pre><br />" +
+                       "<p>For more technical information on case sensitivity in MySQL, have a look at " +
+                       "<a href='http://dev.mysql.com/doc/refman/5.0/en/identifier-case-sensitivity.html'>the documentation on the subject</a></p>";
+            }
+            return string.Empty;
+        }
+
+        private Attempt<Result> CheckReadyForInstall()
+        {
+            if (_configured == false || (string.IsNullOrEmpty(_connectionString) || string.IsNullOrEmpty(ProviderName)))
+            {
+                return Attempt.Fail(new Result
+                {
+                    Message =
+                        "Database configuration is invalid. Please check that the entered database exists and that the provided username and password has write access to the database.",
+                    Success = false,
+                    Percentage = "10"
+                });
+            }
+            return Attempt<Result>.Succeed();
+        }
+
+        private Result HandleInstallException(Exception ex)
+        {
+            LogHelper.Info<DatabaseContext>("Database configuration failed with the following error and stack trace: " + ex.Message + "\n" + ex.StackTrace);
+
+            if (_result != null)
+            {
+                LogHelper.Info<DatabaseContext>("The database schema validation produced the following summary: \n" + _result.GetSummary());
+            }
+
+            return new Result
+            {
+                Message =
+                    "The database configuration failed with the following message: " + ex.Message +
+                    "\n Please check log file for additional information (can be found in '/App_Data/Logs/UmbracoTraceLog.txt')",
+                Success = false,
+                Percentage = "90"
+            };
+        }
+
         internal class Result
         {
+            public bool RequiresUpgrade { get; set; }
             public string Message { get; set; }
             public bool Success { get; set; }
             public string Percentage { get; set; }
diff --git a/src/Umbraco.Web.UI/install/InstallerRestService.aspx.cs b/src/Umbraco.Web.UI/install/InstallerRestService.aspx.cs
index a2857c5885..248e6c17d2 100644
--- a/src/Umbraco.Web.UI/install/InstallerRestService.aspx.cs
+++ b/src/Umbraco.Web.UI/install/InstallerRestService.aspx.cs
@@ -59,7 +59,7 @@ namespace Umbraco.Web.UI.Install
 
         [WebMethod]
         [ScriptMethod(ResponseFormat = ResponseFormat.Json)]
-        public static string InstallOrUpgrade()
+        public static string Install()
         {
             //if its not configured then we can continue
             if (ApplicationContext.Current == null || ApplicationContext.Current.IsConfigured)
@@ -67,10 +67,43 @@ namespace Umbraco.Web.UI.Install
                 throw new AuthenticationException("The application is already configured");
             }
 
-            LogHelper.Info<InstallerRestService>("Running 'InstallOrUpgrade' service");
+            LogHelper.Info<InstallerRestService>("Running 'Install' service");
 
-            var result = ApplicationContext.Current.DatabaseContext.CreateDatabaseSchemaAndDataOrUpgrade();
+            var result = ApplicationContext.Current.DatabaseContext.CreateDatabaseSchemaAndData();
 
+            if (result.RequiresUpgrade == false)
+            {
+                HandleConnectionStrings();
+            }            
+
+            var js = new JavaScriptSerializer();
+            var jsonResult = js.Serialize(result);
+            return jsonResult;
+        }
+
+        [WebMethod]
+        [ScriptMethod(ResponseFormat = ResponseFormat.Json)]
+        public static string Upgrade()
+        {
+            //if its not configured then we can continue
+            if (ApplicationContext.Current == null || ApplicationContext.Current.IsConfigured)
+            {
+                throw new AuthenticationException("The application is already configured");
+            }
+
+            LogHelper.Info<InstallerRestService>("Running 'Upgrade' service");
+
+            var result = ApplicationContext.Current.DatabaseContext.UpgradeSchemaAndData();
+
+            HandleConnectionStrings();
+
+            var js = new JavaScriptSerializer();
+            var jsonResult = js.Serialize(result);
+            return jsonResult;
+        }
+
+        private static void HandleConnectionStrings()
+        {
             // Remove legacy umbracoDbDsn configuration setting if it exists and connectionstring also exists
             if (ConfigurationManager.ConnectionStrings[Core.Configuration.GlobalSettings.UmbracoConnectionName] != null)
             {
@@ -82,10 +115,6 @@ namespace Umbraco.Web.UI.Install
                 LogHelper.Error<InstallerRestService>("", ex);
                 throw ex;
             }
-
-            var js = new JavaScriptSerializer();
-            var jsonResult = js.Serialize(result);
-            return jsonResult;
         }
     }
 }
\ No newline at end of file
diff --git a/src/Umbraco.Web.UI/install/steps/database.ascx b/src/Umbraco.Web.UI/install/steps/database.ascx
index 6b6f1c4496..c39eb00980 100644
--- a/src/Umbraco.Web.UI/install/steps/database.ascx
+++ b/src/Umbraco.Web.UI/install/steps/database.ascx
@@ -368,33 +368,81 @@
 
     <script type="text/javascript">
         jQuery(document).ready(function() {
+            
             updateProgressBar("5");
             updateStatusMessage("Connecting to database..");
 
-            $.ajax({
-                type: 'POST',
-                contentType: 'application/json; charset=utf-8',
-                data: '{}',
-                dataType: 'json',
-                url: 'InstallerRestService.aspx/InstallOrUpgrade',
-                success: function(data) {
-                    var json = JSON.parse(data.d);
+            var upgradeTimeout;
 
-                    updateProgressBar(json.Percentage);
-                    updateStatusMessage(json.Message);
-                
-                    if (json.Success) {    
-                        $(".btn-box").show();
-                        $('.ui-progressbar-value').css("background-image", "url(../umbraco_client/installer/images/pbar.gif)");
-                        $(".result-status-container").show();
-                        $(".progress-status-container").hide();
-                    } else {
-                        $(".btn-continue").hide();
-                        $(".btn-back").show();
-                        $(".btn-box").show();
-                    }
+            function upgradeProgress(currProgress) {
+                if (currProgress < 90) {
+                    upgradeTimeout = setTimeout(function() {
+                        currProgress++;
+                        updateProgressBar(currProgress.toString());
+                        upgradeProgress(currProgress);
+                    }, 10000);
                 }
-            });
+            }
+
+            function handleSuccess(json) {
+                if (json.Success) {    
+                    $(".btn-box").show();
+                    $('.ui-progressbar-value').css("background-image", "url(../umbraco_client/installer/images/pbar.gif)");
+                    $(".result-status-container").show();
+                    $(".progress-status-container").hide();
+                } 
+                else {
+                    $(".btn-continue").hide();
+                    $(".btn-back").show();
+                    $(".btn-box").show();
+                }
+            }
+
+            function runUpgrade() {
+                $.ajax({
+                    type: 'POST',
+                    contentType: 'application/json; charset=utf-8',
+                    data: '{}',
+                    dataType: 'json',
+                    url: 'InstallerRestService.aspx/Upgrade',
+                    success: function(data) {
+                        clearTimeout(upgradeTimeout);
+                        var json = JSON.parse(data.d);
+
+                        updateProgressBar(json.Percentage);
+                        updateStatusMessage(json.Message);
+                
+                        handleSuccess(json);
+                    }
+                });
+                upgradeProgress(30);
+            }
+
+            function runInstall() {
+                $.ajax({
+                    type: 'POST',
+                    contentType: 'application/json; charset=utf-8',
+                    data: '{}',
+                    dataType: 'json',
+                    url: 'InstallerRestService.aspx/Install',
+                    success: function(data) {
+                        var json = JSON.parse(data.d);
+
+                        updateProgressBar(json.Percentage);
+                        updateStatusMessage(json.Message);
+                
+                        if (json.RequiresUpgrade) {
+                            runUpgrade();
+                        }
+                        else {
+                            handleSuccess(json);
+                        }
+                    }
+                });
+            }
+
+            //kick it off
+            runInstall();
         });
     </script>
 
diff --git a/src/Umbraco.Web/umbraco.presentation/install/utills/LegacyClasses.cs b/src/Umbraco.Web/umbraco.presentation/install/utills/LegacyClasses.cs
index 52fd5d3b9a..c068ac2c01 100644
--- a/src/Umbraco.Web/umbraco.presentation/install/utills/LegacyClasses.cs
+++ b/src/Umbraco.Web/umbraco.presentation/install/utills/LegacyClasses.cs
@@ -66,7 +66,11 @@ namespace umbraco.presentation.install.utills
         {
             LogHelper.Info<p>("Running 'installOrUpgrade' service");
 
-            var result = ApplicationContext.Current.DatabaseContext.CreateDatabaseSchemaAndDataOrUpgrade();
+            var result = ApplicationContext.Current.DatabaseContext.CreateDatabaseSchemaAndData();
+            if (result.RequiresUpgrade)
+            {
+                result = ApplicationContext.Current.DatabaseContext.UpgradeSchemaAndData();
+            }
 
             // Remove legacy umbracoDbDsn configuration setting if it exists and connectionstring also exists
             if (ConfigurationManager.ConnectionStrings[Umbraco.Core.Configuration.GlobalSettings.UmbracoConnectionName] != null)

From 51da5343eae89c71ecda9504fa6255580ceb2151 Mon Sep 17 00:00:00 2001
From: Shannon <sdeminick@gmail.com>
Date: Fri, 13 Dec 2013 17:07:29 +1100
Subject: [PATCH 2/3] Fixes installation issue with rebuilding media cache

Conflicts:
	src/Umbraco.Core/Umbraco.Core.csproj
	src/Umbraco.Web.UI/config/trees.config
	src/Umbraco.Web/Umbraco.Web.csproj
---
 .../RemoveCachedRecycleMediaXml.cs            | 31 ---------------
 src/Umbraco.Core/Umbraco.Core.csproj          |  1 -
 .../RebuildMediaXmlCacheAfterUpgrade.cs       | 38 +++++++++++++++++++
 src/Umbraco.Web/Umbraco.Web.csproj            |  1 +
 4 files changed, 39 insertions(+), 32 deletions(-)
 delete mode 100644 src/Umbraco.Core/Persistence/Migrations/Upgrades/TargetVersionSixTwoZero/RemoveCachedRecycleMediaXml.cs
 create mode 100644 src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs

diff --git a/src/Umbraco.Core/Persistence/Migrations/Upgrades/TargetVersionSixTwoZero/RemoveCachedRecycleMediaXml.cs b/src/Umbraco.Core/Persistence/Migrations/Upgrades/TargetVersionSixTwoZero/RemoveCachedRecycleMediaXml.cs
deleted file mode 100644
index d0945db957..0000000000
--- a/src/Umbraco.Core/Persistence/Migrations/Upgrades/TargetVersionSixTwoZero/RemoveCachedRecycleMediaXml.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-﻿using Umbraco.Core.Configuration;
-using Umbraco.Core.Services;
-
-namespace Umbraco.Core.Persistence.Migrations.Upgrades.TargetVersionSixTwoZero
-{
-    /// <summary>
-    /// Due to this bug: http://issues.umbraco.org/issue/U4-3820 we need to remove the cached media
-    /// xml found in the cmsContentXml table for any media that has been recycled.
-    /// </summary>
-    [Migration("6.2.0", 1, GlobalSettings.UmbracoMigrationName)]
-    public class RemoveCachedRecycleMediaXml : MigrationBase
-    {
-        public override void Up()
-        {
-            //now that the controlId column is renamed and now a string we need to convert
-            if (Context == null || Context.Database == null) return;
-
-            Execute.Code(database =>
-                {
-                    var mediasvc = (MediaService)ApplicationContext.Current.Services.MediaService;
-                    mediasvc.RebuildXmlStructures();
-
-                    return string.Empty;
-                });
-        }
-
-        public override void Down()
-        {
-        }
-    }
-}
\ No newline at end of file
diff --git a/src/Umbraco.Core/Umbraco.Core.csproj b/src/Umbraco.Core/Umbraco.Core.csproj
index d9ff8081c2..021ae04633 100644
--- a/src/Umbraco.Core/Umbraco.Core.csproj
+++ b/src/Umbraco.Core/Umbraco.Core.csproj
@@ -189,7 +189,6 @@
     <Compile Include="Models\PublishedContent\IPublishedContentModelFactory.cs" />
     <Compile Include="Models\PublishedContent\PublishedContentModel.cs" />
     <Compile Include="Models\PublishedContent\PublishedContentModelFactoryResolver.cs" />
-    <Compile Include="Persistence\Migrations\Upgrades\TargetVersionSixTwoZero\RemoveCachedRecycleMediaXml.cs" />
     <Compile Include="PropertyEditors\PropertyCacheValue.cs" />
     <Compile Include="PropertyEditors\PropertyValueCacheAttribute.cs" />
     <Compile Include="PropertyEditors\PropertyValueTypeAttribute.cs" />
diff --git a/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs b/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs
new file mode 100644
index 0000000000..72cf6c24ea
--- /dev/null
+++ b/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs
@@ -0,0 +1,38 @@
+﻿using System;
+using Umbraco.Core;
+using Umbraco.Core.Persistence.Migrations;
+using Umbraco.Core.Services;
+using umbraco.interfaces;
+
+namespace Umbraco.Web.Strategies.Migrations
+{
+    /// <summary>
+    /// This will execute after upgrading to rebuild the xml cache
+    /// </summary>
+    /// <remarks>
+    /// This cannot execute as part of a db migration since we need access to the services/repos.
+    /// 
+    /// This will execute for specific versions - 
+    /// 
+    /// * If current is less than or equal to 7.0.0
+    /// </remarks>
+    public class RebuildMediaXmlCacheAfterUpgrade : IApplicationStartupHandler
+    {
+        public RebuildMediaXmlCacheAfterUpgrade()
+        {
+            MigrationRunner.Migrated += MigrationRunner_Migrated;
+        }
+
+        void MigrationRunner_Migrated(MigrationRunner sender, Core.Events.MigrationEventArgs e)
+        {
+            var target70 = new Version(7, 0, 0);
+
+            if (e.ConfiguredVersion <= target70)
+            {
+                var mediasvc = (MediaService)ApplicationContext.Current.Services.MediaService;
+                mediasvc.RebuildXmlStructures();
+            }
+
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj
index c9a8e14740..42379fb990 100644
--- a/src/Umbraco.Web/Umbraco.Web.csproj
+++ b/src/Umbraco.Web/Umbraco.Web.csproj
@@ -389,6 +389,7 @@
     <Compile Include="Search\LuceneIndexerExtensions.cs" />
     <Compile Include="Security\ValidateRequestAttempt.cs" />
     <Compile Include="Security\WebSecurity.cs" />
+    <Compile Include="Strategies\Migrations\RebuildMediaXmlCacheAfterUpgrade.cs" />
     <Compile Include="UI\CdfLogger.cs" />
     <Compile Include="umbraco.presentation\CompatibilityHelper.cs" />
     <Compile Include="umbraco.presentation\LegacyClasses.cs" />

From 5aec75385947b7a1e85f3d705dfc3781b52781be Mon Sep 17 00:00:00 2001
From: Stephan <sgay@pilotine.com>
Date: Fri, 13 Dec 2013 12:06:56 +0100
Subject: [PATCH 3/3] U4-3710, -3732 - Fix default ShortStringHelper

---
 src/Umbraco.Core/CoreBootManager.cs           |  13 +-
 src/Umbraco.Core/StringExtensions.cs          |   8 +-
 src/Umbraco.Core/Strings/CleanStringType.cs   |  27 +-
 .../Strings/DefaultShortStringHelper.cs       | 700 ++++++++----------
 .../Strings/Utf8ToAsciiConverter.cs           |   7 +-
 .../DefaultShortStringHelperTests.cs          | 583 ++++++++++-----
 6 files changed, 734 insertions(+), 604 deletions(-)

diff --git a/src/Umbraco.Core/CoreBootManager.cs b/src/Umbraco.Core/CoreBootManager.cs
index 91ee5db40f..7b8b0b5b9d 100644
--- a/src/Umbraco.Core/CoreBootManager.cs
+++ b/src/Umbraco.Core/CoreBootManager.cs
@@ -266,17 +266,10 @@ namespace Umbraco.Core
             PropertyValueConvertersResolver.Current = new PropertyValueConvertersResolver(
                 PluginManager.Current.ResolveTypes<IPropertyValueConverter>());
 
-            // use the new DefaultShortStringHelper but sort-of remain compatible
-            // - use UmbracoSettings UrlReplaceCharacters
-            // - allow underscores in terms, allow leading digits
+            // use the new DefaultShortStringHelper
             ShortStringHelperResolver.Current = new ShortStringHelperResolver(
-                new DefaultShortStringHelper()
-                    .WithConfig(CleanStringType.Url, DefaultShortStringHelper.ApplyUrlReplaceCharacters, 
-                        allowUnderscoreInTerm: true, allowLeadingDigits: true));
-
-            // that was the old one
-            //ShortStringHelperResolver.Current = new ShortStringHelperResolver(
-            //    new LegacyShortStringHelper());
+                //new LegacyShortStringHelper());
+                new DefaultShortStringHelper().WithDefaultConfig());
 
 		    UrlSegmentProviderResolver.Current = new UrlSegmentProviderResolver(
 		        typeof (DefaultUrlSegmentProvider));
diff --git a/src/Umbraco.Core/StringExtensions.cs b/src/Umbraco.Core/StringExtensions.cs
index 4c3cfbeba5..8f4c7a57b1 100644
--- a/src/Umbraco.Core/StringExtensions.cs
+++ b/src/Umbraco.Core/StringExtensions.cs
@@ -801,9 +801,11 @@ namespace Umbraco.Core
                 if (_helper != null)
                     return _helper;
 
-                // there *has* to be a short string helper, even if the resolver has not
-                // been initialized - used the default one with default configuration.
-                _helper = new DefaultShortStringHelper().WithConfig(allowLeadingDigits: true);
+                // we don't want Umbraco to die because the resolver hasn't been initialized
+                // as the ShortStringHelper is too important, so as long as it's not there
+                // already, we use a default one. That should never happen, but...
+                Logging.LogHelper.Warn<IShortStringHelper>("ShortStringHelperResolver.HasCurrent == false, fallback to default.");
+                _helper = new DefaultShortStringHelper().WithDefaultConfig();
                 _helper.Freeze();
                 return _helper;
             }
diff --git a/src/Umbraco.Core/Strings/CleanStringType.cs b/src/Umbraco.Core/Strings/CleanStringType.cs
index 28a801aa54..f681c42d4a 100644
--- a/src/Umbraco.Core/Strings/CleanStringType.cs
+++ b/src/Umbraco.Core/Strings/CleanStringType.cs
@@ -14,6 +14,9 @@ namespace Umbraco.Core.Strings
         // note: you have 32 bits at your disposal
         // 0xffffffff
 
+
+        // masks
+
         /// <summary>
         /// Flag mask for casing.
         /// </summary>
@@ -27,13 +30,19 @@ namespace Umbraco.Core.Strings
         /// <summary>
         /// Flag mask for role.
         /// </summary>
-        RoleMask = 0x030000, // 0xff0000 - 8 possible values
+        RoleMask = 0x070000, // 0xff0000 - 8 possible values
+
+
+        // no value
 
         /// <summary>
         /// No value.
         /// </summary>
         None = 0x00,
 
+
+        // casing values
+
         /// <summary>
         /// Pascal casing eg "PascalCase".
         /// </summary>
@@ -66,9 +75,13 @@ namespace Umbraco.Core.Strings
         /// and is pascal otherwise.</remarks>
         UmbracoCase = 0x20,
 
+
+        // encoding values
+
         /// <summary>
         /// Unicode encoding.
         /// </summary>
+        [Obsolete("Use .Utf8 instead.")]
         Unicode = 0x0100,
 
         /// <summary>
@@ -81,14 +94,22 @@ namespace Umbraco.Core.Strings
         /// </summary>
         Ascii = 0x0400,
 
+
+        // role values
+
         /// <summary>
         ///  Url role.
         /// </summary>
-        Url = 0x010000,
+        UrlSegment = 0x010000,
 
         /// <summary>
         /// Alias role.
         /// </summary>
-        Alias = 0x020000
+        Alias = 0x020000,
+
+        /// <summary>
+        /// FileName role.
+        /// </summary>
+        FileName = 0x040000
     }
 }
diff --git a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
index c3845e7318..bb85984d0d 100644
--- a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
+++ b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
@@ -1,5 +1,6 @@
 ﻿using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.IO;
 using System.Linq;
 using System.Globalization;
@@ -13,14 +14,13 @@ namespace Umbraco.Core.Strings
     /// <remarks>
     /// <para>Not optimized to work on large bodies of text.</para>
     /// <para>Meant to replace <c>LegacyShortStringHelper</c> where/when backward compatibility is not an issue.</para>
-    /// <para>Full-unicode support is probably not so good.</para>
     /// <para>NOTE: pre-filters run _before_ the string is re-encoded.</para>
     /// </remarks>
     public class DefaultShortStringHelper : IShortStringHelper
     {
         #region Ctor and vars
 
-        static DefaultShortStringHelper()
+        public DefaultShortStringHelper()
         {
             InitializeLegacyUrlReplaceCharacters();
         }
@@ -41,7 +41,7 @@ namespace Umbraco.Core.Strings
 
         private CultureInfo _defaultCulture = CultureInfo.InvariantCulture;
         private bool _frozen;
-        private readonly Dictionary<CultureInfo, Dictionary<CleanStringType, HelperConfig>> _configs = new Dictionary<CultureInfo, Dictionary<CleanStringType, HelperConfig>>();
+        private readonly Dictionary<CultureInfo, Dictionary<CleanStringType, Config>> _configs = new Dictionary<CultureInfo, Dictionary<CleanStringType, Config>>();
 
         // see notes for CleanAsciiString
         //static DefaultShortStringHelper()
@@ -51,11 +51,11 @@ namespace Umbraco.Core.Strings
 
         #endregion
 
-        #region Legacy UrlReplaceCharacters
+        #region Filters
 
-        static readonly Dictionary<string, string> UrlReplaceCharacters = new Dictionary<string, string>();
+        private readonly Dictionary<string, string> _urlReplaceCharacters = new Dictionary<string, string>();
 
-        static void InitializeLegacyUrlReplaceCharacters()
+        private void InitializeLegacyUrlReplaceCharacters()
         {
             var replaceChars = UmbracoSettings.UrlReplaceCharacters;
             if (replaceChars == null) return;
@@ -67,7 +67,7 @@ namespace Umbraco.Core.Strings
                 if (attributes == null) continue;
                 var org = attributes.GetNamedItem("org");
                 if (org != null && org.Value != "")
-                    UrlReplaceCharacters[org.Value] = XmlHelper.GetNodeValue(node);
+                    _urlReplaceCharacters[org.Value] = XmlHelper.GetNodeValue(node);
             }
         }
 
@@ -76,9 +76,21 @@ namespace Umbraco.Core.Strings
         /// </summary>
         /// <param name="s">The string to filter.</param>
         /// <returns>The filtered string.</returns>
-        public static string ApplyUrlReplaceCharacters(string s)
+        public string ApplyUrlReplaceCharacters(string s)
         {
-            return s.ReplaceMany(UrlReplaceCharacters);
+            return s.ReplaceMany(_urlReplaceCharacters);
+        }
+
+        // ok to be static here because it's not configureable in any way
+        private static readonly char[] InvalidFileNameChars =
+            Path.GetInvalidFileNameChars()
+            .Union("!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| ".ToCharArray())
+            .Distinct()
+            .ToArray();
+
+        public static bool IsValidFileNameChar(char c)
+        {
+            return InvalidFileNameChars.Contains(c) == false;
         }
 
         #endregion
@@ -91,6 +103,11 @@ namespace Umbraco.Core.Strings
                 throw new InvalidOperationException("Cannot configure the helper once it is frozen.");            
         }
 
+        /// <summary>
+        /// Sets a default culture.
+        /// </summary>
+        /// <param name="culture">The default culture.</param>
+        /// <returns>The short string helper.</returns>
         public DefaultShortStringHelper WithDefaultCulture(CultureInfo culture)
         {
             EnsureNotFrozen();
@@ -98,75 +115,131 @@ namespace Umbraco.Core.Strings
             return this;
         }
 
-        public DefaultShortStringHelper WithConfig(
-            Func<string, string> preFilter = null, 
-            bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
+        public DefaultShortStringHelper WithConfig(Config config)
         {
-            return WithConfig(_defaultCulture, CleanStringType.RoleMask,
-                preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
+            return WithConfig(_defaultCulture, CleanStringType.RoleMask, config);
         }
 
-        public DefaultShortStringHelper WithConfig(CleanStringType stringRole,
-            Func<string, string> preFilter = null,
-            bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
+        public DefaultShortStringHelper WithConfig(CleanStringType stringRole, Config config)
         {
-            return WithConfig(_defaultCulture, stringRole,
-                preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
+            return WithConfig(_defaultCulture, stringRole, config);
         }
 
-        public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole,
-            Func<string, string> preFilter = null,
-            bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
+        public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, Config config)
         {
+            if (config == null)
+                throw new ArgumentNullException("config");
+
             EnsureNotFrozen();
             if (_configs.ContainsKey(culture) == false)
-                _configs[culture] = new Dictionary<CleanStringType, HelperConfig>();
-            _configs[culture][stringRole] = new HelperConfig(preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
+                _configs[culture] = new Dictionary<CleanStringType, Config>();
+            _configs[culture][stringRole] = config.Clone(); // clone so it can't be changed
             return this;
         }
 
-        internal sealed class HelperConfig
+        /// <summary>
+        /// Sets the default configuration.
+        /// </summary>
+        /// <returns>The short string helper.</returns>
+        public DefaultShortStringHelper WithDefaultConfig()
         {
-            private HelperConfig()
+            return WithConfig(CleanStringType.UrlSegment, new Config
             {
+                PreFilter = ApplyUrlReplaceCharacters,
+                IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore
+                StringType = CleanStringType.Utf8 | CleanStringType.LowerCase,
+                BreakTermsOnUpper = false,
+                Separator = '-'
+            }).WithConfig(CleanStringType.FileName, new Config
+            {
+                PreFilter = ApplyUrlReplaceCharacters,
+                IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore
+                StringType = CleanStringType.Utf8 | CleanStringType.LowerCase,
+                BreakTermsOnUpper = false,
+                Separator = '-'
+            }).WithConfig(CleanStringType.Alias, new Config
+            {
+                PreFilter = ApplyUrlReplaceCharacters,
+                IsTerm = (c, leading) => leading 
+                    ? char.IsLetter(c) // only letters
+                    : (char.IsLetterOrDigit(c) || c == '_'), // letter, digit or underscore
+                StringType = CleanStringType.Ascii | CleanStringType.UmbracoCase,
+                BreakTermsOnUpper = false
+            });
+        }
+
+        public sealed class Config
+        {
+            public Config()
+            {
+                StringType = CleanStringType.Utf8 | CleanStringType.Unchanged;
                 PreFilter = null;
-                BreakTermsOnUpper = true;
-                AllowLeadingDigits = false;
+                IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c);
+                BreakTermsOnUpper = false;
+                CutAcronymOnNonUpper = false;
+                GreedyAcronyms = false;
+                Separator = Char.MinValue;
             }
 
-            public HelperConfig(Func<string, string> preFilter, bool breakTermsOnUpper, bool allowLeadingDigits, bool allowUnderscoreInTerm)
-                : this()
+            public Config Clone()
             {
-                PreFilter = preFilter;
-                BreakTermsOnUpper = breakTermsOnUpper;
-                AllowLeadingDigits = allowLeadingDigits;
-                AllowUnderscoreInTerm = allowUnderscoreInTerm;
+                return new Config
+                {
+                    PreFilter = PreFilter,
+                    IsTerm = IsTerm,
+                    StringType = StringType,
+                    BreakTermsOnUpper = BreakTermsOnUpper,
+                    CutAcronymOnNonUpper =  CutAcronymOnNonUpper,
+                    GreedyAcronyms =  GreedyAcronyms,
+                    Separator = Separator
+                };
             }
 
-            public Func<string, string> PreFilter { get; private set; }
+            public Func<string, string> PreFilter { get; set; }
+            public Func<char, bool, bool> IsTerm { get; set; }
+
+            public CleanStringType StringType { get; set; }
 
             // indicate whether an uppercase within a term eg "fooBar" is to break
             // into a new term, or to be considered as part of the current term
-            public bool BreakTermsOnUpper { get; private set; }
+            public bool BreakTermsOnUpper { get; set; }
 
-            // indicates whether it is legal to have leading digits, or whether they
-            // should be stripped as any other illegal character
-            public bool AllowLeadingDigits { get; private set; }
-
-            // indicates whether underscore is a valid character in a term or is
-            // to be considered as a separator
-            public bool AllowUnderscoreInTerm { get; private set; }
+            // indicate whether a non-uppercase within an acronym eg "FOOBar" is to cut
+            // the acronym (at "B" or "a" depending on GreedyAcronyms) or to give
+            // up the acronym and treat the term as a word
+            public bool CutAcronymOnNonUpper { get; set; }
 
             // indicates whether acronyms parsing is greedy ie whether "FOObar" is
             // "FOO" + "bar" (greedy) or "FO" + "Obar" (non-greedy)
-            public bool GreedyAcronyms { get { return false; } }
+            public bool GreedyAcronyms { get; set; }
 
-            public static readonly HelperConfig Empty = new HelperConfig();
+            // the separator char
+            // but then how can we tell we dont want any?
+            public char Separator { get; set; }
+
+            // extends the config
+            public CleanStringType StringTypeExtend(CleanStringType stringType)
+            {
+                var st = StringType;
+                foreach (var mask in new[] { CleanStringType.CaseMask, CleanStringType.CodeMask })
+                {
+                    var a = stringType & mask;
+                    if (a == 0) continue;
+
+                    st = st & ~mask; // clear what we have
+                    st = st | a; // set the new value
+                }
+                return st;
+            }
+
+            internal static readonly Config NotConfigured = new Config();
         }
 
-        private HelperConfig GetConfig(CleanStringType stringType, CultureInfo culture)
+        private Config GetConfig(CleanStringType stringType, CultureInfo culture)
         {
-            Dictionary<CleanStringType, HelperConfig> config;
+            stringType = stringType & CleanStringType.RoleMask;
+
+            Dictionary<CleanStringType, Config> config;
             if (_configs.ContainsKey(culture))
             {
                 config = _configs[culture];
@@ -184,7 +257,7 @@ namespace Umbraco.Core.Strings
                     return config[CleanStringType.RoleMask];
             }
 
-            return HelperConfig.Empty;
+            return Config.NotConfigured;
         }
 
         #endregion
@@ -252,7 +325,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForSafeAlias(string text)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.UmbracoCase | CleanStringType.Alias);
+            return CleanStringForSafeAlias(text, _defaultCulture);
         }
 
         /// <summary>
@@ -266,7 +339,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForSafeAlias(string text, CultureInfo culture)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.UmbracoCase | CleanStringType.Alias, culture);
+            return CleanString(text, CleanStringType.Alias, culture);
         }
 
         /// <summary>
@@ -280,7 +353,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForUrlSegment(string text)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.LowerCase | CleanStringType.Url, '-');
+            return CleanStringForUrlSegment(text, _defaultCulture);
         }
 
         /// <summary>
@@ -294,11 +367,11 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForUrlSegment(string text, CultureInfo culture)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.LowerCase | CleanStringType.Url, '-', culture);
+            return CleanString(text, CleanStringType.UrlSegment, culture);
         }
 
         /// <summary>
-        /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename,
+        /// Cleans a string, in the context of the default culture, to produce a string that can safely be used as a filename,
         /// both internally (on disk) and externally (as a url).
         /// </summary>
         /// <param name="text">The text to filter.</param>
@@ -306,23 +379,11 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <remarks>Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented.</remarks>
         public virtual string CleanStringForSafeFileName(string text)
         {
-            if (string.IsNullOrWhiteSpace(text))
-                return string.Empty;
-
-            text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-');
-
-            var pos = text.LastIndexOf('.');
-            var name = pos < 0 ? text : text.Substring(0, pos);
-            var ext = pos < 0 ? string.Empty : text.Substring(pos + 1);
-
-            name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-');
-            ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-');
-
-            return pos < 0 ? name : (name + "." + ext);
+            return CleanStringForSafeFileName(text, _defaultCulture);
         }
 
         /// <summary>
-        /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename,
+        /// Cleans a string to produce a string that can safely be used as a filename,
         /// both internally (on disk) and externally (as a url).
         /// </summary>
         /// <param name="text">The text to filter.</param>
@@ -335,14 +396,17 @@ function validateSafeAlias(id, value, immediate, callback) {{
 
             text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-');
 
-            var pos = text.LastIndexOf('.');
-            var name = pos < 0 ? text : text.Substring(0, pos);
-            var ext = pos < 0 ? string.Empty : text.Substring(pos + 1);
+            var name = Path.GetFileNameWithoutExtension(text);
+            var ext = Path.GetExtension(text); // includes the dot, empty if no extension
 
-            name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture);
-            ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture);
+            Debug.Assert(name != null, "name != null");
+            if (name.Length > 0)
+                name = CleanString(name, CleanStringType.FileName, culture);
+            Debug.Assert(ext != null, "ext != null");
+            if (ext.Length > 0)
+                ext = CleanString(ext.Substring(1), CleanStringType.FileName, culture);
 
-            return pos < 0 ? name : (name + "." + ext);
+            return ext.Length > 0 ? (name + "." + ext) : name;
         }
 
         #endregion
@@ -351,7 +415,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
 
         // MS rules & guidelines:
         // - Do capitalize both characters of two-character acronyms, except the first word of a camel-cased identifier.
-        //     eg "DBRate" (pascal) or "ioHelper" (camel) - "specialDBRate" (pascal) or "specialIOHelper" (camel)
+        //     eg "DBRate" (pascal) or "ioHelper" (camel) - "SpecialDBRate" (pascal) or "specialIOHelper" (camel)
         // - Do capitalize only the first character of acronyms with three or more characters, except the first word of a camel-cased identifier.
         //     eg "XmlWriter (pascal) or "htmlReader" (camel) - "SpecialXmlWriter" (pascal) or "specialHtmlReader" (camel)
         // - Do not capitalize any of the characters of any acronyms, whatever their length, at the beginning of a camel-cased identifier.
@@ -376,7 +440,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <remarks>The string is cleaned in the context of the default culture.</remarks>
         public string CleanString(string text, CleanStringType stringType)
         {
-            return CleanString(text, stringType, char.MinValue, _defaultCulture);
+            return CleanString(text, stringType, _defaultCulture, null);
         }
 
         /// <summary>
@@ -390,7 +454,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <remarks>The string is cleaned in the context of the default culture.</remarks>
         public string CleanString(string text, CleanStringType stringType, char separator)
         {
-            return CleanString(text, stringType, separator, _defaultCulture);
+            return CleanString(text, stringType, _defaultCulture, separator);
         }
 
         /// <summary>
@@ -403,7 +467,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <returns>The clean string.</returns>
         public string CleanString(string text, CleanStringType stringType, CultureInfo culture)
         {
-            return CleanString(text, stringType, char.MinValue, culture);
+            return CleanString(text, stringType, culture, null);
         }
 
         /// <summary>
@@ -415,23 +479,12 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <param name="separator">The separator.</param>
         /// <param name="culture">The culture.</param>
         /// <returns>The clean string.</returns>
-        public virtual string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture)
+        public string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture)
         {
-            var config = GetConfig(stringType & CleanStringType.RoleMask, culture);
-            return CleanString(text, stringType, separator, culture, config);
+            return CleanString(text, stringType, culture, separator);
         }
 
-        /// <summary>
-        /// Cleans a string in the context of a specified culture, using a specified separator and configuration.
-        /// </summary>
-        /// <param name="text">The text to clean.</param>
-        /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, 
-        /// strings are cleaned up to camelCase and Ascii.</param>
-        /// <param name="separator">The separator.</param>
-        /// <param name="culture">The culture.</param>
-        /// <param name="config">The configuration.</param>
-        /// <returns>The clean string.</returns>
-        private string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture, HelperConfig config)
+        protected virtual string CleanString(string text, CleanStringType stringType, CultureInfo culture, char? separator)
         {
             // be safe
             if (text == null)
@@ -439,13 +492,18 @@ function validateSafeAlias(id, value, immediate, callback) {{
             if (culture == null)
                 throw new ArgumentNullException("culture");
 
+            // get config
+            var config = GetConfig(stringType, culture);
+            stringType = config.StringTypeExtend(stringType);
+
             // apply defaults
             if ((stringType & CleanStringType.CaseMask) == CleanStringType.None)
                 stringType |= CleanStringType.CamelCase;
             if ((stringType & CleanStringType.CodeMask) == CleanStringType.None)
                 stringType |= CleanStringType.Ascii;
 
-            var codeType = stringType & CleanStringType.CodeMask;
+            // use configured unless specified
+            separator = separator ?? config.Separator;
 
             // apply pre-filter
             if (config.PreFilter != null)
@@ -456,231 +514,46 @@ function validateSafeAlias(id, value, immediate, callback) {{
             //    text = ReplaceMany(text, config.Replacements);
 
             // recode
-            text = Recode(text, stringType);
+            var codeType = stringType & CleanStringType.CodeMask;
+            text = codeType == CleanStringType.Ascii 
+                ? Utf8ToAsciiConverter.ToAsciiString(text) 
+                : RemoveSurrogatePairs(text);
 
             // clean
-            switch (codeType)
-            {
-                case CleanStringType.Ascii:
-                    // see note below - don't use CleanAsciiString
-                    //text = CleanAsciiString(text, stringType, separator);
-                    //break;
-                case CleanStringType.Utf8:
-                    text = CleanUtf8String(text, stringType, separator, culture, config);
-                    break;
-                case CleanStringType.Unicode:
-                    throw new NotImplementedException("DefaultShortStringHelper does not handle unicode yet.");
-                default:
-                    throw new ArgumentOutOfRangeException("stringType");
-            }
+            text = CleanCodeString(text, stringType, separator.Value, culture, config);
 
             return text;
         }
 
-        // however proud I can be of that subtle, ascii-optimized code,
-        // benchmarking shows it is an order of magnitude slower that the utf8 version
-        // don't use it - keep it here should anyone be tempted to micro-optimize again...
-        //
-        // beware, it has bugs that are fixed in CleanUtf8String but I'm not going to
-        // bugfix commented code....
-
-        /*
-        internal string CleanAsciiString(string text)
+        private static string RemoveSurrogatePairs(string text)
         {
-            return CleanAsciiString(text, CleanStringType.CamelCase, char.MinValue);
-        }
+            var input = text.ToCharArray();
+            var output = new char[input.Length];
+            var opos = 0;
 
-        internal string CleanAsciiString(string text, CleanStringType caseType, char separator)
-        {
-            int opos = 0, ipos = 0;
-            var state = StateBreak;
-
-            caseType &= CleanStringType.CaseMask;
-
-            //switch (caseType)
-            //{
-            //    case CleanStringType.LowerCase:
-            //        input = text.ToLowerInvariant().ToCharArray();
-            //        break;
-            //    case CleanStringType.UpperCase:
-            //        input = text.ToUpperInvariant().ToCharArray();
-            //        break;
-            //    default:
-            //        input =  text.ToCharArray();
-            //        break;
-            //}
-            // if we apply global ToUpper or ToLower to text here
-            // then we cannot break words on uppercase chars
-            var input = text;
-
-            // because we shouldn't be adding any extra char
-            // it's faster to use an array than a StringBuilder
-            var ilen = input.Length;
-            var output = new char[ilen];
-
-            Func<string, string> termFilter = null;
-
-            for (var i = 0; i < ilen; i++)
+            for (var ipos = 0; ipos < input.Length; ipos++)
             {
-                var idx = ValidStringCharacters.IndexOf(input[i]);
-
-                switch (state)
+                var c = input[ipos];
+                if (char.IsSurrogate(c)) // ignore high surrogate
                 {
-                    case StateBreak:
-                        if (idx >= 0 && (opos > 0 || idx < 26 || idx >= 36))
-                        {
-                            ipos = i;
-                            if (opos > 0 && separator != char.MinValue)
-                                output[opos++] = separator;
-                            state = idx < 36 ? StateWord : StateUp;
-                        }
-                        break;
-
-                    case StateWord:
-                        if (idx < 0 || (_breakTermsOnUpper && idx >= 36))
-                        {
-                            CopyAsciiTerm(input, ipos, output, ref opos, i - ipos, caseType, termFilter, false);
-                            ipos = i;
-                            state = idx < 0 ? StateBreak : StateUp;
-                            if (state != StateBreak && separator != char.MinValue)
-                                output[opos++] = separator;
-                        }
-                        break;
-
-                    case StateAcronym:
-                        if (idx < 36)
-                        {
-                            CopyAsciiTerm(input, ipos, output, ref opos, i - ipos, caseType, termFilter, true);
-                            ipos = i;
-                            state = idx < 0 ? StateBreak : StateWord;
-                            if (state != StateBreak && separator != char.MinValue)
-                                output[opos++] = separator;
-                        }
-                        break;
-
-                    case StateUp:
-                        if (idx >= 0)
-                        {
-                            state = idx < 36 ? StateWord : StateAcronym;
-                        }
-                        else
-                        {
-                            CopyAsciiTerm(input, ipos, output, ref opos, 1, caseType, termFilter, false);
-                            state = StateBreak;
-                        }
-                        break;
-
-                    default:
-                        throw new Exception("Invalid state.");
+                    ipos++; // and skip low surrogate
+                    output[opos++] = '?';
+                }
+                else
+                {
+                    output[opos++] = c;
                 }
-            }
-
-            //Console.WriteLine("xx: ({0}) {1}, {2}, {3}", state, input.Length, ipos, opos);
-            switch (state)
-            {
-                case StateBreak:
-                    break;
-
-                case StateWord:
-                    CopyAsciiTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, termFilter, false);
-                    break;
-
-                case StateAcronym:
-                case StateUp:
-                    CopyAsciiTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, termFilter, true);
-                    break;
-
-                default:
-                    throw new Exception("Invalid state.");
             }
 
             return new string(output, 0, opos);
         }
 
-        internal void CopyAsciiTerm(string input, int ipos, char[] output, ref int opos, int len,
-            CleanStringType caseType, Func<string, string> termFilter, bool isAcronym)
-        {
-            var term = input.Substring(ipos, len);
-            ipos = 0;
+        // here was a subtle, ascii-optimized version of the cleaning code, and I was
+        // very proud of it until benchmarking showed it was an order of magnitude slower
+        // that the utf8 version. Micro-optimizing sometimes isn't such a good idea.
 
-            if (termFilter != null)
-            {
-                term = termFilter(term);
-                len = term.Length;
-            }
-
-            if (isAcronym)
-            {
-                if (caseType == CleanStringType.CamelCase && len <= 2 && opos > 0)
-                    caseType = CleanStringType.Unchanged;
-                else if (caseType == CleanStringType.PascalCase && len <= 2)
-                    caseType = CleanStringType.Unchanged;
-            }
-
-            int idx;
-            switch (caseType)
-            {
-                //case CleanStringType.LowerCase:
-                //case CleanStringType.UpperCase:
-                case CleanStringType.Unchanged:
-                    term.CopyTo(ipos, output, opos, len);
-                    opos += len;
-                    break;
-
-                case CleanStringType.LowerCase:
-                    for (var i = ipos; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    }
-                    break;
-
-                case CleanStringType.UpperCase:
-                    for (var i = ipos; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx];
-                    }
-                    break;
-
-                case CleanStringType.CamelCase:
-                    idx = ValidStringCharacters.IndexOf(term[ipos]);
-                    if (opos == 0)
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    else
-                        output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx];
-                    for (var i = ipos + 1; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    }
-                    break;
-
-                case CleanStringType.PascalCase:
-                    idx = ValidStringCharacters.IndexOf(term[ipos]);
-                    output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx];
-                    for (var i = ipos + 1; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    }
-                    break;
-
-                default:
-                    throw new ArgumentOutOfRangeException("caseType");
-            }
-        }
-        */
-
-        // that's the default code that will work for utf8 strings
-        // will not handle unicode, though
-
-        internal string CleanUtf8String(string text)
-        {
-            return CleanUtf8String(text, CleanStringType.CamelCase, char.MinValue, _defaultCulture, HelperConfig.Empty);
-        }
-
-        internal string CleanUtf8String(string text, CleanStringType caseType, char separator, CultureInfo culture, HelperConfig config)
+        // note: does NOT support surrogate pairs in text
+        internal string CleanCodeString(string text, CleanStringType caseType, char separator, CultureInfo culture, Config config)
         {
             int opos = 0, ipos = 0;
             var state = StateBreak;
@@ -695,21 +568,28 @@ function validateSafeAlias(id, value, immediate, callback) {{
             var ilen = input.Length;
             var output = new char[ilen * 2]; // twice the length should be OK in all cases
 
-            //var termFilter = config.TermFilter;
-
             for (var i = 0; i < ilen; i++)
             {
                 var c = input[i];
-                var isDigit = char.IsDigit(c);
+                var isTerm = config.IsTerm(c, opos == 0);
+
+                //var isDigit = char.IsDigit(c);
                 var isUpper = char.IsUpper(c); // false for digits, symbols...
-                var isLower = char.IsLower(c); // false for digits, symbols...
-                var isUnder = config.AllowUnderscoreInTerm && c == '_';
-                var isTerm = char.IsLetterOrDigit(c) || isUnder;
+                //var isLower = char.IsLower(c); // false for digits, symbols...
+
+                // what should I do with surrogates?
+                // no idea, really, so they are not supported at the moment
+                var isPair = char.IsSurrogate(c);
+                if (isPair)
+                    throw new NotSupportedException("Surrogate pairs are not supported.");
 
                 switch (state)
                 {
+                    // within a break
                     case StateBreak:
-                        if (isTerm && (opos > 0 || (isUnder == false && (config.AllowLeadingDigits || isDigit == false))))
+                        // begin a new term if char is a term char,
+                        // and ( pos > 0 or it's also a valid leading char )
+                        if (isTerm)
                         {
                             ipos = i;
                             if (opos > 0 && separator != char.MinValue)
@@ -718,10 +598,13 @@ function validateSafeAlias(id, value, immediate, callback) {{
                         }
                         break;
 
+                    // within a term / word
                     case StateWord:
+                        // end a term if char is not a term char,
+                        // or ( it's uppercase and we break terms on uppercase)
                         if (isTerm == false || (config.BreakTermsOnUpper && isUpper))
                         {
-                            CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ false);
+                            CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, false);
                             ipos = i;
                             state = isTerm ? StateUp : StateBreak;
                             if (state != StateBreak && separator != char.MinValue)
@@ -729,27 +612,48 @@ function validateSafeAlias(id, value, immediate, callback) {{
                         }
                         break;
 
+                    // within a term / acronym
                     case StateAcronym:
-                        if (isTerm == false || isLower || isDigit)
+                        // end an acronym if char is not a term char,
+                        // or if it's not uppercase / config
+                        //Console.WriteLine("acro {0} {1}", c, (config.CutAcronymOnNonUpper && isUpper == false));
+                        if (isTerm == false || (config.CutAcronymOnNonUpper && isUpper == false))
                         {
-                            if (isLower && config.GreedyAcronyms == false)
-                                i -= 1;
-                            CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ true);
-                            ipos = i;
-                            state = isTerm ? StateWord : StateBreak;
-                            if (state != StateBreak && separator != char.MinValue)
-                                output[opos++] = separator;
+                            // whether it's part of the acronym depends on whether we're greedy
+                            if (isTerm && config.GreedyAcronyms == false)
+                                i -= 1; // handle that char again, in another state - not part of the acronym
+                            if (i - ipos > 1) // single-char can't be an acronym
+                            {
+                                CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, true);
+                                ipos = i;
+                                state = isTerm ? StateWord : StateBreak;
+                                if (state != StateBreak && separator != char.MinValue)
+                                    output[opos++] = separator;
+                            }
+                            else if (isTerm)
+                            {
+                                state = StateWord;
+                            }
+                        }
+                        else if (isUpper == false) // isTerm == true
+                        {
+                            // it's a term char and we don't cut...
+                            // keep moving forward as a word
+                            state = StateWord;
                         }
                         break;
 
+                    // within a term / uppercase = could be a word or an acronym
                     case StateUp:
                         if (isTerm)
                         {
+                            // add that char to the term and pick word or acronym
                             state = isUpper ? StateAcronym : StateWord;
                         }
                         else
                         {
-                            CopyUtf8Term(input, ipos, output, ref opos, 1, caseType, culture, /*termFilter,*/ false);
+                            // single char, copy then break
+                            CopyTerm(input, ipos, output, ref opos, 1, caseType, culture, false);
                             state = StateBreak;
                         }
                         break;
@@ -765,12 +669,12 @@ function validateSafeAlias(id, value, immediate, callback) {{
                     break;
 
                 case StateWord:
-                    CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ false);
+                    CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, false);
                     break;
 
                 case StateAcronym:
                 case StateUp:
-                    CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ true);
+                    CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, true);
                     break;
 
                 default:
@@ -780,17 +684,15 @@ function validateSafeAlias(id, value, immediate, callback) {{
             return new string(output, 0, opos);
         }
 
-        internal void CopyUtf8Term(string input, int ipos, char[] output, ref int opos, int len,
-            CleanStringType caseType, CultureInfo culture, /*Func<string, string> termFilter,*/ bool isAcronym)
+        // note: supports surrogate pairs in input string
+        internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len,
+            CleanStringType caseType, CultureInfo culture, bool isAcronym)
         {
             var term = input.Substring(ipos, len);
-            ipos = 0;
-
-            //if (termFilter != null)
-            //{
-            //    term = termFilter(term);
-            //    len = term.Length;
-            //}
+            //Console.WriteLine("TERM \"{0}\" {1} {2}", 
+            //    term, 
+            //    isAcronym ? "acronym" : "word",
+            //    caseType);
 
             if (isAcronym)
             {
@@ -800,48 +702,100 @@ function validateSafeAlias(id, value, immediate, callback) {{
                     caseType = CleanStringType.Unchanged;
             }
 
+            // note: MSDN seems to imply that ToUpper or ToLower preserve the length
+            // of the string, but that this behavior is not guaranteed and could change.
+
             char c;
+            int i;
+            string s;
             switch (caseType)
             {
                 //case CleanStringType.LowerCase:
                 //case CleanStringType.UpperCase:
                 case CleanStringType.Unchanged:
-                    term.CopyTo(ipos, output, opos, len);
+                    term.CopyTo(0, output, opos, len);
                     opos += len;
                     break;
 
                 case CleanStringType.LowerCase:
-                    term.ToLower(culture).CopyTo(ipos, output, opos, len);
-                    opos += len;
+                    term = term.ToLower(culture);
+                    term.CopyTo(0, output, opos, term.Length);
+                    opos += term.Length;
                     break;
 
                 case CleanStringType.UpperCase:
-                    term.ToUpper(culture).CopyTo(ipos, output, opos, len);
-                    opos += len;
+                    term = term.ToUpper(culture);
+                    term.CopyTo(0, output, opos, term.Length);
+                    opos += term.Length;
                     break;
 
                 case CleanStringType.CamelCase:
-                    c = term[ipos++];
-                    output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture);
-                    if (len > 1)
-                        term.ToLower(culture).CopyTo(ipos, output, opos, len - 1);
-                    opos += len - 1;
+                    c = term[0];
+                    i = 1;
+                    if (char.IsSurrogate(c))
+                    {
+                        s = term.Substring(ipos, 2);
+                        s = opos == 0 ? s.ToLower(culture) : s.ToUpper(culture);
+                        s.CopyTo(0, output, opos, s.Length);
+                        opos += s.Length;
+                        i++; // surrogate pair len is 2
+                    }
+                    else
+                    {
+                        output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture);
+                    }
+                    if (len > i)
+                    {
+                        term = term.Substring(i).ToLower(culture);
+                        term.CopyTo(0, output, opos, term.Length);
+                        opos += term.Length;
+                    }
                     break;
 
                 case CleanStringType.PascalCase:
-                    c = term[ipos++];
-                    output[opos++] = char.ToUpper(c, culture);
-                    if (len > 1)
-                        term.ToLower(culture).CopyTo(ipos, output, opos, len - 1);
-                    opos += len - 1;
+                    c = term[0];
+                    i = 1;
+                    if (char.IsSurrogate(c))
+                    {
+                        s = term.Substring(ipos, 2);
+                        s = s.ToUpper(culture);
+                        s.CopyTo(0, output, opos, s.Length);
+                        opos += s.Length;
+                        i++; // surrogate pair len is 2
+                    }
+                    else
+                    {
+                        output[opos++] = char.ToUpper(c, culture);
+                    }
+                    if (len > i)
+                    {
+                        term = term.Substring(i).ToLower(culture);
+                        term.CopyTo(0, output, opos, term.Length);
+                        opos += term.Length;
+                    }
                     break;
 
                 case CleanStringType.UmbracoCase:
-                    c = term[ipos++];
-                    output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture);
-                    if (len > 1)
-                        term.CopyTo(ipos, output, opos, len - 1);
-                    opos += len - 1;
+                    c = term[0];
+                    i = 1;
+                    if (char.IsSurrogate(c))
+                    {
+                        s = term.Substring(ipos, 2);
+                        s = opos == 0 ? s : s.ToUpper(culture);
+                        s.CopyTo(0, output, opos, s.Length);
+                        opos += s.Length;
+                        i++; // surrogate pair len is 2
+                    }
+                    else
+                    {
+                        output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture);
+                    }
+                    if (len > i)
+                    {
+                        term = term.Substring(i);
+                        term.CopyTo(0, output, opos, term.Length);
+                        opos += term.Length;                        
+                    }
                     break;
 
                 default:
@@ -860,6 +814,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <param name="separator">The separator, which defaults to a whitespace.</param>
         /// <returns>The splitted text.</returns>
         /// <remarks>Supports Utf8 and Ascii strings, not Unicode strings.</remarks>
+        // NOTE does not support surrogates pairs at the moment
         public virtual string SplitPascalCasing(string text, char separator)
         {
             // be safe
@@ -904,55 +859,6 @@ function validateSafeAlias(id, value, immediate, callback) {{
 
         #endregion
 
-        #region Recode
-
-        /// <summary>
-        /// Returns a new string containing only characters within the specified code type.
-        /// </summary>
-        /// <param name="text">The string to filter.</param>
-        /// <param name="stringType">The string type.</param>
-        /// <returns>The filtered string.</returns>
-        /// <remarks>If <paramref name="stringType"/> is not <c>Unicode</c> then non-utf8 characters are
-        /// removed. If it is <c>Ascii</c> we try to do some intelligent replacement of accents, etc.</remarks>
-        public virtual string Recode(string text, CleanStringType stringType)
-        {
-            // be safe
-            if (text == null)
-                throw new ArgumentNullException("text");
-
-            var codeType = stringType & CleanStringType.CodeMask;
-
-            // unicode to utf8 or ascii: just remove the unicode chars
-            // utf8 to ascii: try to be clever and replace some chars
-
-            // what's the point?
-            if (codeType == CleanStringType.Unicode)
-                return text;
-
-            return codeType == CleanStringType.Utf8 
-                ? RemoveNonUtf8(text) 
-                : Utf8ToAsciiConverter.ToAsciiString(text);
-        }
-
-        private string RemoveNonUtf8(string text)
-        {
-            var len = text.Length;
-            var output = new char[len]; // we won't be adding chars
-            int opos = 0;
-
-            for (var ipos = 0; ipos < len; ipos++)
-            {
-                var c = text[ipos];
-                if (char.IsSurrogate(c))
-                    ipos++;
-                else
-                    output[opos++] = c;
-            }
-            return new string(output, 0, opos);
-        }
-
-        #endregion
-
         #region ReplaceMany
 
         /// <summary>
diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
index f93c461fa3..23ac4e3931 100644
--- a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
+++ b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
@@ -72,8 +72,11 @@ namespace Umbraco.Core.Strings
             var opos = 0;
 
             for (var ipos = 0; ipos < input.Length; ipos++)
-                if (char.IsSurrogate(input[ipos]))
-                    ipos++;
+                if (char.IsSurrogate(input[ipos])) // ignore high surrogate
+                {
+                    ipos++; // and skip low surrogate
+                    output[opos++] = '?';
+                }
                 else
                     ToAscii(input, ipos, output, ref opos);
 
diff --git a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs
index b9188730d7..09df7d0abf 100644
--- a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs
+++ b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs
@@ -1,6 +1,10 @@
-﻿using System.Collections.Generic;
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics;
 using System.Globalization;
+using System.IO;
 using System.Linq;
+using System.Text;
 using System.Text.RegularExpressions;
 using NUnit.Framework;
 using Umbraco.Core;
@@ -26,10 +30,39 @@ namespace Umbraco.Tests.CoreStrings
             // so there still may be utf8 chars even though you want ascii
 
             _helper = new DefaultShortStringHelper()
-                .WithConfig(CleanStringType.Url, StripQuotes, allowLeadingDigits: true)
-                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Url, FilterFrenchElisions, allowLeadingDigits: true)
-                .WithConfig(CleanStringType.Alias, StripQuotes)
-                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Alias, WhiteQuotes);
+                .WithConfig(CleanStringType.FileName, new DefaultShortStringHelper.Config
+                {
+                    //PreFilter = ClearFileChars, // done in IsTerm
+                    IsTerm = (c, leading) => (char.IsLetterOrDigit(c) || c == '_') && DefaultShortStringHelper.IsValidFileNameChar(c),
+                    StringType = CleanStringType.LowerCase | CleanStringType.Ascii,
+                    Separator = '-'
+                })
+                .WithConfig(CleanStringType.UrlSegment, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = StripQuotes,
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_',
+                    StringType = CleanStringType.LowerCase | CleanStringType.Ascii,
+                    Separator = '-'
+                })
+                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.UrlSegment, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = FilterFrenchElisions,
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : (char.IsLetterOrDigit(c) || c == '_'),
+                    StringType = CleanStringType.LowerCase | CleanStringType.Ascii,
+                    Separator = '-'
+                })
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = StripQuotes,
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.UmbracoCase | CleanStringType.Ascii
+                })
+                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = WhiteQuotes,
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.UmbracoCase | CleanStringType.Ascii
+                });
 
             ShortStringHelperResolver.Reset();
             ShortStringHelperResolver.Current = new ShortStringHelperResolver(_helper);
@@ -61,6 +94,333 @@ namespace Umbraco.Tests.CoreStrings
             return s;
         }
 
+        [Test]
+        public void CleanStringUnderscoreInTerm()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // underscore is accepted within terms
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_',
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo_bar*nil", helper.CleanString("foo_bar nil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // underscore is not accepted within terms
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*bar*nil", helper.CleanString("foo_bar nil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringLeadingChars()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // letters and digits are valid leading chars
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("0123foo*bar*nil", helper.CleanString("0123foo_bar nil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // only letters are valid leading chars
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*bar*nil", helper.CleanString("0123foo_bar nil", CleanStringType.Alias));
+            Assert.AreEqual("foo*bar*nil", helper.CleanString("0123 foo_bar nil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringTermOnUpper()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // uppercase letter means new term
+                    BreakTermsOnUpper = true,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*Bar", helper.CleanString("fooBar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // uppercase letter is part of term
+                    BreakTermsOnUpper = false,
+                    Separator = '*'
+                });
+            Assert.AreEqual("fooBar", helper.CleanString("fooBar", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringAcronymOnNonUpper()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // non-uppercase letter means cut acronym
+                    CutAcronymOnNonUpper = true,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BAR*Rnil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BA*Rnil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // non-uppercase letter means word
+                    CutAcronymOnNonUpper = false,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BARRnil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BARnil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringGreedyAcronyms()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    CutAcronymOnNonUpper = true,
+                    GreedyAcronyms = true,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BARR*nil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAR*nil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BA*nil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    CutAcronymOnNonUpper = true,
+                    GreedyAcronyms = false,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BAR*Rnil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BA*Rnil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringWhiteSpace()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo", helper.CleanString("   foo   ", CleanStringType.Alias));
+            Assert.AreEqual("foo*bar", helper.CleanString("   foo   bar   ", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringSeparator()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*bar", helper.CleanString("foo bar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = ' '
+                });
+            Assert.AreEqual("foo bar", helper.CleanString("foo bar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged
+                });
+            Assert.AreEqual("foobar", helper.CleanString("foo bar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '文'
+                });
+            Assert.AreEqual("foo文bar", helper.CleanString("foo bar", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringSymbols()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("house*2", helper.CleanString("house (2)", CleanStringType.Alias));
+            
+            // FIXME but for a filename we want to keep them!
+            // FIXME and what about a url?
+        }
+
+        [Test]
+        public void Utf8Surrogates()
+        {
+            // Unicode values between 0x10000 and 0x10FFF are represented by two 16-bit "surrogate" characters
+            const string str = "a\U00010F00z\uA74Ft";
+            Assert.AreEqual(6, str.Length);
+            Assert.IsTrue(char.IsSurrogate(str[1]));
+            Assert.IsTrue(char.IsHighSurrogate(str[1]));
+            Assert.IsTrue(char.IsSurrogate(str[2]));
+            Assert.IsTrue(char.IsLowSurrogate(str[2]));
+            Assert.AreEqual('z', str[3]);
+            Assert.IsFalse(char.IsSurrogate(str[4]));
+            Assert.AreEqual('\uA74F', str[4]);
+            Assert.AreEqual('t', str[5]);
+
+            Assert.AreEqual("z", str.Substring(3, 1));
+            Assert.AreEqual("\U00010F00", str.Substring(1, 2));
+
+            var bytes = Encoding.UTF8.GetBytes(str);
+            Assert.AreEqual(10, bytes.Length);
+            Assert.AreEqual('a', bytes[0]);
+            // then next string element is two chars (surrogate pair) or 4 bytes, 21 bits of code point
+            Assert.AreEqual('z', bytes[5]);
+            // then next string element is one char and 3 bytes, 16 bits of code point
+            Assert.AreEqual('t', bytes[9]);
+            //foreach (var b in bytes)
+            //    Console.WriteLine("{0:X}", b);
+
+            Console.WriteLine("\U00010B70");
+        }
+
+        [Test]
+        public void Utf8ToAsciiConverter()
+        {
+            const string str = "a\U00010F00z\uA74Ftéô";
+            var output = Core.Strings.Utf8ToAsciiConverter.ToAsciiString(str);
+            Assert.AreEqual("a?zooteo", output);
+        }
+
+        [Test]
+        public void CleanStringEncoding()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("中文测试", helper.CleanString("中文测试", CleanStringType.Alias));
+            Assert.AreEqual("léger*中文测试*ZÔRG", helper.CleanString("léger 中文测试 ZÔRG", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Ascii | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("", helper.CleanString("中文测试", CleanStringType.Alias));
+            Assert.AreEqual("leger*ZORG", helper.CleanString("léger 中文测试 ZÔRG", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringDefaultConfig()
+        {
+            var helper = new DefaultShortStringHelper().WithDefaultConfig();
+
+            const string input = "0123 中文测试 中文测试 léger ZÔRG (2) a?? *x";
+
+            var alias = helper.CleanStringForSafeAlias(input);
+            var filename = helper.CleanStringForSafeFileName(input);
+            var segment = helper.CleanStringForUrlSegment(input);
+
+            // umbraco-cased ascii alias, must begin with a proper letter
+            Assert.AreEqual("legerZORG2AX", alias, "alias");
+
+            // lower-cased, utf8 filename, removing illegal filename chars, using dash-separator
+            Assert.AreEqual("0123-中文测试-中文测试-léger-zôrg-2-a-x", filename, "filename");
+
+            // lower-cased, utf8 url segment, only letters and digits, using dash-separator
+            Assert.AreEqual("0123-中文测试-中文测试-léger-zôrg-2-a-x", segment, "segment");
+        }
+
+        [Test]
+        public void CleanStringCasing()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = ' '
+                });
+
+            // BBB is an acronym
+            // E is a word (too short to be an acronym)
+            // FF is an acronym
+
+            // FIXME "C" can't be an acronym
+            // FIXME "DBXreview" = acronym?!
+
+            Assert.AreEqual("aaa BBB CCc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias)); // unchanged
+            Assert.AreEqual("aaa Bbb Ccc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("Aaa Bbb Ccc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("aaa bbb ccc ddd e ff", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.LowerCase));
+            Assert.AreEqual("AAA BBB CCC DDD E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.UpperCase));
+            Assert.AreEqual("aaa BBB CCc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.UmbracoCase));
+
+            // MS rules & guidelines:
+            // - Do capitalize both characters of two-character acronyms, except the first word of a camel-cased identifier.
+            //     eg "DBRate" (pascal) or "ioHelper" (camel) - "SpecialDBRate" (pascal) or "specialIOHelper" (camel)
+            // - Do capitalize only the first character of acronyms with three or more characters, except the first word of a camel-cased identifier.
+            //     eg "XmlWriter (pascal) or "htmlReader" (camel) - "SpecialXmlWriter" (pascal) or "specialHtmlReader" (camel)
+            // - Do not capitalize any of the characters of any acronyms, whatever their length, at the beginning of a camel-cased identifier.
+            //     eg "xmlWriter" or "dbWriter" (camel)
+
+            Assert.AreEqual("aaa BB Ccc", helper.CleanString("aaa BB ccc", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("aa Bb Ccc", helper.CleanString("AA bb ccc", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("aaa Bb Ccc", helper.CleanString("AAA bb ccc", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("db Rate", helper.CleanString("DB rate", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("special DB Rate", helper.CleanString("special DB rate", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("xml Writer", helper.CleanString("XML writer", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("special Xml Writer", helper.CleanString("special XML writer", CleanStringType.Alias | CleanStringType.CamelCase));
+
+            Assert.AreEqual("Aaa BB Ccc", helper.CleanString("aaa BB ccc", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("AA Bb Ccc", helper.CleanString("AA bb ccc", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Aaa Bb Ccc", helper.CleanString("AAA bb ccc", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("DB Rate", helper.CleanString("DB rate", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Special DB Rate", helper.CleanString("special DB rate", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Xml Writer", helper.CleanString("XML writer", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Special Xml Writer", helper.CleanString("special XML writer", CleanStringType.Alias | CleanStringType.PascalCase));
+        }
+
         #region Cases
         [TestCase("foo", "foo")]
         [TestCase("    foo    ", "foo")]
@@ -100,29 +460,29 @@ namespace Umbraco.Tests.CoreStrings
             Assert.AreEqual(expected, output);
         }
 
-        #region Cases
-        [TestCase("This is my_little_house so cute.", "thisIsMyLittleHouseSoCute", false)]
-        [TestCase("This is my_little_house so cute.", "thisIsMy_little_houseSoCute", true)]
-        [TestCase("This is my_Little_House so cute.", "thisIsMyLittleHouseSoCute", false)]
-        [TestCase("This is my_Little_House so cute.", "thisIsMy_Little_HouseSoCute", true)]
-        [TestCase("An UPPER_CASE_TEST to check", "anUpperCaseTestToCheck", false)]
-        [TestCase("An UPPER_CASE_TEST to check", "anUpper_case_testToCheck", true)]
-        [TestCase("Trailing_", "trailing", false)]
-        [TestCase("Trailing_", "trailing_", true)]
-        [TestCase("_Leading", "leading", false)]
-        [TestCase("_Leading", "leading", true)]
-        [TestCase("Repeat___Repeat", "repeatRepeat", false)]
-        [TestCase("Repeat___Repeat", "repeat___Repeat", true)]
-        [TestCase("Repeat___repeat", "repeatRepeat", false)]
-        [TestCase("Repeat___repeat", "repeat___repeat", true)]
-        #endregion
-        public void CleanStringWithUnderscore(string input, string expected, bool allowUnderscoreInTerm)
-        {
-            var helper = new DefaultShortStringHelper()
-                .WithConfig(allowUnderscoreInTerm: allowUnderscoreInTerm);
-            var output = helper.CleanString(input, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase);
-            Assert.AreEqual(expected, output);
-        }
+        //#region Cases
+        //[TestCase("This is my_little_house so cute.", "thisIsMyLittleHouseSoCute", false)]
+        //[TestCase("This is my_little_house so cute.", "thisIsMy_little_houseSoCute", true)]
+        //[TestCase("This is my_Little_House so cute.", "thisIsMyLittleHouseSoCute", false)]
+        //[TestCase("This is my_Little_House so cute.", "thisIsMy_Little_HouseSoCute", true)]
+        //[TestCase("An UPPER_CASE_TEST to check", "anUpperCaseTestToCheck", false)]
+        //[TestCase("An UPPER_CASE_TEST to check", "anUpper_case_testToCheck", true)]
+        //[TestCase("Trailing_", "trailing", false)]
+        //[TestCase("Trailing_", "trailing_", true)]
+        //[TestCase("_Leading", "leading", false)]
+        //[TestCase("_Leading", "leading", true)]
+        //[TestCase("Repeat___Repeat", "repeatRepeat", false)]
+        //[TestCase("Repeat___Repeat", "repeat___Repeat", true)]
+        //[TestCase("Repeat___repeat", "repeatRepeat", false)]
+        //[TestCase("Repeat___repeat", "repeat___repeat", true)]
+        //#endregion
+        //public void CleanStringWithUnderscore(string input, string expected, bool allowUnderscoreInTerm)
+        //{
+        //    var helper = new DefaultShortStringHelper()
+        //        .WithConfig(allowUnderscoreInTerm: allowUnderscoreInTerm);
+        //    var output = helper.CleanString(input, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase);
+        //    Assert.AreEqual(expected, output);
+        //}
 
         #region Cases
         [TestCase("Home Page", "home-page")]
@@ -133,7 +493,6 @@ namespace Umbraco.Tests.CoreStrings
         [TestCase("汉#字*/漢?字", "")]
         [TestCase("Réalösk fix bran#lo'sk", "realosk-fix-bran-losk")]
         [TestCase("200 ways to be happy", "200-ways-to-be-happy")]
-        [TestCase("aBCdEfGhIJK", "a-b-cd-ef-gh-ijk")]
         #endregion
         public void CleanStringForUrlSegment(string input, string expected)
         {
@@ -162,173 +521,19 @@ namespace Umbraco.Tests.CoreStrings
         }
 
         #region Cases
-        [TestCase("foo", "foo")]
-        [TestCase("    foo    ", "foo")]
-        [TestCase("Foo", "foo")]
-        [TestCase("FoO", "foO")]
-        [TestCase("FoO bar", "foOBar")]
-        [TestCase("FoO bar NIL", "foOBarNil")]
-        [TestCase("FoO 33bar 22NIL", "foO33bar22Nil")]
-        [TestCase("FoO 33bar 22NI", "foO33bar22NI")]
-        [TestCase("0foo", "foo")]
-        [TestCase("2foo bar", "fooBar")]
-        [TestCase("9FOO", "foo")]
-        [TestCase("foo-BAR", "fooBar")]
-        [TestCase("foo-BA-dang", "fooBADang")]
-        [TestCase("foo_BAR", "fooBar")]
-        [TestCase("foo'BAR", "fooBar")]
-        [TestCase("sauté dans l'espace", "sautéDansLEspace")]
-        [TestCase("foo\"\"bar", "fooBar")]
-        [TestCase("-foo-", "foo")]
-        [TestCase("_foo_", "foo")]
-        [TestCase("spécial", "spécial")]
-        [TestCase("brô dëk ", "brôDëk")]
-        [TestCase("1235brô dëk ", "brôDëk")]
-        [TestCase("汉#字*/漢?字", "汉字漢字")]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst")]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst")]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst")]
-        [TestCase("quelle élévation à partir", "quelleÉlévationÀPartir")]
-        #endregion
-        public void CleanUtf8String(string input, string expected)
-        {
-            input = _helper.Recode(input, CleanStringType.Utf8);
-            var output = _helper.CleanUtf8String(input);
-            Assert.AreEqual(expected, output);
-        }
-
-        #region Cases
-        [TestCase("sauté dans l'espace", "saute-dans-espace", "fr-FR", CleanStringType.Url | CleanStringType.Ascii | CleanStringType.LowerCase)]
-        [TestCase("sauté dans l'espace", "sauté-dans-espace", "fr-FR", CleanStringType.Url | CleanStringType.Utf8 | CleanStringType.LowerCase)]
+        [TestCase("sauté dans l'espace", "saute-dans-espace", "fr-FR", CleanStringType.UrlSegment | CleanStringType.Ascii | CleanStringType.LowerCase)]
+        [TestCase("sauté dans l'espace", "sauté-dans-espace", "fr-FR", CleanStringType.UrlSegment | CleanStringType.Utf8 | CleanStringType.LowerCase)]
         [TestCase("sauté dans l'espace", "SauteDansLEspace", "fr-FR", CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.PascalCase)]
-        [TestCase("he doesn't want", "he-doesnt-want", null, CleanStringType.Url | CleanStringType.Ascii | CleanStringType.LowerCase)]
+        [TestCase("he doesn't want", "he-doesnt-want", null, CleanStringType.UrlSegment | CleanStringType.Ascii | CleanStringType.LowerCase)]
         [TestCase("he doesn't want", "heDoesntWant", null, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase)]
         #endregion
         public void CleanStringWithTypeAndCulture(string input, string expected, string culture, CleanStringType stringType)
         {
             var cinfo = culture == null ? CultureInfo.InvariantCulture : new CultureInfo(culture);
-            var separator = (stringType & CleanStringType.Url) == CleanStringType.Url ? '-' : char.MinValue;
-            var output = _helper.CleanString(input, stringType, separator, cinfo);
-            Assert.AreEqual(expected, output);
-        }
 
-        #region Cases
-        [TestCase("foo", "foo")]
-        [TestCase("    foo    ", "foo")]
-        [TestCase("Foo", "foo")]
-        [TestCase("FoO", "foO")]
-        [TestCase("FoO bar", "foOBar")]
-        [TestCase("FoO bar NIL", "foOBarNil")]
-        [TestCase("FoO 33bar 22NIL", "foO33bar22Nil")]
-        [TestCase("FoO 33bar 22NI", "foO33bar22NI")]
-        [TestCase("0foo", "foo")]
-        [TestCase("2foo bar", "fooBar")]
-        [TestCase("9FOO", "foo")]
-        [TestCase("foo-BAR", "fooBar")]
-        [TestCase("foo-BA-dang", "fooBADang")]
-        [TestCase("foo_BAR", "fooBar")]
-        [TestCase("foo'BAR", "fooBar")]
-        [TestCase("sauté dans l'espace", "sauteDansLEspace")]
-        [TestCase("foo\"\"bar", "fooBar")]
-        [TestCase("-foo-", "foo")]
-        [TestCase("_foo_", "foo")]
-        [TestCase("spécial", "special")]
-        [TestCase("brô dëk ", "broDek")]
-        [TestCase("1235brô dëk ", "broDek")]
-        [TestCase("汉#字*/漢?字", "")]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst")]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst")]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst")]
-        #endregion
-        public void CleanStringToAscii(string input, string expected)
-        {
-            var output = _helper.CleanString(input, CleanStringType.Ascii | CleanStringType.CamelCase);
-            Assert.AreEqual(expected, output);
-        }
-
-        #region Cases
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "brodeKtzARlanban123pOo", CleanStringType.Unchanged)]
-        [TestCase("    1235brô dëK tzARlan ban123!pOo    ", "brodeKtzARlanban123pOo", CleanStringType.Unchanged)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "BroDeKTzARlanBan123POo", CleanStringType.PascalCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "broDeKTzARlanBan123POo", CleanStringType.CamelCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "BRODEKTZARLANBAN123POO", CleanStringType.UpperCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "brodektzarlanban123poo", CleanStringType.LowerCase)]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("aaa DB cd EFG X KLMN OP qrst", "aaaDBCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "AaDBCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("aaa DB cd EFG X KLMN OP qrst", "AaaDBCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "AADbCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "AaaDbCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("We store some HTML in the DB for performance", "WeStoreSomeHtmlInTheDBForPerformance", CleanStringType.PascalCase)]
-        [TestCase("We store some HTML in the DB for performance", "weStoreSomeHtmlInTheDBForPerformance", CleanStringType.CamelCase)]
-        [TestCase("X is true", "XIsTrue", CleanStringType.PascalCase)]
-        [TestCase("X is true", "xIsTrue", CleanStringType.CamelCase)]
-        [TestCase("IO are slow", "IOAreSlow", CleanStringType.PascalCase)]
-        [TestCase("IO are slow", "ioAreSlow", CleanStringType.CamelCase)]
-        [TestCase("RAM is fast", "RamIsFast", CleanStringType.PascalCase)]
-        [TestCase("RAM is fast", "ramIsFast", CleanStringType.CamelCase)]
-        [TestCase("Tab 1", "tab1", CleanStringType.CamelCase)]
-        [TestCase("Home - Page", "homePage", CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannonSDocumentType", CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannonsDocumentType", CleanStringType.CamelCase | CleanStringType.Alias)]
-        [TestCase("!BADDLY nam-ed Document Type", "baddlyNamEdDocumentType", CleanStringType.CamelCase)]
-        [TestCase("  !BADDLY nam-ed Document Type", "BADDLYnamedDocumentType", CleanStringType.Unchanged)]
-        [TestCase("!BADDLY nam-ed   Document Type", "BaddlyNamEdDocumentType", CleanStringType.PascalCase)]
-        [TestCase("i %Want!thisTo end up In Proper@case", "IWantThisToEndUpInProperCase", CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "raksmorgasKeKe", CleanStringType.CamelCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "RaksmorgasKeKe", CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "RaksmorgaskeKe", CleanStringType.Unchanged)]
-        [TestCase("TRii", "TRii", CleanStringType.Unchanged)]
-        [TestCase("**TRii", "TRii", CleanStringType.Unchanged)]
-        [TestCase("TRii", "tRii", CleanStringType.CamelCase)]
-        [TestCase("TRXii", "trXii", CleanStringType.CamelCase)]
-        [TestCase("**TRii", "tRii", CleanStringType.CamelCase)]
-        [TestCase("TRii", "TRii", CleanStringType.PascalCase)]
-        [TestCase("TRXii", "TRXii", CleanStringType.PascalCase)]
-        [TestCase("**TRii", "TRii", CleanStringType.PascalCase)]
-        [TestCase("trII", "trII", CleanStringType.Unchanged)]
-        [TestCase("**trII", "trII", CleanStringType.Unchanged)]
-        [TestCase("trII", "trII", CleanStringType.CamelCase)]
-        [TestCase("**trII", "trII", CleanStringType.CamelCase)]
-        [TestCase("trII", "TrII", CleanStringType.PascalCase)]
-        [TestCase("**trII", "TrII", CleanStringType.PascalCase)]
-        [TestCase("trIIX", "trIix", CleanStringType.CamelCase)]
-        [TestCase("**trIIX", "trIix", CleanStringType.CamelCase)]
-        [TestCase("trIIX", "TrIix", CleanStringType.PascalCase)]
-        [TestCase("**trIIX", "TrIix", CleanStringType.PascalCase)]
-        #endregion
-        public void CleanStringToAsciiWithType(string input, string expected, CleanStringType caseType)
-        {
-            var output = _helper.CleanString(input, caseType | CleanStringType.Ascii);
-            Assert.AreEqual(expected, output);
-        }
-
-        #region Cases
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro de K tz A Rlan ban123 p Oo", ' ', CleanStringType.Unchanged)]
-        [TestCase("    1235brô dëK tzARlan ban123!pOo    ", "bro de K tz A Rlan ban123 p Oo", ' ', CleanStringType.Unchanged)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "Bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.PascalCase)]
-        [TestCase("1235brô dëK     tzARlan ban123!pOo", "Bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.PascalCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.CamelCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro-De-K-Tz-A-Rlan-Ban123-P-Oo", '-', CleanStringType.CamelCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "BRO-DE-K-TZ-A-RLAN-BAN123-P-OO", '-', CleanStringType.UpperCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro-de-k-tz-a-rlan-ban123-p-oo", '-', CleanStringType.LowerCase)]
-        [TestCase("Tab 1", "tab 1", ' ', CleanStringType.CamelCase)]
-        [TestCase("Home - Page", "home Page", ' ', CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannon S Document Type", ' ', CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannons Document Type", ' ', CleanStringType.CamelCase | CleanStringType.Alias)]
-        [TestCase("!BADDLY nam-ed Document Type", "baddly Nam Ed Document Type", ' ', CleanStringType.CamelCase)]
-        [TestCase("  !BADDLY nam-ed Document Type", "BADDLY nam ed Document Type", ' ', CleanStringType.Unchanged)]
-        [TestCase("!BADDLY nam-ed   Document Type", "Baddly Nam Ed Document Type", ' ', CleanStringType.PascalCase)]
-        [TestCase("i %Want!thisTo end up In Proper@case", "I Want This To End Up In Proper Case", ' ', CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "raksmorgas Ke Ke", ' ', CleanStringType.CamelCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "Raksmorgas Ke Ke", ' ', CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "Raksmorgas ke Ke", ' ', CleanStringType.Unchanged)]
-        #endregion
-        public void CleanStringToAsciiWithTypeAndSeparator(string input, string expected, char separator, CleanStringType caseType)
-        {
-            var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator);
+            // picks the proper config per culture
+            // and overrides some stringType params (ascii...)
+            var output = _helper.CleanString(input, stringType, cinfo);
             Assert.AreEqual(expected, output);
         }