diff --git a/src/Umbraco.Core/CoreBootManager.cs b/src/Umbraco.Core/CoreBootManager.cs
index 65b1265aa0..d472150a85 100644
--- a/src/Umbraco.Core/CoreBootManager.cs
+++ b/src/Umbraco.Core/CoreBootManager.cs
@@ -324,17 +324,10 @@ namespace Umbraco.Core
 				// fixme - why not use the following syntax?
                 //PluginManager.Current.ResolveTypes<IPropertyValueConverter>());
 
-            // use the new DefaultShortStringHelper but sort-of remain compatible
-            // - use UmbracoSettings UrlReplaceCharacters
-            // - allow underscores in terms, allow leading digits
+            // use the new DefaultShortStringHelper
             ShortStringHelperResolver.Current = new ShortStringHelperResolver(
-                new DefaultShortStringHelper()
-                    .WithConfig(CleanStringType.Url, DefaultShortStringHelper.ApplyUrlReplaceCharacters, 
-                        allowUnderscoreInTerm: true, allowLeadingDigits: true));
-
-            // that was the old one
-            //ShortStringHelperResolver.Current = new ShortStringHelperResolver(
-            //    new LegacyShortStringHelper());
+                //new LegacyShortStringHelper());
+                new DefaultShortStringHelper().WithDefaultConfig());
 
 		    UrlSegmentProviderResolver.Current = new UrlSegmentProviderResolver(
 		        typeof (DefaultUrlSegmentProvider));
diff --git a/src/Umbraco.Core/StringExtensions.cs b/src/Umbraco.Core/StringExtensions.cs
index 9c230f56ff..de36cca531 100644
--- a/src/Umbraco.Core/StringExtensions.cs
+++ b/src/Umbraco.Core/StringExtensions.cs
@@ -866,9 +866,11 @@ namespace Umbraco.Core
                 if (_helper != null)
                     return _helper;
 
-                // there *has* to be a short string helper, even if the resolver has not
-                // been initialized - used the default one with default configuration.
-                _helper = new DefaultShortStringHelper().WithConfig(allowLeadingDigits: true);
+                // we don't want Umbraco to die because the resolver hasn't been initialized
+                // as the ShortStringHelper is too important, so as long as it's not there
+                // already, we use a default one. That should never happen, but...
+                Logging.LogHelper.Warn<IShortStringHelper>("ShortStringHelperResolver.HasCurrent == false, fallback to default.");
+                _helper = new DefaultShortStringHelper().WithDefaultConfig();
                 _helper.Freeze();
                 return _helper;
             }
diff --git a/src/Umbraco.Core/Strings/CleanStringType.cs b/src/Umbraco.Core/Strings/CleanStringType.cs
index 28a801aa54..f681c42d4a 100644
--- a/src/Umbraco.Core/Strings/CleanStringType.cs
+++ b/src/Umbraco.Core/Strings/CleanStringType.cs
@@ -14,6 +14,9 @@ namespace Umbraco.Core.Strings
         // note: you have 32 bits at your disposal
         // 0xffffffff
 
+
+        // masks
+
         /// <summary>
         /// Flag mask for casing.
         /// </summary>
@@ -27,13 +30,19 @@ namespace Umbraco.Core.Strings
         /// <summary>
         /// Flag mask for role.
         /// </summary>
-        RoleMask = 0x030000, // 0xff0000 - 8 possible values
+        RoleMask = 0x070000, // 0xff0000 - 8 possible values
+
+
+        // no value
 
         /// <summary>
         /// No value.
         /// </summary>
         None = 0x00,
 
+
+        // casing values
+
         /// <summary>
         /// Pascal casing eg "PascalCase".
         /// </summary>
@@ -66,9 +75,13 @@ namespace Umbraco.Core.Strings
         /// and is pascal otherwise.</remarks>
         UmbracoCase = 0x20,
 
+
+        // encoding values
+
         /// <summary>
         /// Unicode encoding.
         /// </summary>
+        [Obsolete("Use .Utf8 instead.")]
         Unicode = 0x0100,
 
         /// <summary>
@@ -81,14 +94,22 @@ namespace Umbraco.Core.Strings
         /// </summary>
         Ascii = 0x0400,
 
+
+        // role values
+
         /// <summary>
         ///  Url role.
         /// </summary>
-        Url = 0x010000,
+        UrlSegment = 0x010000,
 
         /// <summary>
         /// Alias role.
         /// </summary>
-        Alias = 0x020000
+        Alias = 0x020000,
+
+        /// <summary>
+        /// FileName role.
+        /// </summary>
+        FileName = 0x040000
     }
 }
diff --git a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
index 5e0ccae655..b02a8f1521 100644
--- a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
+++ b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
@@ -1,5 +1,6 @@
 ﻿using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.IO;
 using System.Linq;
 using System.Globalization;
@@ -15,14 +16,13 @@ namespace Umbraco.Core.Strings
     /// <remarks>
     /// <para>Not optimized to work on large bodies of text.</para>
     /// <para>Meant to replace <c>LegacyShortStringHelper</c> where/when backward compatibility is not an issue.</para>
-    /// <para>Full-unicode support is probably not so good.</para>
     /// <para>NOTE: pre-filters run _before_ the string is re-encoded.</para>
     /// </remarks>
     public class DefaultShortStringHelper : IShortStringHelper
     {
         #region Ctor and vars
 
-        static DefaultShortStringHelper()
+        public DefaultShortStringHelper()
         {
             InitializeLegacyUrlReplaceCharacters();
         }
@@ -43,7 +43,7 @@ namespace Umbraco.Core.Strings
 
         private CultureInfo _defaultCulture = CultureInfo.InvariantCulture;
         private bool _frozen;
-        private readonly Dictionary<CultureInfo, Dictionary<CleanStringType, HelperConfig>> _configs = new Dictionary<CultureInfo, Dictionary<CleanStringType, HelperConfig>>();
+        private readonly Dictionary<CultureInfo, Dictionary<CleanStringType, Config>> _configs = new Dictionary<CultureInfo, Dictionary<CleanStringType, Config>>();
 
         // see notes for CleanAsciiString
         //static DefaultShortStringHelper()
@@ -53,11 +53,11 @@ namespace Umbraco.Core.Strings
 
         #endregion
 
-        #region Legacy UrlReplaceCharacters
+        #region Filters
 
-        static readonly Dictionary<string, string> UrlReplaceCharacters = new Dictionary<string, string>();
+        private readonly Dictionary<string, string> _urlReplaceCharacters = new Dictionary<string, string>();
 
-        static void InitializeLegacyUrlReplaceCharacters()
+        private void InitializeLegacyUrlReplaceCharacters()
         {
             foreach (var node in UmbracoConfig.For.UmbracoSettings().RequestHandler.CharCollection)
             {
@@ -71,9 +71,21 @@ namespace Umbraco.Core.Strings
         /// </summary>
         /// <param name="s">The string to filter.</param>
         /// <returns>The filtered string.</returns>
-        public static string ApplyUrlReplaceCharacters(string s)
+        public string ApplyUrlReplaceCharacters(string s)
         {
-            return s.ReplaceMany(UrlReplaceCharacters);
+            return s.ReplaceMany(_urlReplaceCharacters);
+        }
+
+        // ok to be static here because it's not configureable in any way
+        private static readonly char[] InvalidFileNameChars =
+            Path.GetInvalidFileNameChars()
+            .Union("!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| ".ToCharArray())
+            .Distinct()
+            .ToArray();
+
+        public static bool IsValidFileNameChar(char c)
+        {
+            return InvalidFileNameChars.Contains(c) == false;
         }
 
         #endregion
@@ -86,6 +98,11 @@ namespace Umbraco.Core.Strings
                 throw new InvalidOperationException("Cannot configure the helper once it is frozen.");            
         }
 
+        /// <summary>
+        /// Sets a default culture.
+        /// </summary>
+        /// <param name="culture">The default culture.</param>
+        /// <returns>The short string helper.</returns>
         public DefaultShortStringHelper WithDefaultCulture(CultureInfo culture)
         {
             EnsureNotFrozen();
@@ -93,75 +110,131 @@ namespace Umbraco.Core.Strings
             return this;
         }
 
-        public DefaultShortStringHelper WithConfig(
-            Func<string, string> preFilter = null, 
-            bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
+        public DefaultShortStringHelper WithConfig(Config config)
         {
-            return WithConfig(_defaultCulture, CleanStringType.RoleMask,
-                preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
+            return WithConfig(_defaultCulture, CleanStringType.RoleMask, config);
         }
 
-        public DefaultShortStringHelper WithConfig(CleanStringType stringRole,
-            Func<string, string> preFilter = null,
-            bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
+        public DefaultShortStringHelper WithConfig(CleanStringType stringRole, Config config)
         {
-            return WithConfig(_defaultCulture, stringRole,
-                preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
+            return WithConfig(_defaultCulture, stringRole, config);
         }
 
-        public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole,
-            Func<string, string> preFilter = null,
-            bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false)
+        public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, Config config)
         {
+            if (config == null)
+                throw new ArgumentNullException("config");
+
             EnsureNotFrozen();
             if (_configs.ContainsKey(culture) == false)
-                _configs[culture] = new Dictionary<CleanStringType, HelperConfig>();
-            _configs[culture][stringRole] = new HelperConfig(preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm);
+                _configs[culture] = new Dictionary<CleanStringType, Config>();
+            _configs[culture][stringRole] = config.Clone(); // clone so it can't be changed
             return this;
         }
 
-        internal sealed class HelperConfig
+        /// <summary>
+        /// Sets the default configuration.
+        /// </summary>
+        /// <returns>The short string helper.</returns>
+        public DefaultShortStringHelper WithDefaultConfig()
         {
-            private HelperConfig()
+            return WithConfig(CleanStringType.UrlSegment, new Config
             {
+                PreFilter = ApplyUrlReplaceCharacters,
+                IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore
+                StringType = CleanStringType.Utf8 | CleanStringType.LowerCase,
+                BreakTermsOnUpper = false,
+                Separator = '-'
+            }).WithConfig(CleanStringType.FileName, new Config
+            {
+                PreFilter = ApplyUrlReplaceCharacters,
+                IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore
+                StringType = CleanStringType.Utf8 | CleanStringType.LowerCase,
+                BreakTermsOnUpper = false,
+                Separator = '-'
+            }).WithConfig(CleanStringType.Alias, new Config
+            {
+                PreFilter = ApplyUrlReplaceCharacters,
+                IsTerm = (c, leading) => leading 
+                    ? char.IsLetter(c) // only letters
+                    : (char.IsLetterOrDigit(c) || c == '_'), // letter, digit or underscore
+                StringType = CleanStringType.Ascii | CleanStringType.UmbracoCase,
+                BreakTermsOnUpper = false
+            });
+        }
+
+        public sealed class Config
+        {
+            public Config()
+            {
+                StringType = CleanStringType.Utf8 | CleanStringType.Unchanged;
                 PreFilter = null;
-                BreakTermsOnUpper = true;
-                AllowLeadingDigits = false;
+                IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c);
+                BreakTermsOnUpper = false;
+                CutAcronymOnNonUpper = false;
+                GreedyAcronyms = false;
+                Separator = Char.MinValue;
             }
 
-            public HelperConfig(Func<string, string> preFilter, bool breakTermsOnUpper, bool allowLeadingDigits, bool allowUnderscoreInTerm)
-                : this()
+            public Config Clone()
             {
-                PreFilter = preFilter;
-                BreakTermsOnUpper = breakTermsOnUpper;
-                AllowLeadingDigits = allowLeadingDigits;
-                AllowUnderscoreInTerm = allowUnderscoreInTerm;
+                return new Config
+                {
+                    PreFilter = PreFilter,
+                    IsTerm = IsTerm,
+                    StringType = StringType,
+                    BreakTermsOnUpper = BreakTermsOnUpper,
+                    CutAcronymOnNonUpper =  CutAcronymOnNonUpper,
+                    GreedyAcronyms =  GreedyAcronyms,
+                    Separator = Separator
+                };
             }
 
-            public Func<string, string> PreFilter { get; private set; }
+            public Func<string, string> PreFilter { get; set; }
+            public Func<char, bool, bool> IsTerm { get; set; }
+
+            public CleanStringType StringType { get; set; }
 
             // indicate whether an uppercase within a term eg "fooBar" is to break
             // into a new term, or to be considered as part of the current term
-            public bool BreakTermsOnUpper { get; private set; }
+            public bool BreakTermsOnUpper { get; set; }
 
-            // indicates whether it is legal to have leading digits, or whether they
-            // should be stripped as any other illegal character
-            public bool AllowLeadingDigits { get; private set; }
-
-            // indicates whether underscore is a valid character in a term or is
-            // to be considered as a separator
-            public bool AllowUnderscoreInTerm { get; private set; }
+            // indicate whether a non-uppercase within an acronym eg "FOOBar" is to cut
+            // the acronym (at "B" or "a" depending on GreedyAcronyms) or to give
+            // up the acronym and treat the term as a word
+            public bool CutAcronymOnNonUpper { get; set; }
 
             // indicates whether acronyms parsing is greedy ie whether "FOObar" is
             // "FOO" + "bar" (greedy) or "FO" + "Obar" (non-greedy)
-            public bool GreedyAcronyms { get { return false; } }
+            public bool GreedyAcronyms { get; set; }
 
-            public static readonly HelperConfig Empty = new HelperConfig();
+            // the separator char
+            // but then how can we tell we dont want any?
+            public char Separator { get; set; }
+
+            // extends the config
+            public CleanStringType StringTypeExtend(CleanStringType stringType)
+            {
+                var st = StringType;
+                foreach (var mask in new[] { CleanStringType.CaseMask, CleanStringType.CodeMask })
+                {
+                    var a = stringType & mask;
+                    if (a == 0) continue;
+
+                    st = st & ~mask; // clear what we have
+                    st = st | a; // set the new value
+                }
+                return st;
+            }
+
+            internal static readonly Config NotConfigured = new Config();
         }
 
-        private HelperConfig GetConfig(CleanStringType stringType, CultureInfo culture)
+        private Config GetConfig(CleanStringType stringType, CultureInfo culture)
         {
-            Dictionary<CleanStringType, HelperConfig> config;
+            stringType = stringType & CleanStringType.RoleMask;
+
+            Dictionary<CleanStringType, Config> config;
             if (_configs.ContainsKey(culture))
             {
                 config = _configs[culture];
@@ -179,7 +252,7 @@ namespace Umbraco.Core.Strings
                     return config[CleanStringType.RoleMask];
             }
 
-            return HelperConfig.Empty;
+            return Config.NotConfigured;
         }
 
         #endregion
@@ -247,7 +320,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForSafeAlias(string text)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.UmbracoCase | CleanStringType.Alias);
+            return CleanStringForSafeAlias(text, _defaultCulture);
         }
 
         /// <summary>
@@ -261,7 +334,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForSafeAlias(string text, CultureInfo culture)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.UmbracoCase | CleanStringType.Alias, culture);
+            return CleanString(text, CleanStringType.Alias, culture);
         }
 
         /// <summary>
@@ -275,7 +348,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForUrlSegment(string text)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.LowerCase | CleanStringType.Url, '-');
+            return CleanStringForUrlSegment(text, _defaultCulture);
         }
 
         /// <summary>
@@ -289,11 +362,11 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// </remarks>
         public virtual string CleanStringForUrlSegment(string text, CultureInfo culture)
         {
-            return CleanString(text, CleanStringType.Ascii | CleanStringType.LowerCase | CleanStringType.Url, '-', culture);
+            return CleanString(text, CleanStringType.UrlSegment, culture);
         }
 
         /// <summary>
-        /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename,
+        /// Cleans a string, in the context of the default culture, to produce a string that can safely be used as a filename,
         /// both internally (on disk) and externally (as a url).
         /// </summary>
         /// <param name="text">The text to filter.</param>
@@ -301,94 +374,11 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <remarks>Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented.</remarks>
         public virtual string CleanStringForSafeFileName(string text)
         {
-            // - Original version
-
-            if (String.IsNullOrEmpty(text))
-                return String.Empty;
-
-            text = string.IsNullOrWhiteSpace(text) == false 
-                ? text.ReplaceMany(Path.GetInvalidFileNameChars(), '-') 
-                : string.Empty;
-
-            //Break up the file in name and extension before applying the UrlReplaceCharacters
-            var fileNamePart = text.Substring(0, text.LastIndexOf('.'));
-            var ext = text.Substring(text.LastIndexOf('.'));
-
-            fileNamePart = ApplyUrlReplaceCharacters(fileNamePart);
-
-            text = string.Concat(fileNamePart, ext);
-
-            // Adapted from: http://stackoverflow.com/a/4827510/5018
-            // Combined both Reserved Characters and Character Data 
-            // from http://en.wikipedia.org/wiki/Percent-encoding
-            var stringBuilder = new StringBuilder();
-
-            const string reservedCharacters = "!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| ";
-
-            foreach (var character in text)
-            {
-                if (reservedCharacters.IndexOf(character) == -1)
-                    stringBuilder.Append(character);
-                else
-                    stringBuilder.Append("-");
-            }
-
-            // Remove repeating dashes
-            // From: http://stackoverflow.com/questions/5111967/regex-to-remove-a-specific-repeated-character
-            var reducedString = Regex.Replace(stringBuilder.ToString(), "-+", "-");
-
-            return reducedString;
-
-
-            // - Version 2 (Legacy Short string)
-
-            //const string UmbracoValidAliasCharacters = "_-abcdefghijklmnopqrstuvwxyz1234567890";
-            //const string UmbracoInvalidFirstCharacters = "0123456789";
-            //const string validAliasCharacters = UmbracoValidAliasCharacters;
-            //const string invalidFirstCharacters = UmbracoInvalidFirstCharacters;
-            //var safeString = new StringBuilder();
-            //int aliasLength = text.Length;
-            //for (var i = 0; i < aliasLength; i++)
-            //{
-            //    var currentChar = text.Substring(i, 1);
-            //    if (validAliasCharacters.Contains(currentChar.ToLowerInvariant()))
-            //    {
-            //        // check for camel (if previous character is a space, we'll upper case the current one
-            //        if (safeString.Length == 0 && invalidFirstCharacters.Contains(currentChar.ToLowerInvariant()))
-            //        {
-            //            //currentChar = "";
-            //        }
-            //        else
-            //        {
-            //            if (i < aliasLength - 1 && i > 0 && text.Substring(i - 1, 1) == " ")
-            //                currentChar = currentChar.ToUpperInvariant();
-
-            //            safeString.Append(currentChar);
-            //        }
-            //    }
-            //}
-            //return safeString.ToString();
-
-
-            // - Version 3 (Default short string)
-
-            //if (string.IsNullOrWhiteSpace(text))
-            //    return string.Empty;
-
-            //text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-');
-
-            //var pos = text.LastIndexOf('.');
-            //var name = pos < 0 ? text : text.Substring(0, pos);
-            //var ext = pos < 0 ? string.Empty : text.Substring(pos + 1);
-
-            //name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-');
-            //ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-');
-
-            //return pos < 0 ? name : (name + "." + ext);
+            return CleanStringForSafeFileName(text, _defaultCulture);
         }
 
         /// <summary>
-        /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename,
+        /// Cleans a string to produce a string that can safely be used as a filename,
         /// both internally (on disk) and externally (as a url).
         /// </summary>
         /// <param name="text">The text to filter.</param>
@@ -401,14 +391,17 @@ function validateSafeAlias(id, value, immediate, callback) {{
 
             text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-');
 
-            var pos = text.LastIndexOf('.');
-            var name = pos < 0 ? text : text.Substring(0, pos);
-            var ext = pos < 0 ? string.Empty : text.Substring(pos + 1);
+            var name = Path.GetFileNameWithoutExtension(text);
+            var ext = Path.GetExtension(text); // includes the dot, empty if no extension
 
-            name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture);
-            ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture);
+            Debug.Assert(name != null, "name != null");
+            if (name.Length > 0)
+                name = CleanString(name, CleanStringType.FileName, culture);
+            Debug.Assert(ext != null, "ext != null");
+            if (ext.Length > 0)
+                ext = CleanString(ext.Substring(1), CleanStringType.FileName, culture);
 
-            return pos < 0 ? name : (name + "." + ext);
+            return ext.Length > 0 ? (name + "." + ext) : name;
         }
 
         #endregion
@@ -417,7 +410,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
 
         // MS rules & guidelines:
         // - Do capitalize both characters of two-character acronyms, except the first word of a camel-cased identifier.
-        //     eg "DBRate" (pascal) or "ioHelper" (camel) - "specialDBRate" (pascal) or "specialIOHelper" (camel)
+        //     eg "DBRate" (pascal) or "ioHelper" (camel) - "SpecialDBRate" (pascal) or "specialIOHelper" (camel)
         // - Do capitalize only the first character of acronyms with three or more characters, except the first word of a camel-cased identifier.
         //     eg "XmlWriter (pascal) or "htmlReader" (camel) - "SpecialXmlWriter" (pascal) or "specialHtmlReader" (camel)
         // - Do not capitalize any of the characters of any acronyms, whatever their length, at the beginning of a camel-cased identifier.
@@ -442,7 +435,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <remarks>The string is cleaned in the context of the default culture.</remarks>
         public string CleanString(string text, CleanStringType stringType)
         {
-            return CleanString(text, stringType, char.MinValue, _defaultCulture);
+            return CleanString(text, stringType, _defaultCulture, null);
         }
 
         /// <summary>
@@ -456,7 +449,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <remarks>The string is cleaned in the context of the default culture.</remarks>
         public string CleanString(string text, CleanStringType stringType, char separator)
         {
-            return CleanString(text, stringType, separator, _defaultCulture);
+            return CleanString(text, stringType, _defaultCulture, separator);
         }
 
         /// <summary>
@@ -469,7 +462,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <returns>The clean string.</returns>
         public string CleanString(string text, CleanStringType stringType, CultureInfo culture)
         {
-            return CleanString(text, stringType, char.MinValue, culture);
+            return CleanString(text, stringType, culture, null);
         }
 
         /// <summary>
@@ -481,23 +474,12 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <param name="separator">The separator.</param>
         /// <param name="culture">The culture.</param>
         /// <returns>The clean string.</returns>
-        public virtual string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture)
+        public string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture)
         {
-            var config = GetConfig(stringType & CleanStringType.RoleMask, culture);
-            return CleanString(text, stringType, separator, culture, config);
+            return CleanString(text, stringType, culture, separator);
         }
 
-        /// <summary>
-        /// Cleans a string in the context of a specified culture, using a specified separator and configuration.
-        /// </summary>
-        /// <param name="text">The text to clean.</param>
-        /// <param name="stringType">A flag indicating the target casing and encoding of the string. By default, 
-        /// strings are cleaned up to camelCase and Ascii.</param>
-        /// <param name="separator">The separator.</param>
-        /// <param name="culture">The culture.</param>
-        /// <param name="config">The configuration.</param>
-        /// <returns>The clean string.</returns>
-        private string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture, HelperConfig config)
+        protected virtual string CleanString(string text, CleanStringType stringType, CultureInfo culture, char? separator)
         {
             // be safe
             if (text == null)
@@ -505,13 +487,18 @@ function validateSafeAlias(id, value, immediate, callback) {{
             if (culture == null)
                 throw new ArgumentNullException("culture");
 
+            // get config
+            var config = GetConfig(stringType, culture);
+            stringType = config.StringTypeExtend(stringType);
+
             // apply defaults
             if ((stringType & CleanStringType.CaseMask) == CleanStringType.None)
                 stringType |= CleanStringType.CamelCase;
             if ((stringType & CleanStringType.CodeMask) == CleanStringType.None)
                 stringType |= CleanStringType.Ascii;
 
-            var codeType = stringType & CleanStringType.CodeMask;
+            // use configured unless specified
+            separator = separator ?? config.Separator;
 
             // apply pre-filter
             if (config.PreFilter != null)
@@ -522,231 +509,46 @@ function validateSafeAlias(id, value, immediate, callback) {{
             //    text = ReplaceMany(text, config.Replacements);
 
             // recode
-            text = Recode(text, stringType);
+            var codeType = stringType & CleanStringType.CodeMask;
+            text = codeType == CleanStringType.Ascii 
+                ? Utf8ToAsciiConverter.ToAsciiString(text) 
+                : RemoveSurrogatePairs(text);
 
             // clean
-            switch (codeType)
-            {
-                case CleanStringType.Ascii:
-                    // see note below - don't use CleanAsciiString
-                    //text = CleanAsciiString(text, stringType, separator);
-                    //break;
-                case CleanStringType.Utf8:
-                    text = CleanUtf8String(text, stringType, separator, culture, config);
-                    break;
-                case CleanStringType.Unicode:
-                    throw new NotImplementedException("DefaultShortStringHelper does not handle unicode yet.");
-                default:
-                    throw new ArgumentOutOfRangeException("stringType");
-            }
+            text = CleanCodeString(text, stringType, separator.Value, culture, config);
 
             return text;
         }
 
-        // however proud I can be of that subtle, ascii-optimized code,
-        // benchmarking shows it is an order of magnitude slower that the utf8 version
-        // don't use it - keep it here should anyone be tempted to micro-optimize again...
-        //
-        // beware, it has bugs that are fixed in CleanUtf8String but I'm not going to
-        // bugfix commented code....
-
-        /*
-        internal string CleanAsciiString(string text)
+        private static string RemoveSurrogatePairs(string text)
         {
-            return CleanAsciiString(text, CleanStringType.CamelCase, char.MinValue);
-        }
+            var input = text.ToCharArray();
+            var output = new char[input.Length];
+            var opos = 0;
 
-        internal string CleanAsciiString(string text, CleanStringType caseType, char separator)
-        {
-            int opos = 0, ipos = 0;
-            var state = StateBreak;
-
-            caseType &= CleanStringType.CaseMask;
-
-            //switch (caseType)
-            //{
-            //    case CleanStringType.LowerCase:
-            //        input = text.ToLowerInvariant().ToCharArray();
-            //        break;
-            //    case CleanStringType.UpperCase:
-            //        input = text.ToUpperInvariant().ToCharArray();
-            //        break;
-            //    default:
-            //        input =  text.ToCharArray();
-            //        break;
-            //}
-            // if we apply global ToUpper or ToLower to text here
-            // then we cannot break words on uppercase chars
-            var input = text;
-
-            // because we shouldn't be adding any extra char
-            // it's faster to use an array than a StringBuilder
-            var ilen = input.Length;
-            var output = new char[ilen];
-
-            Func<string, string> termFilter = null;
-
-            for (var i = 0; i < ilen; i++)
+            for (var ipos = 0; ipos < input.Length; ipos++)
             {
-                var idx = ValidStringCharacters.IndexOf(input[i]);
-
-                switch (state)
+                var c = input[ipos];
+                if (char.IsSurrogate(c)) // ignore high surrogate
                 {
-                    case StateBreak:
-                        if (idx >= 0 && (opos > 0 || idx < 26 || idx >= 36))
-                        {
-                            ipos = i;
-                            if (opos > 0 && separator != char.MinValue)
-                                output[opos++] = separator;
-                            state = idx < 36 ? StateWord : StateUp;
-                        }
-                        break;
-
-                    case StateWord:
-                        if (idx < 0 || (_breakTermsOnUpper && idx >= 36))
-                        {
-                            CopyAsciiTerm(input, ipos, output, ref opos, i - ipos, caseType, termFilter, false);
-                            ipos = i;
-                            state = idx < 0 ? StateBreak : StateUp;
-                            if (state != StateBreak && separator != char.MinValue)
-                                output[opos++] = separator;
-                        }
-                        break;
-
-                    case StateAcronym:
-                        if (idx < 36)
-                        {
-                            CopyAsciiTerm(input, ipos, output, ref opos, i - ipos, caseType, termFilter, true);
-                            ipos = i;
-                            state = idx < 0 ? StateBreak : StateWord;
-                            if (state != StateBreak && separator != char.MinValue)
-                                output[opos++] = separator;
-                        }
-                        break;
-
-                    case StateUp:
-                        if (idx >= 0)
-                        {
-                            state = idx < 36 ? StateWord : StateAcronym;
-                        }
-                        else
-                        {
-                            CopyAsciiTerm(input, ipos, output, ref opos, 1, caseType, termFilter, false);
-                            state = StateBreak;
-                        }
-                        break;
-
-                    default:
-                        throw new Exception("Invalid state.");
+                    ipos++; // and skip low surrogate
+                    output[opos++] = '?';
+                }
+                else
+                {
+                    output[opos++] = c;
                 }
-            }
-
-            //Console.WriteLine("xx: ({0}) {1}, {2}, {3}", state, input.Length, ipos, opos);
-            switch (state)
-            {
-                case StateBreak:
-                    break;
-
-                case StateWord:
-                    CopyAsciiTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, termFilter, false);
-                    break;
-
-                case StateAcronym:
-                case StateUp:
-                    CopyAsciiTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, termFilter, true);
-                    break;
-
-                default:
-                    throw new Exception("Invalid state.");
             }
 
             return new string(output, 0, opos);
         }
 
-        internal void CopyAsciiTerm(string input, int ipos, char[] output, ref int opos, int len,
-            CleanStringType caseType, Func<string, string> termFilter, bool isAcronym)
-        {
-            var term = input.Substring(ipos, len);
-            ipos = 0;
+        // here was a subtle, ascii-optimized version of the cleaning code, and I was
+        // very proud of it until benchmarking showed it was an order of magnitude slower
+        // that the utf8 version. Micro-optimizing sometimes isn't such a good idea.
 
-            if (termFilter != null)
-            {
-                term = termFilter(term);
-                len = term.Length;
-            }
-
-            if (isAcronym)
-            {
-                if (caseType == CleanStringType.CamelCase && len <= 2 && opos > 0)
-                    caseType = CleanStringType.Unchanged;
-                else if (caseType == CleanStringType.PascalCase && len <= 2)
-                    caseType = CleanStringType.Unchanged;
-            }
-
-            int idx;
-            switch (caseType)
-            {
-                //case CleanStringType.LowerCase:
-                //case CleanStringType.UpperCase:
-                case CleanStringType.Unchanged:
-                    term.CopyTo(ipos, output, opos, len);
-                    opos += len;
-                    break;
-
-                case CleanStringType.LowerCase:
-                    for (var i = ipos; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    }
-                    break;
-
-                case CleanStringType.UpperCase:
-                    for (var i = ipos; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx];
-                    }
-                    break;
-
-                case CleanStringType.CamelCase:
-                    idx = ValidStringCharacters.IndexOf(term[ipos]);
-                    if (opos == 0)
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    else
-                        output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx];
-                    for (var i = ipos + 1; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    }
-                    break;
-
-                case CleanStringType.PascalCase:
-                    idx = ValidStringCharacters.IndexOf(term[ipos]);
-                    output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx];
-                    for (var i = ipos + 1; i < ipos + len; i++)
-                    {
-                        idx = ValidStringCharacters.IndexOf(term[i]);
-                        output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx];
-                    }
-                    break;
-
-                default:
-                    throw new ArgumentOutOfRangeException("caseType");
-            }
-        }
-        */
-
-        // that's the default code that will work for utf8 strings
-        // will not handle unicode, though
-
-        internal string CleanUtf8String(string text)
-        {
-            return CleanUtf8String(text, CleanStringType.CamelCase, char.MinValue, _defaultCulture, HelperConfig.Empty);
-        }
-
-        internal string CleanUtf8String(string text, CleanStringType caseType, char separator, CultureInfo culture, HelperConfig config)
+        // note: does NOT support surrogate pairs in text
+        internal string CleanCodeString(string text, CleanStringType caseType, char separator, CultureInfo culture, Config config)
         {
             int opos = 0, ipos = 0;
             var state = StateBreak;
@@ -761,21 +563,28 @@ function validateSafeAlias(id, value, immediate, callback) {{
             var ilen = input.Length;
             var output = new char[ilen * 2]; // twice the length should be OK in all cases
 
-            //var termFilter = config.TermFilter;
-
             for (var i = 0; i < ilen; i++)
             {
                 var c = input[i];
-                var isDigit = char.IsDigit(c);
+                var isTerm = config.IsTerm(c, opos == 0);
+
+                //var isDigit = char.IsDigit(c);
                 var isUpper = char.IsUpper(c); // false for digits, symbols...
-                var isLower = char.IsLower(c); // false for digits, symbols...
-                var isUnder = config.AllowUnderscoreInTerm && c == '_';
-                var isTerm = char.IsLetterOrDigit(c) || isUnder;
+                //var isLower = char.IsLower(c); // false for digits, symbols...
+
+                // what should I do with surrogates?
+                // no idea, really, so they are not supported at the moment
+                var isPair = char.IsSurrogate(c);
+                if (isPair)
+                    throw new NotSupportedException("Surrogate pairs are not supported.");
 
                 switch (state)
                 {
+                    // within a break
                     case StateBreak:
-                        if (isTerm && (opos > 0 || (isUnder == false && (config.AllowLeadingDigits || isDigit == false))))
+                        // begin a new term if char is a term char,
+                        // and ( pos > 0 or it's also a valid leading char )
+                        if (isTerm)
                         {
                             ipos = i;
                             if (opos > 0 && separator != char.MinValue)
@@ -784,10 +593,13 @@ function validateSafeAlias(id, value, immediate, callback) {{
                         }
                         break;
 
+                    // within a term / word
                     case StateWord:
+                        // end a term if char is not a term char,
+                        // or ( it's uppercase and we break terms on uppercase)
                         if (isTerm == false || (config.BreakTermsOnUpper && isUpper))
                         {
-                            CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ false);
+                            CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, false);
                             ipos = i;
                             state = isTerm ? StateUp : StateBreak;
                             if (state != StateBreak && separator != char.MinValue)
@@ -795,27 +607,48 @@ function validateSafeAlias(id, value, immediate, callback) {{
                         }
                         break;
 
+                    // within a term / acronym
                     case StateAcronym:
-                        if (isTerm == false || isLower || isDigit)
+                        // end an acronym if char is not a term char,
+                        // or if it's not uppercase / config
+                        //Console.WriteLine("acro {0} {1}", c, (config.CutAcronymOnNonUpper && isUpper == false));
+                        if (isTerm == false || (config.CutAcronymOnNonUpper && isUpper == false))
                         {
-                            if (isLower && config.GreedyAcronyms == false)
-                                i -= 1;
-                            CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ true);
-                            ipos = i;
-                            state = isTerm ? StateWord : StateBreak;
-                            if (state != StateBreak && separator != char.MinValue)
-                                output[opos++] = separator;
+                            // whether it's part of the acronym depends on whether we're greedy
+                            if (isTerm && config.GreedyAcronyms == false)
+                                i -= 1; // handle that char again, in another state - not part of the acronym
+                            if (i - ipos > 1) // single-char can't be an acronym
+                            {
+                                CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, true);
+                                ipos = i;
+                                state = isTerm ? StateWord : StateBreak;
+                                if (state != StateBreak && separator != char.MinValue)
+                                    output[opos++] = separator;
+                            }
+                            else if (isTerm)
+                            {
+                                state = StateWord;
+                            }
+                        }
+                        else if (isUpper == false) // isTerm == true
+                        {
+                            // it's a term char and we don't cut...
+                            // keep moving forward as a word
+                            state = StateWord;
                         }
                         break;
 
+                    // within a term / uppercase = could be a word or an acronym
                     case StateUp:
                         if (isTerm)
                         {
+                            // add that char to the term and pick word or acronym
                             state = isUpper ? StateAcronym : StateWord;
                         }
                         else
                         {
-                            CopyUtf8Term(input, ipos, output, ref opos, 1, caseType, culture, /*termFilter,*/ false);
+                            // single char, copy then break
+                            CopyTerm(input, ipos, output, ref opos, 1, caseType, culture, false);
                             state = StateBreak;
                         }
                         break;
@@ -831,12 +664,12 @@ function validateSafeAlias(id, value, immediate, callback) {{
                     break;
 
                 case StateWord:
-                    CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ false);
+                    CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, false);
                     break;
 
                 case StateAcronym:
                 case StateUp:
-                    CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ true);
+                    CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, true);
                     break;
 
                 default:
@@ -846,17 +679,15 @@ function validateSafeAlias(id, value, immediate, callback) {{
             return new string(output, 0, opos);
         }
 
-        internal void CopyUtf8Term(string input, int ipos, char[] output, ref int opos, int len,
-            CleanStringType caseType, CultureInfo culture, /*Func<string, string> termFilter,*/ bool isAcronym)
+        // note: supports surrogate pairs in input string
+        internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len,
+            CleanStringType caseType, CultureInfo culture, bool isAcronym)
         {
             var term = input.Substring(ipos, len);
-            ipos = 0;
-
-            //if (termFilter != null)
-            //{
-            //    term = termFilter(term);
-            //    len = term.Length;
-            //}
+            //Console.WriteLine("TERM \"{0}\" {1} {2}", 
+            //    term, 
+            //    isAcronym ? "acronym" : "word",
+            //    caseType);
 
             if (isAcronym)
             {
@@ -866,48 +697,100 @@ function validateSafeAlias(id, value, immediate, callback) {{
                     caseType = CleanStringType.Unchanged;
             }
 
+            // note: MSDN seems to imply that ToUpper or ToLower preserve the length
+            // of the string, but that this behavior is not guaranteed and could change.
+
             char c;
+            int i;
+            string s;
             switch (caseType)
             {
                 //case CleanStringType.LowerCase:
                 //case CleanStringType.UpperCase:
                 case CleanStringType.Unchanged:
-                    term.CopyTo(ipos, output, opos, len);
+                    term.CopyTo(0, output, opos, len);
                     opos += len;
                     break;
 
                 case CleanStringType.LowerCase:
-                    term.ToLower(culture).CopyTo(ipos, output, opos, len);
-                    opos += len;
+                    term = term.ToLower(culture);
+                    term.CopyTo(0, output, opos, term.Length);
+                    opos += term.Length;
                     break;
 
                 case CleanStringType.UpperCase:
-                    term.ToUpper(culture).CopyTo(ipos, output, opos, len);
-                    opos += len;
+                    term = term.ToUpper(culture);
+                    term.CopyTo(0, output, opos, term.Length);
+                    opos += term.Length;
                     break;
 
                 case CleanStringType.CamelCase:
-                    c = term[ipos++];
-                    output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture);
-                    if (len > 1)
-                        term.ToLower(culture).CopyTo(ipos, output, opos, len - 1);
-                    opos += len - 1;
+                    c = term[0];
+                    i = 1;
+                    if (char.IsSurrogate(c))
+                    {
+                        s = term.Substring(ipos, 2);
+                        s = opos == 0 ? s.ToLower(culture) : s.ToUpper(culture);
+                        s.CopyTo(0, output, opos, s.Length);
+                        opos += s.Length;
+                        i++; // surrogate pair len is 2
+                    }
+                    else
+                    {
+                        output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture);
+                    }
+                    if (len > i)
+                    {
+                        term = term.Substring(i).ToLower(culture);
+                        term.CopyTo(0, output, opos, term.Length);
+                        opos += term.Length;
+                    }
                     break;
 
                 case CleanStringType.PascalCase:
-                    c = term[ipos++];
-                    output[opos++] = char.ToUpper(c, culture);
-                    if (len > 1)
-                        term.ToLower(culture).CopyTo(ipos, output, opos, len - 1);
-                    opos += len - 1;
+                    c = term[0];
+                    i = 1;
+                    if (char.IsSurrogate(c))
+                    {
+                        s = term.Substring(ipos, 2);
+                        s = s.ToUpper(culture);
+                        s.CopyTo(0, output, opos, s.Length);
+                        opos += s.Length;
+                        i++; // surrogate pair len is 2
+                    }
+                    else
+                    {
+                        output[opos++] = char.ToUpper(c, culture);
+                    }
+                    if (len > i)
+                    {
+                        term = term.Substring(i).ToLower(culture);
+                        term.CopyTo(0, output, opos, term.Length);
+                        opos += term.Length;
+                    }
                     break;
 
                 case CleanStringType.UmbracoCase:
-                    c = term[ipos++];
-                    output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture);
-                    if (len > 1)
-                        term.CopyTo(ipos, output, opos, len - 1);
-                    opos += len - 1;
+                    c = term[0];
+                    i = 1;
+                    if (char.IsSurrogate(c))
+                    {
+                        s = term.Substring(ipos, 2);
+                        s = opos == 0 ? s : s.ToUpper(culture);
+                        s.CopyTo(0, output, opos, s.Length);
+                        opos += s.Length;
+                        i++; // surrogate pair len is 2
+                    }
+                    else
+                    {
+                        output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture);
+                    }
+                    if (len > i)
+                    {
+                        term = term.Substring(i);
+                        term.CopyTo(0, output, opos, term.Length);
+                        opos += term.Length;                        
+                    }
                     break;
 
                 default:
@@ -926,6 +809,7 @@ function validateSafeAlias(id, value, immediate, callback) {{
         /// <param name="separator">The separator, which defaults to a whitespace.</param>
         /// <returns>The splitted text.</returns>
         /// <remarks>Supports Utf8 and Ascii strings, not Unicode strings.</remarks>
+        // NOTE does not support surrogates pairs at the moment
         public virtual string SplitPascalCasing(string text, char separator)
         {
             // be safe
@@ -970,55 +854,6 @@ function validateSafeAlias(id, value, immediate, callback) {{
 
         #endregion
 
-        #region Recode
-
-        /// <summary>
-        /// Returns a new string containing only characters within the specified code type.
-        /// </summary>
-        /// <param name="text">The string to filter.</param>
-        /// <param name="stringType">The string type.</param>
-        /// <returns>The filtered string.</returns>
-        /// <remarks>If <paramref name="stringType"/> is not <c>Unicode</c> then non-utf8 characters are
-        /// removed. If it is <c>Ascii</c> we try to do some intelligent replacement of accents, etc.</remarks>
-        public virtual string Recode(string text, CleanStringType stringType)
-        {
-            // be safe
-            if (text == null)
-                throw new ArgumentNullException("text");
-
-            var codeType = stringType & CleanStringType.CodeMask;
-
-            // unicode to utf8 or ascii: just remove the unicode chars
-            // utf8 to ascii: try to be clever and replace some chars
-
-            // what's the point?
-            if (codeType == CleanStringType.Unicode)
-                return text;
-
-            return codeType == CleanStringType.Utf8 
-                ? RemoveNonUtf8(text) 
-                : Utf8ToAsciiConverter.ToAsciiString(text);
-        }
-
-        private string RemoveNonUtf8(string text)
-        {
-            var len = text.Length;
-            var output = new char[len]; // we won't be adding chars
-            int opos = 0;
-
-            for (var ipos = 0; ipos < len; ipos++)
-            {
-                var c = text[ipos];
-                if (char.IsSurrogate(c))
-                    ipos++;
-                else
-                    output[opos++] = c;
-            }
-            return new string(output, 0, opos);
-        }
-
-        #endregion
-
         #region ReplaceMany
 
         /// <summary>
diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
index f93c461fa3..23ac4e3931 100644
--- a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
+++ b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
@@ -72,8 +72,11 @@ namespace Umbraco.Core.Strings
             var opos = 0;
 
             for (var ipos = 0; ipos < input.Length; ipos++)
-                if (char.IsSurrogate(input[ipos]))
-                    ipos++;
+                if (char.IsSurrogate(input[ipos])) // ignore high surrogate
+                {
+                    ipos++; // and skip low surrogate
+                    output[opos++] = '?';
+                }
                 else
                     ToAscii(input, ipos, output, ref opos);
 
diff --git a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs
index 17e583b635..b5b26b26bf 100644
--- a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs
+++ b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs
@@ -1,6 +1,10 @@
-﻿using System.Collections.Generic;
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics;
 using System.Globalization;
+using System.IO;
 using System.Linq;
+using System.Text;
 using System.Text.RegularExpressions;
 using NUnit.Framework;
 using Umbraco.Core;
@@ -29,10 +33,39 @@ namespace Umbraco.Tests.CoreStrings
             // so there still may be utf8 chars even though you want ascii
 
             _helper = new DefaultShortStringHelper()
-                .WithConfig(CleanStringType.Url, StripQuotes, allowLeadingDigits: true)
-                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Url, FilterFrenchElisions, allowLeadingDigits: true)
-                .WithConfig(CleanStringType.Alias, StripQuotes)
-                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Alias, WhiteQuotes);
+                .WithConfig(CleanStringType.FileName, new DefaultShortStringHelper.Config
+                {
+                    //PreFilter = ClearFileChars, // done in IsTerm
+                    IsTerm = (c, leading) => (char.IsLetterOrDigit(c) || c == '_') && DefaultShortStringHelper.IsValidFileNameChar(c),
+                    StringType = CleanStringType.LowerCase | CleanStringType.Ascii,
+                    Separator = '-'
+                })
+                .WithConfig(CleanStringType.UrlSegment, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = StripQuotes,
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_',
+                    StringType = CleanStringType.LowerCase | CleanStringType.Ascii,
+                    Separator = '-'
+                })
+                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.UrlSegment, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = FilterFrenchElisions,
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : (char.IsLetterOrDigit(c) || c == '_'),
+                    StringType = CleanStringType.LowerCase | CleanStringType.Ascii,
+                    Separator = '-'
+                })
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = StripQuotes,
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.UmbracoCase | CleanStringType.Ascii
+                })
+                .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    PreFilter = WhiteQuotes,
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.UmbracoCase | CleanStringType.Ascii
+                });
 
             ShortStringHelperResolver.Reset();
             ShortStringHelperResolver.Current = new ShortStringHelperResolver(_helper);
@@ -65,6 +98,333 @@ namespace Umbraco.Tests.CoreStrings
             return s;
         }
 
+        [Test]
+        public void CleanStringUnderscoreInTerm()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // underscore is accepted within terms
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_',
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo_bar*nil", helper.CleanString("foo_bar nil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // underscore is not accepted within terms
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*bar*nil", helper.CleanString("foo_bar nil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringLeadingChars()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // letters and digits are valid leading chars
+                    IsTerm = (c, leading) => char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("0123foo*bar*nil", helper.CleanString("0123foo_bar nil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    // only letters are valid leading chars
+                    IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c),
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*bar*nil", helper.CleanString("0123foo_bar nil", CleanStringType.Alias));
+            Assert.AreEqual("foo*bar*nil", helper.CleanString("0123 foo_bar nil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringTermOnUpper()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // uppercase letter means new term
+                    BreakTermsOnUpper = true,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*Bar", helper.CleanString("fooBar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // uppercase letter is part of term
+                    BreakTermsOnUpper = false,
+                    Separator = '*'
+                });
+            Assert.AreEqual("fooBar", helper.CleanString("fooBar", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringAcronymOnNonUpper()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // non-uppercase letter means cut acronym
+                    CutAcronymOnNonUpper = true,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BAR*Rnil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BA*Rnil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    // non-uppercase letter means word
+                    CutAcronymOnNonUpper = false,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BARRnil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BARnil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringGreedyAcronyms()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    CutAcronymOnNonUpper = true,
+                    GreedyAcronyms = true,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BARR*nil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAR*nil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BA*nil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    CutAcronymOnNonUpper = true,
+                    GreedyAcronyms = false,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*BAR*Rnil", helper.CleanString("foo BARRnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BA*Rnil", helper.CleanString("foo BARnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias));
+            Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringWhiteSpace()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo", helper.CleanString("   foo   ", CleanStringType.Alias));
+            Assert.AreEqual("foo*bar", helper.CleanString("   foo   bar   ", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringSeparator()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("foo*bar", helper.CleanString("foo bar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = ' '
+                });
+            Assert.AreEqual("foo bar", helper.CleanString("foo bar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged
+                });
+            Assert.AreEqual("foobar", helper.CleanString("foo bar", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '文'
+                });
+            Assert.AreEqual("foo文bar", helper.CleanString("foo bar", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringSymbols()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("house*2", helper.CleanString("house (2)", CleanStringType.Alias));
+            
+            // FIXME but for a filename we want to keep them!
+            // FIXME and what about a url?
+        }
+
+        [Test]
+        public void Utf8Surrogates()
+        {
+            // Unicode values between 0x10000 and 0x10FFF are represented by two 16-bit "surrogate" characters
+            const string str = "a\U00010F00z\uA74Ft";
+            Assert.AreEqual(6, str.Length);
+            Assert.IsTrue(char.IsSurrogate(str[1]));
+            Assert.IsTrue(char.IsHighSurrogate(str[1]));
+            Assert.IsTrue(char.IsSurrogate(str[2]));
+            Assert.IsTrue(char.IsLowSurrogate(str[2]));
+            Assert.AreEqual('z', str[3]);
+            Assert.IsFalse(char.IsSurrogate(str[4]));
+            Assert.AreEqual('\uA74F', str[4]);
+            Assert.AreEqual('t', str[5]);
+
+            Assert.AreEqual("z", str.Substring(3, 1));
+            Assert.AreEqual("\U00010F00", str.Substring(1, 2));
+
+            var bytes = Encoding.UTF8.GetBytes(str);
+            Assert.AreEqual(10, bytes.Length);
+            Assert.AreEqual('a', bytes[0]);
+            // then next string element is two chars (surrogate pair) or 4 bytes, 21 bits of code point
+            Assert.AreEqual('z', bytes[5]);
+            // then next string element is one char and 3 bytes, 16 bits of code point
+            Assert.AreEqual('t', bytes[9]);
+            //foreach (var b in bytes)
+            //    Console.WriteLine("{0:X}", b);
+
+            Console.WriteLine("\U00010B70");
+        }
+
+        [Test]
+        public void Utf8ToAsciiConverter()
+        {
+            const string str = "a\U00010F00z\uA74Ftéô";
+            var output = Core.Strings.Utf8ToAsciiConverter.ToAsciiString(str);
+            Assert.AreEqual("a?zooteo", output);
+        }
+
+        [Test]
+        public void CleanStringEncoding()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("中文测试", helper.CleanString("中文测试", CleanStringType.Alias));
+            Assert.AreEqual("léger*中文测试*ZÔRG", helper.CleanString("léger 中文测试 ZÔRG", CleanStringType.Alias));
+
+            helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Ascii | CleanStringType.Unchanged,
+                    Separator = '*'
+                });
+            Assert.AreEqual("", helper.CleanString("中文测试", CleanStringType.Alias));
+            Assert.AreEqual("leger*ZORG", helper.CleanString("léger 中文测试 ZÔRG", CleanStringType.Alias));
+        }
+
+        [Test]
+        public void CleanStringDefaultConfig()
+        {
+            var helper = new DefaultShortStringHelper().WithDefaultConfig();
+
+            const string input = "0123 中文测试 中文测试 léger ZÔRG (2) a?? *x";
+
+            var alias = helper.CleanStringForSafeAlias(input);
+            var filename = helper.CleanStringForSafeFileName(input);
+            var segment = helper.CleanStringForUrlSegment(input);
+
+            // umbraco-cased ascii alias, must begin with a proper letter
+            Assert.AreEqual("legerZORG2AX", alias, "alias");
+
+            // lower-cased, utf8 filename, removing illegal filename chars, using dash-separator
+            Assert.AreEqual("0123-中文测试-中文测试-léger-zôrg-2-a-x", filename, "filename");
+
+            // lower-cased, utf8 url segment, only letters and digits, using dash-separator
+            Assert.AreEqual("0123-中文测试-中文测试-léger-zôrg-2-a-x", segment, "segment");
+        }
+
+        [Test]
+        public void CleanStringCasing()
+        {
+            var helper = new DefaultShortStringHelper()
+                .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config
+                {
+                    StringType = CleanStringType.Utf8 | CleanStringType.Unchanged,
+                    Separator = ' '
+                });
+
+            // BBB is an acronym
+            // E is a word (too short to be an acronym)
+            // FF is an acronym
+
+            // FIXME "C" can't be an acronym
+            // FIXME "DBXreview" = acronym?!
+
+            Assert.AreEqual("aaa BBB CCc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias)); // unchanged
+            Assert.AreEqual("aaa Bbb Ccc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("Aaa Bbb Ccc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("aaa bbb ccc ddd e ff", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.LowerCase));
+            Assert.AreEqual("AAA BBB CCC DDD E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.UpperCase));
+            Assert.AreEqual("aaa BBB CCc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.UmbracoCase));
+
+            // MS rules & guidelines:
+            // - Do capitalize both characters of two-character acronyms, except the first word of a camel-cased identifier.
+            //     eg "DBRate" (pascal) or "ioHelper" (camel) - "SpecialDBRate" (pascal) or "specialIOHelper" (camel)
+            // - Do capitalize only the first character of acronyms with three or more characters, except the first word of a camel-cased identifier.
+            //     eg "XmlWriter (pascal) or "htmlReader" (camel) - "SpecialXmlWriter" (pascal) or "specialHtmlReader" (camel)
+            // - Do not capitalize any of the characters of any acronyms, whatever their length, at the beginning of a camel-cased identifier.
+            //     eg "xmlWriter" or "dbWriter" (camel)
+
+            Assert.AreEqual("aaa BB Ccc", helper.CleanString("aaa BB ccc", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("aa Bb Ccc", helper.CleanString("AA bb ccc", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("aaa Bb Ccc", helper.CleanString("AAA bb ccc", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("db Rate", helper.CleanString("DB rate", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("special DB Rate", helper.CleanString("special DB rate", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("xml Writer", helper.CleanString("XML writer", CleanStringType.Alias | CleanStringType.CamelCase));
+            Assert.AreEqual("special Xml Writer", helper.CleanString("special XML writer", CleanStringType.Alias | CleanStringType.CamelCase));
+
+            Assert.AreEqual("Aaa BB Ccc", helper.CleanString("aaa BB ccc", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("AA Bb Ccc", helper.CleanString("AA bb ccc", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Aaa Bb Ccc", helper.CleanString("AAA bb ccc", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("DB Rate", helper.CleanString("DB rate", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Special DB Rate", helper.CleanString("special DB rate", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Xml Writer", helper.CleanString("XML writer", CleanStringType.Alias | CleanStringType.PascalCase));
+            Assert.AreEqual("Special Xml Writer", helper.CleanString("special XML writer", CleanStringType.Alias | CleanStringType.PascalCase));
+        }
+
         #region Cases
         [TestCase("foo", "foo")]
         [TestCase("    foo    ", "foo")]
@@ -104,29 +464,29 @@ namespace Umbraco.Tests.CoreStrings
             Assert.AreEqual(expected, output);
         }
 
-        #region Cases
-        [TestCase("This is my_little_house so cute.", "thisIsMyLittleHouseSoCute", false)]
-        [TestCase("This is my_little_house so cute.", "thisIsMy_little_houseSoCute", true)]
-        [TestCase("This is my_Little_House so cute.", "thisIsMyLittleHouseSoCute", false)]
-        [TestCase("This is my_Little_House so cute.", "thisIsMy_Little_HouseSoCute", true)]
-        [TestCase("An UPPER_CASE_TEST to check", "anUpperCaseTestToCheck", false)]
-        [TestCase("An UPPER_CASE_TEST to check", "anUpper_case_testToCheck", true)]
-        [TestCase("Trailing_", "trailing", false)]
-        [TestCase("Trailing_", "trailing_", true)]
-        [TestCase("_Leading", "leading", false)]
-        [TestCase("_Leading", "leading", true)]
-        [TestCase("Repeat___Repeat", "repeatRepeat", false)]
-        [TestCase("Repeat___Repeat", "repeat___Repeat", true)]
-        [TestCase("Repeat___repeat", "repeatRepeat", false)]
-        [TestCase("Repeat___repeat", "repeat___repeat", true)]
-        #endregion
-        public void CleanStringWithUnderscore(string input, string expected, bool allowUnderscoreInTerm)
-        {
-            var helper = new DefaultShortStringHelper()
-                .WithConfig(allowUnderscoreInTerm: allowUnderscoreInTerm);
-            var output = helper.CleanString(input, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase);
-            Assert.AreEqual(expected, output);
-        }
+        //#region Cases
+        //[TestCase("This is my_little_house so cute.", "thisIsMyLittleHouseSoCute", false)]
+        //[TestCase("This is my_little_house so cute.", "thisIsMy_little_houseSoCute", true)]
+        //[TestCase("This is my_Little_House so cute.", "thisIsMyLittleHouseSoCute", false)]
+        //[TestCase("This is my_Little_House so cute.", "thisIsMy_Little_HouseSoCute", true)]
+        //[TestCase("An UPPER_CASE_TEST to check", "anUpperCaseTestToCheck", false)]
+        //[TestCase("An UPPER_CASE_TEST to check", "anUpper_case_testToCheck", true)]
+        //[TestCase("Trailing_", "trailing", false)]
+        //[TestCase("Trailing_", "trailing_", true)]
+        //[TestCase("_Leading", "leading", false)]
+        //[TestCase("_Leading", "leading", true)]
+        //[TestCase("Repeat___Repeat", "repeatRepeat", false)]
+        //[TestCase("Repeat___Repeat", "repeat___Repeat", true)]
+        //[TestCase("Repeat___repeat", "repeatRepeat", false)]
+        //[TestCase("Repeat___repeat", "repeat___repeat", true)]
+        //#endregion
+        //public void CleanStringWithUnderscore(string input, string expected, bool allowUnderscoreInTerm)
+        //{
+        //    var helper = new DefaultShortStringHelper()
+        //        .WithConfig(allowUnderscoreInTerm: allowUnderscoreInTerm);
+        //    var output = helper.CleanString(input, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase);
+        //    Assert.AreEqual(expected, output);
+        //}
 
         #region Cases
         [TestCase("Home Page", "home-page")]
@@ -137,7 +497,6 @@ namespace Umbraco.Tests.CoreStrings
         [TestCase("汉#字*/漢?字", "")]
         [TestCase("Réalösk fix bran#lo'sk", "realosk-fix-bran-losk")]
         [TestCase("200 ways to be happy", "200-ways-to-be-happy")]
-        [TestCase("aBCdEfGhIJK", "a-b-cd-ef-gh-ijk")]
         #endregion
         public void CleanStringForUrlSegment(string input, string expected)
         {
@@ -166,173 +525,19 @@ namespace Umbraco.Tests.CoreStrings
         }
 
         #region Cases
-        [TestCase("foo", "foo")]
-        [TestCase("    foo    ", "foo")]
-        [TestCase("Foo", "foo")]
-        [TestCase("FoO", "foO")]
-        [TestCase("FoO bar", "foOBar")]
-        [TestCase("FoO bar NIL", "foOBarNil")]
-        [TestCase("FoO 33bar 22NIL", "foO33bar22Nil")]
-        [TestCase("FoO 33bar 22NI", "foO33bar22NI")]
-        [TestCase("0foo", "foo")]
-        [TestCase("2foo bar", "fooBar")]
-        [TestCase("9FOO", "foo")]
-        [TestCase("foo-BAR", "fooBar")]
-        [TestCase("foo-BA-dang", "fooBADang")]
-        [TestCase("foo_BAR", "fooBar")]
-        [TestCase("foo'BAR", "fooBar")]
-        [TestCase("sauté dans l'espace", "sautéDansLEspace")]
-        [TestCase("foo\"\"bar", "fooBar")]
-        [TestCase("-foo-", "foo")]
-        [TestCase("_foo_", "foo")]
-        [TestCase("spécial", "spécial")]
-        [TestCase("brô dëk ", "brôDëk")]
-        [TestCase("1235brô dëk ", "brôDëk")]
-        [TestCase("汉#字*/漢?字", "汉字漢字")]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst")]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst")]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst")]
-        [TestCase("quelle élévation à partir", "quelleÉlévationÀPartir")]
-        #endregion
-        public void CleanUtf8String(string input, string expected)
-        {
-            input = _helper.Recode(input, CleanStringType.Utf8);
-            var output = _helper.CleanUtf8String(input);
-            Assert.AreEqual(expected, output);
-        }
-
-        #region Cases
-        [TestCase("sauté dans l'espace", "saute-dans-espace", "fr-FR", CleanStringType.Url | CleanStringType.Ascii | CleanStringType.LowerCase)]
-        [TestCase("sauté dans l'espace", "sauté-dans-espace", "fr-FR", CleanStringType.Url | CleanStringType.Utf8 | CleanStringType.LowerCase)]
+        [TestCase("sauté dans l'espace", "saute-dans-espace", "fr-FR", CleanStringType.UrlSegment | CleanStringType.Ascii | CleanStringType.LowerCase)]
+        [TestCase("sauté dans l'espace", "sauté-dans-espace", "fr-FR", CleanStringType.UrlSegment | CleanStringType.Utf8 | CleanStringType.LowerCase)]
         [TestCase("sauté dans l'espace", "SauteDansLEspace", "fr-FR", CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.PascalCase)]
-        [TestCase("he doesn't want", "he-doesnt-want", null, CleanStringType.Url | CleanStringType.Ascii | CleanStringType.LowerCase)]
+        [TestCase("he doesn't want", "he-doesnt-want", null, CleanStringType.UrlSegment | CleanStringType.Ascii | CleanStringType.LowerCase)]
         [TestCase("he doesn't want", "heDoesntWant", null, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase)]
         #endregion
         public void CleanStringWithTypeAndCulture(string input, string expected, string culture, CleanStringType stringType)
         {
             var cinfo = culture == null ? CultureInfo.InvariantCulture : new CultureInfo(culture);
-            var separator = (stringType & CleanStringType.Url) == CleanStringType.Url ? '-' : char.MinValue;
-            var output = _helper.CleanString(input, stringType, separator, cinfo);
-            Assert.AreEqual(expected, output);
-        }
 
-        #region Cases
-        [TestCase("foo", "foo")]
-        [TestCase("    foo    ", "foo")]
-        [TestCase("Foo", "foo")]
-        [TestCase("FoO", "foO")]
-        [TestCase("FoO bar", "foOBar")]
-        [TestCase("FoO bar NIL", "foOBarNil")]
-        [TestCase("FoO 33bar 22NIL", "foO33bar22Nil")]
-        [TestCase("FoO 33bar 22NI", "foO33bar22NI")]
-        [TestCase("0foo", "foo")]
-        [TestCase("2foo bar", "fooBar")]
-        [TestCase("9FOO", "foo")]
-        [TestCase("foo-BAR", "fooBar")]
-        [TestCase("foo-BA-dang", "fooBADang")]
-        [TestCase("foo_BAR", "fooBar")]
-        [TestCase("foo'BAR", "fooBar")]
-        [TestCase("sauté dans l'espace", "sauteDansLEspace")]
-        [TestCase("foo\"\"bar", "fooBar")]
-        [TestCase("-foo-", "foo")]
-        [TestCase("_foo_", "foo")]
-        [TestCase("spécial", "special")]
-        [TestCase("brô dëk ", "broDek")]
-        [TestCase("1235brô dëk ", "broDek")]
-        [TestCase("汉#字*/漢?字", "")]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst")]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst")]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst")]
-        #endregion
-        public void CleanStringToAscii(string input, string expected)
-        {
-            var output = _helper.CleanString(input, CleanStringType.Ascii | CleanStringType.CamelCase);
-            Assert.AreEqual(expected, output);
-        }
-
-        #region Cases
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "brodeKtzARlanban123pOo", CleanStringType.Unchanged)]
-        [TestCase("    1235brô dëK tzARlan ban123!pOo    ", "brodeKtzARlanban123pOo", CleanStringType.Unchanged)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "BroDeKTzARlanBan123POo", CleanStringType.PascalCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "broDeKTzARlanBan123POo", CleanStringType.CamelCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "BRODEKTZARLANBAN123POO", CleanStringType.UpperCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "brodektzarlanban123poo", CleanStringType.LowerCase)]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("aaa DB cd EFG X KLMN OP qrst", "aaaDBCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("aa DB cd EFG X KLMN OP qrst", "AaDBCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("aaa DB cd EFG X KLMN OP qrst", "AaaDBCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst", CleanStringType.CamelCase)]
-        [TestCase("AA db cd EFG X KLMN OP qrst", "AADbCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("AAA db cd EFG X KLMN OP qrst", "AaaDbCdEfgXKlmnOPQrst", CleanStringType.PascalCase)]
-        [TestCase("We store some HTML in the DB for performance", "WeStoreSomeHtmlInTheDBForPerformance", CleanStringType.PascalCase)]
-        [TestCase("We store some HTML in the DB for performance", "weStoreSomeHtmlInTheDBForPerformance", CleanStringType.CamelCase)]
-        [TestCase("X is true", "XIsTrue", CleanStringType.PascalCase)]
-        [TestCase("X is true", "xIsTrue", CleanStringType.CamelCase)]
-        [TestCase("IO are slow", "IOAreSlow", CleanStringType.PascalCase)]
-        [TestCase("IO are slow", "ioAreSlow", CleanStringType.CamelCase)]
-        [TestCase("RAM is fast", "RamIsFast", CleanStringType.PascalCase)]
-        [TestCase("RAM is fast", "ramIsFast", CleanStringType.CamelCase)]
-        [TestCase("Tab 1", "tab1", CleanStringType.CamelCase)]
-        [TestCase("Home - Page", "homePage", CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannonSDocumentType", CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannonsDocumentType", CleanStringType.CamelCase | CleanStringType.Alias)]
-        [TestCase("!BADDLY nam-ed Document Type", "baddlyNamEdDocumentType", CleanStringType.CamelCase)]
-        [TestCase("  !BADDLY nam-ed Document Type", "BADDLYnamedDocumentType", CleanStringType.Unchanged)]
-        [TestCase("!BADDLY nam-ed   Document Type", "BaddlyNamEdDocumentType", CleanStringType.PascalCase)]
-        [TestCase("i %Want!thisTo end up In Proper@case", "IWantThisToEndUpInProperCase", CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "raksmorgasKeKe", CleanStringType.CamelCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "RaksmorgasKeKe", CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "RaksmorgaskeKe", CleanStringType.Unchanged)]
-        [TestCase("TRii", "TRii", CleanStringType.Unchanged)]
-        [TestCase("**TRii", "TRii", CleanStringType.Unchanged)]
-        [TestCase("TRii", "tRii", CleanStringType.CamelCase)]
-        [TestCase("TRXii", "trXii", CleanStringType.CamelCase)]
-        [TestCase("**TRii", "tRii", CleanStringType.CamelCase)]
-        [TestCase("TRii", "TRii", CleanStringType.PascalCase)]
-        [TestCase("TRXii", "TRXii", CleanStringType.PascalCase)]
-        [TestCase("**TRii", "TRii", CleanStringType.PascalCase)]
-        [TestCase("trII", "trII", CleanStringType.Unchanged)]
-        [TestCase("**trII", "trII", CleanStringType.Unchanged)]
-        [TestCase("trII", "trII", CleanStringType.CamelCase)]
-        [TestCase("**trII", "trII", CleanStringType.CamelCase)]
-        [TestCase("trII", "TrII", CleanStringType.PascalCase)]
-        [TestCase("**trII", "TrII", CleanStringType.PascalCase)]
-        [TestCase("trIIX", "trIix", CleanStringType.CamelCase)]
-        [TestCase("**trIIX", "trIix", CleanStringType.CamelCase)]
-        [TestCase("trIIX", "TrIix", CleanStringType.PascalCase)]
-        [TestCase("**trIIX", "TrIix", CleanStringType.PascalCase)]
-        #endregion
-        public void CleanStringToAsciiWithType(string input, string expected, CleanStringType caseType)
-        {
-            var output = _helper.CleanString(input, caseType | CleanStringType.Ascii);
-            Assert.AreEqual(expected, output);
-        }
-
-        #region Cases
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro de K tz A Rlan ban123 p Oo", ' ', CleanStringType.Unchanged)]
-        [TestCase("    1235brô dëK tzARlan ban123!pOo    ", "bro de K tz A Rlan ban123 p Oo", ' ', CleanStringType.Unchanged)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "Bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.PascalCase)]
-        [TestCase("1235brô dëK     tzARlan ban123!pOo", "Bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.PascalCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.CamelCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro-De-K-Tz-A-Rlan-Ban123-P-Oo", '-', CleanStringType.CamelCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "BRO-DE-K-TZ-A-RLAN-BAN123-P-OO", '-', CleanStringType.UpperCase)]
-        [TestCase("1235brô dëK tzARlan ban123!pOo", "bro-de-k-tz-a-rlan-ban123-p-oo", '-', CleanStringType.LowerCase)]
-        [TestCase("Tab 1", "tab 1", ' ', CleanStringType.CamelCase)]
-        [TestCase("Home - Page", "home Page", ' ', CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannon S Document Type", ' ', CleanStringType.CamelCase)]
-        [TestCase("Shannon's Document Type", "shannons Document Type", ' ', CleanStringType.CamelCase | CleanStringType.Alias)]
-        [TestCase("!BADDLY nam-ed Document Type", "baddly Nam Ed Document Type", ' ', CleanStringType.CamelCase)]
-        [TestCase("  !BADDLY nam-ed Document Type", "BADDLY nam ed Document Type", ' ', CleanStringType.Unchanged)]
-        [TestCase("!BADDLY nam-ed   Document Type", "Baddly Nam Ed Document Type", ' ', CleanStringType.PascalCase)]
-        [TestCase("i %Want!thisTo end up In Proper@case", "I Want This To End Up In Proper Case", ' ', CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "raksmorgas Ke Ke", ' ', CleanStringType.CamelCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "Raksmorgas Ke Ke", ' ', CleanStringType.PascalCase)]
-        [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "Raksmorgas ke Ke", ' ', CleanStringType.Unchanged)]
-        #endregion
-        public void CleanStringToAsciiWithTypeAndSeparator(string input, string expected, char separator, CleanStringType caseType)
-        {
-            var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator);
+            // picks the proper config per culture
+            // and overrides some stringType params (ascii...)
+            var output = _helper.CleanString(input, stringType, cinfo);
             Assert.AreEqual(expected, output);
         }
 
diff --git a/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs b/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs
new file mode 100644
index 0000000000..72cf6c24ea
--- /dev/null
+++ b/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs
@@ -0,0 +1,38 @@
+﻿using System;
+using Umbraco.Core;
+using Umbraco.Core.Persistence.Migrations;
+using Umbraco.Core.Services;
+using umbraco.interfaces;
+
+namespace Umbraco.Web.Strategies.Migrations
+{
+    /// <summary>
+    /// This will execute after upgrading to rebuild the xml cache
+    /// </summary>
+    /// <remarks>
+    /// This cannot execute as part of a db migration since we need access to the services/repos.
+    /// 
+    /// This will execute for specific versions - 
+    /// 
+    /// * If current is less than or equal to 7.0.0
+    /// </remarks>
+    public class RebuildMediaXmlCacheAfterUpgrade : IApplicationStartupHandler
+    {
+        public RebuildMediaXmlCacheAfterUpgrade()
+        {
+            MigrationRunner.Migrated += MigrationRunner_Migrated;
+        }
+
+        void MigrationRunner_Migrated(MigrationRunner sender, Core.Events.MigrationEventArgs e)
+        {
+            var target70 = new Version(7, 0, 0);
+
+            if (e.ConfiguredVersion <= target70)
+            {
+                var mediasvc = (MediaService)ApplicationContext.Current.Services.MediaService;
+                mediasvc.RebuildXmlStructures();
+            }
+
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj
index ab627630c1..6619767148 100644
--- a/src/Umbraco.Web/Umbraco.Web.csproj
+++ b/src/Umbraco.Web/Umbraco.Web.csproj
@@ -618,6 +618,7 @@
       <DependentUpon>Resources.resx</DependentUpon>
     </Compile>
     <Compile Include="UI\JavaScript\ServerVariablesParser.cs" />
+    <Compile Include="Strategies\Migrations\RebuildMediaXmlCacheAfterUpgrade.cs" />
     <Compile Include="UI\CdfLogger.cs" />
     <Compile Include="umbraco.presentation\CompatibilityHelper.cs" />
     <Compile Include="umbraco.presentation\umbraco\controls\PasswordChanger.ascx.cs">