diff --git a/src/Umbraco.Core/CoreBootManager.cs b/src/Umbraco.Core/CoreBootManager.cs index 65b1265aa0..d472150a85 100644 --- a/src/Umbraco.Core/CoreBootManager.cs +++ b/src/Umbraco.Core/CoreBootManager.cs @@ -324,17 +324,10 @@ namespace Umbraco.Core // fixme - why not use the following syntax? //PluginManager.Current.ResolveTypes()); - // use the new DefaultShortStringHelper but sort-of remain compatible - // - use UmbracoSettings UrlReplaceCharacters - // - allow underscores in terms, allow leading digits + // use the new DefaultShortStringHelper ShortStringHelperResolver.Current = new ShortStringHelperResolver( - new DefaultShortStringHelper() - .WithConfig(CleanStringType.Url, DefaultShortStringHelper.ApplyUrlReplaceCharacters, - allowUnderscoreInTerm: true, allowLeadingDigits: true)); - - // that was the old one - //ShortStringHelperResolver.Current = new ShortStringHelperResolver( - // new LegacyShortStringHelper()); + //new LegacyShortStringHelper()); + new DefaultShortStringHelper().WithDefaultConfig()); UrlSegmentProviderResolver.Current = new UrlSegmentProviderResolver( typeof (DefaultUrlSegmentProvider)); diff --git a/src/Umbraco.Core/StringExtensions.cs b/src/Umbraco.Core/StringExtensions.cs index 9c230f56ff..de36cca531 100644 --- a/src/Umbraco.Core/StringExtensions.cs +++ b/src/Umbraco.Core/StringExtensions.cs @@ -866,9 +866,11 @@ namespace Umbraco.Core if (_helper != null) return _helper; - // there *has* to be a short string helper, even if the resolver has not - // been initialized - used the default one with default configuration. - _helper = new DefaultShortStringHelper().WithConfig(allowLeadingDigits: true); + // we don't want Umbraco to die because the resolver hasn't been initialized + // as the ShortStringHelper is too important, so as long as it's not there + // already, we use a default one. That should never happen, but... + Logging.LogHelper.Warn("ShortStringHelperResolver.HasCurrent == false, fallback to default."); + _helper = new DefaultShortStringHelper().WithDefaultConfig(); _helper.Freeze(); return _helper; } diff --git a/src/Umbraco.Core/Strings/CleanStringType.cs b/src/Umbraco.Core/Strings/CleanStringType.cs index 28a801aa54..f681c42d4a 100644 --- a/src/Umbraco.Core/Strings/CleanStringType.cs +++ b/src/Umbraco.Core/Strings/CleanStringType.cs @@ -14,6 +14,9 @@ namespace Umbraco.Core.Strings // note: you have 32 bits at your disposal // 0xffffffff + + // masks + /// /// Flag mask for casing. /// @@ -27,13 +30,19 @@ namespace Umbraco.Core.Strings /// /// Flag mask for role. /// - RoleMask = 0x030000, // 0xff0000 - 8 possible values + RoleMask = 0x070000, // 0xff0000 - 8 possible values + + + // no value /// /// No value. /// None = 0x00, + + // casing values + /// /// Pascal casing eg "PascalCase". /// @@ -66,9 +75,13 @@ namespace Umbraco.Core.Strings /// and is pascal otherwise. UmbracoCase = 0x20, + + // encoding values + /// /// Unicode encoding. /// + [Obsolete("Use .Utf8 instead.")] Unicode = 0x0100, /// @@ -81,14 +94,22 @@ namespace Umbraco.Core.Strings /// Ascii = 0x0400, + + // role values + /// /// Url role. /// - Url = 0x010000, + UrlSegment = 0x010000, /// /// Alias role. /// - Alias = 0x020000 + Alias = 0x020000, + + /// + /// FileName role. + /// + FileName = 0x040000 } } diff --git a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs index 5e0ccae655..b02a8f1521 100644 --- a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs +++ b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.Linq; using System.Globalization; @@ -15,14 +16,13 @@ namespace Umbraco.Core.Strings /// /// Not optimized to work on large bodies of text. /// Meant to replace LegacyShortStringHelper where/when backward compatibility is not an issue. - /// Full-unicode support is probably not so good. /// NOTE: pre-filters run _before_ the string is re-encoded. /// public class DefaultShortStringHelper : IShortStringHelper { #region Ctor and vars - static DefaultShortStringHelper() + public DefaultShortStringHelper() { InitializeLegacyUrlReplaceCharacters(); } @@ -43,7 +43,7 @@ namespace Umbraco.Core.Strings private CultureInfo _defaultCulture = CultureInfo.InvariantCulture; private bool _frozen; - private readonly Dictionary> _configs = new Dictionary>(); + private readonly Dictionary> _configs = new Dictionary>(); // see notes for CleanAsciiString //static DefaultShortStringHelper() @@ -53,11 +53,11 @@ namespace Umbraco.Core.Strings #endregion - #region Legacy UrlReplaceCharacters + #region Filters - static readonly Dictionary UrlReplaceCharacters = new Dictionary(); + private readonly Dictionary _urlReplaceCharacters = new Dictionary(); - static void InitializeLegacyUrlReplaceCharacters() + private void InitializeLegacyUrlReplaceCharacters() { foreach (var node in UmbracoConfig.For.UmbracoSettings().RequestHandler.CharCollection) { @@ -71,9 +71,21 @@ namespace Umbraco.Core.Strings /// /// The string to filter. /// The filtered string. - public static string ApplyUrlReplaceCharacters(string s) + public string ApplyUrlReplaceCharacters(string s) { - return s.ReplaceMany(UrlReplaceCharacters); + return s.ReplaceMany(_urlReplaceCharacters); + } + + // ok to be static here because it's not configureable in any way + private static readonly char[] InvalidFileNameChars = + Path.GetInvalidFileNameChars() + .Union("!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| ".ToCharArray()) + .Distinct() + .ToArray(); + + public static bool IsValidFileNameChar(char c) + { + return InvalidFileNameChars.Contains(c) == false; } #endregion @@ -86,6 +98,11 @@ namespace Umbraco.Core.Strings throw new InvalidOperationException("Cannot configure the helper once it is frozen."); } + /// + /// Sets a default culture. + /// + /// The default culture. + /// The short string helper. public DefaultShortStringHelper WithDefaultCulture(CultureInfo culture) { EnsureNotFrozen(); @@ -93,75 +110,131 @@ namespace Umbraco.Core.Strings return this; } - public DefaultShortStringHelper WithConfig( - Func preFilter = null, - bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false) + public DefaultShortStringHelper WithConfig(Config config) { - return WithConfig(_defaultCulture, CleanStringType.RoleMask, - preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm); + return WithConfig(_defaultCulture, CleanStringType.RoleMask, config); } - public DefaultShortStringHelper WithConfig(CleanStringType stringRole, - Func preFilter = null, - bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false) + public DefaultShortStringHelper WithConfig(CleanStringType stringRole, Config config) { - return WithConfig(_defaultCulture, stringRole, - preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm); + return WithConfig(_defaultCulture, stringRole, config); } - public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, - Func preFilter = null, - bool breakTermsOnUpper = true, bool allowLeadingDigits = false, bool allowUnderscoreInTerm = false) + public DefaultShortStringHelper WithConfig(CultureInfo culture, CleanStringType stringRole, Config config) { + if (config == null) + throw new ArgumentNullException("config"); + EnsureNotFrozen(); if (_configs.ContainsKey(culture) == false) - _configs[culture] = new Dictionary(); - _configs[culture][stringRole] = new HelperConfig(preFilter, breakTermsOnUpper, allowLeadingDigits, allowUnderscoreInTerm); + _configs[culture] = new Dictionary(); + _configs[culture][stringRole] = config.Clone(); // clone so it can't be changed return this; } - internal sealed class HelperConfig + /// + /// Sets the default configuration. + /// + /// The short string helper. + public DefaultShortStringHelper WithDefaultConfig() { - private HelperConfig() + return WithConfig(CleanStringType.UrlSegment, new Config { + PreFilter = ApplyUrlReplaceCharacters, + IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore + StringType = CleanStringType.Utf8 | CleanStringType.LowerCase, + BreakTermsOnUpper = false, + Separator = '-' + }).WithConfig(CleanStringType.FileName, new Config + { + PreFilter = ApplyUrlReplaceCharacters, + IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore + StringType = CleanStringType.Utf8 | CleanStringType.LowerCase, + BreakTermsOnUpper = false, + Separator = '-' + }).WithConfig(CleanStringType.Alias, new Config + { + PreFilter = ApplyUrlReplaceCharacters, + IsTerm = (c, leading) => leading + ? char.IsLetter(c) // only letters + : (char.IsLetterOrDigit(c) || c == '_'), // letter, digit or underscore + StringType = CleanStringType.Ascii | CleanStringType.UmbracoCase, + BreakTermsOnUpper = false + }); + } + + public sealed class Config + { + public Config() + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged; PreFilter = null; - BreakTermsOnUpper = true; - AllowLeadingDigits = false; + IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c); + BreakTermsOnUpper = false; + CutAcronymOnNonUpper = false; + GreedyAcronyms = false; + Separator = Char.MinValue; } - public HelperConfig(Func preFilter, bool breakTermsOnUpper, bool allowLeadingDigits, bool allowUnderscoreInTerm) - : this() + public Config Clone() { - PreFilter = preFilter; - BreakTermsOnUpper = breakTermsOnUpper; - AllowLeadingDigits = allowLeadingDigits; - AllowUnderscoreInTerm = allowUnderscoreInTerm; + return new Config + { + PreFilter = PreFilter, + IsTerm = IsTerm, + StringType = StringType, + BreakTermsOnUpper = BreakTermsOnUpper, + CutAcronymOnNonUpper = CutAcronymOnNonUpper, + GreedyAcronyms = GreedyAcronyms, + Separator = Separator + }; } - public Func PreFilter { get; private set; } + public Func PreFilter { get; set; } + public Func IsTerm { get; set; } + + public CleanStringType StringType { get; set; } // indicate whether an uppercase within a term eg "fooBar" is to break // into a new term, or to be considered as part of the current term - public bool BreakTermsOnUpper { get; private set; } + public bool BreakTermsOnUpper { get; set; } - // indicates whether it is legal to have leading digits, or whether they - // should be stripped as any other illegal character - public bool AllowLeadingDigits { get; private set; } - - // indicates whether underscore is a valid character in a term or is - // to be considered as a separator - public bool AllowUnderscoreInTerm { get; private set; } + // indicate whether a non-uppercase within an acronym eg "FOOBar" is to cut + // the acronym (at "B" or "a" depending on GreedyAcronyms) or to give + // up the acronym and treat the term as a word + public bool CutAcronymOnNonUpper { get; set; } // indicates whether acronyms parsing is greedy ie whether "FOObar" is // "FOO" + "bar" (greedy) or "FO" + "Obar" (non-greedy) - public bool GreedyAcronyms { get { return false; } } + public bool GreedyAcronyms { get; set; } - public static readonly HelperConfig Empty = new HelperConfig(); + // the separator char + // but then how can we tell we dont want any? + public char Separator { get; set; } + + // extends the config + public CleanStringType StringTypeExtend(CleanStringType stringType) + { + var st = StringType; + foreach (var mask in new[] { CleanStringType.CaseMask, CleanStringType.CodeMask }) + { + var a = stringType & mask; + if (a == 0) continue; + + st = st & ~mask; // clear what we have + st = st | a; // set the new value + } + return st; + } + + internal static readonly Config NotConfigured = new Config(); } - private HelperConfig GetConfig(CleanStringType stringType, CultureInfo culture) + private Config GetConfig(CleanStringType stringType, CultureInfo culture) { - Dictionary config; + stringType = stringType & CleanStringType.RoleMask; + + Dictionary config; if (_configs.ContainsKey(culture)) { config = _configs[culture]; @@ -179,7 +252,7 @@ namespace Umbraco.Core.Strings return config[CleanStringType.RoleMask]; } - return HelperConfig.Empty; + return Config.NotConfigured; } #endregion @@ -247,7 +320,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// public virtual string CleanStringForSafeAlias(string text) { - return CleanString(text, CleanStringType.Ascii | CleanStringType.UmbracoCase | CleanStringType.Alias); + return CleanStringForSafeAlias(text, _defaultCulture); } /// @@ -261,7 +334,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// public virtual string CleanStringForSafeAlias(string text, CultureInfo culture) { - return CleanString(text, CleanStringType.Ascii | CleanStringType.UmbracoCase | CleanStringType.Alias, culture); + return CleanString(text, CleanStringType.Alias, culture); } /// @@ -275,7 +348,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// public virtual string CleanStringForUrlSegment(string text) { - return CleanString(text, CleanStringType.Ascii | CleanStringType.LowerCase | CleanStringType.Url, '-'); + return CleanStringForUrlSegment(text, _defaultCulture); } /// @@ -289,11 +362,11 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// public virtual string CleanStringForUrlSegment(string text, CultureInfo culture) { - return CleanString(text, CleanStringType.Ascii | CleanStringType.LowerCase | CleanStringType.Url, '-', culture); + return CleanString(text, CleanStringType.UrlSegment, culture); } /// - /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// Cleans a string, in the context of the default culture, to produce a string that can safely be used as a filename, /// both internally (on disk) and externally (as a url). /// /// The text to filter. @@ -301,94 +374,11 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented. public virtual string CleanStringForSafeFileName(string text) { - // - Original version - - if (String.IsNullOrEmpty(text)) - return String.Empty; - - text = string.IsNullOrWhiteSpace(text) == false - ? text.ReplaceMany(Path.GetInvalidFileNameChars(), '-') - : string.Empty; - - //Break up the file in name and extension before applying the UrlReplaceCharacters - var fileNamePart = text.Substring(0, text.LastIndexOf('.')); - var ext = text.Substring(text.LastIndexOf('.')); - - fileNamePart = ApplyUrlReplaceCharacters(fileNamePart); - - text = string.Concat(fileNamePart, ext); - - // Adapted from: http://stackoverflow.com/a/4827510/5018 - // Combined both Reserved Characters and Character Data - // from http://en.wikipedia.org/wiki/Percent-encoding - var stringBuilder = new StringBuilder(); - - const string reservedCharacters = "!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| "; - - foreach (var character in text) - { - if (reservedCharacters.IndexOf(character) == -1) - stringBuilder.Append(character); - else - stringBuilder.Append("-"); - } - - // Remove repeating dashes - // From: http://stackoverflow.com/questions/5111967/regex-to-remove-a-specific-repeated-character - var reducedString = Regex.Replace(stringBuilder.ToString(), "-+", "-"); - - return reducedString; - - - // - Version 2 (Legacy Short string) - - //const string UmbracoValidAliasCharacters = "_-abcdefghijklmnopqrstuvwxyz1234567890"; - //const string UmbracoInvalidFirstCharacters = "0123456789"; - //const string validAliasCharacters = UmbracoValidAliasCharacters; - //const string invalidFirstCharacters = UmbracoInvalidFirstCharacters; - //var safeString = new StringBuilder(); - //int aliasLength = text.Length; - //for (var i = 0; i < aliasLength; i++) - //{ - // var currentChar = text.Substring(i, 1); - // if (validAliasCharacters.Contains(currentChar.ToLowerInvariant())) - // { - // // check for camel (if previous character is a space, we'll upper case the current one - // if (safeString.Length == 0 && invalidFirstCharacters.Contains(currentChar.ToLowerInvariant())) - // { - // //currentChar = ""; - // } - // else - // { - // if (i < aliasLength - 1 && i > 0 && text.Substring(i - 1, 1) == " ") - // currentChar = currentChar.ToUpperInvariant(); - - // safeString.Append(currentChar); - // } - // } - //} - //return safeString.ToString(); - - - // - Version 3 (Default short string) - - //if (string.IsNullOrWhiteSpace(text)) - // return string.Empty; - - //text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-'); - - //var pos = text.LastIndexOf('.'); - //var name = pos < 0 ? text : text.Substring(0, pos); - //var ext = pos < 0 ? string.Empty : text.Substring(pos + 1); - - //name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-'); - //ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-'); - - //return pos < 0 ? name : (name + "." + ext); + return CleanStringForSafeFileName(text, _defaultCulture); } /// - /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// Cleans a string to produce a string that can safely be used as a filename, /// both internally (on disk) and externally (as a url). /// /// The text to filter. @@ -401,14 +391,17 @@ function validateSafeAlias(id, value, immediate, callback) {{ text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-'); - var pos = text.LastIndexOf('.'); - var name = pos < 0 ? text : text.Substring(0, pos); - var ext = pos < 0 ? string.Empty : text.Substring(pos + 1); + var name = Path.GetFileNameWithoutExtension(text); + var ext = Path.GetExtension(text); // includes the dot, empty if no extension - name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture); - ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture); + Debug.Assert(name != null, "name != null"); + if (name.Length > 0) + name = CleanString(name, CleanStringType.FileName, culture); + Debug.Assert(ext != null, "ext != null"); + if (ext.Length > 0) + ext = CleanString(ext.Substring(1), CleanStringType.FileName, culture); - return pos < 0 ? name : (name + "." + ext); + return ext.Length > 0 ? (name + "." + ext) : name; } #endregion @@ -417,7 +410,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ // MS rules & guidelines: // - Do capitalize both characters of two-character acronyms, except the first word of a camel-cased identifier. - // eg "DBRate" (pascal) or "ioHelper" (camel) - "specialDBRate" (pascal) or "specialIOHelper" (camel) + // eg "DBRate" (pascal) or "ioHelper" (camel) - "SpecialDBRate" (pascal) or "specialIOHelper" (camel) // - Do capitalize only the first character of acronyms with three or more characters, except the first word of a camel-cased identifier. // eg "XmlWriter (pascal) or "htmlReader" (camel) - "SpecialXmlWriter" (pascal) or "specialHtmlReader" (camel) // - Do not capitalize any of the characters of any acronyms, whatever their length, at the beginning of a camel-cased identifier. @@ -442,7 +435,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// The string is cleaned in the context of the default culture. public string CleanString(string text, CleanStringType stringType) { - return CleanString(text, stringType, char.MinValue, _defaultCulture); + return CleanString(text, stringType, _defaultCulture, null); } /// @@ -456,7 +449,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// The string is cleaned in the context of the default culture. public string CleanString(string text, CleanStringType stringType, char separator) { - return CleanString(text, stringType, separator, _defaultCulture); + return CleanString(text, stringType, _defaultCulture, separator); } /// @@ -469,7 +462,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// The clean string. public string CleanString(string text, CleanStringType stringType, CultureInfo culture) { - return CleanString(text, stringType, char.MinValue, culture); + return CleanString(text, stringType, culture, null); } /// @@ -481,23 +474,12 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// The separator. /// The culture. /// The clean string. - public virtual string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture) + public string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture) { - var config = GetConfig(stringType & CleanStringType.RoleMask, culture); - return CleanString(text, stringType, separator, culture, config); + return CleanString(text, stringType, culture, separator); } - /// - /// Cleans a string in the context of a specified culture, using a specified separator and configuration. - /// - /// The text to clean. - /// A flag indicating the target casing and encoding of the string. By default, - /// strings are cleaned up to camelCase and Ascii. - /// The separator. - /// The culture. - /// The configuration. - /// The clean string. - private string CleanString(string text, CleanStringType stringType, char separator, CultureInfo culture, HelperConfig config) + protected virtual string CleanString(string text, CleanStringType stringType, CultureInfo culture, char? separator) { // be safe if (text == null) @@ -505,13 +487,18 @@ function validateSafeAlias(id, value, immediate, callback) {{ if (culture == null) throw new ArgumentNullException("culture"); + // get config + var config = GetConfig(stringType, culture); + stringType = config.StringTypeExtend(stringType); + // apply defaults if ((stringType & CleanStringType.CaseMask) == CleanStringType.None) stringType |= CleanStringType.CamelCase; if ((stringType & CleanStringType.CodeMask) == CleanStringType.None) stringType |= CleanStringType.Ascii; - var codeType = stringType & CleanStringType.CodeMask; + // use configured unless specified + separator = separator ?? config.Separator; // apply pre-filter if (config.PreFilter != null) @@ -522,231 +509,46 @@ function validateSafeAlias(id, value, immediate, callback) {{ // text = ReplaceMany(text, config.Replacements); // recode - text = Recode(text, stringType); + var codeType = stringType & CleanStringType.CodeMask; + text = codeType == CleanStringType.Ascii + ? Utf8ToAsciiConverter.ToAsciiString(text) + : RemoveSurrogatePairs(text); // clean - switch (codeType) - { - case CleanStringType.Ascii: - // see note below - don't use CleanAsciiString - //text = CleanAsciiString(text, stringType, separator); - //break; - case CleanStringType.Utf8: - text = CleanUtf8String(text, stringType, separator, culture, config); - break; - case CleanStringType.Unicode: - throw new NotImplementedException("DefaultShortStringHelper does not handle unicode yet."); - default: - throw new ArgumentOutOfRangeException("stringType"); - } + text = CleanCodeString(text, stringType, separator.Value, culture, config); return text; } - // however proud I can be of that subtle, ascii-optimized code, - // benchmarking shows it is an order of magnitude slower that the utf8 version - // don't use it - keep it here should anyone be tempted to micro-optimize again... - // - // beware, it has bugs that are fixed in CleanUtf8String but I'm not going to - // bugfix commented code.... - - /* - internal string CleanAsciiString(string text) + private static string RemoveSurrogatePairs(string text) { - return CleanAsciiString(text, CleanStringType.CamelCase, char.MinValue); - } + var input = text.ToCharArray(); + var output = new char[input.Length]; + var opos = 0; - internal string CleanAsciiString(string text, CleanStringType caseType, char separator) - { - int opos = 0, ipos = 0; - var state = StateBreak; - - caseType &= CleanStringType.CaseMask; - - //switch (caseType) - //{ - // case CleanStringType.LowerCase: - // input = text.ToLowerInvariant().ToCharArray(); - // break; - // case CleanStringType.UpperCase: - // input = text.ToUpperInvariant().ToCharArray(); - // break; - // default: - // input = text.ToCharArray(); - // break; - //} - // if we apply global ToUpper or ToLower to text here - // then we cannot break words on uppercase chars - var input = text; - - // because we shouldn't be adding any extra char - // it's faster to use an array than a StringBuilder - var ilen = input.Length; - var output = new char[ilen]; - - Func termFilter = null; - - for (var i = 0; i < ilen; i++) + for (var ipos = 0; ipos < input.Length; ipos++) { - var idx = ValidStringCharacters.IndexOf(input[i]); - - switch (state) + var c = input[ipos]; + if (char.IsSurrogate(c)) // ignore high surrogate { - case StateBreak: - if (idx >= 0 && (opos > 0 || idx < 26 || idx >= 36)) - { - ipos = i; - if (opos > 0 && separator != char.MinValue) - output[opos++] = separator; - state = idx < 36 ? StateWord : StateUp; - } - break; - - case StateWord: - if (idx < 0 || (_breakTermsOnUpper && idx >= 36)) - { - CopyAsciiTerm(input, ipos, output, ref opos, i - ipos, caseType, termFilter, false); - ipos = i; - state = idx < 0 ? StateBreak : StateUp; - if (state != StateBreak && separator != char.MinValue) - output[opos++] = separator; - } - break; - - case StateAcronym: - if (idx < 36) - { - CopyAsciiTerm(input, ipos, output, ref opos, i - ipos, caseType, termFilter, true); - ipos = i; - state = idx < 0 ? StateBreak : StateWord; - if (state != StateBreak && separator != char.MinValue) - output[opos++] = separator; - } - break; - - case StateUp: - if (idx >= 0) - { - state = idx < 36 ? StateWord : StateAcronym; - } - else - { - CopyAsciiTerm(input, ipos, output, ref opos, 1, caseType, termFilter, false); - state = StateBreak; - } - break; - - default: - throw new Exception("Invalid state."); + ipos++; // and skip low surrogate + output[opos++] = '?'; + } + else + { + output[opos++] = c; } - } - - //Console.WriteLine("xx: ({0}) {1}, {2}, {3}", state, input.Length, ipos, opos); - switch (state) - { - case StateBreak: - break; - - case StateWord: - CopyAsciiTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, termFilter, false); - break; - - case StateAcronym: - case StateUp: - CopyAsciiTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, termFilter, true); - break; - - default: - throw new Exception("Invalid state."); } return new string(output, 0, opos); } - internal void CopyAsciiTerm(string input, int ipos, char[] output, ref int opos, int len, - CleanStringType caseType, Func termFilter, bool isAcronym) - { - var term = input.Substring(ipos, len); - ipos = 0; + // here was a subtle, ascii-optimized version of the cleaning code, and I was + // very proud of it until benchmarking showed it was an order of magnitude slower + // that the utf8 version. Micro-optimizing sometimes isn't such a good idea. - if (termFilter != null) - { - term = termFilter(term); - len = term.Length; - } - - if (isAcronym) - { - if (caseType == CleanStringType.CamelCase && len <= 2 && opos > 0) - caseType = CleanStringType.Unchanged; - else if (caseType == CleanStringType.PascalCase && len <= 2) - caseType = CleanStringType.Unchanged; - } - - int idx; - switch (caseType) - { - //case CleanStringType.LowerCase: - //case CleanStringType.UpperCase: - case CleanStringType.Unchanged: - term.CopyTo(ipos, output, opos, len); - opos += len; - break; - - case CleanStringType.LowerCase: - for (var i = ipos; i < ipos + len; i++) - { - idx = ValidStringCharacters.IndexOf(term[i]); - output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx]; - } - break; - - case CleanStringType.UpperCase: - for (var i = ipos; i < ipos + len; i++) - { - idx = ValidStringCharacters.IndexOf(term[i]); - output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx]; - } - break; - - case CleanStringType.CamelCase: - idx = ValidStringCharacters.IndexOf(term[ipos]); - if (opos == 0) - output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx]; - else - output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx]; - for (var i = ipos + 1; i < ipos + len; i++) - { - idx = ValidStringCharacters.IndexOf(term[i]); - output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx]; - } - break; - - case CleanStringType.PascalCase: - idx = ValidStringCharacters.IndexOf(term[ipos]); - output[opos++] = ValidStringCharacters[idx < 26 ? idx + 36 : idx]; - for (var i = ipos + 1; i < ipos + len; i++) - { - idx = ValidStringCharacters.IndexOf(term[i]); - output[opos++] = ValidStringCharacters[idx >= 36 ? idx - 36 : idx]; - } - break; - - default: - throw new ArgumentOutOfRangeException("caseType"); - } - } - */ - - // that's the default code that will work for utf8 strings - // will not handle unicode, though - - internal string CleanUtf8String(string text) - { - return CleanUtf8String(text, CleanStringType.CamelCase, char.MinValue, _defaultCulture, HelperConfig.Empty); - } - - internal string CleanUtf8String(string text, CleanStringType caseType, char separator, CultureInfo culture, HelperConfig config) + // note: does NOT support surrogate pairs in text + internal string CleanCodeString(string text, CleanStringType caseType, char separator, CultureInfo culture, Config config) { int opos = 0, ipos = 0; var state = StateBreak; @@ -761,21 +563,28 @@ function validateSafeAlias(id, value, immediate, callback) {{ var ilen = input.Length; var output = new char[ilen * 2]; // twice the length should be OK in all cases - //var termFilter = config.TermFilter; - for (var i = 0; i < ilen; i++) { var c = input[i]; - var isDigit = char.IsDigit(c); + var isTerm = config.IsTerm(c, opos == 0); + + //var isDigit = char.IsDigit(c); var isUpper = char.IsUpper(c); // false for digits, symbols... - var isLower = char.IsLower(c); // false for digits, symbols... - var isUnder = config.AllowUnderscoreInTerm && c == '_'; - var isTerm = char.IsLetterOrDigit(c) || isUnder; + //var isLower = char.IsLower(c); // false for digits, symbols... + + // what should I do with surrogates? + // no idea, really, so they are not supported at the moment + var isPair = char.IsSurrogate(c); + if (isPair) + throw new NotSupportedException("Surrogate pairs are not supported."); switch (state) { + // within a break case StateBreak: - if (isTerm && (opos > 0 || (isUnder == false && (config.AllowLeadingDigits || isDigit == false)))) + // begin a new term if char is a term char, + // and ( pos > 0 or it's also a valid leading char ) + if (isTerm) { ipos = i; if (opos > 0 && separator != char.MinValue) @@ -784,10 +593,13 @@ function validateSafeAlias(id, value, immediate, callback) {{ } break; + // within a term / word case StateWord: + // end a term if char is not a term char, + // or ( it's uppercase and we break terms on uppercase) if (isTerm == false || (config.BreakTermsOnUpper && isUpper)) { - CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ false); + CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, false); ipos = i; state = isTerm ? StateUp : StateBreak; if (state != StateBreak && separator != char.MinValue) @@ -795,27 +607,48 @@ function validateSafeAlias(id, value, immediate, callback) {{ } break; + // within a term / acronym case StateAcronym: - if (isTerm == false || isLower || isDigit) + // end an acronym if char is not a term char, + // or if it's not uppercase / config + //Console.WriteLine("acro {0} {1}", c, (config.CutAcronymOnNonUpper && isUpper == false)); + if (isTerm == false || (config.CutAcronymOnNonUpper && isUpper == false)) { - if (isLower && config.GreedyAcronyms == false) - i -= 1; - CopyUtf8Term(input, ipos, output, ref opos, i - ipos, caseType, culture, /*termFilter,*/ true); - ipos = i; - state = isTerm ? StateWord : StateBreak; - if (state != StateBreak && separator != char.MinValue) - output[opos++] = separator; + // whether it's part of the acronym depends on whether we're greedy + if (isTerm && config.GreedyAcronyms == false) + i -= 1; // handle that char again, in another state - not part of the acronym + if (i - ipos > 1) // single-char can't be an acronym + { + CopyTerm(input, ipos, output, ref opos, i - ipos, caseType, culture, true); + ipos = i; + state = isTerm ? StateWord : StateBreak; + if (state != StateBreak && separator != char.MinValue) + output[opos++] = separator; + } + else if (isTerm) + { + state = StateWord; + } + } + else if (isUpper == false) // isTerm == true + { + // it's a term char and we don't cut... + // keep moving forward as a word + state = StateWord; } break; + // within a term / uppercase = could be a word or an acronym case StateUp: if (isTerm) { + // add that char to the term and pick word or acronym state = isUpper ? StateAcronym : StateWord; } else { - CopyUtf8Term(input, ipos, output, ref opos, 1, caseType, culture, /*termFilter,*/ false); + // single char, copy then break + CopyTerm(input, ipos, output, ref opos, 1, caseType, culture, false); state = StateBreak; } break; @@ -831,12 +664,12 @@ function validateSafeAlias(id, value, immediate, callback) {{ break; case StateWord: - CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ false); + CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, false); break; case StateAcronym: case StateUp: - CopyUtf8Term(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, /*termFilter,*/ true); + CopyTerm(input, ipos, output, ref opos, input.Length - ipos, caseType, culture, true); break; default: @@ -846,17 +679,15 @@ function validateSafeAlias(id, value, immediate, callback) {{ return new string(output, 0, opos); } - internal void CopyUtf8Term(string input, int ipos, char[] output, ref int opos, int len, - CleanStringType caseType, CultureInfo culture, /*Func termFilter,*/ bool isAcronym) + // note: supports surrogate pairs in input string + internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len, + CleanStringType caseType, CultureInfo culture, bool isAcronym) { var term = input.Substring(ipos, len); - ipos = 0; - - //if (termFilter != null) - //{ - // term = termFilter(term); - // len = term.Length; - //} + //Console.WriteLine("TERM \"{0}\" {1} {2}", + // term, + // isAcronym ? "acronym" : "word", + // caseType); if (isAcronym) { @@ -866,48 +697,100 @@ function validateSafeAlias(id, value, immediate, callback) {{ caseType = CleanStringType.Unchanged; } + // note: MSDN seems to imply that ToUpper or ToLower preserve the length + // of the string, but that this behavior is not guaranteed and could change. + char c; + int i; + string s; switch (caseType) { //case CleanStringType.LowerCase: //case CleanStringType.UpperCase: case CleanStringType.Unchanged: - term.CopyTo(ipos, output, opos, len); + term.CopyTo(0, output, opos, len); opos += len; break; case CleanStringType.LowerCase: - term.ToLower(culture).CopyTo(ipos, output, opos, len); - opos += len; + term = term.ToLower(culture); + term.CopyTo(0, output, opos, term.Length); + opos += term.Length; break; case CleanStringType.UpperCase: - term.ToUpper(culture).CopyTo(ipos, output, opos, len); - opos += len; + term = term.ToUpper(culture); + term.CopyTo(0, output, opos, term.Length); + opos += term.Length; break; case CleanStringType.CamelCase: - c = term[ipos++]; - output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture); - if (len > 1) - term.ToLower(culture).CopyTo(ipos, output, opos, len - 1); - opos += len - 1; + c = term[0]; + i = 1; + if (char.IsSurrogate(c)) + { + s = term.Substring(ipos, 2); + s = opos == 0 ? s.ToLower(culture) : s.ToUpper(culture); + s.CopyTo(0, output, opos, s.Length); + opos += s.Length; + i++; // surrogate pair len is 2 + } + else + { + output[opos] = opos++ == 0 ? char.ToLower(c, culture) : char.ToUpper(c, culture); + } + if (len > i) + { + term = term.Substring(i).ToLower(culture); + term.CopyTo(0, output, opos, term.Length); + opos += term.Length; + } break; case CleanStringType.PascalCase: - c = term[ipos++]; - output[opos++] = char.ToUpper(c, culture); - if (len > 1) - term.ToLower(culture).CopyTo(ipos, output, opos, len - 1); - opos += len - 1; + c = term[0]; + i = 1; + if (char.IsSurrogate(c)) + { + s = term.Substring(ipos, 2); + s = s.ToUpper(culture); + s.CopyTo(0, output, opos, s.Length); + opos += s.Length; + i++; // surrogate pair len is 2 + } + else + { + output[opos++] = char.ToUpper(c, culture); + } + if (len > i) + { + term = term.Substring(i).ToLower(culture); + term.CopyTo(0, output, opos, term.Length); + opos += term.Length; + } break; case CleanStringType.UmbracoCase: - c = term[ipos++]; - output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture); - if (len > 1) - term.CopyTo(ipos, output, opos, len - 1); - opos += len - 1; + c = term[0]; + i = 1; + if (char.IsSurrogate(c)) + { + s = term.Substring(ipos, 2); + s = opos == 0 ? s : s.ToUpper(culture); + s.CopyTo(0, output, opos, s.Length); + opos += s.Length; + i++; // surrogate pair len is 2 + } + else + { + output[opos] = opos++ == 0 ? c : char.ToUpper(c, culture); + } + if (len > i) + { + term = term.Substring(i); + term.CopyTo(0, output, opos, term.Length); + opos += term.Length; + } break; default: @@ -926,6 +809,7 @@ function validateSafeAlias(id, value, immediate, callback) {{ /// The separator, which defaults to a whitespace. /// The splitted text. /// Supports Utf8 and Ascii strings, not Unicode strings. + // NOTE does not support surrogates pairs at the moment public virtual string SplitPascalCasing(string text, char separator) { // be safe @@ -970,55 +854,6 @@ function validateSafeAlias(id, value, immediate, callback) {{ #endregion - #region Recode - - /// - /// Returns a new string containing only characters within the specified code type. - /// - /// The string to filter. - /// The string type. - /// The filtered string. - /// If is not Unicode then non-utf8 characters are - /// removed. If it is Ascii we try to do some intelligent replacement of accents, etc. - public virtual string Recode(string text, CleanStringType stringType) - { - // be safe - if (text == null) - throw new ArgumentNullException("text"); - - var codeType = stringType & CleanStringType.CodeMask; - - // unicode to utf8 or ascii: just remove the unicode chars - // utf8 to ascii: try to be clever and replace some chars - - // what's the point? - if (codeType == CleanStringType.Unicode) - return text; - - return codeType == CleanStringType.Utf8 - ? RemoveNonUtf8(text) - : Utf8ToAsciiConverter.ToAsciiString(text); - } - - private string RemoveNonUtf8(string text) - { - var len = text.Length; - var output = new char[len]; // we won't be adding chars - int opos = 0; - - for (var ipos = 0; ipos < len; ipos++) - { - var c = text[ipos]; - if (char.IsSurrogate(c)) - ipos++; - else - output[opos++] = c; - } - return new string(output, 0, opos); - } - - #endregion - #region ReplaceMany /// diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs index f93c461fa3..23ac4e3931 100644 --- a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs +++ b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs @@ -72,8 +72,11 @@ namespace Umbraco.Core.Strings var opos = 0; for (var ipos = 0; ipos < input.Length; ipos++) - if (char.IsSurrogate(input[ipos])) - ipos++; + if (char.IsSurrogate(input[ipos])) // ignore high surrogate + { + ipos++; // and skip low surrogate + output[opos++] = '?'; + } else ToAscii(input, ipos, output, ref opos); diff --git a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs index 17e583b635..b5b26b26bf 100644 --- a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs +++ b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs @@ -1,6 +1,10 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; +using System.Diagnostics; using System.Globalization; +using System.IO; using System.Linq; +using System.Text; using System.Text.RegularExpressions; using NUnit.Framework; using Umbraco.Core; @@ -29,10 +33,39 @@ namespace Umbraco.Tests.CoreStrings // so there still may be utf8 chars even though you want ascii _helper = new DefaultShortStringHelper() - .WithConfig(CleanStringType.Url, StripQuotes, allowLeadingDigits: true) - .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Url, FilterFrenchElisions, allowLeadingDigits: true) - .WithConfig(CleanStringType.Alias, StripQuotes) - .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Alias, WhiteQuotes); + .WithConfig(CleanStringType.FileName, new DefaultShortStringHelper.Config + { + //PreFilter = ClearFileChars, // done in IsTerm + IsTerm = (c, leading) => (char.IsLetterOrDigit(c) || c == '_') && DefaultShortStringHelper.IsValidFileNameChar(c), + StringType = CleanStringType.LowerCase | CleanStringType.Ascii, + Separator = '-' + }) + .WithConfig(CleanStringType.UrlSegment, new DefaultShortStringHelper.Config + { + PreFilter = StripQuotes, + IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', + StringType = CleanStringType.LowerCase | CleanStringType.Ascii, + Separator = '-' + }) + .WithConfig(new CultureInfo("fr-FR"), CleanStringType.UrlSegment, new DefaultShortStringHelper.Config + { + PreFilter = FilterFrenchElisions, + IsTerm = (c, leading) => leading ? char.IsLetter(c) : (char.IsLetterOrDigit(c) || c == '_'), + StringType = CleanStringType.LowerCase | CleanStringType.Ascii, + Separator = '-' + }) + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + PreFilter = StripQuotes, + IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c), + StringType = CleanStringType.UmbracoCase | CleanStringType.Ascii + }) + .WithConfig(new CultureInfo("fr-FR"), CleanStringType.Alias, new DefaultShortStringHelper.Config + { + PreFilter = WhiteQuotes, + IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c), + StringType = CleanStringType.UmbracoCase | CleanStringType.Ascii + }); ShortStringHelperResolver.Reset(); ShortStringHelperResolver.Current = new ShortStringHelperResolver(_helper); @@ -65,6 +98,333 @@ namespace Umbraco.Tests.CoreStrings return s; } + [Test] + public void CleanStringUnderscoreInTerm() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + // underscore is accepted within terms + IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("foo_bar*nil", helper.CleanString("foo_bar nil", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + // underscore is not accepted within terms + IsTerm = (c, leading) => char.IsLetterOrDigit(c), + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("foo*bar*nil", helper.CleanString("foo_bar nil", CleanStringType.Alias)); + } + + [Test] + public void CleanStringLeadingChars() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + // letters and digits are valid leading chars + IsTerm = (c, leading) => char.IsLetterOrDigit(c), + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("0123foo*bar*nil", helper.CleanString("0123foo_bar nil", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + // only letters are valid leading chars + IsTerm = (c, leading) => leading ? char.IsLetter(c) : char.IsLetterOrDigit(c), + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("foo*bar*nil", helper.CleanString("0123foo_bar nil", CleanStringType.Alias)); + Assert.AreEqual("foo*bar*nil", helper.CleanString("0123 foo_bar nil", CleanStringType.Alias)); + } + + [Test] + public void CleanStringTermOnUpper() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + // uppercase letter means new term + BreakTermsOnUpper = true, + Separator = '*' + }); + Assert.AreEqual("foo*Bar", helper.CleanString("fooBar", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + // uppercase letter is part of term + BreakTermsOnUpper = false, + Separator = '*' + }); + Assert.AreEqual("fooBar", helper.CleanString("fooBar", CleanStringType.Alias)); + } + + [Test] + public void CleanStringAcronymOnNonUpper() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + // non-uppercase letter means cut acronym + CutAcronymOnNonUpper = true, + Separator = '*' + }); + Assert.AreEqual("foo*BAR*Rnil", helper.CleanString("foo BARRnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BA*Rnil", helper.CleanString("foo BARnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias)); + Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + // non-uppercase letter means word + CutAcronymOnNonUpper = false, + Separator = '*' + }); + Assert.AreEqual("foo*BARRnil", helper.CleanString("foo BARRnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BARnil", helper.CleanString("foo BARnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias)); + Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias)); + } + + [Test] + public void CleanStringGreedyAcronyms() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + CutAcronymOnNonUpper = true, + GreedyAcronyms = true, + Separator = '*' + }); + Assert.AreEqual("foo*BARR*nil", helper.CleanString("foo BARRnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BAR*nil", helper.CleanString("foo BARnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BA*nil", helper.CleanString("foo BAnil", CleanStringType.Alias)); + Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + CutAcronymOnNonUpper = true, + GreedyAcronyms = false, + Separator = '*' + }); + Assert.AreEqual("foo*BAR*Rnil", helper.CleanString("foo BARRnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BA*Rnil", helper.CleanString("foo BARnil", CleanStringType.Alias)); + Assert.AreEqual("foo*BAnil", helper.CleanString("foo BAnil", CleanStringType.Alias)); + Assert.AreEqual("foo*Bnil", helper.CleanString("foo Bnil", CleanStringType.Alias)); + } + + [Test] + public void CleanStringWhiteSpace() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("foo", helper.CleanString(" foo ", CleanStringType.Alias)); + Assert.AreEqual("foo*bar", helper.CleanString(" foo bar ", CleanStringType.Alias)); + } + + [Test] + public void CleanStringSeparator() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("foo*bar", helper.CleanString("foo bar", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = ' ' + }); + Assert.AreEqual("foo bar", helper.CleanString("foo bar", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged + }); + Assert.AreEqual("foobar", helper.CleanString("foo bar", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '文' + }); + Assert.AreEqual("foo文bar", helper.CleanString("foo bar", CleanStringType.Alias)); + } + + [Test] + public void CleanStringSymbols() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("house*2", helper.CleanString("house (2)", CleanStringType.Alias)); + + // FIXME but for a filename we want to keep them! + // FIXME and what about a url? + } + + [Test] + public void Utf8Surrogates() + { + // Unicode values between 0x10000 and 0x10FFF are represented by two 16-bit "surrogate" characters + const string str = "a\U00010F00z\uA74Ft"; + Assert.AreEqual(6, str.Length); + Assert.IsTrue(char.IsSurrogate(str[1])); + Assert.IsTrue(char.IsHighSurrogate(str[1])); + Assert.IsTrue(char.IsSurrogate(str[2])); + Assert.IsTrue(char.IsLowSurrogate(str[2])); + Assert.AreEqual('z', str[3]); + Assert.IsFalse(char.IsSurrogate(str[4])); + Assert.AreEqual('\uA74F', str[4]); + Assert.AreEqual('t', str[5]); + + Assert.AreEqual("z", str.Substring(3, 1)); + Assert.AreEqual("\U00010F00", str.Substring(1, 2)); + + var bytes = Encoding.UTF8.GetBytes(str); + Assert.AreEqual(10, bytes.Length); + Assert.AreEqual('a', bytes[0]); + // then next string element is two chars (surrogate pair) or 4 bytes, 21 bits of code point + Assert.AreEqual('z', bytes[5]); + // then next string element is one char and 3 bytes, 16 bits of code point + Assert.AreEqual('t', bytes[9]); + //foreach (var b in bytes) + // Console.WriteLine("{0:X}", b); + + Console.WriteLine("\U00010B70"); + } + + [Test] + public void Utf8ToAsciiConverter() + { + const string str = "a\U00010F00z\uA74Ftéô"; + var output = Core.Strings.Utf8ToAsciiConverter.ToAsciiString(str); + Assert.AreEqual("a?zooteo", output); + } + + [Test] + public void CleanStringEncoding() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("中文测试", helper.CleanString("中文测试", CleanStringType.Alias)); + Assert.AreEqual("léger*中文测试*ZÔRG", helper.CleanString("léger 中文测试 ZÔRG", CleanStringType.Alias)); + + helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Ascii | CleanStringType.Unchanged, + Separator = '*' + }); + Assert.AreEqual("", helper.CleanString("中文测试", CleanStringType.Alias)); + Assert.AreEqual("leger*ZORG", helper.CleanString("léger 中文测试 ZÔRG", CleanStringType.Alias)); + } + + [Test] + public void CleanStringDefaultConfig() + { + var helper = new DefaultShortStringHelper().WithDefaultConfig(); + + const string input = "0123 中文测试 中文测试 léger ZÔRG (2) a?? *x"; + + var alias = helper.CleanStringForSafeAlias(input); + var filename = helper.CleanStringForSafeFileName(input); + var segment = helper.CleanStringForUrlSegment(input); + + // umbraco-cased ascii alias, must begin with a proper letter + Assert.AreEqual("legerZORG2AX", alias, "alias"); + + // lower-cased, utf8 filename, removing illegal filename chars, using dash-separator + Assert.AreEqual("0123-中文测试-中文测试-léger-zôrg-2-a-x", filename, "filename"); + + // lower-cased, utf8 url segment, only letters and digits, using dash-separator + Assert.AreEqual("0123-中文测试-中文测试-léger-zôrg-2-a-x", segment, "segment"); + } + + [Test] + public void CleanStringCasing() + { + var helper = new DefaultShortStringHelper() + .WithConfig(CleanStringType.Alias, new DefaultShortStringHelper.Config + { + StringType = CleanStringType.Utf8 | CleanStringType.Unchanged, + Separator = ' ' + }); + + // BBB is an acronym + // E is a word (too short to be an acronym) + // FF is an acronym + + // FIXME "C" can't be an acronym + // FIXME "DBXreview" = acronym?! + + Assert.AreEqual("aaa BBB CCc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias)); // unchanged + Assert.AreEqual("aaa Bbb Ccc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.CamelCase)); + Assert.AreEqual("Aaa Bbb Ccc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.PascalCase)); + Assert.AreEqual("aaa bbb ccc ddd e ff", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.LowerCase)); + Assert.AreEqual("AAA BBB CCC DDD E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.UpperCase)); + Assert.AreEqual("aaa BBB CCc Ddd E FF", helper.CleanString("aaa BBB CCc Ddd E FF", CleanStringType.Alias | CleanStringType.UmbracoCase)); + + // MS rules & guidelines: + // - Do capitalize both characters of two-character acronyms, except the first word of a camel-cased identifier. + // eg "DBRate" (pascal) or "ioHelper" (camel) - "SpecialDBRate" (pascal) or "specialIOHelper" (camel) + // - Do capitalize only the first character of acronyms with three or more characters, except the first word of a camel-cased identifier. + // eg "XmlWriter (pascal) or "htmlReader" (camel) - "SpecialXmlWriter" (pascal) or "specialHtmlReader" (camel) + // - Do not capitalize any of the characters of any acronyms, whatever their length, at the beginning of a camel-cased identifier. + // eg "xmlWriter" or "dbWriter" (camel) + + Assert.AreEqual("aaa BB Ccc", helper.CleanString("aaa BB ccc", CleanStringType.Alias | CleanStringType.CamelCase)); + Assert.AreEqual("aa Bb Ccc", helper.CleanString("AA bb ccc", CleanStringType.Alias | CleanStringType.CamelCase)); + Assert.AreEqual("aaa Bb Ccc", helper.CleanString("AAA bb ccc", CleanStringType.Alias | CleanStringType.CamelCase)); + Assert.AreEqual("db Rate", helper.CleanString("DB rate", CleanStringType.Alias | CleanStringType.CamelCase)); + Assert.AreEqual("special DB Rate", helper.CleanString("special DB rate", CleanStringType.Alias | CleanStringType.CamelCase)); + Assert.AreEqual("xml Writer", helper.CleanString("XML writer", CleanStringType.Alias | CleanStringType.CamelCase)); + Assert.AreEqual("special Xml Writer", helper.CleanString("special XML writer", CleanStringType.Alias | CleanStringType.CamelCase)); + + Assert.AreEqual("Aaa BB Ccc", helper.CleanString("aaa BB ccc", CleanStringType.Alias | CleanStringType.PascalCase)); + Assert.AreEqual("AA Bb Ccc", helper.CleanString("AA bb ccc", CleanStringType.Alias | CleanStringType.PascalCase)); + Assert.AreEqual("Aaa Bb Ccc", helper.CleanString("AAA bb ccc", CleanStringType.Alias | CleanStringType.PascalCase)); + Assert.AreEqual("DB Rate", helper.CleanString("DB rate", CleanStringType.Alias | CleanStringType.PascalCase)); + Assert.AreEqual("Special DB Rate", helper.CleanString("special DB rate", CleanStringType.Alias | CleanStringType.PascalCase)); + Assert.AreEqual("Xml Writer", helper.CleanString("XML writer", CleanStringType.Alias | CleanStringType.PascalCase)); + Assert.AreEqual("Special Xml Writer", helper.CleanString("special XML writer", CleanStringType.Alias | CleanStringType.PascalCase)); + } + #region Cases [TestCase("foo", "foo")] [TestCase(" foo ", "foo")] @@ -104,29 +464,29 @@ namespace Umbraco.Tests.CoreStrings Assert.AreEqual(expected, output); } - #region Cases - [TestCase("This is my_little_house so cute.", "thisIsMyLittleHouseSoCute", false)] - [TestCase("This is my_little_house so cute.", "thisIsMy_little_houseSoCute", true)] - [TestCase("This is my_Little_House so cute.", "thisIsMyLittleHouseSoCute", false)] - [TestCase("This is my_Little_House so cute.", "thisIsMy_Little_HouseSoCute", true)] - [TestCase("An UPPER_CASE_TEST to check", "anUpperCaseTestToCheck", false)] - [TestCase("An UPPER_CASE_TEST to check", "anUpper_case_testToCheck", true)] - [TestCase("Trailing_", "trailing", false)] - [TestCase("Trailing_", "trailing_", true)] - [TestCase("_Leading", "leading", false)] - [TestCase("_Leading", "leading", true)] - [TestCase("Repeat___Repeat", "repeatRepeat", false)] - [TestCase("Repeat___Repeat", "repeat___Repeat", true)] - [TestCase("Repeat___repeat", "repeatRepeat", false)] - [TestCase("Repeat___repeat", "repeat___repeat", true)] - #endregion - public void CleanStringWithUnderscore(string input, string expected, bool allowUnderscoreInTerm) - { - var helper = new DefaultShortStringHelper() - .WithConfig(allowUnderscoreInTerm: allowUnderscoreInTerm); - var output = helper.CleanString(input, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase); - Assert.AreEqual(expected, output); - } + //#region Cases + //[TestCase("This is my_little_house so cute.", "thisIsMyLittleHouseSoCute", false)] + //[TestCase("This is my_little_house so cute.", "thisIsMy_little_houseSoCute", true)] + //[TestCase("This is my_Little_House so cute.", "thisIsMyLittleHouseSoCute", false)] + //[TestCase("This is my_Little_House so cute.", "thisIsMy_Little_HouseSoCute", true)] + //[TestCase("An UPPER_CASE_TEST to check", "anUpperCaseTestToCheck", false)] + //[TestCase("An UPPER_CASE_TEST to check", "anUpper_case_testToCheck", true)] + //[TestCase("Trailing_", "trailing", false)] + //[TestCase("Trailing_", "trailing_", true)] + //[TestCase("_Leading", "leading", false)] + //[TestCase("_Leading", "leading", true)] + //[TestCase("Repeat___Repeat", "repeatRepeat", false)] + //[TestCase("Repeat___Repeat", "repeat___Repeat", true)] + //[TestCase("Repeat___repeat", "repeatRepeat", false)] + //[TestCase("Repeat___repeat", "repeat___repeat", true)] + //#endregion + //public void CleanStringWithUnderscore(string input, string expected, bool allowUnderscoreInTerm) + //{ + // var helper = new DefaultShortStringHelper() + // .WithConfig(allowUnderscoreInTerm: allowUnderscoreInTerm); + // var output = helper.CleanString(input, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase); + // Assert.AreEqual(expected, output); + //} #region Cases [TestCase("Home Page", "home-page")] @@ -137,7 +497,6 @@ namespace Umbraco.Tests.CoreStrings [TestCase("汉#字*/漢?字", "")] [TestCase("Réalösk fix bran#lo'sk", "realosk-fix-bran-losk")] [TestCase("200 ways to be happy", "200-ways-to-be-happy")] - [TestCase("aBCdEfGhIJK", "a-b-cd-ef-gh-ijk")] #endregion public void CleanStringForUrlSegment(string input, string expected) { @@ -166,173 +525,19 @@ namespace Umbraco.Tests.CoreStrings } #region Cases - [TestCase("foo", "foo")] - [TestCase(" foo ", "foo")] - [TestCase("Foo", "foo")] - [TestCase("FoO", "foO")] - [TestCase("FoO bar", "foOBar")] - [TestCase("FoO bar NIL", "foOBarNil")] - [TestCase("FoO 33bar 22NIL", "foO33bar22Nil")] - [TestCase("FoO 33bar 22NI", "foO33bar22NI")] - [TestCase("0foo", "foo")] - [TestCase("2foo bar", "fooBar")] - [TestCase("9FOO", "foo")] - [TestCase("foo-BAR", "fooBar")] - [TestCase("foo-BA-dang", "fooBADang")] - [TestCase("foo_BAR", "fooBar")] - [TestCase("foo'BAR", "fooBar")] - [TestCase("sauté dans l'espace", "sautéDansLEspace")] - [TestCase("foo\"\"bar", "fooBar")] - [TestCase("-foo-", "foo")] - [TestCase("_foo_", "foo")] - [TestCase("spécial", "spécial")] - [TestCase("brô dëk ", "brôDëk")] - [TestCase("1235brô dëk ", "brôDëk")] - [TestCase("汉#字*/漢?字", "汉字漢字")] - [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst")] - [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst")] - [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst")] - [TestCase("quelle élévation à partir", "quelleÉlévationÀPartir")] - #endregion - public void CleanUtf8String(string input, string expected) - { - input = _helper.Recode(input, CleanStringType.Utf8); - var output = _helper.CleanUtf8String(input); - Assert.AreEqual(expected, output); - } - - #region Cases - [TestCase("sauté dans l'espace", "saute-dans-espace", "fr-FR", CleanStringType.Url | CleanStringType.Ascii | CleanStringType.LowerCase)] - [TestCase("sauté dans l'espace", "sauté-dans-espace", "fr-FR", CleanStringType.Url | CleanStringType.Utf8 | CleanStringType.LowerCase)] + [TestCase("sauté dans l'espace", "saute-dans-espace", "fr-FR", CleanStringType.UrlSegment | CleanStringType.Ascii | CleanStringType.LowerCase)] + [TestCase("sauté dans l'espace", "sauté-dans-espace", "fr-FR", CleanStringType.UrlSegment | CleanStringType.Utf8 | CleanStringType.LowerCase)] [TestCase("sauté dans l'espace", "SauteDansLEspace", "fr-FR", CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.PascalCase)] - [TestCase("he doesn't want", "he-doesnt-want", null, CleanStringType.Url | CleanStringType.Ascii | CleanStringType.LowerCase)] + [TestCase("he doesn't want", "he-doesnt-want", null, CleanStringType.UrlSegment | CleanStringType.Ascii | CleanStringType.LowerCase)] [TestCase("he doesn't want", "heDoesntWant", null, CleanStringType.Alias | CleanStringType.Ascii | CleanStringType.CamelCase)] #endregion public void CleanStringWithTypeAndCulture(string input, string expected, string culture, CleanStringType stringType) { var cinfo = culture == null ? CultureInfo.InvariantCulture : new CultureInfo(culture); - var separator = (stringType & CleanStringType.Url) == CleanStringType.Url ? '-' : char.MinValue; - var output = _helper.CleanString(input, stringType, separator, cinfo); - Assert.AreEqual(expected, output); - } - #region Cases - [TestCase("foo", "foo")] - [TestCase(" foo ", "foo")] - [TestCase("Foo", "foo")] - [TestCase("FoO", "foO")] - [TestCase("FoO bar", "foOBar")] - [TestCase("FoO bar NIL", "foOBarNil")] - [TestCase("FoO 33bar 22NIL", "foO33bar22Nil")] - [TestCase("FoO 33bar 22NI", "foO33bar22NI")] - [TestCase("0foo", "foo")] - [TestCase("2foo bar", "fooBar")] - [TestCase("9FOO", "foo")] - [TestCase("foo-BAR", "fooBar")] - [TestCase("foo-BA-dang", "fooBADang")] - [TestCase("foo_BAR", "fooBar")] - [TestCase("foo'BAR", "fooBar")] - [TestCase("sauté dans l'espace", "sauteDansLEspace")] - [TestCase("foo\"\"bar", "fooBar")] - [TestCase("-foo-", "foo")] - [TestCase("_foo_", "foo")] - [TestCase("spécial", "special")] - [TestCase("brô dëk ", "broDek")] - [TestCase("1235brô dëk ", "broDek")] - [TestCase("汉#字*/漢?字", "")] - [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst")] - [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst")] - [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst")] - #endregion - public void CleanStringToAscii(string input, string expected) - { - var output = _helper.CleanString(input, CleanStringType.Ascii | CleanStringType.CamelCase); - Assert.AreEqual(expected, output); - } - - #region Cases - [TestCase("1235brô dëK tzARlan ban123!pOo", "brodeKtzARlanban123pOo", CleanStringType.Unchanged)] - [TestCase(" 1235brô dëK tzARlan ban123!pOo ", "brodeKtzARlanban123pOo", CleanStringType.Unchanged)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "BroDeKTzARlanBan123POo", CleanStringType.PascalCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "broDeKTzARlanBan123POo", CleanStringType.CamelCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "BRODEKTZARLANBAN123POO", CleanStringType.UpperCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "brodektzarlanban123poo", CleanStringType.LowerCase)] - [TestCase("aa DB cd EFG X KLMN OP qrst", "aaDBCdEfgXKlmnOPQrst", CleanStringType.CamelCase)] - [TestCase("aaa DB cd EFG X KLMN OP qrst", "aaaDBCdEfgXKlmnOPQrst", CleanStringType.CamelCase)] - [TestCase("aa DB cd EFG X KLMN OP qrst", "AaDBCdEfgXKlmnOPQrst", CleanStringType.PascalCase)] - [TestCase("aaa DB cd EFG X KLMN OP qrst", "AaaDBCdEfgXKlmnOPQrst", CleanStringType.PascalCase)] - [TestCase("AA db cd EFG X KLMN OP qrst", "aaDbCdEfgXKlmnOPQrst", CleanStringType.CamelCase)] - [TestCase("AAA db cd EFG X KLMN OP qrst", "aaaDbCdEfgXKlmnOPQrst", CleanStringType.CamelCase)] - [TestCase("AA db cd EFG X KLMN OP qrst", "AADbCdEfgXKlmnOPQrst", CleanStringType.PascalCase)] - [TestCase("AAA db cd EFG X KLMN OP qrst", "AaaDbCdEfgXKlmnOPQrst", CleanStringType.PascalCase)] - [TestCase("We store some HTML in the DB for performance", "WeStoreSomeHtmlInTheDBForPerformance", CleanStringType.PascalCase)] - [TestCase("We store some HTML in the DB for performance", "weStoreSomeHtmlInTheDBForPerformance", CleanStringType.CamelCase)] - [TestCase("X is true", "XIsTrue", CleanStringType.PascalCase)] - [TestCase("X is true", "xIsTrue", CleanStringType.CamelCase)] - [TestCase("IO are slow", "IOAreSlow", CleanStringType.PascalCase)] - [TestCase("IO are slow", "ioAreSlow", CleanStringType.CamelCase)] - [TestCase("RAM is fast", "RamIsFast", CleanStringType.PascalCase)] - [TestCase("RAM is fast", "ramIsFast", CleanStringType.CamelCase)] - [TestCase("Tab 1", "tab1", CleanStringType.CamelCase)] - [TestCase("Home - Page", "homePage", CleanStringType.CamelCase)] - [TestCase("Shannon's Document Type", "shannonSDocumentType", CleanStringType.CamelCase)] - [TestCase("Shannon's Document Type", "shannonsDocumentType", CleanStringType.CamelCase | CleanStringType.Alias)] - [TestCase("!BADDLY nam-ed Document Type", "baddlyNamEdDocumentType", CleanStringType.CamelCase)] - [TestCase(" !BADDLY nam-ed Document Type", "BADDLYnamedDocumentType", CleanStringType.Unchanged)] - [TestCase("!BADDLY nam-ed Document Type", "BaddlyNamEdDocumentType", CleanStringType.PascalCase)] - [TestCase("i %Want!thisTo end up In Proper@case", "IWantThisToEndUpInProperCase", CleanStringType.PascalCase)] - [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "raksmorgasKeKe", CleanStringType.CamelCase)] - [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "RaksmorgasKeKe", CleanStringType.PascalCase)] - [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "RaksmorgaskeKe", CleanStringType.Unchanged)] - [TestCase("TRii", "TRii", CleanStringType.Unchanged)] - [TestCase("**TRii", "TRii", CleanStringType.Unchanged)] - [TestCase("TRii", "tRii", CleanStringType.CamelCase)] - [TestCase("TRXii", "trXii", CleanStringType.CamelCase)] - [TestCase("**TRii", "tRii", CleanStringType.CamelCase)] - [TestCase("TRii", "TRii", CleanStringType.PascalCase)] - [TestCase("TRXii", "TRXii", CleanStringType.PascalCase)] - [TestCase("**TRii", "TRii", CleanStringType.PascalCase)] - [TestCase("trII", "trII", CleanStringType.Unchanged)] - [TestCase("**trII", "trII", CleanStringType.Unchanged)] - [TestCase("trII", "trII", CleanStringType.CamelCase)] - [TestCase("**trII", "trII", CleanStringType.CamelCase)] - [TestCase("trII", "TrII", CleanStringType.PascalCase)] - [TestCase("**trII", "TrII", CleanStringType.PascalCase)] - [TestCase("trIIX", "trIix", CleanStringType.CamelCase)] - [TestCase("**trIIX", "trIix", CleanStringType.CamelCase)] - [TestCase("trIIX", "TrIix", CleanStringType.PascalCase)] - [TestCase("**trIIX", "TrIix", CleanStringType.PascalCase)] - #endregion - public void CleanStringToAsciiWithType(string input, string expected, CleanStringType caseType) - { - var output = _helper.CleanString(input, caseType | CleanStringType.Ascii); - Assert.AreEqual(expected, output); - } - - #region Cases - [TestCase("1235brô dëK tzARlan ban123!pOo", "bro de K tz A Rlan ban123 p Oo", ' ', CleanStringType.Unchanged)] - [TestCase(" 1235brô dëK tzARlan ban123!pOo ", "bro de K tz A Rlan ban123 p Oo", ' ', CleanStringType.Unchanged)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "Bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.PascalCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "Bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.PascalCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "bro De K Tz A Rlan Ban123 P Oo", ' ', CleanStringType.CamelCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "bro-De-K-Tz-A-Rlan-Ban123-P-Oo", '-', CleanStringType.CamelCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "BRO-DE-K-TZ-A-RLAN-BAN123-P-OO", '-', CleanStringType.UpperCase)] - [TestCase("1235brô dëK tzARlan ban123!pOo", "bro-de-k-tz-a-rlan-ban123-p-oo", '-', CleanStringType.LowerCase)] - [TestCase("Tab 1", "tab 1", ' ', CleanStringType.CamelCase)] - [TestCase("Home - Page", "home Page", ' ', CleanStringType.CamelCase)] - [TestCase("Shannon's Document Type", "shannon S Document Type", ' ', CleanStringType.CamelCase)] - [TestCase("Shannon's Document Type", "shannons Document Type", ' ', CleanStringType.CamelCase | CleanStringType.Alias)] - [TestCase("!BADDLY nam-ed Document Type", "baddly Nam Ed Document Type", ' ', CleanStringType.CamelCase)] - [TestCase(" !BADDLY nam-ed Document Type", "BADDLY nam ed Document Type", ' ', CleanStringType.Unchanged)] - [TestCase("!BADDLY nam-ed Document Type", "Baddly Nam Ed Document Type", ' ', CleanStringType.PascalCase)] - [TestCase("i %Want!thisTo end up In Proper@case", "I Want This To End Up In Proper Case", ' ', CleanStringType.PascalCase)] - [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "raksmorgas Ke Ke", ' ', CleanStringType.CamelCase)] - [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "Raksmorgas Ke Ke", ' ', CleanStringType.PascalCase)] - [TestCase("Räksmörgås %%$£¤¤¤§ kéKé", "Raksmorgas ke Ke", ' ', CleanStringType.Unchanged)] - #endregion - public void CleanStringToAsciiWithTypeAndSeparator(string input, string expected, char separator, CleanStringType caseType) - { - var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator); + // picks the proper config per culture + // and overrides some stringType params (ascii...) + var output = _helper.CleanString(input, stringType, cinfo); Assert.AreEqual(expected, output); } diff --git a/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs b/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs new file mode 100644 index 0000000000..72cf6c24ea --- /dev/null +++ b/src/Umbraco.Web/Strategies/Migrations/RebuildMediaXmlCacheAfterUpgrade.cs @@ -0,0 +1,38 @@ +using System; +using Umbraco.Core; +using Umbraco.Core.Persistence.Migrations; +using Umbraco.Core.Services; +using umbraco.interfaces; + +namespace Umbraco.Web.Strategies.Migrations +{ + /// + /// This will execute after upgrading to rebuild the xml cache + /// + /// + /// This cannot execute as part of a db migration since we need access to the services/repos. + /// + /// This will execute for specific versions - + /// + /// * If current is less than or equal to 7.0.0 + /// + public class RebuildMediaXmlCacheAfterUpgrade : IApplicationStartupHandler + { + public RebuildMediaXmlCacheAfterUpgrade() + { + MigrationRunner.Migrated += MigrationRunner_Migrated; + } + + void MigrationRunner_Migrated(MigrationRunner sender, Core.Events.MigrationEventArgs e) + { + var target70 = new Version(7, 0, 0); + + if (e.ConfiguredVersion <= target70) + { + var mediasvc = (MediaService)ApplicationContext.Current.Services.MediaService; + mediasvc.RebuildXmlStructures(); + } + + } + } +} \ No newline at end of file diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj index ab627630c1..6619767148 100644 --- a/src/Umbraco.Web/Umbraco.Web.csproj +++ b/src/Umbraco.Web/Umbraco.Web.csproj @@ -618,6 +618,7 @@ Resources.resx +