2013-02-07 13:30:50 -01:00
using System ;
using System.Collections.Generic ;
using System.Globalization ;
2013-03-11 14:58:07 -01:00
using System.IO ;
2013-02-07 13:30:50 -01:00
using System.Linq ;
using System.Text ;
using System.Text.RegularExpressions ;
using System.Xml ;
using Umbraco.Core.Configuration ;
namespace Umbraco.Core.Strings
{
/// <summary>
/// Legacy implementation of string functions for short strings such as aliases or url segments.
/// </summary>
/// <remarks>
/// <para>Not necessarily optimized to work on large bodies of text.</para>
/// <para>Can expose surprising or bogus behavior.</para>
/// <para>Uses invariant culture everywhere.</para>
/// </remarks>
internal class LegacyShortStringHelper : IShortStringHelper
{
#region Ctor and vars
/// <summary>
/// Freezes the helper so it can prevents its configuration from being modified.
/// </summary>
/// <remarks>Will be called by <c>ShortStringHelperResolver</c> when resolution freezes.</remarks>
public void Freeze ( )
{
// we have nothing to protect.
}
const string UmbracoValidAliasCharacters = "_-abcdefghijklmnopqrstuvwxyz1234567890" ;
2013-04-02 09:19:49 -02:00
const string UmbracoInvalidFirstCharacters = "0123456789" ;
2013-02-07 13:30:50 -01:00
#endregion
2013-02-19 06:30:19 -01:00
#region Short string services JavaScript
2013-02-19 06:26:58 -01:00
2013-02-19 06:30:19 -01:00
const string SssjsValidCharacters = "_-abcdefghijklmnopqrstuvwxyz1234567890" ;
2013-04-02 09:19:49 -02:00
const string SssjsInvalidFirstCharacters = "0123456789" ;
2013-02-19 06:26:58 -01:00
2013-02-19 06:30:19 -01:00
private const string SssjsFormat = @ "
2013-02-19 06:26:58 -01:00
var UMBRACO_FORCE_SAFE_ALIAS = { 0 } ;
var UMBRACO_FORCE_SAFE_ALIAS_VALIDCHARS = ' { 1 } ' ;
var UMBRACO_FORCE_SAFE_ALIAS_INVALID_FIRST_CHARS = ' { 2 } ' ;
function safeAlias ( alias ) { {
if ( UMBRACO_FORCE_SAFE_ALIAS ) { {
var safeAlias = ' ' ;
var aliasLength = alias . length ;
for ( var i = 0 ; i < aliasLength ; i + + ) { {
currentChar = alias . substring ( i , i + 1 ) ;
if ( UMBRACO_FORCE_SAFE_ALIAS_VALIDCHARS . indexOf ( currentChar . toLowerCase ( ) ) > - 1 ) { {
// check for camel (if previous character is a space, we'll upper case the current one
if ( safeAlias = = ' ' & & UMBRACO_FORCE_SAFE_ALIAS_INVALID_FIRST_CHARS . indexOf ( currentChar . toLowerCase ( ) ) > 0 ) { {
currentChar = ' ' ;
} } else { {
// first char should always be lowercase (camel style)
if ( safeAlias . length = = 0 )
currentChar = currentChar . toLowerCase ( ) ;
if ( i < aliasLength - 1 & & safeAlias ! = ' ' & & alias . substring ( i - 1 , i ) = = ' ' )
currentChar = currentChar . toUpperCase ( ) ;
safeAlias + = currentChar ;
} }
} }
} }
2013-02-19 06:29:59 -01:00
alias = safeAlias ;
2013-02-19 06:26:58 -01:00
} }
2013-02-19 06:29:59 -01:00
return alias ;
} }
function getSafeAlias ( id , value , immediate , callback ) { {
callback ( safeAlias ( value ) ) ;
} }
function validateSafeAlias ( id , value , immediate , callback ) { {
callback ( value = = safeAlias ( value ) ) ;
2013-02-19 06:26:58 -01:00
} }
2013-02-19 06:29:59 -01:00
// legacy backward compatibility requires that one
function isValidAlias ( alias ) { {
2013-02-19 06:26:58 -01:00
return alias = = safeAlias ( alias ) ;
} }
";
/// <summary>
2013-02-19 06:30:19 -01:00
/// Gets the JavaScript code defining client-side short string services.
2013-02-19 06:26:58 -01:00
/// </summary>
2013-02-19 06:30:19 -01:00
public string GetShortStringServicesJavaScript ( string controllerPath )
2013-02-19 06:26:58 -01:00
{
2013-02-19 06:30:19 -01:00
return string . Format ( SssjsFormat ,
UmbracoSettings . ForceSafeAliases ? "true" : "false" , SssjsValidCharacters , SssjsInvalidFirstCharacters ) ;
2013-02-19 06:26:58 -01:00
}
#endregion
2013-02-07 13:30:50 -01:00
#region IShortStringHelper CleanFor . . .
/// <summary>
/// Cleans a string to produce a string that can safely be used in an alias.
/// </summary>
/// <param name="text">The text to filter.</param>
/// <returns>The safe alias.</returns>
/// <remarks>The string will be cleaned in the context of invariant culture.</remarks>
public string CleanStringForSafeAlias ( string text )
{
// ported from StringExtensions.ToSafeAlias()
const string validAliasCharacters = UmbracoValidAliasCharacters ;
const string invalidFirstCharacters = UmbracoInvalidFirstCharacters ;
var safeString = new StringBuilder ( ) ;
int aliasLength = text . Length ;
for ( var i = 0 ; i < aliasLength ; i + + )
{
var currentChar = text . Substring ( i , 1 ) ;
2013-02-26 18:12:22 -01:00
if ( validAliasCharacters . Contains ( currentChar . ToLowerInvariant ( ) ) )
2013-02-07 13:30:50 -01:00
{
// check for camel (if previous character is a space, we'll upper case the current one
2013-02-26 18:12:22 -01:00
if ( safeString . Length = = 0 & & invalidFirstCharacters . Contains ( currentChar . ToLowerInvariant ( ) ) )
2013-02-07 13:30:50 -01:00
{
2013-02-19 06:27:13 -01:00
//currentChar = "";
2013-02-07 13:30:50 -01:00
}
else
{
if ( i < aliasLength - 1 & & i > 0 & & text . Substring ( i - 1 , 1 ) = = " " )
2013-02-26 18:12:22 -01:00
currentChar = currentChar . ToUpperInvariant ( ) ;
2013-02-07 13:30:50 -01:00
safeString . Append ( currentChar ) ;
}
}
}
return safeString . ToString ( ) ;
}
/// <summary>
/// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used in an alias.
/// </summary>
/// <param name="text">The text to filter.</param>
/// <param name="culture">The culture.</param>
/// <returns>The safe alias.</returns>
/// <remarks>Legacy does not support culture contexts.</remarks>
public string CleanStringForSafeAlias ( string text , CultureInfo culture )
{
return CleanStringForSafeAlias ( text ) ;
}
/// <summary>
/// Cleans a string to produce a string that can safely be used in an url segment, in the context of the invariant culture.
/// </summary>
/// <param name="text">The text to filter.</param>
/// <returns>The safe url segment.</returns>
public string CleanStringForUrlSegment ( string text )
{
return LegacyFormatUrl ( text ) ;
}
/// <summary>
/// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used in an url segment.
/// </summary>
/// <param name="text">The text to filter.</param>
/// <param name="culture">The culture.</param>
/// <returns>The safe url segment.</returns>
/// <remarks>Legacy does not support culture contexts.</remarks>
public string CleanStringForUrlSegment ( string text , CultureInfo culture )
{
return CleanStringForUrlSegment ( text ) ;
}
2013-03-11 14:58:07 -01:00
/// <summary>
/// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename,
/// both internally (on disk) and externally (as a url).
/// </summary>
/// <param name="text">The text to filter.</param>
/// <returns>The safe filename.</returns>
/// <remarks>Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented.</remarks>
public string CleanStringForSafeFileName ( string text )
{
var filePath = text ;
// ported from Core.IO.IOHelper.SafeFileName()
if ( String . IsNullOrEmpty ( filePath ) )
return String . Empty ;
if ( ! String . IsNullOrWhiteSpace ( filePath ) )
{
foreach ( var character in Path . GetInvalidFileNameChars ( ) )
{
filePath = filePath . Replace ( character , '-' ) ;
}
}
else
{
filePath = String . Empty ;
}
//Break up the file in name and extension before applying the UrlReplaceCharacters
var fileNamePart = filePath . Substring ( 0 , filePath . LastIndexOf ( '.' ) ) ;
var ext = filePath . Substring ( filePath . LastIndexOf ( '.' ) ) ;
//Because the file usually is downloadable as well we check characters against 'UmbracoSettings.UrlReplaceCharacters'
XmlNode replaceChars = UmbracoSettings . UrlReplaceCharacters ;
foreach ( XmlNode n in replaceChars . SelectNodes ( "char" ) )
{
if ( n . Attributes . GetNamedItem ( "org" ) ! = null & & n . Attributes . GetNamedItem ( "org" ) . Value ! = "" )
fileNamePart = fileNamePart . Replace ( n . Attributes . GetNamedItem ( "org" ) . Value , XmlHelper . GetNodeValue ( n ) ) ;
}
filePath = string . Concat ( fileNamePart , ext ) ;
// Adapted from: http://stackoverflow.com/a/4827510/5018
// Combined both Reserved Characters and Character Data
// from http://en.wikipedia.org/wiki/Percent-encoding
var stringBuilder = new StringBuilder ( ) ;
const string reservedCharacters = "!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| " ;
foreach ( var character in filePath )
{
if ( reservedCharacters . IndexOf ( character ) = = - 1 )
stringBuilder . Append ( character ) ;
else
stringBuilder . Append ( "-" ) ;
}
// Remove repeating dashes
// From: http://stackoverflow.com/questions/5111967/regex-to-remove-a-specific-repeated-character
var reducedString = Regex . Replace ( stringBuilder . ToString ( ) , "-+" , "-" ) ;
return reducedString ;
}
/// <summary>
/// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename,
/// both internally (on disk) and externally (as a url).
/// </summary>
/// <param name="text">The text to filter.</param>
/// <param name="culture">The culture.</param>
/// <returns>The safe filename.</returns>
/// <remarks>Legacy does not support culture contexts.</remarks>
public string CleanStringForSafeFileName ( string text , CultureInfo culture )
{
return CleanStringForSafeFileName ( text ) ;
}
2013-02-07 13:30:50 -01:00
#endregion
#region CleanString
// legacy does not implement these
public string CleanString ( string text , CleanStringType stringType )
{
return text ;
}
public string CleanString ( string text , CleanStringType stringType , char separator )
{
return text ;
}
public string CleanString ( string text , CleanStringType stringType , CultureInfo culture )
{
return text ;
}
public string CleanString ( string text , CleanStringType stringType , char separator , CultureInfo culture )
{
return text ;
}
#endregion
#region SplitPascalCasing
/// <summary>
/// Splits a pascal-cased string by inserting a separator in between each term.
/// </summary>
/// <param name="text">The text to split.</param>
/// <param name="separator">The separator.</param>
/// <returns>The splitted string.</returns>
/// <remarks>Probably only supports Ascii strings.</remarks>
public string SplitPascalCasing ( string text , char separator )
{
// ported from StringExtensions.SplitPascalCasing()
var replacement = "$1" + separator ;
var result = Regex . Replace ( text , "([a-z](?=[A-Z])|[A-Z](?=[A-Z][a-z]))" , replacement ) ;
return result ;
}
#endregion
#region Legacy
/// <summary>
/// Cleans a string to produce a string that can safely be used in an alias.
/// </summary>
/// <param name="text">The text to filter.</param>
/// <returns>The safe alias.</returns>
/// <remarks>The string will be cleaned in the context of invariant culture.</remarks>
public string LegacyCleanStringForUmbracoAlias ( string text )
{
// ported from StringExtensions.ToUmbracoAlias()
// kept here for reference, not used anymore
if ( string . IsNullOrEmpty ( text ) ) return string . Empty ;
//convert case first
//var tmp = text.ConvertCase(caseType);
// note: always Camel anyway
var tmp = LegacyConvertStringCase ( text , CleanStringType . CamelCase ) ;
//remove non-alphanumeric chars
var result = Regex . Replace ( tmp , @"[^a-zA-Z0-9\s\.-]+" , "" , RegexOptions . Compiled ) ;
// note: spaces are always removed anyway
//if (removeSpaces)
// result = result.Replace(" ", "");
return result ;
}
/// <summary>
/// Filters a string to convert case, and more.
/// </summary>
/// <param name="phrase">the text to filter.</param>
/// <param name="cases">The string case type.</param>
/// <returns>The filtered text.</returns>
/// <remarks>
/// <para>This is the legacy method, so we can't really change it, although it has issues (see unit tests).</para>
/// <para>It does more than "converting the case", and also remove spaces, etc.</para>
/// </remarks>
public string LegacyConvertStringCase ( string phrase , CleanStringType cases )
{
// ported from StringExtensions.ConvertCase
cases & = CleanStringType . CaseMask ;
var splittedPhrase = Regex . Split ( phrase , @"[^a-zA-Z0-9\']" , RegexOptions . Compiled ) ;
if ( cases = = CleanStringType . Unchanged )
return string . Join ( "" , splittedPhrase ) ;
//var splittedPhrase = phrase.Split(' ', '-', '.');
var sb = new StringBuilder ( ) ;
foreach ( var splittedPhraseChars in splittedPhrase . Select ( s = > s . ToCharArray ( ) ) )
{
if ( splittedPhraseChars . Length > 0 )
{
2013-02-26 01:13:31 +06:00
splittedPhraseChars [ 0 ] = ( ( new String ( splittedPhraseChars [ 0 ] , 1 ) ) . ToUpperInvariant ( ) . ToCharArray ( ) ) [ 0 ] ;
2013-02-07 13:30:50 -01:00
}
sb . Append ( new String ( splittedPhraseChars ) ) ;
}
var result = sb . ToString ( ) ;
if ( cases = = CleanStringType . CamelCase )
{
if ( result . Length > 1 )
{
var pattern = new Regex ( "^([A-Z]*)([A-Z].*)$" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
var match = pattern . Match ( result ) ;
if ( match . Success )
{
2013-02-26 01:13:31 +06:00
result = match . Groups [ 1 ] . Value . ToLowerInvariant ( ) + match . Groups [ 2 ] . Value ;
2013-02-07 13:30:50 -01:00
2013-02-26 01:13:31 +06:00
return result . Substring ( 0 , 1 ) . ToLowerInvariant ( ) + result . Substring ( 1 ) ;
2013-02-07 13:30:50 -01:00
}
return result ;
}
2013-02-26 01:13:31 +06:00
return result . ToLowerInvariant ( ) ;
2013-02-07 13:30:50 -01:00
}
return result ;
}
/// <summary>
/// Converts string to a URL alias.
/// </summary>
/// <param name="value">The value.</param>
/// <param name="charReplacements">The char replacements.</param>
/// <param name="replaceDoubleDashes">if set to <c>true</c> replace double dashes.</param>
/// <param name="stripNonAscii">if set to <c>true</c> strip non ASCII.</param>
/// <param name="urlEncode">if set to <c>true</c> URL encode.</param>
/// <returns></returns>
/// <remarks>
/// This ensures that ONLY ascii chars are allowed and of those ascii chars, only digits and lowercase chars, all
/// punctuation, etc... are stripped out, however this method allows you to pass in string's to replace with the
/// specified replacement character before the string is converted to ascii and it has invalid characters stripped out.
/// This allows you to replace strings like & , etc.. with your replacement character before the automatic
/// reduction.
/// </remarks>
public string LegacyToUrlAlias ( string value , IDictionary < string , string > charReplacements , bool replaceDoubleDashes , bool stripNonAscii , bool urlEncode )
{
// to lower case invariant
// replace chars one by one using charReplacements
// (opt) convert to ASCII then remove anything that's not ASCII
// trim - and _ then (opt) remove double -
// (opt) url-encode
// charReplacement is actually *string* replacement ie it can replace " " by a non-breaking space
// so it's kind of a pre-filter actually...
// we need pre-filters, and post-filters, within each token...
// not so... we may want to replace with a space BEFORE cutting into tokens...
//first to lower case
value = value . ToLowerInvariant ( ) ;
//then replacement chars
value = charReplacements . Aggregate ( value , ( current , kvp ) = > current . Replace ( kvp . Key , kvp . Value ) ) ;
//then convert to only ascii, this will remove the rest of any invalid chars
if ( stripNonAscii )
{
value = Encoding . ASCII . GetString (
Encoding . Convert (
Encoding . UTF8 ,
Encoding . GetEncoding (
Encoding . ASCII . EncodingName ,
new EncoderReplacementFallback ( String . Empty ) ,
new DecoderExceptionFallback ( ) ) ,
Encoding . UTF8 . GetBytes ( value ) ) ) ;
//remove all characters that do not fall into the following categories (apart from the replacement val)
var validCodeRanges =
//digits
Enumerable . Range ( 48 , 10 ) . Concat (
//lowercase chars
Enumerable . Range ( 97 , 26 ) ) ;
var sb = new StringBuilder ( ) ;
2013-02-26 01:13:31 +06:00
foreach ( var c in value . Where ( c = > charReplacements . Values . Contains ( c . ToString ( CultureInfo . InvariantCulture ) ) | | validCodeRanges . Contains ( c ) ) )
2013-02-07 13:30:50 -01:00
{
sb . Append ( c ) ;
}
value = sb . ToString ( ) ;
}
//trim dashes from end
value = value . Trim ( '-' , '_' ) ;
//replace double occurances of - or _
value = replaceDoubleDashes ? Regex . Replace ( value , @"([-_]){2,}" , "$1" , RegexOptions . Compiled ) : value ;
//url encode result
return urlEncode ? System . Web . HttpUtility . UrlEncode ( value ) : value ;
}
/// <summary>
/// Cleans a string to produce a string that can safely be used in an url segment.
/// </summary>
/// <param name="url">The text to filter.</param>
/// <returns>The safe url segment.</returns>
/// <remarks>
/// <para>Uses <c>UmbracoSettings.UrlReplaceCharacters</c>
/// and <c>UmbracoSettings.RemoveDoubleDashesFromUrlReplacing</c>.</para>
/// </remarks>
public string LegacyFormatUrl ( string url )
{
2013-04-02 11:34:40 -02:00
var newUrl = url . ToLowerInvariant ( ) ;
2013-02-07 13:30:50 -01:00
var replaceChars = UmbracoSettings . UrlReplaceCharacters ;
foreach ( XmlNode n in replaceChars . SelectNodes ( "char" ) )
{
if ( n . Attributes . GetNamedItem ( "org" ) ! = null & & n . Attributes . GetNamedItem ( "org" ) . Value ! = "" )
newUrl = newUrl . Replace ( n . Attributes . GetNamedItem ( "org" ) . Value , XmlHelper . GetNodeValue ( n ) ) ;
}
// check for double dashes
if ( UmbracoSettings . RemoveDoubleDashesFromUrlReplacing )
{
newUrl = Regex . Replace ( newUrl , @"[-]{2,}" , "-" ) ;
}
return newUrl ;
}
#endregion
#region ReplaceMany
/// <summary>
/// Returns a new string in which all occurences of specified strings are replaced by other specified strings.
/// </summary>
/// <param name="text">The string to filter.</param>
/// <param name="replacements">The replacements definition.</param>
/// <returns>The filtered string.</returns>
public string ReplaceMany ( string text , IDictionary < string , string > replacements )
{
// Have done various tests, implementing my own "super fast" state machine to handle
// replacement of many items, or via regexes, but on short strings and not too
// many replacements (which prob. is going to be our case) nothing can beat this...
// (at least with safe and checked code -- we don't want unsafe/unchecked here)
// Note that it will do chained-replacements ie replaced items can be replaced
// in turn by another replacement (ie the order of replacements is important)
return replacements . Aggregate ( text , ( current , kvp ) = > current . Replace ( kvp . Key , kvp . Value ) ) ;
}
2013-03-11 14:58:07 -01:00
/// <summary>
/// Returns a new string in which all occurences of specified characters are replaced by a specified character.
/// </summary>
/// <param name="text">The string to filter.</param>
/// <param name="chars">The characters to replace.</param>
/// <param name="replacement">The replacement character.</param>
/// <returns>The filtered string.</returns>
public string ReplaceMany ( string text , char [ ] chars , char replacement )
{
// be safe
if ( text = = null )
throw new ArgumentNullException ( "text" ) ;
if ( chars = = null )
throw new ArgumentNullException ( "chars" ) ;
// see note above
return chars . Aggregate ( text , ( current , c ) = > current . Replace ( c , replacement ) ) ;
}
2013-02-07 13:30:50 -01:00
#endregion
}
}