merge branch 'origin/6.2.0' into 7.0.2 + fix

This commit is contained in:
Stephan
2014-01-16 19:40:29 +01:00
6 changed files with 271 additions and 1 deletions

View File

@@ -10,6 +10,8 @@ namespace Umbraco.Core.Configuration.UmbracoSettings
bool RemoveDoubleDashes { get; }
bool ConvertUrlsToAscii { get; }
IEnumerable<IChar> CharCollection { get; }
}
}

View File

@@ -121,6 +121,11 @@ namespace Umbraco.Core.Configuration.UmbracoSettings
get { return UrlReplacing.RemoveDoubleDashes; }
}
bool IRequestHandlerSection.ConvertUrlsToAscii
{
get { return UrlReplacing.ConvertUrlsToAscii; }
}
IEnumerable<IChar> IRequestHandlerSection.CharCollection
{
get { return UrlReplacing.CharCollection; }

View File

@@ -11,6 +11,12 @@ namespace Umbraco.Core.Configuration.UmbracoSettings
get { return (bool) base["removeDoubleDashes"]; }
}
[ConfigurationProperty("toAscii", DefaultValue = false)]
internal bool ConvertUrlsToAscii
{
get { return (bool)base["toAscii"]; }
}
[ConfigurationCollection(typeof(CharCollection), AddItemName = "char")]
[ConfigurationProperty("", IsDefaultCollection = true)]
internal CharCollection CharCollection

View File

@@ -142,7 +142,7 @@ namespace Umbraco.Core.Strings
{
PreFilter = ApplyUrlReplaceCharacters,
IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore
StringType = CleanStringType.Utf8 | CleanStringType.LowerCase,
StringType = (UmbracoConfig.For.UmbracoSettings().RequestHandler.ConvertUrlsToAscii ? CleanStringType.Ascii : CleanStringType.Utf8) | CleanStringType.LowerCase,
BreakTermsOnUpper = false,
Separator = '-'
}).WithConfig(CleanStringType.FileName, new Config

View File

@@ -3316,6 +3316,243 @@ namespace Umbraco.Core.Strings
output[opos++] = '~';
break;
// BEGIN CUSTOM TRANSLITERATION OF CYRILIC CHARS
#region Cyrilic chars
// russian uppercase "А Б В Г Д Е Ё Ж З И Й К Л М Н О П Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я"
// russian lowercase "а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я"
// notes
// read http://www.vesic.org/english/blog/c-sharp/transliteration-easy-way-microsoft-transliteration-utility/
// should we look into MS Transliteration Utility (http://msdn.microsoft.com/en-US/goglobal/bb688104.aspx)
// also UnicodeSharpFork https://bitbucket.org/DimaStefantsov/unidecodesharpfork
// also Transliterator http://transliterator.codeplex.com/
//
// in any case it would be good to generate all those "case" statements instead of writing them by hand
// time for a T4 template?
// also we should support extensibility so ppl can register more cases in external code
// fixme
// transliterates Анастасия as Anastasiya, and not Anastasia
// Ольга --> Ol'ga, Татьяна --> Tat'yana -- that's bad (?)
// Note: should ä (german umlaut) become a or ae ?
case '\u0410': // А
output[opos++] = 'A';
break;
case '\u0430': // а
output[opos++] = 'a';
break;
case '\u0411': // Б
output[opos++] = 'B';
break;
case '\u0431': // б
output[opos++] = 'b';
break;
case '\u0412': // В
output[opos++] = 'V';
break;
case '\u0432': // в
output[opos++] = 'v';
break;
case '\u0413': // Г
output[opos++] = 'G';
break;
case '\u0433': // г
output[opos++] = 'g';
break;
case '\u0414': // Д
output[opos++] = 'D';
break;
case '\u0434': // д
output[opos++] = 'd';
break;
case '\u0415': // Е
output[opos++] = 'E';
break;
case '\u0435': // е
output[opos++] = 'e';
break;
case '\u0401': // Ё
output[opos++] = 'E'; // alt. Yo
break;
case '\u0451': // ё
output[opos++] = 'e'; // alt. yo
break;
case '\u0416': // Ж
output[opos++] = 'Z';
output[opos++] = 'h';
break;
case '\u0436': // ж
output[opos++] = 'z';
output[opos++] = 'h';
break;
case '\u0417': // З
output[opos++] = 'Z';
break;
case '\u0437': // з
output[opos++] = 'z';
break;
case '\u0418': // И
output[opos++] = 'I';
break;
case '\u0438': // и
output[opos++] = 'i';
break;
case '\u0419': // Й
output[opos++] = 'I'; // alt. Y, J
break;
case '\u0439': // й
output[opos++] = 'i'; // alt. y, j
break;
case '\u041A': // К
output[opos++] = 'K';
break;
case '\u043A': // к
output[opos++] = 'k';
break;
case '\u041B': // Л
output[opos++] = 'L';
break;
case '\u043B': // л
output[opos++] = 'l';
break;
case '\u041C': // М
output[opos++] = 'M';
break;
case '\u043C': // м
output[opos++] = 'm';
break;
case '\u041D': // Н
output[opos++] = 'N';
break;
case '\u043D': // н
output[opos++] = 'n';
break;
case '\u041E': // О
output[opos++] = 'O';
break;
case '\u043E': // о
output[opos++] = 'o';
break;
case '\u041F': // П
output[opos++] = 'P';
break;
case '\u043F': // п
output[opos++] = 'p';
break;
case '\u0420': // Р
output[opos++] = 'R';
break;
case '\u0440': // р
output[opos++] = 'r';
break;
case '\u0421': // С
output[opos++] = 'S';
break;
case '\u0441': // с
output[opos++] = 's';
break;
case '\u0422': // Т
output[opos++] = 'T';
break;
case '\u0442': // т
output[opos++] = 't';
break;
case '\u0423': // У
output[opos++] = 'U';
break;
case '\u0443': // у
output[opos++] = 'u';
break;
case '\u0424': // Ф
output[opos++] = 'F';
break;
case '\u0444': // ф
output[opos++] = 'f';
break;
case '\u0425': // Х
output[opos++] = 'K'; // alt. X
output[opos++] = 'h';
break;
case '\u0445': // х
output[opos++] = 'k'; // alt. x
output[opos++] = 'h';
break;
case '\u0426': // Ц
output[opos++] = 'F';
break;
case '\u0446': // ц
output[opos++] = 'f';
break;
case '\u0427': // Ч
output[opos++] = 'C'; // alt. Ts, C
output[opos++] = 'h';
break;
case '\u0447': // ч
output[opos++] = 'c'; // alt. ts, c
output[opos++] = 'h';
break;
case '\u0428': // Ш
output[opos++] = 'S'; // alt. Ch, S
output[opos++] = 'h';
break;
case '\u0448': // ш
output[opos++] = 's'; // alt. ch, s
output[opos++] = 'h';
break;
case '\u0429': // Щ
output[opos++] = 'S'; // alt. Shch, Sc
output[opos++] = 'h';
break;
case '\u0449': // щ
output[opos++] = 's'; // alt. shch, sc
output[opos++] = 'h';
break;
case '\u042A': // Ъ
output[opos++] = '"'; // "
break;
case '\u044A': // ъ
output[opos++] = '"'; // "
break;
case '\u042B': // Ы
output[opos++] = 'Y';
break;
case '\u044B': // ы
output[opos++] = 'y';
break;
case '\u042C': // Ь
output[opos++] = '\''; // '
break;
case '\u044C': // ь
output[opos++] = '\''; // '
break;
case '\u042D': // Э
output[opos++] = 'E';
break;
case '\u044D': // э
output[opos++] = 'e';
break;
case '\u042E': // Ю
output[opos++] = 'Y'; // alt. Ju
output[opos++] = 'u';
break;
case '\u044E': // ю
output[opos++] = 'y'; // alt. ju
output[opos++] = 'u';
break;
case '\u042F': // Я
output[opos++] = 'Y'; // alt. Ja
output[opos++] = 'a';
break;
case '\u044F': // я
output[opos++] = 'y'; // alt. ja
output[opos++] = 'a';
break;
#endregion
// BEGIN EXTRA
/*
case '£':

View File

@@ -98,6 +98,26 @@ namespace Umbraco.Tests.CoreStrings
return s;
}
[Test]
public void U4_4056()
{
const string input = "ÆØÅ and æøå and 中文测试 and אודות האתר and größer БбДдЖж page";
var helper = new DefaultShortStringHelper().WithDefaultConfig(); // unicode
var output = helper.CleanStringForUrlSegment(input);
Assert.AreEqual("æøå-and-æøå-and-中文测试-and-אודות-האתר-and-größer-ббдджж-page", output);
helper = new DefaultShortStringHelper()
.WithConfig(CleanStringType.UrlSegment, new DefaultShortStringHelper.Config
{
IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_',
StringType = CleanStringType.LowerCase | CleanStringType.Ascii, // ascii
Separator = '-'
});
output = helper.CleanStringForUrlSegment(input);
Assert.AreEqual("aeoa-and-aeoa-and-and-and-grosser-bbddzhzh-page", output);
}
[Test]
public void CleanStringUnderscoreInTerm()
{