merge branch 'origin/6.2.0' into 7.0.2 + fix
This commit is contained in:
@@ -10,6 +10,8 @@ namespace Umbraco.Core.Configuration.UmbracoSettings
|
||||
|
||||
bool RemoveDoubleDashes { get; }
|
||||
|
||||
bool ConvertUrlsToAscii { get; }
|
||||
|
||||
IEnumerable<IChar> CharCollection { get; }
|
||||
}
|
||||
}
|
||||
@@ -121,6 +121,11 @@ namespace Umbraco.Core.Configuration.UmbracoSettings
|
||||
get { return UrlReplacing.RemoveDoubleDashes; }
|
||||
}
|
||||
|
||||
bool IRequestHandlerSection.ConvertUrlsToAscii
|
||||
{
|
||||
get { return UrlReplacing.ConvertUrlsToAscii; }
|
||||
}
|
||||
|
||||
IEnumerable<IChar> IRequestHandlerSection.CharCollection
|
||||
{
|
||||
get { return UrlReplacing.CharCollection; }
|
||||
|
||||
@@ -11,6 +11,12 @@ namespace Umbraco.Core.Configuration.UmbracoSettings
|
||||
get { return (bool) base["removeDoubleDashes"]; }
|
||||
}
|
||||
|
||||
[ConfigurationProperty("toAscii", DefaultValue = false)]
|
||||
internal bool ConvertUrlsToAscii
|
||||
{
|
||||
get { return (bool)base["toAscii"]; }
|
||||
}
|
||||
|
||||
[ConfigurationCollection(typeof(CharCollection), AddItemName = "char")]
|
||||
[ConfigurationProperty("", IsDefaultCollection = true)]
|
||||
internal CharCollection CharCollection
|
||||
|
||||
@@ -142,7 +142,7 @@ namespace Umbraco.Core.Strings
|
||||
{
|
||||
PreFilter = ApplyUrlReplaceCharacters,
|
||||
IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_', // letter, digit or underscore
|
||||
StringType = CleanStringType.Utf8 | CleanStringType.LowerCase,
|
||||
StringType = (UmbracoConfig.For.UmbracoSettings().RequestHandler.ConvertUrlsToAscii ? CleanStringType.Ascii : CleanStringType.Utf8) | CleanStringType.LowerCase,
|
||||
BreakTermsOnUpper = false,
|
||||
Separator = '-'
|
||||
}).WithConfig(CleanStringType.FileName, new Config
|
||||
|
||||
@@ -3316,6 +3316,243 @@ namespace Umbraco.Core.Strings
|
||||
output[opos++] = '~';
|
||||
break;
|
||||
|
||||
// BEGIN CUSTOM TRANSLITERATION OF CYRILIC CHARS
|
||||
|
||||
#region Cyrilic chars
|
||||
|
||||
// russian uppercase "А Б В Г Д Е Ё Ж З И Й К Л М Н О П Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я"
|
||||
// russian lowercase "а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я"
|
||||
|
||||
// notes
|
||||
// read http://www.vesic.org/english/blog/c-sharp/transliteration-easy-way-microsoft-transliteration-utility/
|
||||
// should we look into MS Transliteration Utility (http://msdn.microsoft.com/en-US/goglobal/bb688104.aspx)
|
||||
// also UnicodeSharpFork https://bitbucket.org/DimaStefantsov/unidecodesharpfork
|
||||
// also Transliterator http://transliterator.codeplex.com/
|
||||
//
|
||||
// in any case it would be good to generate all those "case" statements instead of writing them by hand
|
||||
// time for a T4 template?
|
||||
// also we should support extensibility so ppl can register more cases in external code
|
||||
|
||||
// fixme
|
||||
// transliterates Анастасия as Anastasiya, and not Anastasia
|
||||
// Ольга --> Ol'ga, Татьяна --> Tat'yana -- that's bad (?)
|
||||
// Note: should ä (german umlaut) become a or ae ?
|
||||
|
||||
case '\u0410': // А
|
||||
output[opos++] = 'A';
|
||||
break;
|
||||
case '\u0430': // а
|
||||
output[opos++] = 'a';
|
||||
break;
|
||||
case '\u0411': // Б
|
||||
output[opos++] = 'B';
|
||||
break;
|
||||
case '\u0431': // б
|
||||
output[opos++] = 'b';
|
||||
break;
|
||||
case '\u0412': // В
|
||||
output[opos++] = 'V';
|
||||
break;
|
||||
case '\u0432': // в
|
||||
output[opos++] = 'v';
|
||||
break;
|
||||
case '\u0413': // Г
|
||||
output[opos++] = 'G';
|
||||
break;
|
||||
case '\u0433': // г
|
||||
output[opos++] = 'g';
|
||||
break;
|
||||
case '\u0414': // Д
|
||||
output[opos++] = 'D';
|
||||
break;
|
||||
case '\u0434': // д
|
||||
output[opos++] = 'd';
|
||||
break;
|
||||
case '\u0415': // Е
|
||||
output[opos++] = 'E';
|
||||
break;
|
||||
case '\u0435': // е
|
||||
output[opos++] = 'e';
|
||||
break;
|
||||
case '\u0401': // Ё
|
||||
output[opos++] = 'E'; // alt. Yo
|
||||
break;
|
||||
case '\u0451': // ё
|
||||
output[opos++] = 'e'; // alt. yo
|
||||
break;
|
||||
case '\u0416': // Ж
|
||||
output[opos++] = 'Z';
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0436': // ж
|
||||
output[opos++] = 'z';
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0417': // З
|
||||
output[opos++] = 'Z';
|
||||
break;
|
||||
case '\u0437': // з
|
||||
output[opos++] = 'z';
|
||||
break;
|
||||
case '\u0418': // И
|
||||
output[opos++] = 'I';
|
||||
break;
|
||||
case '\u0438': // и
|
||||
output[opos++] = 'i';
|
||||
break;
|
||||
case '\u0419': // Й
|
||||
output[opos++] = 'I'; // alt. Y, J
|
||||
break;
|
||||
case '\u0439': // й
|
||||
output[opos++] = 'i'; // alt. y, j
|
||||
break;
|
||||
case '\u041A': // К
|
||||
output[opos++] = 'K';
|
||||
break;
|
||||
case '\u043A': // к
|
||||
output[opos++] = 'k';
|
||||
break;
|
||||
case '\u041B': // Л
|
||||
output[opos++] = 'L';
|
||||
break;
|
||||
case '\u043B': // л
|
||||
output[opos++] = 'l';
|
||||
break;
|
||||
case '\u041C': // М
|
||||
output[opos++] = 'M';
|
||||
break;
|
||||
case '\u043C': // м
|
||||
output[opos++] = 'm';
|
||||
break;
|
||||
case '\u041D': // Н
|
||||
output[opos++] = 'N';
|
||||
break;
|
||||
case '\u043D': // н
|
||||
output[opos++] = 'n';
|
||||
break;
|
||||
case '\u041E': // О
|
||||
output[opos++] = 'O';
|
||||
break;
|
||||
case '\u043E': // о
|
||||
output[opos++] = 'o';
|
||||
break;
|
||||
case '\u041F': // П
|
||||
output[opos++] = 'P';
|
||||
break;
|
||||
case '\u043F': // п
|
||||
output[opos++] = 'p';
|
||||
break;
|
||||
case '\u0420': // Р
|
||||
output[opos++] = 'R';
|
||||
break;
|
||||
case '\u0440': // р
|
||||
output[opos++] = 'r';
|
||||
break;
|
||||
case '\u0421': // С
|
||||
output[opos++] = 'S';
|
||||
break;
|
||||
case '\u0441': // с
|
||||
output[opos++] = 's';
|
||||
break;
|
||||
case '\u0422': // Т
|
||||
output[opos++] = 'T';
|
||||
break;
|
||||
case '\u0442': // т
|
||||
output[opos++] = 't';
|
||||
break;
|
||||
case '\u0423': // У
|
||||
output[opos++] = 'U';
|
||||
break;
|
||||
case '\u0443': // у
|
||||
output[opos++] = 'u';
|
||||
break;
|
||||
case '\u0424': // Ф
|
||||
output[opos++] = 'F';
|
||||
break;
|
||||
case '\u0444': // ф
|
||||
output[opos++] = 'f';
|
||||
break;
|
||||
case '\u0425': // Х
|
||||
output[opos++] = 'K'; // alt. X
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0445': // х
|
||||
output[opos++] = 'k'; // alt. x
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0426': // Ц
|
||||
output[opos++] = 'F';
|
||||
break;
|
||||
case '\u0446': // ц
|
||||
output[opos++] = 'f';
|
||||
break;
|
||||
case '\u0427': // Ч
|
||||
output[opos++] = 'C'; // alt. Ts, C
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0447': // ч
|
||||
output[opos++] = 'c'; // alt. ts, c
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0428': // Ш
|
||||
output[opos++] = 'S'; // alt. Ch, S
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0448': // ш
|
||||
output[opos++] = 's'; // alt. ch, s
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0429': // Щ
|
||||
output[opos++] = 'S'; // alt. Shch, Sc
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u0449': // щ
|
||||
output[opos++] = 's'; // alt. shch, sc
|
||||
output[opos++] = 'h';
|
||||
break;
|
||||
case '\u042A': // Ъ
|
||||
output[opos++] = '"'; // "
|
||||
break;
|
||||
case '\u044A': // ъ
|
||||
output[opos++] = '"'; // "
|
||||
break;
|
||||
case '\u042B': // Ы
|
||||
output[opos++] = 'Y';
|
||||
break;
|
||||
case '\u044B': // ы
|
||||
output[opos++] = 'y';
|
||||
break;
|
||||
case '\u042C': // Ь
|
||||
output[opos++] = '\''; // '
|
||||
break;
|
||||
case '\u044C': // ь
|
||||
output[opos++] = '\''; // '
|
||||
break;
|
||||
case '\u042D': // Э
|
||||
output[opos++] = 'E';
|
||||
break;
|
||||
case '\u044D': // э
|
||||
output[opos++] = 'e';
|
||||
break;
|
||||
case '\u042E': // Ю
|
||||
output[opos++] = 'Y'; // alt. Ju
|
||||
output[opos++] = 'u';
|
||||
break;
|
||||
case '\u044E': // ю
|
||||
output[opos++] = 'y'; // alt. ju
|
||||
output[opos++] = 'u';
|
||||
break;
|
||||
case '\u042F': // Я
|
||||
output[opos++] = 'Y'; // alt. Ja
|
||||
output[opos++] = 'a';
|
||||
break;
|
||||
case '\u044F': // я
|
||||
output[opos++] = 'y'; // alt. ja
|
||||
output[opos++] = 'a';
|
||||
break;
|
||||
|
||||
#endregion
|
||||
|
||||
// BEGIN EXTRA
|
||||
/*
|
||||
case '£':
|
||||
|
||||
@@ -98,6 +98,26 @@ namespace Umbraco.Tests.CoreStrings
|
||||
return s;
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void U4_4056()
|
||||
{
|
||||
const string input = "ÆØÅ and æøå and 中文测试 and אודות האתר and größer БбДдЖж page";
|
||||
|
||||
var helper = new DefaultShortStringHelper().WithDefaultConfig(); // unicode
|
||||
var output = helper.CleanStringForUrlSegment(input);
|
||||
Assert.AreEqual("æøå-and-æøå-and-中文测试-and-אודות-האתר-and-größer-ббдджж-page", output);
|
||||
|
||||
helper = new DefaultShortStringHelper()
|
||||
.WithConfig(CleanStringType.UrlSegment, new DefaultShortStringHelper.Config
|
||||
{
|
||||
IsTerm = (c, leading) => char.IsLetterOrDigit(c) || c == '_',
|
||||
StringType = CleanStringType.LowerCase | CleanStringType.Ascii, // ascii
|
||||
Separator = '-'
|
||||
});
|
||||
output = helper.CleanStringForUrlSegment(input);
|
||||
Assert.AreEqual("aeoa-and-aeoa-and-and-and-grosser-bbddzhzh-page", output);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void CleanStringUnderscoreInTerm()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user