diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index ff195a37ee..c0eaf680a3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -208,11 +208,13 @@ There will be times that we really like your proposed changes and we’ll finish The Core Contributors team consists of one member of Umbraco HQ, [Sebastiaan][Sebastiaan], who gets assistance from the following community members who have committed to volunteering their free time: -- [Nathan Woulfe][Nathan Woulfe] -- [Joe Glombek][Joe Glombek] -- [Laura Weatherhead][Laura Weatherhead] +- [Busra Sengul][Busra Sengul] +- [Emma Garland][Emma Garland] +- [George Bidder][George Bidder] +- [Jason Elkin][Jason Elkin] +- [Laura Neto][Laura Neto] - [Michael Latouche][Michael Latouche] -- [Owain Williams][Owain Williams] +- [Sebastiaan][Sebastiaan] These wonderful people aim to provide you with a reply to your PR, review and test out your changes and on occasions, they might ask more questions. If they are happy with your work, they'll let Umbraco HQ know by approving the PR. HQ will have final sign-off and will check the work again before it is merged. @@ -227,12 +229,14 @@ These wonderful people aim to provide you with a reply to your PR, review and te -[Nathan Woulfe]: https://github.com/nathanwoulfe "Nathan's GitHub profile" -[Joe Glombek]: https://github.com/glombek "Joe's GitHub profile" -[Laura Weatherhead]: https://github.com/lssweatherhead "Laura's GitHub profile" +[Busra Sengul]: https://github.com/busrasengul "Busra's GitHub profile" +[Emma Garland]: https://github.com/emmagarland "Emma's GitHub profile" +[George Bidder]: https://github.com/georgebid "George's GitHub profile" +[Jason Elkin]: https://github.com/jasonelkin "Jason's GitHub profile" +[Kyle Eck]: https://github.com/teckspeed "Kyle's GitHub profile" +[Laura Neto]: https://github.com/lauraneto "Laura's GitHub profile" [Michael Latouche]: https://github.com/mikecp "Michael's GitHub profile" -[Owain Williams]: https://github.com/OwainWilliams "Owain's GitHub profile" -[Sebastiaan]: https://github.com/nul800sebastiaan "Senastiaan's GitHub profile" +[Sebastiaan]: https://github.com/nul800sebastiaan "Sebastiaan's GitHub profile" [ Umbrabot ]: https://github.com/umbrabot [git flow]: https://jeffkreeftmeijer.com/git-flow/ "An explanation of git flow" [sync fork ext]: http://robots.thoughtbot.com/post/5133345960/keeping-a-git-fork-updated "Details on keeping a git fork updated" diff --git a/src/Umbraco.Core/EmbeddedResources/Lang/da.xml b/src/Umbraco.Core/EmbeddedResources/Lang/da.xml index cf0518613d..564f500b6b 100644 --- a/src/Umbraco.Core/EmbeddedResources/Lang/da.xml +++ b/src/Umbraco.Core/EmbeddedResources/Lang/da.xml @@ -389,7 +389,7 @@ Element-type Definerer skabelonen for et sæt at egenskaber, der kan anvendes som skema i - avancerede felter som f.eks. 'Block List' eller 'Nested Content'. + avancerede felter som f.eks. 'Block List' eller 'Block Grid'. Komposition Definerer et sæt genbrugbare egenskaber, der kan inkluderes i definitionen af @@ -1647,7 +1647,7 @@ Mange hilsner fra Umbraco robotten Tillad segmentering Element-type Er en Element-type - En Element-type er tiltænkt brug i f.eks. Nested Content, ikke i indholdstræet. + En Element-type er tiltænkt brug i andre Dokumenttyper, ikke i indholdstræet. En Dokumenttype kan ikke ændres til en Element-type efter den er blevet brugt til at oprette en eller flere indholds elementer. diff --git a/src/Umbraco.Core/EmbeddedResources/Lang/en.xml b/src/Umbraco.Core/EmbeddedResources/Lang/en.xml index efc125d6d5..56a0838896 100644 --- a/src/Umbraco.Core/EmbeddedResources/Lang/en.xml +++ b/src/Umbraco.Core/EmbeddedResources/Lang/en.xml @@ -394,7 +394,7 @@ Element Type Defines the schema for a repeating set of properties, for example, in a 'Block - List' or 'Nested Content' property editor. + List' or 'Block Grid' property editor. Composition Defines a re-usable set of properties that can be included in the definition of @@ -1861,9 +1861,8 @@ To manage your website, simply open the Umbraco backoffice and start adding cont Allow segmentation Element Type Is an Element Type - An Element Type is meant to be used for instance in Nested Content, and not in the - tree. - + An Element Type is meant to be used within other Document Types, and not in the Content + tree. A document Type cannot be changed to an Element Type once it has been used to create one or more content items. diff --git a/src/Umbraco.Core/EmbeddedResources/Lang/en_us.xml b/src/Umbraco.Core/EmbeddedResources/Lang/en_us.xml index 78fcd0f13d..d85e6d7eb4 100644 --- a/src/Umbraco.Core/EmbeddedResources/Lang/en_us.xml +++ b/src/Umbraco.Core/EmbeddedResources/Lang/en_us.xml @@ -410,7 +410,7 @@ Element Type Defines the schema for a repeating set of properties, for example, in a 'Block - List' or 'Nested Content' property editor. + List' or 'Block Grid' property editor. Composition Defines a re-usable set of properties that can be included in the definition of @@ -1927,7 +1927,7 @@ To manage your website, simply open the Umbraco backoffice and start adding cont Allow segmentation Element Type Is an Element Type - An Element Type is meant to be used for instance in Nested Content, and not in the + An Element Type is meant to be used within other Document Types, and not in the Content tree. A Document Type cannot be changed to an Element Type once it has been used to diff --git a/src/Umbraco.Core/EmbeddedResources/Lang/it.xml b/src/Umbraco.Core/EmbeddedResources/Lang/it.xml index 2430542e66..5e45248d48 100644 --- a/src/Umbraco.Core/EmbeddedResources/Lang/it.xml +++ b/src/Umbraco.Core/EmbeddedResources/Lang/it.xml @@ -2660,6 +2660,7 @@ Per gestire il tuo sito web, è sufficiente aprire il backoffice di Umbraco e in Usato nei tipi di membro Non ci sono riferimenti a tipi di membro. Usato da + Correlato ai seguenti elementi Usato nei documenti Usato nei membri Usato nei media diff --git a/src/Umbraco.Core/Extensions/PublishedElementExtensions.cs b/src/Umbraco.Core/Extensions/PublishedElementExtensions.cs index c85178c85c..440962cd76 100644 --- a/src/Umbraco.Core/Extensions/PublishedElementExtensions.cs +++ b/src/Umbraco.Core/Extensions/PublishedElementExtensions.cs @@ -134,6 +134,27 @@ public static class PublishedElementExtensions #endregion + #region CheckVariation + /// + /// Method to check if VariationContext culture differs from culture parameter, if so it will update the VariationContext for the PublishedValueFallback. + /// + /// The requested PublishedValueFallback. + /// The requested culture. + /// The requested segment. + /// + private static void EventuallyUpdateVariationContext(IPublishedValueFallback publishedValueFallback, string? culture, string? segment) + { + IVariationContextAccessor? variationContextAccessor = publishedValueFallback.VariationContextAccessor; + + //If there is a difference in requested culture and the culture that is set in the VariationContext, it will pick wrong localized content. + //This happens for example using links to localized content in a RichText Editor. + if (!string.IsNullOrEmpty(culture) && variationContextAccessor?.VariationContext?.Culture != culture) + { + variationContextAccessor!.VariationContext = new VariationContext(culture, segment); + } + } + #endregion + #region Value /// @@ -174,6 +195,8 @@ public static class PublishedElementExtensions { IPublishedProperty? property = content.GetProperty(alias); + EventuallyUpdateVariationContext(publishedValueFallback, culture, segment); + // if we have a property, and it has a value, return that value if (property != null && property.HasValue(culture, segment)) { diff --git a/src/Umbraco.Core/Extensions/StringExtensions.cs b/src/Umbraco.Core/Extensions/StringExtensions.cs index 6a76650523..c1abeb8650 100644 --- a/src/Umbraco.Core/Extensions/StringExtensions.cs +++ b/src/Umbraco.Core/Extensions/StringExtensions.cs @@ -1040,14 +1040,15 @@ public static class StringExtensions throw new ArgumentNullException(nameof(text)); } - var pos = text.IndexOf(search, StringComparison.InvariantCulture); + ReadOnlySpan spanText = text.AsSpan(); + var pos = spanText.IndexOf(search, StringComparison.InvariantCulture); if (pos < 0) { return text; } - return text.Substring(0, pos) + replace + text.Substring(pos + search.Length); + return string.Concat(spanText[..pos], replace.AsSpan(), spanText[(pos + search.Length)..]); } /// diff --git a/src/Umbraco.Core/Models/PropertyGroup.cs b/src/Umbraco.Core/Models/PropertyGroup.cs index 034770cdfc..9d23c85a9b 100644 --- a/src/Umbraco.Core/Models/PropertyGroup.cs +++ b/src/Umbraco.Core/Models/PropertyGroup.cs @@ -2,6 +2,7 @@ using System.Collections.Specialized; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.Serialization; + using Umbraco.Cms.Core.Models.Entities; namespace Umbraco.Cms.Core.Models; @@ -132,7 +133,7 @@ public class PropertyGroup : EntityBase, IEquatable } public bool Equals(PropertyGroup? other) => - base.Equals(other) || (other != null && Type == other.Type && Alias == other.Alias); + base.Equals(other) || (other != null && Type == other.Type && Alias == other.Alias && Id == other.Id); public override int GetHashCode() => (base.GetHashCode(), Type, Alias).GetHashCode(); diff --git a/src/Umbraco.Core/Models/PropertyTypeCollection.cs b/src/Umbraco.Core/Models/PropertyTypeCollection.cs index 49c83b4c9d..77a5d84421 100644 --- a/src/Umbraco.Core/Models/PropertyTypeCollection.cs +++ b/src/Umbraco.Core/Models/PropertyTypeCollection.cs @@ -30,7 +30,7 @@ public class PropertyTypeCollection : KeyedCollection, IN // This baseclass calling is needed, else compiler will complain about nullability /// - public bool IsReadOnly => ((ICollection)this).IsReadOnly; + public bool IsReadOnly => false; // 'new' keyword is required! we can explicitly implement ICollection.Add BUT since normally a concrete PropertyType type // is passed in, the explicit implementation doesn't get called, this ensures it does get called. diff --git a/src/Umbraco.Core/Models/PublishedContent/IPublishedValueFallback.cs b/src/Umbraco.Core/Models/PublishedContent/IPublishedValueFallback.cs index 729f7dd6bc..111d747ec1 100644 --- a/src/Umbraco.Core/Models/PublishedContent/IPublishedValueFallback.cs +++ b/src/Umbraco.Core/Models/PublishedContent/IPublishedValueFallback.cs @@ -5,6 +5,11 @@ namespace Umbraco.Cms.Core.Models.PublishedContent; /// public interface IPublishedValueFallback { + /// + /// VariationContextAccessor that is not required to be implemented, therefore throws NotImplementedException as default. + /// + IVariationContextAccessor VariationContextAccessor { get { throw new NotImplementedException(); } set { throw new NotImplementedException(); } } + /// /// Tries to get a fallback value for a property. /// diff --git a/src/Umbraco.Core/Models/PublishedContent/PublishedValueFallback.cs b/src/Umbraco.Core/Models/PublishedContent/PublishedValueFallback.cs index 8a50323f12..fe914e95d4 100644 --- a/src/Umbraco.Core/Models/PublishedContent/PublishedValueFallback.cs +++ b/src/Umbraco.Core/Models/PublishedContent/PublishedValueFallback.cs @@ -20,6 +20,8 @@ public class PublishedValueFallback : IPublishedValueFallback _variationContextAccessor = variationContextAccessor; } + public IVariationContextAccessor VariationContextAccessor { get { return _variationContextAccessor; } } + /// public bool TryGetValue(IPublishedProperty property, string? culture, string? segment, Fallback fallback, object? defaultValue, out object? value) => TryGetValue(property, culture, segment, fallback, defaultValue, out value); diff --git a/src/Umbraco.Core/Services/ContentService.cs b/src/Umbraco.Core/Services/ContentService.cs index 607e8dc7a8..fb9889d3e4 100644 --- a/src/Umbraco.Core/Services/ContentService.cs +++ b/src/Umbraco.Core/Services/ContentService.cs @@ -2936,6 +2936,7 @@ public class ContentService : RepositoryService, IContentService // save saved.Add(content); _documentRepository.Save(content); + Audit(AuditType.Sort, userId, content.Id, "Sorting content performed by user"); } // first saved, then sorted @@ -2951,8 +2952,7 @@ public class ContentService : RepositoryService, IContentService { scope.Notifications.Publish(new ContentPublishedNotification(published, eventMessages)); } - - Audit(AuditType.Sort, userId, 0, "Sorting content performed by user"); + return OperationResult.Succeed(eventMessages); } diff --git a/src/Umbraco.Core/Services/MediaService.cs b/src/Umbraco.Core/Services/MediaService.cs index a9f81245d1..26731b9393 100644 --- a/src/Umbraco.Core/Services/MediaService.cs +++ b/src/Umbraco.Core/Services/MediaService.cs @@ -728,6 +728,8 @@ namespace Umbraco.Cms.Core.Services media.CreatorId = userId; } + media.WriterId = userId; + _mediaRepository.Save(media); scope.Notifications.Publish(new MediaSavedNotification(media, eventMessages).WithStateFrom(savingNotification)); // TODO: See note about suppressing events in content service diff --git a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs index ea93a099f8..70a330b600 100644 --- a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs +++ b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs @@ -1,4 +1,4 @@ -using System.Diagnostics; +using System.Diagnostics; using System.Globalization; using Microsoft.Extensions.Options; using Umbraco.Cms.Core.Configuration.Models; @@ -305,10 +305,10 @@ namespace Umbraco.Cms.Core.Strings return text; } - private static string RemoveSurrogatePairs(string text) + private string RemoveSurrogatePairs(string text) { - var input = text.ToCharArray(); - var output = new char[input.Length]; + var input = text.AsSpan(); + Span output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[text.Length]; var opos = 0; for (var ipos = 0; ipos < input.Length; ipos++) @@ -325,7 +325,7 @@ namespace Umbraco.Cms.Core.Strings } } - return new string(output, 0, opos); + return new string(output); } // here was a subtle, ascii-optimized version of the cleaning code, and I was @@ -347,7 +347,8 @@ namespace Umbraco.Cms.Core.Strings // it's faster to use an array than a StringBuilder var ilen = input.Length; - var output = new char[ilen * 2]; // twice the length should be OK in all cases + var totalSize = ilen * 2; + Span output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // twice the length should be OK in all cases for (var i = 0; i < ilen; i++) { @@ -479,11 +480,11 @@ namespace Umbraco.Cms.Core.Strings throw new Exception("Invalid state."); } - return new string(output, 0, opos); + return new string(output.Slice(0, opos)); } // note: supports surrogate pairs in input string - internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym) + internal void CopyTerm(string input, int ipos, Span output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym) { var term = input.Substring(ipos, len); CultureInfo cultureInfo = string.IsNullOrEmpty(culture) ? CultureInfo.InvariantCulture : CultureInfo.GetCultureInfo(culture); @@ -509,19 +510,19 @@ namespace Umbraco.Cms.Core.Strings //case CleanStringType.LowerCase: //case CleanStringType.UpperCase: case CleanStringType.Unchanged: - term.CopyTo(0, output, opos, len); + term.CopyTo(output.Slice(opos, len)); opos += len; break; case CleanStringType.LowerCase: term = term.ToLower(cultureInfo); - term.CopyTo(0, output, opos, term.Length); + term.CopyTo(output.Slice(opos, term.Length)); opos += term.Length; break; case CleanStringType.UpperCase: term = term.ToUpper(cultureInfo); - term.CopyTo(0, output, opos, term.Length); + term.CopyTo(output.Slice(opos, term.Length)); opos += term.Length; break; @@ -532,7 +533,7 @@ namespace Umbraco.Cms.Core.Strings { s = term.Substring(ipos, 2); s = opos == 0 ? s.ToLower(cultureInfo) : s.ToUpper(cultureInfo); - s.CopyTo(0, output, opos, s.Length); + s.CopyTo(output.Slice(opos, s.Length)); opos += s.Length; i++; // surrogate pair len is 2 } @@ -543,7 +544,7 @@ namespace Umbraco.Cms.Core.Strings if (len > i) { term = term.Substring(i).ToLower(cultureInfo); - term.CopyTo(0, output, opos, term.Length); + term.CopyTo(output.Slice(opos, term.Length)); opos += term.Length; } break; @@ -555,7 +556,7 @@ namespace Umbraco.Cms.Core.Strings { s = term.Substring(ipos, 2); s = s.ToUpper(cultureInfo); - s.CopyTo(0, output, opos, s.Length); + s.CopyTo(output.Slice(opos, s.Length)); opos += s.Length; i++; // surrogate pair len is 2 } @@ -566,7 +567,7 @@ namespace Umbraco.Cms.Core.Strings if (len > i) { term = term.Substring(i).ToLower(cultureInfo); - term.CopyTo(0, output, opos, term.Length); + term.CopyTo(output.Slice(opos, term.Length)); opos += term.Length; } break; @@ -578,7 +579,7 @@ namespace Umbraco.Cms.Core.Strings { s = term.Substring(ipos, 2); s = opos == 0 ? s : s.ToUpper(cultureInfo); - s.CopyTo(0, output, opos, s.Length); + s.CopyTo(output.Slice(opos, s.Length)); opos += s.Length; i++; // surrogate pair len is 2 } @@ -589,7 +590,7 @@ namespace Umbraco.Cms.Core.Strings if (len > i) { term = term.Substring(i); - term.CopyTo(0, output, opos, term.Length); + term.CopyTo(output.Slice(opos, term.Length)); opos += term.Length; } break; diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs index 4221273150..74bc2fa9e8 100644 --- a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs +++ b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs @@ -11,21 +11,27 @@ namespace Umbraco.Cms.Core.Strings; /// public static class Utf8ToAsciiConverter { + [Obsolete("Use ToAsciiString(ReadOnlySpan..) instead")] + public static string ToAsciiString(string text, char fail = '?') + { + return ToAsciiString(text.AsSpan(), fail); + } + /// /// Converts an Utf8 string into an Ascii string. /// /// The text to convert. /// The character to use to replace characters that cannot properly be converted. /// The converted text. - public static string ToAsciiString(string text, char fail = '?') + public static string ToAsciiString(ReadOnlySpan text, char fail = '?') { - var input = text.ToCharArray(); - // this is faster although it uses more memory // but... we should be filtering short strings only... - var output = new char[input.Length * 3]; // *3 because of things such as OE - var len = ToAscii(input, output, fail); - return new string(output, 0, len); + + var totalSize = text.Length * 3; + Span output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // *3 because of things such as OE + var len = ToAscii(text, output, fail); + return new string(output[..len]); // var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra // ToAscii(input, output); @@ -66,7 +72,7 @@ public static class Utf8ToAsciiConverter /// The number of characters in the output array. /// The caller must ensure that the output array is big enough. /// The output array is not big enough. - private static int ToAscii(char[] input, char[] output, char fail = '?') + private static int ToAscii(ReadOnlySpan input, Span output, char fail = '?') { var opos = 0; @@ -121,7 +127,7 @@ public static class Utf8ToAsciiConverter /// Input should contain Utf8 characters exclusively and NOT Unicode. /// Removes controls, normalizes whitespaces, replaces symbols by '?'. /// - private static void ToAscii(char[] input, int ipos, char[] output, ref int opos, char fail = '?') + private static void ToAscii(ReadOnlySpan input, int ipos, Span output, ref int opos, char fail = '?') { var c = input[ipos]; diff --git a/src/Umbraco.Web.UI.Client/src/common/services/contenteditinghelper.service.js b/src/Umbraco.Web.UI.Client/src/common/services/contenteditinghelper.service.js index e7ecb5c93c..30afe39884 100644 --- a/src/Umbraco.Web.UI.Client/src/common/services/contenteditinghelper.service.js +++ b/src/Umbraco.Web.UI.Client/src/common/services/contenteditinghelper.service.js @@ -142,6 +142,9 @@ function contentEditingHelper(fileManager, $q, $location, $routeParams, editorSt //update editor state to what is current editorState.set(args.content); + //needs to be manually set for infinite editing mode + args.scope.isNew = args.content.id === 0 && args.scope.isNew; + return $q.reject(err); }); } diff --git a/src/Umbraco.Web.UI.Client/src/less/components/umb-group-builder.less b/src/Umbraco.Web.UI.Client/src/less/components/umb-group-builder.less index 9865b38120..1d4a6e201f 100644 --- a/src/Umbraco.Web.UI.Client/src/less/components/umb-group-builder.less +++ b/src/Umbraco.Web.UI.Client/src/less/components/umb-group-builder.less @@ -796,6 +796,7 @@ input.umb-group-builder__group-sort-value { transform: translate(0, -50%); } + input.editor-label, textarea.editor-label { border-color: transparent; box-shadow: none; diff --git a/src/Umbraco.Web.UI.Client/src/less/components/umb-node-preview.less b/src/Umbraco.Web.UI.Client/src/less/components/umb-node-preview.less index bac1ebc4f3..7e42d0e46e 100644 --- a/src/Umbraco.Web.UI.Client/src/less/components/umb-node-preview.less +++ b/src/Umbraco.Web.UI.Client/src/less/components/umb-node-preview.less @@ -7,7 +7,8 @@ } .umb-editor-wrapper .umb-node-preview { - .umb-property-editor--limit-width(); + word-break: break-word; + .umb-property-editor--limit-width(); } .umb-node-preview:last-of-type { @@ -38,7 +39,6 @@ .umb-node-preview__content { flex: 1 1 auto; - margin-right: 25px; overflow: hidden; } diff --git a/src/Umbraco.Web.UI.Client/src/less/components/umb-readonlyvalue.less b/src/Umbraco.Web.UI.Client/src/less/components/umb-readonlyvalue.less index 0790bdd07a..f0a910b278 100644 --- a/src/Umbraco.Web.UI.Client/src/less/components/umb-readonlyvalue.less +++ b/src/Umbraco.Web.UI.Client/src/less/components/umb-readonlyvalue.less @@ -1,3 +1,5 @@ -.umb-readonlyvalue { - position:relative; +.umb-readonlyvalue { + position: relative; + word-break: break-word; + .umb-property-editor--limit-width(); } diff --git a/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.controller.js b/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.controller.js index 246cec9c50..76fbf8a3ba 100644 --- a/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.controller.js +++ b/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.controller.js @@ -15,7 +15,7 @@ var vm = this; const dataTypesCanBeChangedConfig = window.Umbraco.Sys.ServerVariables.umbracoSettings.dataTypesCanBeChanged; - + vm.allowChangeDataType = false; vm.changeDataTypeHelpTextIsVisible = false; vm.propertyTypeHasValues = false; diff --git a/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.html b/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.html index 22f7cec252..6edb11f819 100644 --- a/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.html +++ b/src/Umbraco.Web.UI.Client/src/views/common/infiniteeditors/propertysettings/propertysettings.html @@ -20,7 +20,8 @@
- + ng-keypress="vm.submitOnEnter($event)" />
Required label
@@ -47,11 +46,10 @@ ng-model="model.property.description" localize="placeholder" placeholder="@placeholders_enterDescription" - ng-keypress="vm.submitOnEnter($event)" umb-auto-resize>
- +
- +
diff --git a/src/Umbraco.Web.UI.Client/src/views/components/tabs/umb-tabs-nav.html b/src/Umbraco.Web.UI.Client/src/views/components/tabs/umb-tabs-nav.html index 9480d14fba..4872916791 100644 --- a/src/Umbraco.Web.UI.Client/src/views/components/tabs/umb-tabs-nav.html +++ b/src/Umbraco.Web.UI.Client/src/views/components/tabs/umb-tabs-nav.html @@ -1,6 +1,6 @@ -
    -
  • - diff --git a/src/Umbraco.Web.UI.Client/src/views/documentTypes/views/permissions/permissions.html b/src/Umbraco.Web.UI.Client/src/views/documentTypes/views/permissions/permissions.html index 3460180ce6..088a32d45e 100644 --- a/src/Umbraco.Web.UI.Client/src/views/documentTypes/views/permissions/permissions.html +++ b/src/Umbraco.Web.UI.Client/src/views/documentTypes/views/permissions/permissions.html @@ -78,7 +78,7 @@
    Is an Element Type
    - An Element Type is meant to be used for instance in Nested Content, and not in the tree. + An Element Type is meant to be used within other Document Types, and not in the Content tree.
    A Document Type cannot be changed to an Element Type once it has been used to create one or more content items.
    diff --git a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/blockgrid/prevalue/blockgrid.blockconfiguration.area.overlay.html b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/blockgrid/prevalue/blockgrid.blockconfiguration.area.overlay.html index 650ca29629..c117e0fbc3 100644 --- a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/blockgrid/prevalue/blockgrid.blockconfiguration.area.overlay.html +++ b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/blockgrid/prevalue/blockgrid.blockconfiguration.area.overlay.html @@ -32,7 +32,7 @@ When using GetBlockGridHTML() to render the Block Grid, the alias will be rendered in the markup as a 'data-area-alias' attribute. Use the alias attribute to target the element for the area. Example. .umb-block-grid__area[data-area-alias="MyAreaAlias"] { ... }
    - +
    @@ -51,7 +51,7 @@ Override the label text for adding a new Block to this Area, Example: 'Add Widget'
    - +
    diff --git a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/listview/listview.controller.js b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/listview/listview.controller.js index 300aa771e3..98de0c4bfc 100644 --- a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/listview/listview.controller.js +++ b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/listview/listview.controller.js @@ -153,7 +153,7 @@ function listViewController($scope, $interpolate, $routeParams, $injector, $time $scope.options = { useInfiniteEditor: $scope.model.config.useInfiniteEditor === true, pageSize: $scope.model.config.pageSize ? $scope.model.config.pageSize : 10, - pageNumber: (listParamsForCurrent && $routeParams.page && !isNaN($routeParams.page) && Number($routeParams.page) > 0) ? $routeParams.page : 1, + pageNumber: (listParamsForCurrent && $routeParams.page && !isNaN($routeParams.page) && Number($routeParams.page) > 0) ? $routeParams.page : 1, filter: (listParamsForCurrent && $routeParams.filter ? $routeParams.filter : '').trim(), orderBy: (listParamsForCurrent && $routeParams.orderBy ? $routeParams.orderBy : $scope.model.config.orderBy ? $scope.model.config.orderBy : 'VersionDate').trim(), orderDirection: (listParamsForCurrent && $routeParams.orderDirection ? $routeParams.orderDirection : $scope.model.config.orderDirection ? $scope.model.config.orderDirection : "desc").trim(), @@ -177,7 +177,7 @@ function listViewController($scope, $interpolate, $routeParams, $injector, $time }; _.each($scope.options.includeProperties, function (property) { - property.nameExp = property.nameTemplate + property.nameExp = property.nameTemplate ? $interpolate(property.nameTemplate) : undefined; }); @@ -314,7 +314,7 @@ function listViewController($scope, $interpolate, $routeParams, $injector, $time //reload! $scope.reloadView(id, reloadActiveNode); } - // in the media section, the list view items are by default also shown in the tree, so we need + // in the media section, the list view items are by default also shown in the tree, so we need // to refresh the current tree node when changing the folder contents (adding and removing) else if (reloadActiveNode && section === "media") { var activeNode = appState.getTreeState("selectedNode"); @@ -474,7 +474,7 @@ function listViewController($scope, $interpolate, $routeParams, $injector, $time } }; - // if any of the selected nodes has variants we want to + // if any of the selected nodes has variants we want to // show a dialog where the languages can be chosen if (selectionHasVariants()) { languageResource.getAll() @@ -530,7 +530,7 @@ function listViewController($scope, $interpolate, $routeParams, $injector, $time } }; - // if any of the selected nodes has variants we want to + // if any of the selected nodes has variants we want to // show a dialog where the languages can be chosen if (selectionHasVariants()) { languageResource.getAll() diff --git a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.controller.js b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.controller.js index 1320bd6804..64363914f9 100644 --- a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.controller.js +++ b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.controller.js @@ -104,9 +104,6 @@ vm.minItems = model.config.minItems || 0; vm.maxItems = model.config.maxItems || 0; - if (vm.maxItems === 0) - vm.maxItems = 1000; - vm.singleMode = vm.minItems === 1 && vm.maxItems === 1 && model.config.contentTypes.length === 1; vm.expandsOnLoad = Object.toBoolean(model.config.expandsOnLoad) vm.showIcons = Object.toBoolean(model.config.showIcons); @@ -204,9 +201,17 @@ validate(); }; + vm.maxItemsExceeded = function () { + return vm.maxItems !== 0 && vm.nodes.length > vm.maxItems; + } + + vm.maxItemsReached = function () { + return vm.maxItems !== 0 && vm.nodes.length >= vm.maxItems; + } + vm.openNodeTypePicker = function ($event) { - if (vm.nodes.length >= vm.maxItems) { + if (vm.maxItemsReached()) { return; } @@ -767,7 +772,7 @@ $scope.nestedContentForm.minCount.$setValidity("minCount", true); } - if (vm.nodes.length > vm.maxItems) { + if (vm.maxItemsExceeded()) { $scope.nestedContentForm.maxCount.$setValidity("maxCount", false); } else { diff --git a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.propertyeditor.html b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.propertyeditor.html index f2654e9851..e5f4564e08 100644 --- a/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.propertyeditor.html +++ b/src/Umbraco.Web.UI.Client/src/views/propertyeditors/nestedcontent/nestedcontent.propertyeditor.html @@ -1,4 +1,4 @@ -
    +
    @@ -60,9 +60,9 @@ @@ -78,7 +78,7 @@ Minimum %0% entries, needs %1% more.
    -
    +
    Maximum %0% entries, %1% too many.
    diff --git a/tests/Directory.Build.props b/tests/Directory.Build.props index 9768d3ee8e..a773c6a1c9 100644 --- a/tests/Directory.Build.props +++ b/tests/Directory.Build.props @@ -5,8 +5,9 @@ annotations + false - + $(MSBuildThisFileDirectory)codeanalysis.ruleset diff --git a/tests/Umbraco.Tests.Benchmarks/OldUtf8ToAsciiConverter.cs b/tests/Umbraco.Tests.Benchmarks/OldUtf8ToAsciiConverter.cs new file mode 100644 index 0000000000..08872d8178 --- /dev/null +++ b/tests/Umbraco.Tests.Benchmarks/OldUtf8ToAsciiConverter.cs @@ -0,0 +1,3626 @@ +using System; + +namespace Umbraco.Cms.Core.Strings; + +/// +/// Provides methods to convert Utf8 text to Ascii. +/// +/// +/// Tries to match characters such as accented eg "é" to Ascii equivalent eg "e". +/// Converts all "whitespace" characters to a single whitespace. +/// Removes all non-Utf8 (unicode) characters, so in fact it can sort-of "convert" Unicode to Ascii. +/// Replaces symbols with '?'. +/// +public static class OldUtf8ToAsciiConverter +{ + /// + /// Converts an Utf8 string into an Ascii string. + /// + /// The text to convert. + /// The character to use to replace characters that cannot properly be converted. + /// The converted text. + public static string ToAsciiString(string text, char fail = '?') + { + var input = text.ToCharArray(); + + // this is faster although it uses more memory + // but... we should be filtering short strings only... + var output = new char[input.Length * 3]; // *3 because of things such as OE + var len = ToAscii(input, output, fail); + return new string(output, 0, len); + + // var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra + // ToAscii(input, output); + // return output.ToString(); + } + + /// + /// Converts an Utf8 string into an array of Ascii characters. + /// + /// The text to convert. + /// The character to use to replace characters that cannot properly be converted. + /// The converted text. + public static char[] ToAsciiCharArray(string text, char fail = '?') + { + var input = text.ToCharArray(); + + // this is faster although it uses more memory + // but... we should be filtering short strings only... + var output = new char[input.Length * 3]; // *3 because of things such as OE + var len = ToAscii(input, output, fail); + var array = new char[len]; + Array.Copy(output, array, len); + return array; + + // var temp = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra + // ToAscii(input, temp); + // var output = new char[temp.Length]; + // temp.CopyTo(0, output, 0, temp.Length); + // return output; + } + + /// + /// Converts an array of Utf8 characters into an array of Ascii characters. + /// + /// The input array. + /// The output array. + /// The character to use to replace characters that cannot properly be converted. + /// The number of characters in the output array. + /// The caller must ensure that the output array is big enough. + /// The output array is not big enough. + private static int ToAscii(char[] input, char[] output, char fail = '?') + { + var opos = 0; + + for (var ipos = 0; ipos < input.Length; ipos++) + { + // ignore high surrogate + if (char.IsSurrogate(input[ipos])) + { + ipos++; // and skip low surrogate + output[opos++] = fail; + } + else + { + ToAscii(input, ipos, output, ref opos, fail); + } + } + + return opos; + } + + // private static void ToAscii(char[] input, StringBuilder output) + // { + // var chars = new char[5]; + + // for (var ipos = 0; ipos < input.Length; ipos++) + // { + // var opos = 0; + // if (char.IsSurrogate(input[ipos])) + // ipos++; + // else + // { + // ToAscii(input, ipos, chars, ref opos); + // output.Append(chars, 0, opos); + // } + // } + // } + + /// + /// Converts the character at position in input array of Utf8 characters + /// + /// and writes the converted value to output array of Ascii characters at position + /// , + /// and increments that position accordingly. + /// + /// The input array. + /// The input position. + /// The output array. + /// The output position. + /// The character to use to replace characters that cannot properly be converted. + /// + /// Adapted from various sources on the 'net including Lucene.Net.Analysis.ASCIIFoldingFilter. + /// Input should contain Utf8 characters exclusively and NOT Unicode. + /// Removes controls, normalizes whitespaces, replaces symbols by '?'. + /// + private static void ToAscii(char[] input, int ipos, char[] output, ref int opos, char fail = '?') + { + var c = input[ipos]; + + if (char.IsControl(c)) + { + // Control characters are non-printing and formatting characters, such as ACK, BEL, CR, FF, LF, and VT. + // The Unicode standard assigns the following code points to control characters: from \U0000 to \U001F, + // \U007F, and from \U0080 to \U009F. According to the Unicode standard, these values are to be + // interpreted as control characters unless their use is otherwise defined by an application. Valid + // control characters are members of the UnicodeCategory.Control category. + + // we don't want them + } + + // else if (char.IsSeparator(c)) + // { + // // The Unicode standard recognizes three subcategories of separators: + // // - Space separators (the UnicodeCategory.SpaceSeparator category), which includes characters such as \u0020. + // // - Line separators (the UnicodeCategory.LineSeparator category), which includes \u2028. + // // - Paragraph separators (the UnicodeCategory.ParagraphSeparator category), which includes \u2029. + // // + // // Note: The Unicode standard classifies the characters \u000A (LF), \u000C (FF), and \u000A (CR) as control + // // characters (members of the UnicodeCategory.Control category), not as separator characters. + + // // better do it via WhiteSpace + // } + else if (char.IsWhiteSpace(c)) + { + // White space characters are the following Unicode characters: + // - Members of the SpaceSeparator category, which includes the characters SPACE (U+0020), + // OGHAM SPACE MARK (U+1680), MONGOLIAN VOWEL SEPARATOR (U+180E), EN QUAD (U+2000), EM QUAD (U+2001), + // EN SPACE (U+2002), EM SPACE (U+2003), THREE-PER-EM SPACE (U+2004), FOUR-PER-EM SPACE (U+2005), + // SIX-PER-EM SPACE (U+2006), FIGURE SPACE (U+2007), PUNCTUATION SPACE (U+2008), THIN SPACE (U+2009), + // HAIR SPACE (U+200A), NARROW NO-BREAK SPACE (U+202F), MEDIUM MATHEMATICAL SPACE (U+205F), + // and IDEOGRAPHIC SPACE (U+3000). + // - Members of the LineSeparator category, which consists solely of the LINE SEPARATOR character (U+2028). + // - Members of the ParagraphSeparator category, which consists solely of the PARAGRAPH SEPARATOR character (U+2029). + // - The characters CHARACTER TABULATION (U+0009), LINE FEED (U+000A), LINE TABULATION (U+000B), + // FORM FEED (U+000C), CARRIAGE RETURN (U+000D), NEXT LINE (U+0085), and NO-BREAK SPACE (U+00A0). + + // make it a whitespace + output[opos++] = ' '; + } + else if (c < '\u0080') + { + // safe + output[opos++] = c; + } + else + { + switch (c) + { + case '\u00C0': + // À [LATIN CAPITAL LETTER A WITH GRAVE] + case '\u00C1': + // � [LATIN CAPITAL LETTER A WITH ACUTE] + case '\u00C2': + //  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] + case '\u00C3': + // à [LATIN CAPITAL LETTER A WITH TILDE] + case '\u00C4': + // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] + case '\u00C5': + // Ã… [LATIN CAPITAL LETTER A WITH RING ABOVE] + case '\u0100': + // Ä€ [LATIN CAPITAL LETTER A WITH MACRON] + case '\u0102': + // Ä‚ [LATIN CAPITAL LETTER A WITH BREVE] + case '\u0104': + // Ä„ [LATIN CAPITAL LETTER A WITH OGONEK] + case '\u018F': + // � http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] + case '\u01CD': + // � [LATIN CAPITAL LETTER A WITH CARON] + case '\u01DE': + // Çž [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] + case '\u01E0': + // Ç  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] + case '\u01FA': + // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] + case '\u0200': + // È€ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] + case '\u0202': + // È‚ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] + case '\u0226': + // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] + case '\u023A': + // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] + case '\u1D00': + // á´€ [LATIN LETTER SMALL CAPITAL A] + case '\u1E00': + // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] + case '\u1EA0': + // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] + case '\u1EA2': + // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] + case '\u1EA4': + // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] + case '\u1EA6': + // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] + case '\u1EA8': + // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EAA': + // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] + case '\u1EAC': + // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EAE': + // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] + case '\u1EB0': + // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] + case '\u1EB2': + // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] + case '\u1EB4': + // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] + case '\u1EB6': + // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] + case '\u24B6': + // â’¶ [CIRCLED LATIN CAPITAL LETTER A] + case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A] + output[opos++] = 'A'; + break; + + case '\u00E0': + // à [LATIN SMALL LETTER A WITH GRAVE] + case '\u00E1': + // á [LATIN SMALL LETTER A WITH ACUTE] + case '\u00E2': + // â [LATIN SMALL LETTER A WITH CIRCUMFLEX] + case '\u00E3': + // ã [LATIN SMALL LETTER A WITH TILDE] + case '\u00E4': + // ä [LATIN SMALL LETTER A WITH DIAERESIS] + case '\u00E5': + // Ã¥ [LATIN SMALL LETTER A WITH RING ABOVE] + case '\u0101': + // � [LATIN SMALL LETTER A WITH MACRON] + case '\u0103': + // ă [LATIN SMALL LETTER A WITH BREVE] + case '\u0105': + // Ä… [LATIN SMALL LETTER A WITH OGONEK] + case '\u01CE': + // ÇŽ [LATIN SMALL LETTER A WITH CARON] + case '\u01DF': + // ÇŸ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] + case '\u01E1': + // Ç¡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] + case '\u01FB': + // Ç» [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] + case '\u0201': + // � [LATIN SMALL LETTER A WITH DOUBLE GRAVE] + case '\u0203': + // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] + case '\u0227': + // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] + case '\u0250': + // � [LATIN SMALL LETTER TURNED A] + case '\u0259': + // É™ [LATIN SMALL LETTER SCHWA] + case '\u025A': + // Éš [LATIN SMALL LETTER SCHWA WITH HOOK] + case '\u1D8F': + // � [LATIN SMALL LETTER A WITH RETROFLEX HOOK] + case '\u1D95': + // á¶• [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] + case '\u1E01': + // ạ [LATIN SMALL LETTER A WITH RING BELOW] + case '\u1E9A': + // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] + case '\u1EA1': + // ạ [LATIN SMALL LETTER A WITH DOT BELOW] + case '\u1EA3': + // ả [LATIN SMALL LETTER A WITH HOOK ABOVE] + case '\u1EA5': + // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] + case '\u1EA7': + // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] + case '\u1EA9': + // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EAB': + // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] + case '\u1EAD': + // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EAF': + // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] + case '\u1EB1': + // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] + case '\u1EB3': + // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] + case '\u1EB5': + // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] + case '\u1EB7': + // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] + case '\u2090': + // � [LATIN SUBSCRIPT SMALL LETTER A] + case '\u2094': + // �? [LATIN SUBSCRIPT SMALL LETTER SCHWA] + case '\u24D0': + // � [CIRCLED LATIN SMALL LETTER A] + case '\u2C65': + // â±¥ [LATIN SMALL LETTER A WITH STROKE] + case '\u2C6F': + // Ɐ [LATIN CAPITAL LETTER TURNED A] + case '\uFF41': // � [FULLWIDTH LATIN SMALL LETTER A] + output[opos++] = 'a'; + break; + + case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA] + output[opos++] = 'A'; + output[opos++] = 'A'; + break; + + case '\u00C6': + // Æ [LATIN CAPITAL LETTER AE] + case '\u01E2': + // Ç¢ [LATIN CAPITAL LETTER AE WITH MACRON] + case '\u01FC': + // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] + case '\u1D01': // á´� [LATIN LETTER SMALL CAPITAL AE] + output[opos++] = 'A'; + output[opos++] = 'E'; + break; + + case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO] + output[opos++] = 'A'; + output[opos++] = 'O'; + break; + + case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU] + output[opos++] = 'A'; + output[opos++] = 'U'; + break; + + case '\uA738': + // Ꜹ [LATIN CAPITAL LETTER AV] + case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] + output[opos++] = 'A'; + output[opos++] = 'V'; + break; + + case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY] + output[opos++] = 'A'; + output[opos++] = 'Y'; + break; + + case '\u249C': // â’œ [PARENTHESIZED LATIN SMALL LETTER A] + output[opos++] = '('; + output[opos++] = 'a'; + output[opos++] = ')'; + break; + + case '\uA733': // ꜳ [LATIN SMALL LETTER AA] + output[opos++] = 'a'; + output[opos++] = 'a'; + break; + + case '\u00E6': + // æ [LATIN SMALL LETTER AE] + case '\u01E3': + // Ç£ [LATIN SMALL LETTER AE WITH MACRON] + case '\u01FD': + // ǽ [LATIN SMALL LETTER AE WITH ACUTE] + case '\u1D02': // á´‚ [LATIN SMALL LETTER TURNED AE] + output[opos++] = 'a'; + output[opos++] = 'e'; + break; + + case '\uA735': // ꜵ [LATIN SMALL LETTER AO] + output[opos++] = 'a'; + output[opos++] = 'o'; + break; + + case '\uA737': // ꜷ [LATIN SMALL LETTER AU] + output[opos++] = 'a'; + output[opos++] = 'u'; + break; + + case '\uA739': + // ꜹ [LATIN SMALL LETTER AV] + case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] + output[opos++] = 'a'; + output[opos++] = 'v'; + break; + + case '\uA73D': // ꜽ [LATIN SMALL LETTER AY] + output[opos++] = 'a'; + output[opos++] = 'y'; + break; + + case '\u0181': + // � [LATIN CAPITAL LETTER B WITH HOOK] + case '\u0182': + // Æ‚ [LATIN CAPITAL LETTER B WITH TOPBAR] + case '\u0243': + // Ƀ [LATIN CAPITAL LETTER B WITH STROKE] + case '\u0299': + // Ê™ [LATIN LETTER SMALL CAPITAL B] + case '\u1D03': + // á´ƒ [LATIN LETTER SMALL CAPITAL BARRED B] + case '\u1E02': + // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] + case '\u1E04': + // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] + case '\u1E06': + // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] + case '\u24B7': + // â’· [CIRCLED LATIN CAPITAL LETTER B] + case '\uFF22': // ï¼¢ [FULLWIDTH LATIN CAPITAL LETTER B] + output[opos++] = 'B'; + break; + + case '\u0180': + // Æ€ [LATIN SMALL LETTER B WITH STROKE] + case '\u0183': + // ƃ [LATIN SMALL LETTER B WITH TOPBAR] + case '\u0253': + // É“ [LATIN SMALL LETTER B WITH HOOK] + case '\u1D6C': + // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] + case '\u1D80': + // á¶€ [LATIN SMALL LETTER B WITH PALATAL HOOK] + case '\u1E03': + // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] + case '\u1E05': + // ḅ [LATIN SMALL LETTER B WITH DOT BELOW] + case '\u1E07': + // ḇ [LATIN SMALL LETTER B WITH LINE BELOW] + case '\u24D1': + // â“‘ [CIRCLED LATIN SMALL LETTER B] + case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B] + output[opos++] = 'b'; + break; + + case '\u249D': // â’� [PARENTHESIZED LATIN SMALL LETTER B] + output[opos++] = '('; + output[opos++] = 'b'; + output[opos++] = ')'; + break; + + case '\u00C7': + // Ç [LATIN CAPITAL LETTER C WITH CEDILLA] + case '\u0106': + // Ć [LATIN CAPITAL LETTER C WITH ACUTE] + case '\u0108': + // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] + case '\u010A': + // ÄŠ [LATIN CAPITAL LETTER C WITH DOT ABOVE] + case '\u010C': + // ÄŒ [LATIN CAPITAL LETTER C WITH CARON] + case '\u0187': + // Ƈ [LATIN CAPITAL LETTER C WITH HOOK] + case '\u023B': + // È» [LATIN CAPITAL LETTER C WITH STROKE] + case '\u0297': + // Ê— [LATIN LETTER STRETCHED C] + case '\u1D04': + // á´„ [LATIN LETTER SMALL CAPITAL C] + case '\u1E08': + // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] + case '\u24B8': + // â’¸ [CIRCLED LATIN CAPITAL LETTER C] + case '\uFF23': // ï¼£ [FULLWIDTH LATIN CAPITAL LETTER C] + output[opos++] = 'C'; + break; + + case '\u00E7': + // ç [LATIN SMALL LETTER C WITH CEDILLA] + case '\u0107': + // ć [LATIN SMALL LETTER C WITH ACUTE] + case '\u0109': + // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] + case '\u010B': + // Ä‹ [LATIN SMALL LETTER C WITH DOT ABOVE] + case '\u010D': + // � [LATIN SMALL LETTER C WITH CARON] + case '\u0188': + // ƈ [LATIN SMALL LETTER C WITH HOOK] + case '\u023C': + // ȼ [LATIN SMALL LETTER C WITH STROKE] + case '\u0255': + // É• [LATIN SMALL LETTER C WITH CURL] + case '\u1E09': + // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] + case '\u2184': + // ↄ [LATIN SMALL LETTER REVERSED C] + case '\u24D2': + // â“’ [CIRCLED LATIN SMALL LETTER C] + case '\uA73E': + // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] + case '\uA73F': + // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] + case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C] + output[opos++] = 'c'; + break; + + case '\u249E': // â’ž [PARENTHESIZED LATIN SMALL LETTER C] + output[opos++] = '('; + output[opos++] = 'c'; + output[opos++] = ')'; + break; + + case '\u00D0': + // � [LATIN CAPITAL LETTER ETH] + case '\u010E': + // ÄŽ [LATIN CAPITAL LETTER D WITH CARON] + case '\u0110': + // � [LATIN CAPITAL LETTER D WITH STROKE] + case '\u0189': + // Ɖ [LATIN CAPITAL LETTER AFRICAN D] + case '\u018A': + // ÆŠ [LATIN CAPITAL LETTER D WITH HOOK] + case '\u018B': + // Æ‹ [LATIN CAPITAL LETTER D WITH TOPBAR] + case '\u1D05': + // á´… [LATIN LETTER SMALL CAPITAL D] + case '\u1D06': + // á´† [LATIN LETTER SMALL CAPITAL ETH] + case '\u1E0A': + // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] + case '\u1E0C': + // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] + case '\u1E0E': + // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] + case '\u1E10': + // � [LATIN CAPITAL LETTER D WITH CEDILLA] + case '\u1E12': + // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] + case '\u24B9': + // â’¹ [CIRCLED LATIN CAPITAL LETTER D] + case '\uA779': + // � [LATIN CAPITAL LETTER INSULAR D] + case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D] + output[opos++] = 'D'; + break; + + case '\u00F0': + // ð [LATIN SMALL LETTER ETH] + case '\u010F': + // � [LATIN SMALL LETTER D WITH CARON] + case '\u0111': + // Ä‘ [LATIN SMALL LETTER D WITH STROKE] + case '\u018C': + // ÆŒ [LATIN SMALL LETTER D WITH TOPBAR] + case '\u0221': + // È¡ [LATIN SMALL LETTER D WITH CURL] + case '\u0256': + // É– [LATIN SMALL LETTER D WITH TAIL] + case '\u0257': + // É— [LATIN SMALL LETTER D WITH HOOK] + case '\u1D6D': + // áµ­ [LATIN SMALL LETTER D WITH MIDDLE TILDE] + case '\u1D81': + // � [LATIN SMALL LETTER D WITH PALATAL HOOK] + case '\u1D91': + // á¶‘ [LATIN SMALL LETTER D WITH HOOK AND TAIL] + case '\u1E0B': + // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] + case '\u1E0D': + // � [LATIN SMALL LETTER D WITH DOT BELOW] + case '\u1E0F': + // � [LATIN SMALL LETTER D WITH LINE BELOW] + case '\u1E11': + // ḑ [LATIN SMALL LETTER D WITH CEDILLA] + case '\u1E13': + // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] + case '\u24D3': + // â““ [CIRCLED LATIN SMALL LETTER D] + case '\uA77A': + // � [LATIN SMALL LETTER INSULAR D] + case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D] + output[opos++] = 'd'; + break; + + case '\u01C4': + // Ç„ [LATIN CAPITAL LETTER DZ WITH CARON] + case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ] + output[opos++] = 'D'; + output[opos++] = 'Z'; + break; + + case '\u01C5': + // Ç… [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] + case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] + output[opos++] = 'D'; + output[opos++] = 'z'; + break; + + case '\u249F': // â’Ÿ [PARENTHESIZED LATIN SMALL LETTER D] + output[opos++] = '('; + output[opos++] = 'd'; + output[opos++] = ')'; + break; + + case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH] + output[opos++] = 'd'; + output[opos++] = 'b'; + break; + + case '\u01C6': + // dž [LATIN SMALL LETTER DZ WITH CARON] + case '\u01F3': + // dz [LATIN SMALL LETTER DZ] + case '\u02A3': + // Ê£ [LATIN SMALL LETTER DZ DIGRAPH] + case '\u02A5': // Ê¥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] + output[opos++] = 'd'; + output[opos++] = 'z'; + break; + + case '\u00C8': + // È [LATIN CAPITAL LETTER E WITH GRAVE] + case '\u00C9': + // É [LATIN CAPITAL LETTER E WITH ACUTE] + case '\u00CA': + // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] + case '\u00CB': + // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] + case '\u0112': + // Ä’ [LATIN CAPITAL LETTER E WITH MACRON] + case '\u0114': + // �? [LATIN CAPITAL LETTER E WITH BREVE] + case '\u0116': + // Ä– [LATIN CAPITAL LETTER E WITH DOT ABOVE] + case '\u0118': + // Ę [LATIN CAPITAL LETTER E WITH OGONEK] + case '\u011A': + // Äš [LATIN CAPITAL LETTER E WITH CARON] + case '\u018E': + // ÆŽ [LATIN CAPITAL LETTER REVERSED E] + case '\u0190': + // � [LATIN CAPITAL LETTER OPEN E] + case '\u0204': + // È„ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] + case '\u0206': + // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] + case '\u0228': + // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] + case '\u0246': + // Ɇ [LATIN CAPITAL LETTER E WITH STROKE] + case '\u1D07': + // á´‡ [LATIN LETTER SMALL CAPITAL E] + case '\u1E14': + // �? [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] + case '\u1E16': + // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] + case '\u1E18': + // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] + case '\u1E1A': + // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] + case '\u1E1C': + // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] + case '\u1EB8': + // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] + case '\u1EBA': + // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] + case '\u1EBC': + // Ẽ [LATIN CAPITAL LETTER E WITH TILDE] + case '\u1EBE': + // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] + case '\u1EC0': + // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] + case '\u1EC2': + // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EC4': + // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] + case '\u1EC6': + // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case '\u24BA': + // â’º [CIRCLED LATIN CAPITAL LETTER E] + case '\u2C7B': + // â±» [LATIN LETTER SMALL CAPITAL TURNED E] + case '\uFF25': // ï¼¥ [FULLWIDTH LATIN CAPITAL LETTER E] + output[opos++] = 'E'; + break; + + case '\u00E8': + // è [LATIN SMALL LETTER E WITH GRAVE] + case '\u00E9': + // é [LATIN SMALL LETTER E WITH ACUTE] + case '\u00EA': + // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] + case '\u00EB': + // ë [LATIN SMALL LETTER E WITH DIAERESIS] + case '\u0113': + // Ä“ [LATIN SMALL LETTER E WITH MACRON] + case '\u0115': + // Ä• [LATIN SMALL LETTER E WITH BREVE] + case '\u0117': + // Ä— [LATIN SMALL LETTER E WITH DOT ABOVE] + case '\u0119': + // Ä™ [LATIN SMALL LETTER E WITH OGONEK] + case '\u011B': + // Ä› [LATIN SMALL LETTER E WITH CARON] + case '\u01DD': + // � [LATIN SMALL LETTER TURNED E] + case '\u0205': + // È… [LATIN SMALL LETTER E WITH DOUBLE GRAVE] + case '\u0207': + // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] + case '\u0229': + // È© [LATIN SMALL LETTER E WITH CEDILLA] + case '\u0247': + // ɇ [LATIN SMALL LETTER E WITH STROKE] + case '\u0258': + // ɘ [LATIN SMALL LETTER REVERSED E] + case '\u025B': + // É› [LATIN SMALL LETTER OPEN E] + case '\u025C': + // Éœ [LATIN SMALL LETTER REVERSED OPEN E] + case '\u025D': + // � [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] + case '\u025E': + // Éž [LATIN SMALL LETTER CLOSED REVERSED OPEN E] + case '\u029A': + // Êš [LATIN SMALL LETTER CLOSED OPEN E] + case '\u1D08': + // á´ˆ [LATIN SMALL LETTER TURNED OPEN E] + case '\u1D92': + // á¶’ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] + case '\u1D93': + // á¶“ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] + case '\u1D94': + // �? [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] + case '\u1E15': + // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] + case '\u1E17': + // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] + case '\u1E19': + // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] + case '\u1E1B': + // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] + case '\u1E1D': + // � [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] + case '\u1EB9': + // ẹ [LATIN SMALL LETTER E WITH DOT BELOW] + case '\u1EBB': + // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] + case '\u1EBD': + // ẽ [LATIN SMALL LETTER E WITH TILDE] + case '\u1EBF': + // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] + case '\u1EC1': + // � [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] + case '\u1EC3': + // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EC5': + // á»… [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] + case '\u1EC7': + // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case '\u2091': + // â‚‘ [LATIN SUBSCRIPT SMALL LETTER E] + case '\u24D4': + // �? [CIRCLED LATIN SMALL LETTER E] + case '\u2C78': + // ⱸ [LATIN SMALL LETTER E WITH NOTCH] + case '\uFF45': // ï½… [FULLWIDTH LATIN SMALL LETTER E] + output[opos++] = 'e'; + break; + + case '\u24A0': // â’  [PARENTHESIZED LATIN SMALL LETTER E] + output[opos++] = '('; + output[opos++] = 'e'; + output[opos++] = ')'; + break; + + case '\u0191': + // Æ‘ [LATIN CAPITAL LETTER F WITH HOOK] + case '\u1E1E': + // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] + case '\u24BB': + // â’» [CIRCLED LATIN CAPITAL LETTER F] + case '\uA730': + // ꜰ [LATIN LETTER SMALL CAPITAL F] + case '\uA77B': + // � [LATIN CAPITAL LETTER INSULAR F] + case '\uA7FB': + // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] + case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F] + output[opos++] = 'F'; + break; + + case '\u0192': + // Æ’ [LATIN SMALL LETTER F WITH HOOK] + case '\u1D6E': + // áµ® [LATIN SMALL LETTER F WITH MIDDLE TILDE] + case '\u1D82': + // á¶‚ [LATIN SMALL LETTER F WITH PALATAL HOOK] + case '\u1E1F': + // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] + case '\u1E9B': + // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] + case '\u24D5': + // â“• [CIRCLED LATIN SMALL LETTER F] + case '\uA77C': + // � [LATIN SMALL LETTER INSULAR F] + case '\uFF46': // f [FULLWIDTH LATIN SMALL LETTER F] + output[opos++] = 'f'; + break; + + case '\u24A1': // â’¡ [PARENTHESIZED LATIN SMALL LETTER F] + output[opos++] = '('; + output[opos++] = 'f'; + output[opos++] = ')'; + break; + + case '\uFB00': // ff [LATIN SMALL LIGATURE FF] + output[opos++] = 'f'; + output[opos++] = 'f'; + break; + + case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI] + output[opos++] = 'f'; + output[opos++] = 'f'; + output[opos++] = 'i'; + break; + + case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL] + output[opos++] = 'f'; + output[opos++] = 'f'; + output[opos++] = 'l'; + break; + + case '\uFB01': // � [LATIN SMALL LIGATURE FI] + output[opos++] = 'f'; + output[opos++] = 'i'; + break; + + case '\uFB02': // fl [LATIN SMALL LIGATURE FL] + output[opos++] = 'f'; + output[opos++] = 'l'; + break; + + case '\u011C': + // Äœ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] + case '\u011E': + // Äž [LATIN CAPITAL LETTER G WITH BREVE] + case '\u0120': + // Ä  [LATIN CAPITAL LETTER G WITH DOT ABOVE] + case '\u0122': + // Ä¢ [LATIN CAPITAL LETTER G WITH CEDILLA] + case '\u0193': + // Æ“ [LATIN CAPITAL LETTER G WITH HOOK] + case '\u01E4': + // Ǥ [LATIN CAPITAL LETTER G WITH STROKE] + case '\u01E5': + // Ç¥ [LATIN SMALL LETTER G WITH STROKE] + case '\u01E6': + // Ǧ [LATIN CAPITAL LETTER G WITH CARON] + case '\u01E7': + // ǧ [LATIN SMALL LETTER G WITH CARON] + case '\u01F4': + // Ç´ [LATIN CAPITAL LETTER G WITH ACUTE] + case '\u0262': + // É¢ [LATIN LETTER SMALL CAPITAL G] + case '\u029B': + // Ê› [LATIN LETTER SMALL CAPITAL G WITH HOOK] + case '\u1E20': + // Ḡ [LATIN CAPITAL LETTER G WITH MACRON] + case '\u24BC': + // â’¼ [CIRCLED LATIN CAPITAL LETTER G] + case '\uA77D': + // � [LATIN CAPITAL LETTER INSULAR G] + case '\uA77E': + // � [LATIN CAPITAL LETTER TURNED INSULAR G] + case '\uFF27': // ï¼§ [FULLWIDTH LATIN CAPITAL LETTER G] + output[opos++] = 'G'; + break; + + case '\u011D': + // � [LATIN SMALL LETTER G WITH CIRCUMFLEX] + case '\u011F': + // ÄŸ [LATIN SMALL LETTER G WITH BREVE] + case '\u0121': + // Ä¡ [LATIN SMALL LETTER G WITH DOT ABOVE] + case '\u0123': + // Ä£ [LATIN SMALL LETTER G WITH CEDILLA] + case '\u01F5': + // ǵ [LATIN SMALL LETTER G WITH ACUTE] + case '\u0260': + // É  [LATIN SMALL LETTER G WITH HOOK] + case '\u0261': + // É¡ [LATIN SMALL LETTER SCRIPT G] + case '\u1D77': + // áµ· [LATIN SMALL LETTER TURNED G] + case '\u1D79': + // áµ¹ [LATIN SMALL LETTER INSULAR G] + case '\u1D83': + // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] + case '\u1E21': + // ḡ [LATIN SMALL LETTER G WITH MACRON] + case '\u24D6': + // â“– [CIRCLED LATIN SMALL LETTER G] + case '\uA77F': + // � [LATIN SMALL LETTER TURNED INSULAR G] + case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G] + output[opos++] = 'g'; + break; + + case '\u24A2': // â’¢ [PARENTHESIZED LATIN SMALL LETTER G] + output[opos++] = '('; + output[opos++] = 'g'; + output[opos++] = ')'; + break; + + case '\u0124': + // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] + case '\u0126': + // Ħ [LATIN CAPITAL LETTER H WITH STROKE] + case '\u021E': + // Èž [LATIN CAPITAL LETTER H WITH CARON] + case '\u029C': + // Êœ [LATIN LETTER SMALL CAPITAL H] + case '\u1E22': + // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] + case '\u1E24': + // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] + case '\u1E26': + // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] + case '\u1E28': + // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] + case '\u1E2A': + // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] + case '\u24BD': + // â’½ [CIRCLED LATIN CAPITAL LETTER H] + case '\u2C67': + // â±§ [LATIN CAPITAL LETTER H WITH DESCENDER] + case '\u2C75': + // â±µ [LATIN CAPITAL LETTER HALF H] + case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H] + output[opos++] = 'H'; + break; + + case '\u0125': + // Ä¥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] + case '\u0127': + // ħ [LATIN SMALL LETTER H WITH STROKE] + case '\u021F': + // ÈŸ [LATIN SMALL LETTER H WITH CARON] + case '\u0265': + // É¥ [LATIN SMALL LETTER TURNED H] + case '\u0266': + // ɦ [LATIN SMALL LETTER H WITH HOOK] + case '\u02AE': + // Ê® [LATIN SMALL LETTER TURNED H WITH FISHHOOK] + case '\u02AF': + // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] + case '\u1E23': + // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] + case '\u1E25': + // ḥ [LATIN SMALL LETTER H WITH DOT BELOW] + case '\u1E27': + // ḧ [LATIN SMALL LETTER H WITH DIAERESIS] + case '\u1E29': + // ḩ [LATIN SMALL LETTER H WITH CEDILLA] + case '\u1E2B': + // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] + case '\u1E96': + // ẖ [LATIN SMALL LETTER H WITH LINE BELOW] + case '\u24D7': + // â“— [CIRCLED LATIN SMALL LETTER H] + case '\u2C68': + // ⱨ [LATIN SMALL LETTER H WITH DESCENDER] + case '\u2C76': + // â±¶ [LATIN SMALL LETTER HALF H] + case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H] + output[opos++] = 'h'; + break; + + case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] + output[opos++] = 'H'; + output[opos++] = 'V'; + break; + + case '\u24A3': // â’£ [PARENTHESIZED LATIN SMALL LETTER H] + output[opos++] = '('; + output[opos++] = 'h'; + output[opos++] = ')'; + break; + + case '\u0195': // Æ• [LATIN SMALL LETTER HV] + output[opos++] = 'h'; + output[opos++] = 'v'; + break; + + case '\u00CC': + // ÃŒ [LATIN CAPITAL LETTER I WITH GRAVE] + case '\u00CD': + // � [LATIN CAPITAL LETTER I WITH ACUTE] + case '\u00CE': + // ÃŽ [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] + case '\u00CF': + // � [LATIN CAPITAL LETTER I WITH DIAERESIS] + case '\u0128': + // Ĩ [LATIN CAPITAL LETTER I WITH TILDE] + case '\u012A': + // Ī [LATIN CAPITAL LETTER I WITH MACRON] + case '\u012C': + // Ĭ [LATIN CAPITAL LETTER I WITH BREVE] + case '\u012E': + // Ä® [LATIN CAPITAL LETTER I WITH OGONEK] + case '\u0130': + // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] + case '\u0196': + // Æ– [LATIN CAPITAL LETTER IOTA] + case '\u0197': + // Æ— [LATIN CAPITAL LETTER I WITH STROKE] + case '\u01CF': + // � [LATIN CAPITAL LETTER I WITH CARON] + case '\u0208': + // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] + case '\u020A': + // ÈŠ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] + case '\u026A': + // ɪ [LATIN LETTER SMALL CAPITAL I] + case '\u1D7B': + // áµ» [LATIN SMALL CAPITAL LETTER I WITH STROKE] + case '\u1E2C': + // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] + case '\u1E2E': + // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] + case '\u1EC8': + // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] + case '\u1ECA': + // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] + case '\u24BE': + // â’¾ [CIRCLED LATIN CAPITAL LETTER I] + case '\uA7FE': + // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] + case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I] + output[opos++] = 'I'; + break; + + case '\u00EC': + // ì [LATIN SMALL LETTER I WITH GRAVE] + case '\u00ED': + // í [LATIN SMALL LETTER I WITH ACUTE] + case '\u00EE': + // î [LATIN SMALL LETTER I WITH CIRCUMFLEX] + case '\u00EF': + // ï [LATIN SMALL LETTER I WITH DIAERESIS] + case '\u0129': + // Ä© [LATIN SMALL LETTER I WITH TILDE] + case '\u012B': + // Ä« [LATIN SMALL LETTER I WITH MACRON] + case '\u012D': + // Ä­ [LATIN SMALL LETTER I WITH BREVE] + case '\u012F': + // į [LATIN SMALL LETTER I WITH OGONEK] + case '\u0131': + // ı [LATIN SMALL LETTER DOTLESS I] + case '\u01D0': + // � [LATIN SMALL LETTER I WITH CARON] + case '\u0209': + // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] + case '\u020B': + // È‹ [LATIN SMALL LETTER I WITH INVERTED BREVE] + case '\u0268': + // ɨ [LATIN SMALL LETTER I WITH STROKE] + case '\u1D09': + // á´‰ [LATIN SMALL LETTER TURNED I] + case '\u1D62': + // áµ¢ [LATIN SUBSCRIPT SMALL LETTER I] + case '\u1D7C': + // áµ¼ [LATIN SMALL LETTER IOTA WITH STROKE] + case '\u1D96': + // á¶– [LATIN SMALL LETTER I WITH RETROFLEX HOOK] + case '\u1E2D': + // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] + case '\u1E2F': + // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] + case '\u1EC9': + // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] + case '\u1ECB': + // ị [LATIN SMALL LETTER I WITH DOT BELOW] + case '\u2071': + // � [SUPERSCRIPT LATIN SMALL LETTER I] + case '\u24D8': + // ⓘ [CIRCLED LATIN SMALL LETTER I] + case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I] + output[opos++] = 'i'; + break; + + case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ] + output[opos++] = 'I'; + output[opos++] = 'J'; + break; + + case '\u24A4': // â’¤ [PARENTHESIZED LATIN SMALL LETTER I] + output[opos++] = '('; + output[opos++] = 'i'; + output[opos++] = ')'; + break; + + case '\u0133': // ij [LATIN SMALL LIGATURE IJ] + output[opos++] = 'i'; + output[opos++] = 'j'; + break; + + case '\u0134': + // Ä´ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] + case '\u0248': + // Ɉ [LATIN CAPITAL LETTER J WITH STROKE] + case '\u1D0A': + // á´Š [LATIN LETTER SMALL CAPITAL J] + case '\u24BF': + // â’¿ [CIRCLED LATIN CAPITAL LETTER J] + case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J] + output[opos++] = 'J'; + break; + + case '\u0135': + // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] + case '\u01F0': + // ǰ [LATIN SMALL LETTER J WITH CARON] + case '\u0237': + // È· [LATIN SMALL LETTER DOTLESS J] + case '\u0249': + // ɉ [LATIN SMALL LETTER J WITH STROKE] + case '\u025F': + // ÉŸ [LATIN SMALL LETTER DOTLESS J WITH STROKE] + case '\u0284': + // Ê„ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] + case '\u029D': + // � [LATIN SMALL LETTER J WITH CROSSED-TAIL] + case '\u24D9': + // â“™ [CIRCLED LATIN SMALL LETTER J] + case '\u2C7C': + // â±¼ [LATIN SUBSCRIPT SMALL LETTER J] + case '\uFF4A': // j [FULLWIDTH LATIN SMALL LETTER J] + output[opos++] = 'j'; + break; + + case '\u24A5': // â’¥ [PARENTHESIZED LATIN SMALL LETTER J] + output[opos++] = '('; + output[opos++] = 'j'; + output[opos++] = ')'; + break; + + case '\u0136': + // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] + case '\u0198': + // Ƙ [LATIN CAPITAL LETTER K WITH HOOK] + case '\u01E8': + // Ǩ [LATIN CAPITAL LETTER K WITH CARON] + case '\u1D0B': + // á´‹ [LATIN LETTER SMALL CAPITAL K] + case '\u1E30': + // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] + case '\u1E32': + // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] + case '\u1E34': + // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] + case '\u24C0': + // â“€ [CIRCLED LATIN CAPITAL LETTER K] + case '\u2C69': + // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] + case '\uA740': + // � [LATIN CAPITAL LETTER K WITH STROKE] + case '\uA742': + // � [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] + case '\uA744': + // � [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] + case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K] + output[opos++] = 'K'; + break; + + case '\u0137': + // Ä· [LATIN SMALL LETTER K WITH CEDILLA] + case '\u0199': + // Æ™ [LATIN SMALL LETTER K WITH HOOK] + case '\u01E9': + // Ç© [LATIN SMALL LETTER K WITH CARON] + case '\u029E': + // Êž [LATIN SMALL LETTER TURNED K] + case '\u1D84': + // á¶„ [LATIN SMALL LETTER K WITH PALATAL HOOK] + case '\u1E31': + // ḱ [LATIN SMALL LETTER K WITH ACUTE] + case '\u1E33': + // ḳ [LATIN SMALL LETTER K WITH DOT BELOW] + case '\u1E35': + // ḵ [LATIN SMALL LETTER K WITH LINE BELOW] + case '\u24DA': + // ⓚ [CIRCLED LATIN SMALL LETTER K] + case '\u2C6A': + // ⱪ [LATIN SMALL LETTER K WITH DESCENDER] + case '\uA741': + // � [LATIN SMALL LETTER K WITH STROKE] + case '\uA743': + // � [LATIN SMALL LETTER K WITH DIAGONAL STROKE] + case '\uA745': + // � [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] + case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K] + output[opos++] = 'k'; + break; + + case '\u24A6': // â’¦ [PARENTHESIZED LATIN SMALL LETTER K] + output[opos++] = '('; + output[opos++] = 'k'; + output[opos++] = ')'; + break; + + case '\u0139': + // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] + case '\u013B': + // Ä» [LATIN CAPITAL LETTER L WITH CEDILLA] + case '\u013D': + // Ľ [LATIN CAPITAL LETTER L WITH CARON] + case '\u013F': + // Ä¿ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] + case '\u0141': + // � [LATIN CAPITAL LETTER L WITH STROKE] + case '\u023D': + // Ƚ [LATIN CAPITAL LETTER L WITH BAR] + case '\u029F': + // ÊŸ [LATIN LETTER SMALL CAPITAL L] + case '\u1D0C': + // á´Œ [LATIN LETTER SMALL CAPITAL L WITH STROKE] + case '\u1E36': + // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] + case '\u1E38': + // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] + case '\u1E3A': + // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] + case '\u1E3C': + // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] + case '\u24C1': + // � [CIRCLED LATIN CAPITAL LETTER L] + case '\u2C60': + // â±  [LATIN CAPITAL LETTER L WITH DOUBLE BAR] + case '\u2C62': + // â±¢ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] + case '\uA746': + // � [LATIN CAPITAL LETTER BROKEN L] + case '\uA748': + // � [LATIN CAPITAL LETTER L WITH HIGH STROKE] + case '\uA780': + // Ꞁ [LATIN CAPITAL LETTER TURNED L] + case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L] + output[opos++] = 'L'; + break; + + case '\u013A': + // ĺ [LATIN SMALL LETTER L WITH ACUTE] + case '\u013C': + // ļ [LATIN SMALL LETTER L WITH CEDILLA] + case '\u013E': + // ľ [LATIN SMALL LETTER L WITH CARON] + case '\u0140': + // Å€ [LATIN SMALL LETTER L WITH MIDDLE DOT] + case '\u0142': + // Å‚ [LATIN SMALL LETTER L WITH STROKE] + case '\u019A': + // Æš [LATIN SMALL LETTER L WITH BAR] + case '\u0234': + // È´ [LATIN SMALL LETTER L WITH CURL] + case '\u026B': + // É« [LATIN SMALL LETTER L WITH MIDDLE TILDE] + case '\u026C': + // ɬ [LATIN SMALL LETTER L WITH BELT] + case '\u026D': + // É­ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] + case '\u1D85': + // á¶… [LATIN SMALL LETTER L WITH PALATAL HOOK] + case '\u1E37': + // ḷ [LATIN SMALL LETTER L WITH DOT BELOW] + case '\u1E39': + // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] + case '\u1E3B': + // ḻ [LATIN SMALL LETTER L WITH LINE BELOW] + case '\u1E3D': + // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] + case '\u24DB': + // â“› [CIRCLED LATIN SMALL LETTER L] + case '\u2C61': + // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] + case '\uA747': + // � [LATIN SMALL LETTER BROKEN L] + case '\uA749': + // � [LATIN SMALL LETTER L WITH HIGH STROKE] + case '\uA781': + // � [LATIN SMALL LETTER TURNED L] + case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L] + output[opos++] = 'l'; + break; + + case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ] + output[opos++] = 'L'; + output[opos++] = 'J'; + break; + + case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] + output[opos++] = 'L'; + output[opos++] = 'L'; + break; + + case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] + output[opos++] = 'L'; + output[opos++] = 'j'; + break; + + case '\u24A7': // â’§ [PARENTHESIZED LATIN SMALL LETTER L] + output[opos++] = '('; + output[opos++] = 'l'; + output[opos++] = ')'; + break; + + case '\u01C9': // lj [LATIN SMALL LETTER LJ] + output[opos++] = 'l'; + output[opos++] = 'j'; + break; + + case '\u1EFB': // á»» [LATIN SMALL LETTER MIDDLE-WELSH LL] + output[opos++] = 'l'; + output[opos++] = 'l'; + break; + + case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH] + output[opos++] = 'l'; + output[opos++] = 's'; + break; + + case '\u02AB': // Ê« [LATIN SMALL LETTER LZ DIGRAPH] + output[opos++] = 'l'; + output[opos++] = 'z'; + break; + + case '\u019C': + // Æœ [LATIN CAPITAL LETTER TURNED M] + case '\u1D0D': + // á´� [LATIN LETTER SMALL CAPITAL M] + case '\u1E3E': + // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] + case '\u1E40': + // á¹€ [LATIN CAPITAL LETTER M WITH DOT ABOVE] + case '\u1E42': + // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] + case '\u24C2': + // â“‚ [CIRCLED LATIN CAPITAL LETTER M] + case '\u2C6E': + // â±® [LATIN CAPITAL LETTER M WITH HOOK] + case '\uA7FD': + // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] + case '\uA7FF': + // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] + case '\uFF2D': // ï¼­ [FULLWIDTH LATIN CAPITAL LETTER M] + output[opos++] = 'M'; + break; + + case '\u026F': + // ɯ [LATIN SMALL LETTER TURNED M] + case '\u0270': + // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] + case '\u0271': + // ɱ [LATIN SMALL LETTER M WITH HOOK] + case '\u1D6F': + // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] + case '\u1D86': + // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] + case '\u1E3F': + // ḿ [LATIN SMALL LETTER M WITH ACUTE] + case '\u1E41': + // � [LATIN SMALL LETTER M WITH DOT ABOVE] + case '\u1E43': + // ṃ [LATIN SMALL LETTER M WITH DOT BELOW] + case '\u24DC': + // ⓜ [CIRCLED LATIN SMALL LETTER M] + case '\uFF4D': // � [FULLWIDTH LATIN SMALL LETTER M] + output[opos++] = 'm'; + break; + + case '\u24A8': // â’¨ [PARENTHESIZED LATIN SMALL LETTER M] + output[opos++] = '('; + output[opos++] = 'm'; + output[opos++] = ')'; + break; + + case '\u00D1': + // Ñ [LATIN CAPITAL LETTER N WITH TILDE] + case '\u0143': + // Ã…Æ’ [LATIN CAPITAL LETTER N WITH ACUTE] + case '\u0145': + // Å… [LATIN CAPITAL LETTER N WITH CEDILLA] + case '\u0147': + // Ň [LATIN CAPITAL LETTER N WITH CARON] + case '\u014A': + // Ã…Å  http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] + case '\u019D': + // � [LATIN CAPITAL LETTER N WITH LEFT HOOK] + case '\u01F8': + // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] + case '\u0220': + // È  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] + case '\u0274': + // É´ [LATIN LETTER SMALL CAPITAL N] + case '\u1D0E': + // á´Ž [LATIN LETTER SMALL CAPITAL REVERSED N] + case '\u1E44': + // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] + case '\u1E46': + // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] + case '\u1E48': + // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] + case '\u1E4A': + // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] + case '\u24C3': + // Ⓝ [CIRCLED LATIN CAPITAL LETTER N] + case '\uFF2E': // ï¼® [FULLWIDTH LATIN CAPITAL LETTER N] + output[opos++] = 'N'; + break; + + case '\u00F1': + // ñ [LATIN SMALL LETTER N WITH TILDE] + case '\u0144': + // Å„ [LATIN SMALL LETTER N WITH ACUTE] + case '\u0146': + // ņ [LATIN SMALL LETTER N WITH CEDILLA] + case '\u0148': + // ň [LATIN SMALL LETTER N WITH CARON] + case '\u0149': + // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] + case '\u014B': + // Å‹ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] + case '\u019E': + // Æž [LATIN SMALL LETTER N WITH LONG RIGHT LEG] + case '\u01F9': + // ǹ [LATIN SMALL LETTER N WITH GRAVE] + case '\u0235': + // ȵ [LATIN SMALL LETTER N WITH CURL] + case '\u0272': + // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] + case '\u0273': + // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] + case '\u1D70': + // áµ° [LATIN SMALL LETTER N WITH MIDDLE TILDE] + case '\u1D87': + // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] + case '\u1E45': + // á¹… [LATIN SMALL LETTER N WITH DOT ABOVE] + case '\u1E47': + // ṇ [LATIN SMALL LETTER N WITH DOT BELOW] + case '\u1E49': + // ṉ [LATIN SMALL LETTER N WITH LINE BELOW] + case '\u1E4B': + // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] + case '\u207F': + // � [SUPERSCRIPT LATIN SMALL LETTER N] + case '\u24DD': + // � [CIRCLED LATIN SMALL LETTER N] + case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N] + output[opos++] = 'n'; + break; + + case '\u01CA': // ÇŠ [LATIN CAPITAL LETTER NJ] + output[opos++] = 'N'; + output[opos++] = 'J'; + break; + + case '\u01CB': // Ç‹ [LATIN CAPITAL LETTER N WITH SMALL LETTER J] + output[opos++] = 'N'; + output[opos++] = 'j'; + break; + + case '\u24A9': // â’© [PARENTHESIZED LATIN SMALL LETTER N] + output[opos++] = '('; + output[opos++] = 'n'; + output[opos++] = ')'; + break; + + case '\u01CC': // ÇŒ [LATIN SMALL LETTER NJ] + output[opos++] = 'n'; + output[opos++] = 'j'; + break; + + case '\u00D2': + // Ã’ [LATIN CAPITAL LETTER O WITH GRAVE] + case '\u00D3': + // Ó [LATIN CAPITAL LETTER O WITH ACUTE] + case '\u00D4': + // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] + case '\u00D5': + // Õ [LATIN CAPITAL LETTER O WITH TILDE] + case '\u00D6': + // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] + case '\u00D8': + // Ø [LATIN CAPITAL LETTER O WITH STROKE] + case '\u014C': + // Ã…Å’ [LATIN CAPITAL LETTER O WITH MACRON] + case '\u014E': + // ÅŽ [LATIN CAPITAL LETTER O WITH BREVE] + case '\u0150': + // � [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] + case '\u0186': + // Ɔ [LATIN CAPITAL LETTER OPEN O] + case '\u019F': + // ÆŸ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] + case '\u01A0': + // Æ  [LATIN CAPITAL LETTER O WITH HORN] + case '\u01D1': + // Ç‘ [LATIN CAPITAL LETTER O WITH CARON] + case '\u01EA': + // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] + case '\u01EC': + // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] + case '\u01FE': + // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] + case '\u020C': + // ÈŒ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] + case '\u020E': + // ÈŽ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] + case '\u022A': + // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] + case '\u022C': + // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] + case '\u022E': + // È® [LATIN CAPITAL LETTER O WITH DOT ABOVE] + case '\u0230': + // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] + case '\u1D0F': + // á´� [LATIN LETTER SMALL CAPITAL O] + case '\u1D10': + // á´� [LATIN LETTER SMALL CAPITAL OPEN O] + case '\u1E4C': + // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] + case '\u1E4E': + // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] + case '\u1E50': + // � [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] + case '\u1E52': + // á¹’ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] + case '\u1ECC': + // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] + case '\u1ECE': + // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] + case '\u1ED0': + // � [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] + case '\u1ED2': + // á»’ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] + case '\u1ED4': + // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1ED6': + // á»– [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] + case '\u1ED8': + // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EDA': + // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] + case '\u1EDC': + // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] + case '\u1EDE': + // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] + case '\u1EE0': + // á»  [LATIN CAPITAL LETTER O WITH HORN AND TILDE] + case '\u1EE2': + // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] + case '\u24C4': + // â“„ [CIRCLED LATIN CAPITAL LETTER O] + case '\uA74A': + // � [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] + case '\uA74C': + // � [LATIN CAPITAL LETTER O WITH LOOP] + case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O] + output[opos++] = 'O'; + break; + + case '\u00F2': + // ò [LATIN SMALL LETTER O WITH GRAVE] + case '\u00F3': + // ó [LATIN SMALL LETTER O WITH ACUTE] + case '\u00F4': + // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] + case '\u00F5': + // õ [LATIN SMALL LETTER O WITH TILDE] + case '\u00F6': + // ö [LATIN SMALL LETTER O WITH DIAERESIS] + case '\u00F8': + // ø [LATIN SMALL LETTER O WITH STROKE] + case '\u014D': + // � [LATIN SMALL LETTER O WITH MACRON] + case '\u014F': + // � [LATIN SMALL LETTER O WITH BREVE] + case '\u0151': + // Å‘ [LATIN SMALL LETTER O WITH DOUBLE ACUTE] + case '\u01A1': + // Æ¡ [LATIN SMALL LETTER O WITH HORN] + case '\u01D2': + // Ç’ [LATIN SMALL LETTER O WITH CARON] + case '\u01EB': + // Ç« [LATIN SMALL LETTER O WITH OGONEK] + case '\u01ED': + // Ç­ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] + case '\u01FF': + // Ç¿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] + case '\u020D': + // � [LATIN SMALL LETTER O WITH DOUBLE GRAVE] + case '\u020F': + // � [LATIN SMALL LETTER O WITH INVERTED BREVE] + case '\u022B': + // È« [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] + case '\u022D': + // È­ [LATIN SMALL LETTER O WITH TILDE AND MACRON] + case '\u022F': + // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] + case '\u0231': + // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] + case '\u0254': + // �? [LATIN SMALL LETTER OPEN O] + case '\u0275': + // ɵ [LATIN SMALL LETTER BARRED O] + case '\u1D16': + // á´– [LATIN SMALL LETTER TOP HALF O] + case '\u1D17': + // á´— [LATIN SMALL LETTER BOTTOM HALF O] + case '\u1D97': + // á¶— [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] + case '\u1E4D': + // � [LATIN SMALL LETTER O WITH TILDE AND ACUTE] + case '\u1E4F': + // � [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] + case '\u1E51': + // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] + case '\u1E53': + // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] + case '\u1ECD': + // � [LATIN SMALL LETTER O WITH DOT BELOW] + case '\u1ECF': + // � [LATIN SMALL LETTER O WITH HOOK ABOVE] + case '\u1ED1': + // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] + case '\u1ED3': + // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] + case '\u1ED5': + // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1ED7': + // á»— [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] + case '\u1ED9': + // á»™ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EDB': + // á»› [LATIN SMALL LETTER O WITH HORN AND ACUTE] + case '\u1EDD': + // � [LATIN SMALL LETTER O WITH HORN AND GRAVE] + case '\u1EDF': + // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] + case '\u1EE1': + // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] + case '\u1EE3': + // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] + case '\u2092': + // â‚’ [LATIN SUBSCRIPT SMALL LETTER O] + case '\u24DE': + // ⓞ [CIRCLED LATIN SMALL LETTER O] + case '\u2C7A': + // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] + case '\uA74B': + // � [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] + case '\uA74D': + // � [LATIN SMALL LETTER O WITH LOOP] + case '\uFF4F': // � [FULLWIDTH LATIN SMALL LETTER O] + output[opos++] = 'o'; + break; + + case '\u0152': + // Å’ [LATIN CAPITAL LIGATURE OE] + case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE] + output[opos++] = 'O'; + output[opos++] = 'E'; + break; + + case '\uA74E': // � [LATIN CAPITAL LETTER OO] + output[opos++] = 'O'; + output[opos++] = 'O'; + break; + + case '\u0222': + // È¢ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] + case '\u1D15': // á´• [LATIN LETTER SMALL CAPITAL OU] + output[opos++] = 'O'; + output[opos++] = 'U'; + break; + + case '\u24AA': // â’ª [PARENTHESIZED LATIN SMALL LETTER O] + output[opos++] = '('; + output[opos++] = 'o'; + output[opos++] = ')'; + break; + + case '\u0153': + // Å“ [LATIN SMALL LIGATURE OE] + case '\u1D14': // á´�? [LATIN SMALL LETTER TURNED OE] + output[opos++] = 'o'; + output[opos++] = 'e'; + break; + + case '\uA74F': // � [LATIN SMALL LETTER OO] + output[opos++] = 'o'; + output[opos++] = 'o'; + break; + + case '\u0223': // È£ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] + output[opos++] = 'o'; + output[opos++] = 'u'; + break; + + case '\u01A4': + // Ƥ [LATIN CAPITAL LETTER P WITH HOOK] + case '\u1D18': + // á´˜ [LATIN LETTER SMALL CAPITAL P] + case '\u1E54': + // �? [LATIN CAPITAL LETTER P WITH ACUTE] + case '\u1E56': + // á¹– [LATIN CAPITAL LETTER P WITH DOT ABOVE] + case '\u24C5': + // â“… [CIRCLED LATIN CAPITAL LETTER P] + case '\u2C63': + // â±£ [LATIN CAPITAL LETTER P WITH STROKE] + case '\uA750': + // � [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] + case '\uA752': + // � [LATIN CAPITAL LETTER P WITH FLOURISH] + case '\uA754': + // �? [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] + case '\uFF30': // ï¼° [FULLWIDTH LATIN CAPITAL LETTER P] + output[opos++] = 'P'; + break; + + case '\u01A5': + // Æ¥ [LATIN SMALL LETTER P WITH HOOK] + case '\u1D71': + // áµ± [LATIN SMALL LETTER P WITH MIDDLE TILDE] + case '\u1D7D': + // áµ½ [LATIN SMALL LETTER P WITH STROKE] + case '\u1D88': + // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] + case '\u1E55': + // ṕ [LATIN SMALL LETTER P WITH ACUTE] + case '\u1E57': + // á¹— [LATIN SMALL LETTER P WITH DOT ABOVE] + case '\u24DF': + // ⓟ [CIRCLED LATIN SMALL LETTER P] + case '\uA751': + // � [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] + case '\uA753': + // � [LATIN SMALL LETTER P WITH FLOURISH] + case '\uA755': + // � [LATIN SMALL LETTER P WITH SQUIRREL TAIL] + case '\uA7FC': + // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] + case '\uFF50': // � [FULLWIDTH LATIN SMALL LETTER P] + output[opos++] = 'p'; + break; + + case '\u24AB': // â’« [PARENTHESIZED LATIN SMALL LETTER P] + output[opos++] = '('; + output[opos++] = 'p'; + output[opos++] = ')'; + break; + + case '\u024A': + // ÉŠ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] + case '\u24C6': + // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] + case '\uA756': + // � [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] + case '\uA758': + // � [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] + case '\uFF31': // ï¼± [FULLWIDTH LATIN CAPITAL LETTER Q] + output[opos++] = 'Q'; + break; + + case '\u0138': + // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] + case '\u024B': + // É‹ [LATIN SMALL LETTER Q WITH HOOK TAIL] + case '\u02A0': + // Ê  [LATIN SMALL LETTER Q WITH HOOK] + case '\u24E0': + // â“  [CIRCLED LATIN SMALL LETTER Q] + case '\uA757': + // � [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] + case '\uA759': + // � [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] + case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q] + output[opos++] = 'q'; + break; + + case '\u24AC': // â’¬ [PARENTHESIZED LATIN SMALL LETTER Q] + output[opos++] = '('; + output[opos++] = 'q'; + output[opos++] = ')'; + break; + + case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH] + output[opos++] = 'q'; + output[opos++] = 'p'; + break; + + case '\u0154': + // �? [LATIN CAPITAL LETTER R WITH ACUTE] + case '\u0156': + // Å– [LATIN CAPITAL LETTER R WITH CEDILLA] + case '\u0158': + // Ã…Ëœ [LATIN CAPITAL LETTER R WITH CARON] + case '\u0210': + // È’ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] + case '\u0212': + // È’ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] + case '\u024C': + // ÉŒ [LATIN CAPITAL LETTER R WITH STROKE] + case '\u0280': + // Ê€ [LATIN LETTER SMALL CAPITAL R] + case '\u0281': + // � [LATIN LETTER SMALL CAPITAL INVERTED R] + case '\u1D19': + // á´™ [LATIN LETTER SMALL CAPITAL REVERSED R] + case '\u1D1A': + // á´š [LATIN LETTER SMALL CAPITAL TURNED R] + case '\u1E58': + // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] + case '\u1E5A': + // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] + case '\u1E5C': + // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] + case '\u1E5E': + // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] + case '\u24C7': + // Ⓡ [CIRCLED LATIN CAPITAL LETTER R] + case '\u2C64': + // Ɽ [LATIN CAPITAL LETTER R WITH TAIL] + case '\uA75A': + // � [LATIN CAPITAL LETTER R ROTUNDA] + case '\uA782': + // êž‚ [LATIN CAPITAL LETTER INSULAR R] + case '\uFF32': // ï¼² [FULLWIDTH LATIN CAPITAL LETTER R] + output[opos++] = 'R'; + break; + + case '\u0155': + // Å• [LATIN SMALL LETTER R WITH ACUTE] + case '\u0157': + // Å— [LATIN SMALL LETTER R WITH CEDILLA] + case '\u0159': + // Ã…â„¢ [LATIN SMALL LETTER R WITH CARON] + case '\u0211': + // È‘ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] + case '\u0213': + // È“ [LATIN SMALL LETTER R WITH INVERTED BREVE] + case '\u024D': + // � [LATIN SMALL LETTER R WITH STROKE] + case '\u027C': + // ɼ [LATIN SMALL LETTER R WITH LONG LEG] + case '\u027D': + // ɽ [LATIN SMALL LETTER R WITH TAIL] + case '\u027E': + // ɾ [LATIN SMALL LETTER R WITH FISHHOOK] + case '\u027F': + // É¿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] + case '\u1D63': + // áµ£ [LATIN SUBSCRIPT SMALL LETTER R] + case '\u1D72': + // áµ² [LATIN SMALL LETTER R WITH MIDDLE TILDE] + case '\u1D73': + // áµ³ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] + case '\u1D89': + // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] + case '\u1E59': + // á¹™ [LATIN SMALL LETTER R WITH DOT ABOVE] + case '\u1E5B': + // á¹› [LATIN SMALL LETTER R WITH DOT BELOW] + case '\u1E5D': + // � [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] + case '\u1E5F': + // ṟ [LATIN SMALL LETTER R WITH LINE BELOW] + case '\u24E1': + // â“¡ [CIRCLED LATIN SMALL LETTER R] + case '\uA75B': + // � [LATIN SMALL LETTER R ROTUNDA] + case '\uA783': + // ꞃ [LATIN SMALL LETTER INSULAR R] + case '\uFF52': // ï½’ [FULLWIDTH LATIN SMALL LETTER R] + output[opos++] = 'r'; + break; + + case '\u24AD': // â’­ [PARENTHESIZED LATIN SMALL LETTER R] + output[opos++] = '('; + output[opos++] = 'r'; + output[opos++] = ')'; + break; + + case '\u015A': + // Ã…Å¡ [LATIN CAPITAL LETTER S WITH ACUTE] + case '\u015C': + // Ã…Å“ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] + case '\u015E': + // Åž [LATIN CAPITAL LETTER S WITH CEDILLA] + case '\u0160': + // Å  [LATIN CAPITAL LETTER S WITH CARON] + case '\u0218': + // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] + case '\u1E60': + // á¹  [LATIN CAPITAL LETTER S WITH DOT ABOVE] + case '\u1E62': + // á¹¢ [LATIN CAPITAL LETTER S WITH DOT BELOW] + case '\u1E64': + // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] + case '\u1E66': + // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] + case '\u1E68': + // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] + case '\u24C8': + // Ⓢ [CIRCLED LATIN CAPITAL LETTER S] + case '\uA731': + // ꜱ [LATIN LETTER SMALL CAPITAL S] + case '\uA785': + // êž… [LATIN SMALL LETTER INSULAR S] + case '\uFF33': // ï¼³ [FULLWIDTH LATIN CAPITAL LETTER S] + output[opos++] = 'S'; + break; + + case '\u015B': + // Å› [LATIN SMALL LETTER S WITH ACUTE] + case '\u015D': + // � [LATIN SMALL LETTER S WITH CIRCUMFLEX] + case '\u015F': + // ÅŸ [LATIN SMALL LETTER S WITH CEDILLA] + case '\u0161': + // Å¡ [LATIN SMALL LETTER S WITH CARON] + case '\u017F': + // Å¿ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] + case '\u0219': + // È™ [LATIN SMALL LETTER S WITH COMMA BELOW] + case '\u023F': + // È¿ [LATIN SMALL LETTER S WITH SWASH TAIL] + case '\u0282': + // Ê‚ [LATIN SMALL LETTER S WITH HOOK] + case '\u1D74': + // áµ´ [LATIN SMALL LETTER S WITH MIDDLE TILDE] + case '\u1D8A': + // á¶Š [LATIN SMALL LETTER S WITH PALATAL HOOK] + case '\u1E61': + // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] + case '\u1E63': + // á¹£ [LATIN SMALL LETTER S WITH DOT BELOW] + case '\u1E65': + // á¹¥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] + case '\u1E67': + // á¹§ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] + case '\u1E69': + // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] + case '\u1E9C': + // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] + case '\u1E9D': + // � [LATIN SMALL LETTER LONG S WITH HIGH STROKE] + case '\u24E2': + // â“¢ [CIRCLED LATIN SMALL LETTER S] + case '\uA784': + // êž„ [LATIN CAPITAL LETTER INSULAR S] + case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S] + output[opos++] = 's'; + break; + + case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S] + output[opos++] = 'S'; + output[opos++] = 'S'; + break; + + case '\u24AE': // â’® [PARENTHESIZED LATIN SMALL LETTER S] + output[opos++] = '('; + output[opos++] = 's'; + output[opos++] = ')'; + break; + + case '\u00DF': // ß [LATIN SMALL LETTER SHARP S] + output[opos++] = 's'; + output[opos++] = 's'; + break; + + case '\uFB06': // st [LATIN SMALL LIGATURE ST] + output[opos++] = 's'; + output[opos++] = 't'; + break; + + case '\u0162': + // Å¢ [LATIN CAPITAL LETTER T WITH CEDILLA] + case '\u0164': + // Ť [LATIN CAPITAL LETTER T WITH CARON] + case '\u0166': + // Ŧ [LATIN CAPITAL LETTER T WITH STROKE] + case '\u01AC': + // Ƭ [LATIN CAPITAL LETTER T WITH HOOK] + case '\u01AE': + // Æ® [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] + case '\u021A': + // Èš [LATIN CAPITAL LETTER T WITH COMMA BELOW] + case '\u023E': + // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] + case '\u1D1B': + // á´› [LATIN LETTER SMALL CAPITAL T] + case '\u1E6A': + // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] + case '\u1E6C': + // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] + case '\u1E6E': + // á¹® [LATIN CAPITAL LETTER T WITH LINE BELOW] + case '\u1E70': + // á¹° [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] + case '\u24C9': + // Ⓣ [CIRCLED LATIN CAPITAL LETTER T] + case '\uA786': + // Ꞇ [LATIN CAPITAL LETTER INSULAR T] + case '\uFF34': // ï¼´ [FULLWIDTH LATIN CAPITAL LETTER T] + output[opos++] = 'T'; + break; + + case '\u0163': + // Å£ [LATIN SMALL LETTER T WITH CEDILLA] + case '\u0165': + // Ã…Â¥ [LATIN SMALL LETTER T WITH CARON] + case '\u0167': + // ŧ [LATIN SMALL LETTER T WITH STROKE] + case '\u01AB': + // Æ« [LATIN SMALL LETTER T WITH PALATAL HOOK] + case '\u01AD': + // Æ­ [LATIN SMALL LETTER T WITH HOOK] + case '\u021B': + // È› [LATIN SMALL LETTER T WITH COMMA BELOW] + case '\u0236': + // ȶ [LATIN SMALL LETTER T WITH CURL] + case '\u0287': + // ʇ [LATIN SMALL LETTER TURNED T] + case '\u0288': + // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] + case '\u1D75': + // áµµ [LATIN SMALL LETTER T WITH MIDDLE TILDE] + case '\u1E6B': + // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] + case '\u1E6D': + // á¹­ [LATIN SMALL LETTER T WITH DOT BELOW] + case '\u1E6F': + // ṯ [LATIN SMALL LETTER T WITH LINE BELOW] + case '\u1E71': + // á¹± [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] + case '\u1E97': + // ẗ [LATIN SMALL LETTER T WITH DIAERESIS] + case '\u24E3': + // â“£ [CIRCLED LATIN SMALL LETTER T] + case '\u2C66': + // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] + case '\uFF54': // �? [FULLWIDTH LATIN SMALL LETTER T] + output[opos++] = 't'; + break; + + case '\u00DE': + // Þ [LATIN CAPITAL LETTER THORN] + case '\uA766': // � [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] + output[opos++] = 'T'; + output[opos++] = 'H'; + break; + + case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ] + output[opos++] = 'T'; + output[opos++] = 'Z'; + break; + + case '\u24AF': // â’¯ [PARENTHESIZED LATIN SMALL LETTER T] + output[opos++] = '('; + output[opos++] = 't'; + output[opos++] = ')'; + break; + + case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] + output[opos++] = 't'; + output[opos++] = 'c'; + break; + + case '\u00FE': + // þ [LATIN SMALL LETTER THORN] + case '\u1D7A': + // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] + case '\uA767': // � [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] + output[opos++] = 't'; + output[opos++] = 'h'; + break; + + case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH] + output[opos++] = 't'; + output[opos++] = 's'; + break; + + case '\uA729': // ꜩ [LATIN SMALL LETTER TZ] + output[opos++] = 't'; + output[opos++] = 'z'; + break; + + case '\u00D9': + // Ù [LATIN CAPITAL LETTER U WITH GRAVE] + case '\u00DA': + // Ú [LATIN CAPITAL LETTER U WITH ACUTE] + case '\u00DB': + // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] + case '\u00DC': + // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] + case '\u0168': + // Ũ [LATIN CAPITAL LETTER U WITH TILDE] + case '\u016A': + // Ū [LATIN CAPITAL LETTER U WITH MACRON] + case '\u016C': + // Ŭ [LATIN CAPITAL LETTER U WITH BREVE] + case '\u016E': + // Å® [LATIN CAPITAL LETTER U WITH RING ABOVE] + case '\u0170': + // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] + case '\u0172': + // Ų [LATIN CAPITAL LETTER U WITH OGONEK] + case '\u01AF': + // Ư [LATIN CAPITAL LETTER U WITH HORN] + case '\u01D3': + // Ç“ [LATIN CAPITAL LETTER U WITH CARON] + case '\u01D5': + // Ç• [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] + case '\u01D7': + // Ç— [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] + case '\u01D9': + // Ç™ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] + case '\u01DB': + // Ç› [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] + case '\u0214': + // �? [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] + case '\u0216': + // È– [LATIN CAPITAL LETTER U WITH INVERTED BREVE] + case '\u0244': + // É„ [LATIN CAPITAL LETTER U BAR] + case '\u1D1C': + // á´œ [LATIN LETTER SMALL CAPITAL U] + case '\u1D7E': + // áµ¾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] + case '\u1E72': + // á¹² [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] + case '\u1E74': + // á¹´ [LATIN CAPITAL LETTER U WITH TILDE BELOW] + case '\u1E76': + // á¹¶ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] + case '\u1E78': + // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] + case '\u1E7A': + // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] + case '\u1EE4': + // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] + case '\u1EE6': + // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] + case '\u1EE8': + // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] + case '\u1EEA': + // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] + case '\u1EEC': + // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] + case '\u1EEE': + // á»® [LATIN CAPITAL LETTER U WITH HORN AND TILDE] + case '\u1EF0': + // á»° [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] + case '\u24CA': + // Ⓤ [CIRCLED LATIN CAPITAL LETTER U] + case '\uFF35': // ï¼µ [FULLWIDTH LATIN CAPITAL LETTER U] + output[opos++] = 'U'; + break; + + case '\u00F9': + // ù [LATIN SMALL LETTER U WITH GRAVE] + case '\u00FA': + // ú [LATIN SMALL LETTER U WITH ACUTE] + case '\u00FB': + // û [LATIN SMALL LETTER U WITH CIRCUMFLEX] + case '\u00FC': + // ü [LATIN SMALL LETTER U WITH DIAERESIS] + case '\u0169': + // Å© [LATIN SMALL LETTER U WITH TILDE] + case '\u016B': + // Å« [LATIN SMALL LETTER U WITH MACRON] + case '\u016D': + // Å­ [LATIN SMALL LETTER U WITH BREVE] + case '\u016F': + // ů [LATIN SMALL LETTER U WITH RING ABOVE] + case '\u0171': + // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] + case '\u0173': + // ų [LATIN SMALL LETTER U WITH OGONEK] + case '\u01B0': + // ư [LATIN SMALL LETTER U WITH HORN] + case '\u01D4': + // �? [LATIN SMALL LETTER U WITH CARON] + case '\u01D6': + // Ç– [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] + case '\u01D8': + // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] + case '\u01DA': + // Çš [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] + case '\u01DC': + // Çœ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] + case '\u0215': + // È• [LATIN SMALL LETTER U WITH DOUBLE GRAVE] + case '\u0217': + // È— [LATIN SMALL LETTER U WITH INVERTED BREVE] + case '\u0289': + // ʉ [LATIN SMALL LETTER U BAR] + case '\u1D64': + // ᵤ [LATIN SUBSCRIPT SMALL LETTER U] + case '\u1D99': + // á¶™ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] + case '\u1E73': + // á¹³ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] + case '\u1E75': + // á¹µ [LATIN SMALL LETTER U WITH TILDE BELOW] + case '\u1E77': + // á¹· [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] + case '\u1E79': + // á¹¹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] + case '\u1E7B': + // á¹» [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] + case '\u1EE5': + // ụ [LATIN SMALL LETTER U WITH DOT BELOW] + case '\u1EE7': + // á»§ [LATIN SMALL LETTER U WITH HOOK ABOVE] + case '\u1EE9': + // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] + case '\u1EEB': + // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] + case '\u1EED': + // á»­ [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] + case '\u1EEF': + // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] + case '\u1EF1': + // á»± [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] + case '\u24E4': + // ⓤ [CIRCLED LATIN SMALL LETTER U] + case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U] + output[opos++] = 'u'; + break; + + case '\u24B0': // â’° [PARENTHESIZED LATIN SMALL LETTER U] + output[opos++] = '('; + output[opos++] = 'u'; + output[opos++] = ')'; + break; + + case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE] + output[opos++] = 'u'; + output[opos++] = 'e'; + break; + + case '\u01B2': + // Ʋ [LATIN CAPITAL LETTER V WITH HOOK] + case '\u0245': + // É… [LATIN CAPITAL LETTER TURNED V] + case '\u1D20': + // á´  [LATIN LETTER SMALL CAPITAL V] + case '\u1E7C': + // á¹¼ [LATIN CAPITAL LETTER V WITH TILDE] + case '\u1E7E': + // á¹¾ [LATIN CAPITAL LETTER V WITH DOT BELOW] + case '\u1EFC': + // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] + case '\u24CB': + // â“‹ [CIRCLED LATIN CAPITAL LETTER V] + case '\uA75E': + // � [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] + case '\uA768': + // � [LATIN CAPITAL LETTER VEND] + case '\uFF36': // ï¼¶ [FULLWIDTH LATIN CAPITAL LETTER V] + output[opos++] = 'V'; + break; + + case '\u028B': + // Ê‹ [LATIN SMALL LETTER V WITH HOOK] + case '\u028C': + // ÊŒ [LATIN SMALL LETTER TURNED V] + case '\u1D65': + // áµ¥ [LATIN SUBSCRIPT SMALL LETTER V] + case '\u1D8C': + // á¶Œ [LATIN SMALL LETTER V WITH PALATAL HOOK] + case '\u1E7D': + // á¹½ [LATIN SMALL LETTER V WITH TILDE] + case '\u1E7F': + // ṿ [LATIN SMALL LETTER V WITH DOT BELOW] + case '\u24E5': + // â“¥ [CIRCLED LATIN SMALL LETTER V] + case '\u2C71': + // â±± [LATIN SMALL LETTER V WITH RIGHT HOOK] + case '\u2C74': + // â±´ [LATIN SMALL LETTER V WITH CURL] + case '\uA75F': + // � [LATIN SMALL LETTER V WITH DIAGONAL STROKE] + case '\uFF56': // ï½– [FULLWIDTH LATIN SMALL LETTER V] + output[opos++] = 'v'; + break; + + case '\uA760': // � [LATIN CAPITAL LETTER VY] + output[opos++] = 'V'; + output[opos++] = 'Y'; + break; + + case '\u24B1': // â’± [PARENTHESIZED LATIN SMALL LETTER V] + output[opos++] = '('; + output[opos++] = 'v'; + output[opos++] = ')'; + break; + + case '\uA761': // � [LATIN SMALL LETTER VY] + output[opos++] = 'v'; + output[opos++] = 'y'; + break; + + case '\u0174': + // Å´ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] + case '\u01F7': + // Ç· http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] + case '\u1D21': + // á´¡ [LATIN LETTER SMALL CAPITAL W] + case '\u1E80': + // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] + case '\u1E82': + // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] + case '\u1E84': + // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] + case '\u1E86': + // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] + case '\u1E88': + // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] + case '\u24CC': + // Ⓦ [CIRCLED LATIN CAPITAL LETTER W] + case '\u2C72': + // â±² [LATIN CAPITAL LETTER W WITH HOOK] + case '\uFF37': // ï¼· [FULLWIDTH LATIN CAPITAL LETTER W] + output[opos++] = 'W'; + break; + + case '\u0175': + // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] + case '\u01BF': + // Æ¿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] + case '\u028D': + // � [LATIN SMALL LETTER TURNED W] + case '\u1E81': + // � [LATIN SMALL LETTER W WITH GRAVE] + case '\u1E83': + // ẃ [LATIN SMALL LETTER W WITH ACUTE] + case '\u1E85': + // ẅ [LATIN SMALL LETTER W WITH DIAERESIS] + case '\u1E87': + // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] + case '\u1E89': + // ẉ [LATIN SMALL LETTER W WITH DOT BELOW] + case '\u1E98': + // ẘ [LATIN SMALL LETTER W WITH RING ABOVE] + case '\u24E6': + // ⓦ [CIRCLED LATIN SMALL LETTER W] + case '\u2C73': + // â±³ [LATIN SMALL LETTER W WITH HOOK] + case '\uFF57': // ï½— [FULLWIDTH LATIN SMALL LETTER W] + output[opos++] = 'w'; + break; + + case '\u24B2': // â’² [PARENTHESIZED LATIN SMALL LETTER W] + output[opos++] = '('; + output[opos++] = 'w'; + output[opos++] = ')'; + break; + + case '\u1E8A': + // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] + case '\u1E8C': + // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] + case '\u24CD': + // � [CIRCLED LATIN CAPITAL LETTER X] + case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X] + output[opos++] = 'X'; + break; + + case '\u1D8D': + // � [LATIN SMALL LETTER X WITH PALATAL HOOK] + case '\u1E8B': + // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] + case '\u1E8D': + // � [LATIN SMALL LETTER X WITH DIAERESIS] + case '\u2093': + // â‚“ [LATIN SUBSCRIPT SMALL LETTER X] + case '\u24E7': + // â“§ [CIRCLED LATIN SMALL LETTER X] + case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X] + output[opos++] = 'x'; + break; + + case '\u24B3': // â’³ [PARENTHESIZED LATIN SMALL LETTER X] + output[opos++] = '('; + output[opos++] = 'x'; + output[opos++] = ')'; + break; + + case '\u00DD': + // � [LATIN CAPITAL LETTER Y WITH ACUTE] + case '\u0176': + // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] + case '\u0178': + // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] + case '\u01B3': + // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] + case '\u0232': + // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] + case '\u024E': + // ÉŽ [LATIN CAPITAL LETTER Y WITH STROKE] + case '\u028F': + // � [LATIN LETTER SMALL CAPITAL Y] + case '\u1E8E': + // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] + case '\u1EF2': + // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] + case '\u1EF4': + // á»´ [LATIN CAPITAL LETTER Y WITH DOT BELOW] + case '\u1EF6': + // á»¶ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] + case '\u1EF8': + // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] + case '\u1EFE': + // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] + case '\u24CE': + // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] + case '\uFF39': // ï¼¹ [FULLWIDTH LATIN CAPITAL LETTER Y] + output[opos++] = 'Y'; + break; + + case '\u00FD': + // ý [LATIN SMALL LETTER Y WITH ACUTE] + case '\u00FF': + // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] + case '\u0177': + // Å· [LATIN SMALL LETTER Y WITH CIRCUMFLEX] + case '\u01B4': + // Æ´ [LATIN SMALL LETTER Y WITH HOOK] + case '\u0233': + // ȳ [LATIN SMALL LETTER Y WITH MACRON] + case '\u024F': + // � [LATIN SMALL LETTER Y WITH STROKE] + case '\u028E': + // ÊŽ [LATIN SMALL LETTER TURNED Y] + case '\u1E8F': + // � [LATIN SMALL LETTER Y WITH DOT ABOVE] + case '\u1E99': + // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] + case '\u1EF3': + // ỳ [LATIN SMALL LETTER Y WITH GRAVE] + case '\u1EF5': + // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] + case '\u1EF7': + // á»· [LATIN SMALL LETTER Y WITH HOOK ABOVE] + case '\u1EF9': + // ỹ [LATIN SMALL LETTER Y WITH TILDE] + case '\u1EFF': + // ỿ [LATIN SMALL LETTER Y WITH LOOP] + case '\u24E8': + // ⓨ [CIRCLED LATIN SMALL LETTER Y] + case '\uFF59': // ï½™ [FULLWIDTH LATIN SMALL LETTER Y] + output[opos++] = 'y'; + break; + + case '\u24B4': // â’´ [PARENTHESIZED LATIN SMALL LETTER Y] + output[opos++] = '('; + output[opos++] = 'y'; + output[opos++] = ')'; + break; + + case '\u0179': + // Ź [LATIN CAPITAL LETTER Z WITH ACUTE] + case '\u017B': + // Å» [LATIN CAPITAL LETTER Z WITH DOT ABOVE] + case '\u017D': + // Ž [LATIN CAPITAL LETTER Z WITH CARON] + case '\u01B5': + // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] + case '\u021C': + // Èœ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] + case '\u0224': + // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] + case '\u1D22': + // á´¢ [LATIN LETTER SMALL CAPITAL Z] + case '\u1E90': + // � [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] + case '\u1E92': + // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] + case '\u1E94': + // �? [LATIN CAPITAL LETTER Z WITH LINE BELOW] + case '\u24CF': + // � [CIRCLED LATIN CAPITAL LETTER Z] + case '\u2C6B': + // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] + case '\uA762': + // � [LATIN CAPITAL LETTER VISIGOTHIC Z] + case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z] + output[opos++] = 'Z'; + break; + + case '\u017A': + // ź [LATIN SMALL LETTER Z WITH ACUTE] + case '\u017C': + // ż [LATIN SMALL LETTER Z WITH DOT ABOVE] + case '\u017E': + // ž [LATIN SMALL LETTER Z WITH CARON] + case '\u01B6': + // ƶ [LATIN SMALL LETTER Z WITH STROKE] + case '\u021D': + // � http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] + case '\u0225': + // È¥ [LATIN SMALL LETTER Z WITH HOOK] + case '\u0240': + // É€ [LATIN SMALL LETTER Z WITH SWASH TAIL] + case '\u0290': + // � [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] + case '\u0291': + // Ê‘ [LATIN SMALL LETTER Z WITH CURL] + case '\u1D76': + // áµ¶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] + case '\u1D8E': + // á¶Ž [LATIN SMALL LETTER Z WITH PALATAL HOOK] + case '\u1E91': + // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] + case '\u1E93': + // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] + case '\u1E95': + // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] + case '\u24E9': + // â“© [CIRCLED LATIN SMALL LETTER Z] + case '\u2C6C': + // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] + case '\uA763': + // � [LATIN SMALL LETTER VISIGOTHIC Z] + case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z] + output[opos++] = 'z'; + break; + + case '\u24B5': // â’µ [PARENTHESIZED LATIN SMALL LETTER Z] + output[opos++] = '('; + output[opos++] = 'z'; + output[opos++] = ')'; + break; + + case '\u2070': + // � [SUPERSCRIPT ZERO] + case '\u2080': + // â‚€ [SUBSCRIPT ZERO] + case '\u24EA': + // ⓪ [CIRCLED DIGIT ZERO] + case '\u24FF': + // â“¿ [NEGATIVE CIRCLED DIGIT ZERO] + case '\uFF10': // � [FULLWIDTH DIGIT ZERO] + output[opos++] = '0'; + break; + + case '\u00B9': + // ¹ [SUPERSCRIPT ONE] + case '\u2081': + // � [SUBSCRIPT ONE] + case '\u2460': + // â‘  [CIRCLED DIGIT ONE] + case '\u24F5': + // ⓵ [DOUBLE CIRCLED DIGIT ONE] + case '\u2776': + // � [DINGBAT NEGATIVE CIRCLED DIGIT ONE] + case '\u2780': + // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] + case '\u278A': + // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] + case '\uFF11': // 1 [FULLWIDTH DIGIT ONE] + output[opos++] = '1'; + break; + + case '\u2488': // â’ˆ [DIGIT ONE FULL STOP] + output[opos++] = '1'; + output[opos++] = '.'; + break; + + case '\u2474': // â‘´ [PARENTHESIZED DIGIT ONE] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = ')'; + break; + + case '\u00B2': + // ² [SUPERSCRIPT TWO] + case '\u2082': + // â‚‚ [SUBSCRIPT TWO] + case '\u2461': + // â‘¡ [CIRCLED DIGIT TWO] + case '\u24F6': + // â“¶ [DOUBLE CIRCLED DIGIT TWO] + case '\u2777': + // � [DINGBAT NEGATIVE CIRCLED DIGIT TWO] + case '\u2781': + // � [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] + case '\u278B': + // âž‹ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] + case '\uFF12': // ï¼’ [FULLWIDTH DIGIT TWO] + output[opos++] = '2'; + break; + + case '\u2489': // â’‰ [DIGIT TWO FULL STOP] + output[opos++] = '2'; + output[opos++] = '.'; + break; + + case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO] + output[opos++] = '('; + output[opos++] = '2'; + output[opos++] = ')'; + break; + + case '\u00B3': + // ³ [SUPERSCRIPT THREE] + case '\u2083': + // ₃ [SUBSCRIPT THREE] + case '\u2462': + // â‘¢ [CIRCLED DIGIT THREE] + case '\u24F7': + // â“· [DOUBLE CIRCLED DIGIT THREE] + case '\u2778': + // � [DINGBAT NEGATIVE CIRCLED DIGIT THREE] + case '\u2782': + // âž‚ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] + case '\u278C': + // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] + case '\uFF13': // 3 [FULLWIDTH DIGIT THREE] + output[opos++] = '3'; + break; + + case '\u248A': // â’Š [DIGIT THREE FULL STOP] + output[opos++] = '3'; + output[opos++] = '.'; + break; + + case '\u2476': // â‘¶ [PARENTHESIZED DIGIT THREE] + output[opos++] = '('; + output[opos++] = '3'; + output[opos++] = ')'; + break; + + case '\u2074': + // � [SUPERSCRIPT FOUR] + case '\u2084': + // â‚„ [SUBSCRIPT FOUR] + case '\u2463': + // â‘£ [CIRCLED DIGIT FOUR] + case '\u24F8': + // ⓸ [DOUBLE CIRCLED DIGIT FOUR] + case '\u2779': + // � [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] + case '\u2783': + // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] + case '\u278D': + // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] + case '\uFF14': // �? [FULLWIDTH DIGIT FOUR] + output[opos++] = '4'; + break; + + case '\u248B': // â’‹ [DIGIT FOUR FULL STOP] + output[opos++] = '4'; + output[opos++] = '.'; + break; + + case '\u2477': // â‘· [PARENTHESIZED DIGIT FOUR] + output[opos++] = '('; + output[opos++] = '4'; + output[opos++] = ')'; + break; + + case '\u2075': + // � [SUPERSCRIPT FIVE] + case '\u2085': + // â‚… [SUBSCRIPT FIVE] + case '\u2464': + // ⑤ [CIRCLED DIGIT FIVE] + case '\u24F9': + // ⓹ [DOUBLE CIRCLED DIGIT FIVE] + case '\u277A': + // � [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] + case '\u2784': + // âž„ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] + case '\u278E': + // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] + case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE] + output[opos++] = '5'; + break; + + case '\u248C': // â’Œ [DIGIT FIVE FULL STOP] + output[opos++] = '5'; + output[opos++] = '.'; + break; + + case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE] + output[opos++] = '('; + output[opos++] = '5'; + output[opos++] = ')'; + break; + + case '\u2076': + // � [SUPERSCRIPT SIX] + case '\u2086': + // ₆ [SUBSCRIPT SIX] + case '\u2465': + // â‘¥ [CIRCLED DIGIT SIX] + case '\u24FA': + // ⓺ [DOUBLE CIRCLED DIGIT SIX] + case '\u277B': + // � [DINGBAT NEGATIVE CIRCLED DIGIT SIX] + case '\u2785': + // âž… [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] + case '\u278F': + // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] + case '\uFF16': // ï¼– [FULLWIDTH DIGIT SIX] + output[opos++] = '6'; + break; + + case '\u248D': // â’� [DIGIT SIX FULL STOP] + output[opos++] = '6'; + output[opos++] = '.'; + break; + + case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX] + output[opos++] = '('; + output[opos++] = '6'; + output[opos++] = ')'; + break; + + case '\u2077': + // � [SUPERSCRIPT SEVEN] + case '\u2087': + // ₇ [SUBSCRIPT SEVEN] + case '\u2466': + // ⑦ [CIRCLED DIGIT SEVEN] + case '\u24FB': + // â“» [DOUBLE CIRCLED DIGIT SEVEN] + case '\u277C': + // � [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] + case '\u2786': + // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] + case '\u2790': + // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] + case '\uFF17': // ï¼— [FULLWIDTH DIGIT SEVEN] + output[opos++] = '7'; + break; + + case '\u248E': // â’Ž [DIGIT SEVEN FULL STOP] + output[opos++] = '7'; + output[opos++] = '.'; + break; + + case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN] + output[opos++] = '('; + output[opos++] = '7'; + output[opos++] = ')'; + break; + + case '\u2078': + // � [SUPERSCRIPT EIGHT] + case '\u2088': + // ₈ [SUBSCRIPT EIGHT] + case '\u2467': + // â‘§ [CIRCLED DIGIT EIGHT] + case '\u24FC': + // ⓼ [DOUBLE CIRCLED DIGIT EIGHT] + case '\u277D': + // � [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] + case '\u2787': + // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] + case '\u2791': + // âž‘ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] + case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT] + output[opos++] = '8'; + break; + + case '\u248F': // â’� [DIGIT EIGHT FULL STOP] + output[opos++] = '8'; + output[opos++] = '.'; + break; + + case '\u247B': // â‘» [PARENTHESIZED DIGIT EIGHT] + output[opos++] = '('; + output[opos++] = '8'; + output[opos++] = ')'; + break; + + case '\u2079': + // � [SUPERSCRIPT NINE] + case '\u2089': + // ₉ [SUBSCRIPT NINE] + case '\u2468': + // ⑨ [CIRCLED DIGIT NINE] + case '\u24FD': + // ⓽ [DOUBLE CIRCLED DIGIT NINE] + case '\u277E': + // � [DINGBAT NEGATIVE CIRCLED DIGIT NINE] + case '\u2788': + // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] + case '\u2792': + // âž’ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] + case '\uFF19': // ï¼™ [FULLWIDTH DIGIT NINE] + output[opos++] = '9'; + break; + + case '\u2490': // â’� [DIGIT NINE FULL STOP] + output[opos++] = '9'; + output[opos++] = '.'; + break; + + case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE] + output[opos++] = '('; + output[opos++] = '9'; + output[opos++] = ')'; + break; + + case '\u2469': + // â‘© [CIRCLED NUMBER TEN] + case '\u24FE': + // ⓾ [DOUBLE CIRCLED NUMBER TEN] + case '\u277F': + // � [DINGBAT NEGATIVE CIRCLED NUMBER TEN] + case '\u2789': + // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] + case '\u2793': // âž“ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] + output[opos++] = '1'; + output[opos++] = '0'; + break; + + case '\u2491': // â’‘ [NUMBER TEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '0'; + output[opos++] = '.'; + break; + + case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '0'; + output[opos++] = ')'; + break; + + case '\u246A': + // ⑪ [CIRCLED NUMBER ELEVEN] + case '\u24EB': // â“« [NEGATIVE CIRCLED NUMBER ELEVEN] + output[opos++] = '1'; + output[opos++] = '1'; + break; + + case '\u2492': // â’’ [NUMBER ELEVEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '1'; + output[opos++] = '.'; + break; + + case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '1'; + output[opos++] = ')'; + break; + + case '\u246B': + // â‘« [CIRCLED NUMBER TWELVE] + case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] + output[opos++] = '1'; + output[opos++] = '2'; + break; + + case '\u2493': // â’“ [NUMBER TWELVE FULL STOP] + output[opos++] = '1'; + output[opos++] = '2'; + output[opos++] = '.'; + break; + + case '\u247F': // â‘¿ [PARENTHESIZED NUMBER TWELVE] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '2'; + output[opos++] = ')'; + break; + + case '\u246C': + // ⑬ [CIRCLED NUMBER THIRTEEN] + case '\u24ED': // â“­ [NEGATIVE CIRCLED NUMBER THIRTEEN] + output[opos++] = '1'; + output[opos++] = '3'; + break; + + case '\u2494': // â’�? [NUMBER THIRTEEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '3'; + output[opos++] = '.'; + break; + + case '\u2480': // â’€ [PARENTHESIZED NUMBER THIRTEEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '3'; + output[opos++] = ')'; + break; + + case '\u246D': + // â‘­ [CIRCLED NUMBER FOURTEEN] + case '\u24EE': // â“® [NEGATIVE CIRCLED NUMBER FOURTEEN] + output[opos++] = '1'; + output[opos++] = '4'; + break; + + case '\u2495': // â’• [NUMBER FOURTEEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '4'; + output[opos++] = '.'; + break; + + case '\u2481': // â’� [PARENTHESIZED NUMBER FOURTEEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '4'; + output[opos++] = ')'; + break; + + case '\u246E': + // â‘® [CIRCLED NUMBER FIFTEEN] + case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] + output[opos++] = '1'; + output[opos++] = '5'; + break; + + case '\u2496': // â’– [NUMBER FIFTEEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '5'; + output[opos++] = '.'; + break; + + case '\u2482': // â’‚ [PARENTHESIZED NUMBER FIFTEEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '5'; + output[opos++] = ')'; + break; + + case '\u246F': + // ⑯ [CIRCLED NUMBER SIXTEEN] + case '\u24F0': // â“° [NEGATIVE CIRCLED NUMBER SIXTEEN] + output[opos++] = '1'; + output[opos++] = '6'; + break; + + case '\u2497': // â’— [NUMBER SIXTEEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '6'; + output[opos++] = '.'; + break; + + case '\u2483': // â’ƒ [PARENTHESIZED NUMBER SIXTEEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '6'; + output[opos++] = ')'; + break; + + case '\u2470': + // â‘° [CIRCLED NUMBER SEVENTEEN] + case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] + output[opos++] = '1'; + output[opos++] = '7'; + break; + + case '\u2498': // â’˜ [NUMBER SEVENTEEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '7'; + output[opos++] = '.'; + break; + + case '\u2484': // â’„ [PARENTHESIZED NUMBER SEVENTEEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '7'; + output[opos++] = ')'; + break; + + case '\u2471': + // ⑱ [CIRCLED NUMBER EIGHTEEN] + case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] + output[opos++] = '1'; + output[opos++] = '8'; + break; + + case '\u2499': // â’™ [NUMBER EIGHTEEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '8'; + output[opos++] = '.'; + break; + + case '\u2485': // â’… [PARENTHESIZED NUMBER EIGHTEEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '8'; + output[opos++] = ')'; + break; + + case '\u2472': + // ⑲ [CIRCLED NUMBER NINETEEN] + case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] + output[opos++] = '1'; + output[opos++] = '9'; + break; + + case '\u249A': // â’š [NUMBER NINETEEN FULL STOP] + output[opos++] = '1'; + output[opos++] = '9'; + output[opos++] = '.'; + break; + + case '\u2486': // â’† [PARENTHESIZED NUMBER NINETEEN] + output[opos++] = '('; + output[opos++] = '1'; + output[opos++] = '9'; + output[opos++] = ')'; + break; + + case '\u2473': + // ⑳ [CIRCLED NUMBER TWENTY] + case '\u24F4': // â“´ [NEGATIVE CIRCLED NUMBER TWENTY] + output[opos++] = '2'; + output[opos++] = '0'; + break; + + case '\u249B': // â’› [NUMBER TWENTY FULL STOP] + output[opos++] = '2'; + output[opos++] = '0'; + output[opos++] = '.'; + break; + + case '\u2487': // â’‡ [PARENTHESIZED NUMBER TWENTY] + output[opos++] = '('; + output[opos++] = '2'; + output[opos++] = '0'; + output[opos++] = ')'; + break; + + case '\u00AB': + // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] + case '\u00BB': + // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] + case '\u201C': + // “ [LEFT DOUBLE QUOTATION MARK] + case '\u201D': + // � [RIGHT DOUBLE QUOTATION MARK] + case '\u201E': + // „ [DOUBLE LOW-9 QUOTATION MARK] + case '\u2033': + // ″ [DOUBLE PRIME] + case '\u2036': + // ‶ [REVERSED DOUBLE PRIME] + case '\u275D': + // � [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] + case '\u275E': + // � [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] + case '\u276E': + // � [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] + case '\u276F': + // � [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] + case '\uFF02': // " [FULLWIDTH QUOTATION MARK] + output[opos++] = '"'; + break; + + case '\u2018': + // ‘ [LEFT SINGLE QUOTATION MARK] + case '\u2019': + // ’ [RIGHT SINGLE QUOTATION MARK] + case '\u201A': + // ‚ [SINGLE LOW-9 QUOTATION MARK] + case '\u201B': + // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] + case '\u2032': + // ′ [PRIME] + case '\u2035': + // ‵ [REVERSED PRIME] + case '\u2039': + // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] + case '\u203A': + // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] + case '\u275B': + // � [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] + case '\u275C': + // � [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] + case '\uFF07': // ' [FULLWIDTH APOSTROPHE] + output[opos++] = '\''; + break; + + case '\u2010': + // � [HYPHEN] + case '\u2011': + // ‑ [NON-BREAKING HYPHEN] + case '\u2012': + // ‒ [FIGURE DASH] + case '\u2013': + // – [EN DASH] + case '\u2014': + // �? [EM DASH] + case '\u207B': + // � [SUPERSCRIPT MINUS] + case '\u208B': + // â‚‹ [SUBSCRIPT MINUS] + case '\uFF0D': // � [FULLWIDTH HYPHEN-MINUS] + output[opos++] = '-'; + break; + + case '\u2045': + // � [LEFT SQUARE BRACKET WITH QUILL] + case '\u2772': + // � [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] + case '\uFF3B': // ï¼» [FULLWIDTH LEFT SQUARE BRACKET] + output[opos++] = '['; + break; + + case '\u2046': + // � [RIGHT SQUARE BRACKET WITH QUILL] + case '\u2773': + // � [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] + case '\uFF3D': // ï¼½ [FULLWIDTH RIGHT SQUARE BRACKET] + output[opos++] = ']'; + break; + + case '\u207D': + // � [SUPERSCRIPT LEFT PARENTHESIS] + case '\u208D': + // � [SUBSCRIPT LEFT PARENTHESIS] + case '\u2768': + // � [MEDIUM LEFT PARENTHESIS ORNAMENT] + case '\u276A': + // � [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] + case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS] + output[opos++] = '('; + break; + + case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS] + output[opos++] = '('; + output[opos++] = '('; + break; + + case '\u207E': + // � [SUPERSCRIPT RIGHT PARENTHESIS] + case '\u208E': + // ₎ [SUBSCRIPT RIGHT PARENTHESIS] + case '\u2769': + // � [MEDIUM RIGHT PARENTHESIS ORNAMENT] + case '\u276B': + // � [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] + case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS] + output[opos++] = ')'; + break; + + case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS] + output[opos++] = ')'; + output[opos++] = ')'; + break; + + case '\u276C': + // � [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] + case '\u2770': + // � [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] + case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN] + output[opos++] = '<'; + break; + + case '\u276D': + // � [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] + case '\u2771': + // � [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] + case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN] + output[opos++] = '>'; + break; + + case '\u2774': + // � [MEDIUM LEFT CURLY BRACKET ORNAMENT] + case '\uFF5B': // ï½› [FULLWIDTH LEFT CURLY BRACKET] + output[opos++] = '{'; + break; + + case '\u2775': + // � [MEDIUM RIGHT CURLY BRACKET ORNAMENT] + case '\uFF5D': // � [FULLWIDTH RIGHT CURLY BRACKET] + output[opos++] = '}'; + break; + + case '\u207A': + // � [SUPERSCRIPT PLUS SIGN] + case '\u208A': + // ₊ [SUBSCRIPT PLUS SIGN] + case '\uFF0B': // + [FULLWIDTH PLUS SIGN] + output[opos++] = '+'; + break; + + case '\u207C': + // � [SUPERSCRIPT EQUALS SIGN] + case '\u208C': + // ₌ [SUBSCRIPT EQUALS SIGN] + case '\uFF1D': // � [FULLWIDTH EQUALS SIGN] + output[opos++] = '='; + break; + + case '\uFF01': // � [FULLWIDTH EXCLAMATION MARK] + output[opos++] = '!'; + break; + + case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK] + output[opos++] = '!'; + output[opos++] = '!'; + break; + + case '\u2049': // � [EXCLAMATION QUESTION MARK] + output[opos++] = '!'; + output[opos++] = '?'; + break; + + case '\uFF03': // # [FULLWIDTH NUMBER SIGN] + output[opos++] = '#'; + break; + + case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN] + output[opos++] = '$'; + break; + + case '\u2052': + // � [COMMERCIAL MINUS SIGN] + case '\uFF05': // ï¼… [FULLWIDTH PERCENT SIGN] + output[opos++] = '%'; + break; + + case '\uFF06': // & [FULLWIDTH AMPERSAND] + output[opos++] = '&'; + break; + + case '\u204E': + // � [LOW ASTERISK] + case '\uFF0A': // * [FULLWIDTH ASTERISK] + output[opos++] = '*'; + break; + + case '\uFF0C': // , [FULLWIDTH COMMA] + output[opos++] = ','; + break; + + case '\uFF0E': // . [FULLWIDTH FULL STOP] + output[opos++] = '.'; + break; + + case '\u2044': + // � [FRACTION SLASH] + case '\uFF0F': // � [FULLWIDTH SOLIDUS] + output[opos++] = '/'; + break; + + case '\uFF1A': // : [FULLWIDTH COLON] + output[opos++] = ':'; + break; + + case '\u204F': + // � [REVERSED SEMICOLON] + case '\uFF1B': // ï¼› [FULLWIDTH SEMICOLON] + output[opos++] = ';'; + break; + + case '\uFF1F': // ? [FULLWIDTH QUESTION MARK] + output[opos++] = '?'; + break; + + case '\u2047': // � [DOUBLE QUESTION MARK] + output[opos++] = '?'; + output[opos++] = '?'; + break; + + case '\u2048': // � [QUESTION EXCLAMATION MARK] + output[opos++] = '?'; + output[opos++] = '!'; + break; + + case '\uFF20': // ï¼  [FULLWIDTH COMMERCIAL AT] + output[opos++] = '@'; + break; + + case '\uFF3C': // ï¼¼ [FULLWIDTH REVERSE SOLIDUS] + output[opos++] = '\\'; + break; + + case '\u2038': + // ‸ [CARET] + case '\uFF3E': // ï¼¾ [FULLWIDTH CIRCUMFLEX ACCENT] + output[opos++] = '^'; + break; + + case '\uFF3F': // _ [FULLWIDTH LOW LINE] + output[opos++] = '_'; + break; + + case '\u2053': + // � [SWUNG DASH] + case '\uFF5E': // ~ [FULLWIDTH TILDE] + output[opos++] = '~'; + break; + + // BEGIN CUSTOM TRANSLITERATION OF CYRILIC CHARS + + // russian uppercase "А Б В Г Д Е Ё Ж З И Й К Л М Н О П Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я" + // russian lowercase "а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я" + + // notes + // read http://www.vesic.org/english/blog/c-sharp/transliteration-easy-way-microsoft-transliteration-utility/ + // should we look into MS Transliteration Utility (http://msdn.microsoft.com/en-US/goglobal/bb688104.aspx) + // also UnicodeSharpFork https://bitbucket.org/DimaStefantsov/unidecodesharpfork + // also Transliterator http://transliterator.codeplex.com/ + // + // in any case it would be good to generate all those "case" statements instead of writing them by hand + // time for a T4 template? + // also we should support extensibility so ppl can register more cases in external code + + // TODO: transliterates Анастасия as Anastasiya, and not Anastasia + // Ольга --> Ol'ga, Татьяна --> Tat'yana -- that's bad (?) + // Note: should ä (German umlaut) become a or ae ? + case '\u0410': // А + output[opos++] = 'A'; + break; + case '\u0430': // а + output[opos++] = 'a'; + break; + case '\u0411': // Б + output[opos++] = 'B'; + break; + case '\u0431': // б + output[opos++] = 'b'; + break; + case '\u0412': // В + output[opos++] = 'V'; + break; + case '\u0432': // в + output[opos++] = 'v'; + break; + case '\u0413': // Г + output[opos++] = 'G'; + break; + case '\u0433': // г + output[opos++] = 'g'; + break; + case '\u0414': // Д + output[opos++] = 'D'; + break; + case '\u0434': // д + output[opos++] = 'd'; + break; + case '\u0415': // Е + output[opos++] = 'E'; + break; + case '\u0435': // е + output[opos++] = 'e'; + break; + case '\u0401': // Ё + output[opos++] = 'E'; // alt. Yo + break; + case '\u0451': // ё + output[opos++] = 'e'; // alt. yo + break; + case '\u0416': // Ж + output[opos++] = 'Z'; + output[opos++] = 'h'; + break; + case '\u0436': // ж + output[opos++] = 'z'; + output[opos++] = 'h'; + break; + case '\u0417': // З + output[opos++] = 'Z'; + break; + case '\u0437': // з + output[opos++] = 'z'; + break; + case '\u0418': // И + output[opos++] = 'I'; + break; + case '\u0438': // и + output[opos++] = 'i'; + break; + case '\u0419': // Й + output[opos++] = 'I'; // alt. Y, J + break; + case '\u0439': // й + output[opos++] = 'i'; // alt. y, j + break; + case '\u041A': // К + output[opos++] = 'K'; + break; + case '\u043A': // к + output[opos++] = 'k'; + break; + case '\u041B': // Л + output[opos++] = 'L'; + break; + case '\u043B': // л + output[opos++] = 'l'; + break; + case '\u041C': // М + output[opos++] = 'M'; + break; + case '\u043C': // м + output[opos++] = 'm'; + break; + case '\u041D': // Н + output[opos++] = 'N'; + break; + case '\u043D': // н + output[opos++] = 'n'; + break; + case '\u041E': // О + output[opos++] = 'O'; + break; + case '\u043E': // о + output[opos++] = 'o'; + break; + case '\u041F': // П + output[opos++] = 'P'; + break; + case '\u043F': // п + output[opos++] = 'p'; + break; + case '\u0420': // Р + output[opos++] = 'R'; + break; + case '\u0440': // р + output[opos++] = 'r'; + break; + case '\u0421': // С + output[opos++] = 'S'; + break; + case '\u0441': // с + output[opos++] = 's'; + break; + case '\u0422': // Т + output[opos++] = 'T'; + break; + case '\u0442': // т + output[opos++] = 't'; + break; + case '\u0423': // У + output[opos++] = 'U'; + break; + case '\u0443': // у + output[opos++] = 'u'; + break; + case '\u0424': // Ф + output[opos++] = 'F'; + break; + case '\u0444': // ф + output[opos++] = 'f'; + break; + case '\u0425': // Х + output[opos++] = 'K'; // alt. X + output[opos++] = 'h'; + break; + case '\u0445': // х + output[opos++] = 'k'; // alt. x + output[opos++] = 'h'; + break; + case '\u0426': // Ц + output[opos++] = 'F'; + break; + case '\u0446': // ц + output[opos++] = 'f'; + break; + case '\u0427': // Ч + output[opos++] = 'C'; // alt. Ts, C + output[opos++] = 'h'; + break; + case '\u0447': // ч + output[opos++] = 'c'; // alt. ts, c + output[opos++] = 'h'; + break; + case '\u0428': // Ш + output[opos++] = 'S'; // alt. Ch, S + output[opos++] = 'h'; + break; + case '\u0448': // ш + output[opos++] = 's'; // alt. ch, s + output[opos++] = 'h'; + break; + case '\u0429': // Щ + output[opos++] = 'S'; // alt. Shch, Sc + output[opos++] = 'h'; + break; + case '\u0449': // щ + output[opos++] = 's'; // alt. shch, sc + output[opos++] = 'h'; + break; + case '\u042A': // Ъ + output[opos++] = '"'; // " + break; + case '\u044A': // ъ + output[opos++] = '"'; // " + break; + case '\u042B': // Ы + output[opos++] = 'Y'; + break; + case '\u044B': // ы + output[opos++] = 'y'; + break; + case '\u042C': // Ь + output[opos++] = '\''; // ' + break; + case '\u044C': // ь + output[opos++] = '\''; // ' + break; + case '\u042D': // Э + output[opos++] = 'E'; + break; + case '\u044D': // э + output[opos++] = 'e'; + break; + case '\u042E': // Ю + output[opos++] = 'Y'; // alt. Ju + output[opos++] = 'u'; + break; + case '\u044E': // ю + output[opos++] = 'y'; // alt. ju + output[opos++] = 'u'; + break; + case '\u042F': // Я + output[opos++] = 'Y'; // alt. Ja + output[opos++] = 'a'; + break; + case '\u044F': // я + output[opos++] = 'y'; // alt. ja + output[opos++] = 'a'; + break; + + // BEGIN EXTRA + /* + case '£': + output[opos++] = 'G'; + output[opos++] = 'B'; + output[opos++] = 'P'; + break; + + case '€': + output[opos++] = 'E'; + output[opos++] = 'U'; + output[opos++] = 'R'; + break; + + case '©': + output[opos++] = '('; + output[opos++] = 'C'; + output[opos++] = ')'; + break; + */ + default: + // if (ToMoreAscii(input, ipos, output, ref opos)) + // break; + + // if (!char.IsLetterOrDigit(c)) // that would not catch eg 汉 unfortunately + // output[opos++] = '?'; + // else + // output[opos++] = c; + + // strict ASCII + output[opos++] = fail; + + break; + } + } + } + + // private static bool ToMoreAscii(char[] input, int ipos, char[] output, ref int opos) + // { + // var c = input[ipos]; + + // switch (c) + // { + // case '£': + // output[opos++] = 'G'; + // output[opos++] = 'B'; + // output[opos++] = 'P'; + // break; + + // case '€': + // output[opos++] = 'E'; + // output[opos++] = 'U'; + // output[opos++] = 'R'; + // break; + + // case '©': + // output[opos++] = '('; + // output[opos++] = 'C'; + // output[opos++] = ')'; + // break; + + // default: + // return false; + // } + + // return true; + // } +} diff --git a/tests/Umbraco.Tests.Benchmarks/ShortStringHelperBenchmarks.cs b/tests/Umbraco.Tests.Benchmarks/ShortStringHelperBenchmarks.cs new file mode 100644 index 0000000000..5710edb0e0 --- /dev/null +++ b/tests/Umbraco.Tests.Benchmarks/ShortStringHelperBenchmarks.cs @@ -0,0 +1,108 @@ +using System; +using BenchmarkDotNet.Attributes; +using Umbraco.Cms.Core.Strings; +using Umbraco.Tests.Benchmarks.Config; + +namespace Umbraco.Tests.Benchmarks; + +[QuickRunWithMemoryDiagnoserConfig] +public class ShortStringHelperBenchmarks +{ + private DefaultShortStringHelper _shortStringHelper; + + private string _input; + + [GlobalSetup] + public void Setup() + { + _shortStringHelper = new DefaultShortStringHelper(new DefaultShortStringHelperConfig()); + _input = "This is a 🎈 balloon"; + } + + [Benchmark(Baseline = true)] + public void ToUrlSegment() + { + _shortStringHelper.CleanStringForUrlSegment(_input); + } + + /*[Benchmark(Baseline = true)] + public string OldAsciString() + { + return OldUtf8ToAsciiConverter.ToAsciiString(_input); + } + + + [Benchmark] + public string NewAsciString() + { + return Utf8ToAsciiConverter.ToAsciiString(_input); + }*/ + + #region SurrogatePairs + + /*[Benchmark(Baseline = true)] + public string RemoveSurrogatePairs() + { + var input = _input.ToCharArray(); + var output = new char[input.Length]; + var opos = 0; + + for (var ipos = 0; ipos < input.Length; ipos++) + { + var c = input[ipos]; + if (char.IsSurrogate(c)) // ignore high surrogate + { + ipos++; // and skip low surrogate + output[opos++] = '?'; + } + else + { + output[opos++] = c; + } + } + + return new string(output, 0, opos); + } + + [Benchmark] + public string RemoveNewSurrogatePairs() + { + var input = _input.AsSpan(); + Span output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[input.Length]; + var opos = 0; + + for (var ipos = 0; ipos < input.Length; ipos++) + { + var c = input[ipos]; + if (char.IsSurrogate(c)) // ignore high surrogate + { + ipos++; // and skip low surrogate + output[opos++] = '?'; + } + else + { + output[opos++] = c; + } + } + + return new string(output); + }*/ + + #endregion + + //| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated | + //|-----------------------------------:|---------:|---------:|--------:|------:|-------:|----------:| + //| ToUrlSegment | 464.2 ns | 34.88 ns | 1.91 ns | 1.00 | 0.1627 | 512 B | + //| ToUrlSegment (With below changes) | 455.7 ns | 26.83 ns | 1.47 ns | 1.00 | 0.1182 | 384 B | + //| ToUrlSegment(CleanCodeString change| 420.6 ns | 64.06 ns | 3.51 ns | 1.00 | 0.0856 | 280 B | + + //| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated | + //|------------------------ |---------:|----------:|---------:|------:|-------:|----------:| + //| RemoveSurrogatePairs | 70.75 ns | 15.307 ns | 0.839 ns | 1.00 | 0.0610 | 192 B | + //| RemoveNewSurrogatePairs | 58.44 ns | 7.297 ns | 0.400 ns | 0.83 | 0.0198 | 64 B | + + //| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated | + //|-------------- |---------:|---------:|--------:|------:|-------:|----------:| + //| OldAsciString | 181.4 ns | 11.50 ns | 0.63 ns | 1.00 | 0.0851 | 272 B | + //| NewAsciString | 180.7 ns | 5.35 ns | 0.29 ns | 1.00 | 0.0450 | 64 B | +} diff --git a/tests/Umbraco.Tests.Benchmarks/StringReplaceFirstBenchmarks.cs b/tests/Umbraco.Tests.Benchmarks/StringReplaceFirstBenchmarks.cs new file mode 100644 index 0000000000..e5c6c6b2b5 --- /dev/null +++ b/tests/Umbraco.Tests.Benchmarks/StringReplaceFirstBenchmarks.cs @@ -0,0 +1,66 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using System.Threading.Tasks; +using BenchmarkDotNet.Attributes; +using Umbraco.Tests.Benchmarks.Config; + +namespace Umbraco.Tests.Benchmarks; + +[QuickRunWithMemoryDiagnoserConfig] +public class StringReplaceFirstBenchmarks +{ + [Params("Test string", + "This is a test string that contains multiple test entries", + "This is a string where the searched value is very far back. The system needs to go through all of this code before it reaches the test")] + public string Text { get; set; } + public string Search { get; set; } + public string Replace { get; set; } + + [GlobalSetup] + public void Setup() + { + Search = "test"; + Replace = "release"; + } + + [Benchmark(Baseline = true, Description = "Replace first w/ substring")] + public string SubstringReplaceFirst() + { + var pos = Text.IndexOf(Search, StringComparison.InvariantCulture); + + if (pos < 0) + { + return Text; + } + + return Text.Substring(0, pos) + Replace + Text.Substring(pos + Search.Length); + } + + [Benchmark(Description = "Replace first w/ span")] + public string SpanReplaceFirst() + { + var spanText = Text.AsSpan(); + var pos = spanText.IndexOf(Search, StringComparison.InvariantCulture); + + if (pos < 0) + { + return Text; + } + + return string.Concat(spanText[..pos], Replace.AsSpan(), spanText[(pos + Search.Length)..]); + } + + //| Method | Text | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Allocated | + //|----------------------------- |--------------------- |----------:|---------:|---------:|------:|--------:|-------:|----------:| + //| 'Replace first w/ substring' | Test string | 46.08 ns | 25.83 ns | 1.416 ns | 1.00 | 0.00 | - | - | + //| 'Replace first w/ span' | Test string | 38.59 ns | 19.46 ns | 1.067 ns | 0.84 | 0.05 | - | - | + //| | | | | | | | | | + //| 'Replace first w/ substring' | This(...)test[134] | 407.89 ns | 52.08 ns | 2.855 ns | 1.00 | 0.00 | 0.1833 | 584 B | + //| 'Replace first w/ span' | This(...)test[134] | 372.99 ns | 58.38 ns | 3.200 ns | 0.91 | 0.01 | 0.0941 | 296 B | + //| | | | | | | | | | + //| 'Replace first w/ substring' | This(...)tries[57] | 113.16 ns | 27.95 ns | 1.532 ns | 1.00 | 0.00 | 0.0961 | 304 B | + //| 'Replace first w/ span' | This(...)tries[57] | 76.57 ns | 17.86 ns | 0.979 ns | 0.68 | 0.01 | 0.0455 | 144 B | +} diff --git a/tests/Umbraco.Tests.Benchmarks/StringReplaceManyBenchmarks.cs b/tests/Umbraco.Tests.Benchmarks/StringReplaceManyBenchmarks.cs index 096d591463..8c4914d0df 100644 --- a/tests/Umbraco.Tests.Benchmarks/StringReplaceManyBenchmarks.cs +++ b/tests/Umbraco.Tests.Benchmarks/StringReplaceManyBenchmarks.cs @@ -74,6 +74,7 @@ public class StringReplaceManyBenchmarks return result; } + /* short text, short replacement: diff --git a/tests/Umbraco.Tests.Common/Umbraco.Tests.Common.csproj b/tests/Umbraco.Tests.Common/Umbraco.Tests.Common.csproj index 915c255e84..214840604e 100644 --- a/tests/Umbraco.Tests.Common/Umbraco.Tests.Common.csproj +++ b/tests/Umbraco.Tests.Common/Umbraco.Tests.Common.csproj @@ -4,6 +4,7 @@ Umbraco CMS - Test tools Contains commonly used tools to write tests for Umbraco CMS, such as various builders for content etc. Umbraco.Cms.Tests.Common + true diff --git a/tests/Umbraco.Tests.Integration/Umbraco.Core/Packaging/CreatedPackagesRepositoryTests.cs b/tests/Umbraco.Tests.Integration/Umbraco.Core/Packaging/CreatedPackagesRepositoryTests.cs index b594495dd0..70610ba17e 100644 --- a/tests/Umbraco.Tests.Integration/Umbraco.Core/Packaging/CreatedPackagesRepositoryTests.cs +++ b/tests/Umbraco.Tests.Integration/Umbraco.Core/Packaging/CreatedPackagesRepositoryTests.cs @@ -252,7 +252,7 @@ public class CreatedPackagesRepositoryTests : UmbracoIntegrationTest Assert.AreEqual(test, mediaEntry.Name); Assert.IsNotNull(zipArchive.GetEntry("package.xml")); Assert.AreEqual( - $"", + $"", packageXml.Element("umbPackage").Element("MediaItems").ToString(SaveOptions.DisableFormatting)); Assert.AreEqual(2, zipArchive.Entries.Count()); Assert.AreEqual(ZipArchiveMode.Read, zipArchive.Mode); diff --git a/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj b/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj index 2e500a7a85..6f9ba8ad10 100644 --- a/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj +++ b/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj @@ -6,6 +6,7 @@ true true Umbraco.Cms.Tests.Integration + true diff --git a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/StringExtensionsTests.cs b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/StringExtensionsTests.cs index 01fc57c1d8..bd02bead1c 100644 --- a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/StringExtensionsTests.cs +++ b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/StringExtensionsTests.cs @@ -326,6 +326,14 @@ public class StringExtensionsTests Assert.AreEqual(expected, output); } + [TestCase("test to test", "test", "release", "release to test")] + [TestCase("nothing to do", "test", "release", "nothing to do")] + public void ReplaceFirst(string input, string search, string replacement, string expected) + { + var output = input.ReplaceFirst(search, replacement); + Assert.AreEqual(expected, output); + } + [Test] public void IsFullPath() { diff --git a/tools/Umbraco.JsonSchema/Umbraco.JsonSchema.csproj b/tools/Umbraco.JsonSchema/Umbraco.JsonSchema.csproj index 732eccf01f..32da8f798a 100644 --- a/tools/Umbraco.JsonSchema/Umbraco.JsonSchema.csproj +++ b/tools/Umbraco.JsonSchema/Umbraco.JsonSchema.csproj @@ -12,7 +12,5 @@ - -