diff --git a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
index 581cd168a3..6d26a1a888 100644
--- a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
+++ b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs
@@ -278,11 +278,11 @@ namespace Umbraco.Cms.Core.Strings
switch (codeType)
{
case CleanStringType.Ascii:
- text = Utf8ToAsciiConverter.ToAsciiString(text);
+ text = Utf8ToAsciiConverterStatic.ToAsciiString(text);
break;
case CleanStringType.TryAscii:
const char ESC = (char) 27;
- var ctext = Utf8ToAsciiConverter.ToAsciiString(text, ESC);
+ var ctext = Utf8ToAsciiConverterStatic.ToAsciiString(text, ESC);
if (ctext.Contains(ESC) == false)
{
text = ctext;
diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
index f997328aff..f8463f7e01 100644
--- a/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
+++ b/src/Umbraco.Core/Strings/Utf8ToAsciiConverter.cs
@@ -1,3631 +1,209 @@
+using System.Buffers;
+using System.Collections.Frozen;
+using System.Globalization;
+using System.Text;
+
namespace Umbraco.Cms.Core.Strings;
///
-/// Provides methods to convert Utf8 text to Ascii.
+/// SIMD-optimized UTF-8 to ASCII converter with extensible character mappings.
///
///
-/// Tries to match characters such as accented eg "é" to Ascii equivalent eg "e".
-/// Converts all "whitespace" characters to a single whitespace.
-/// Removes all non-Utf8 (unicode) characters, so in fact it can sort-of "convert" Unicode to Ascii.
-/// Replaces symbols with '?'.
+///
+/// This converter uses a multi-step fallback strategy:
+/// 1. Dictionary lookup for special cases (ligatures, Cyrillic, special Latin)
+/// 2. Unicode normalization (FormD) for accented Latin characters
+/// 3. Control character stripping
+/// 4. Whitespace normalization
+/// 5. Fallback character for unmapped characters
+///
+///
+/// Most accented Latin characters (À, é, ñ, etc.) are handled automatically via
+/// Unicode normalization. Dictionary mappings are only needed for characters that
+/// don't decompose correctly (ligatures like Æ→AE, Cyrillic, special Latin like Ø→O).
+///
///
-public static class Utf8ToAsciiConverter
+public sealed class Utf8ToAsciiConverter : IUtf8ToAsciiConverter
{
///
- /// Converts an Utf8 string into an Ascii string.
+ /// Maximum expansion ratio for output buffer sizing.
+ /// Worst case: single char becomes 4 chars (e.g., Щ→Shch in standard transliteration).
///
- /// The text to convert.
- /// The character to use to replace characters that cannot properly be converted.
- /// The converted text.
- public static string ToAsciiString(string text, char fail = '?') => ToAsciiString(text.AsSpan(), fail);
+ private const int MaxExpansionRatio = 4;
- ///
- /// Converts an Utf8 string into an Ascii string.
- ///
- /// The text to convert.
- /// The character to use to replace characters that cannot properly be converted.
- /// The converted text.
- public static string ToAsciiString(ReadOnlySpan text, char fail = '?')
+ // SIMD-optimized ASCII detection (uses AVX-512 when available)
+ private static readonly SearchValues AsciiPrintable =
+ SearchValues.Create(" !\"#$%&'()*+,-./0123456789:;<=>?@" +
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`" +
+ "abcdefghijklmnopqrstuvwxyz{|}~");
+
+ private readonly FrozenDictionary _mappings;
+
+ public Utf8ToAsciiConverter(ICharacterMappingLoader mappingLoader)
{
- // this is faster although it uses more memory
- // but... we should be filtering short strings only...
-
- var totalSize = text.Length * 3;
- Span output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // *3 because of things such as OE
- var len = ToAscii(text, output, fail);
- return new string(output[..len]);
-
- // var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
- // ToAscii(input, output);
- // return output.ToString();
+ _mappings = mappingLoader.LoadMappings();
}
- ///
- /// Converts an Utf8 string into an array of Ascii characters.
- ///
- /// The text to convert.
- /// The character to use to replace characters that cannot properly be converted.
- /// The converted text.
- public static char[] ToAsciiCharArray(string text, char fail = '?')
+ ///
+ public string Convert(string? text, char fallback = '?')
{
- var input = text.ToCharArray();
-
- // this is faster although it uses more memory
- // but... we should be filtering short strings only...
- int outputLength = input.Length * 3; // *3 because of things such as OE
- Span output = outputLength <= 1024 ? stackalloc char[outputLength] : new char[outputLength];
- var len = ToAscii(input, output, fail);
- return output[..len].ToArray();
-
- // var temp = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
- // ToAscii(input, temp);
- // var output = new char[temp.Length];
- // temp.CopyTo(0, output, 0, temp.Length);
- // return output;
- }
-
- ///
- /// Converts an array of Utf8 characters into an array of Ascii characters.
- ///
- /// The input array.
- /// The output array.
- /// The character to use to replace characters that cannot properly be converted.
- /// The number of characters in the output array.
- /// The caller must ensure that the output array is big enough.
- /// The output array is not big enough.
- private static int ToAscii(ReadOnlySpan input, Span output, char fail = '?')
- {
- var opos = 0;
-
- for (var ipos = 0; ipos < input.Length; ipos++)
+ if (string.IsNullOrEmpty(text))
{
- // ignore high surrogate
- if (char.IsSurrogate(input[ipos]))
+ return string.Empty;
+ }
+
+ var input = text.AsSpan();
+
+ // Fast path: all ASCII - no conversion needed
+ if (input.IndexOfAnyExcept(AsciiPrintable) == -1)
+ {
+ return text;
+ }
+
+ // Allocate output buffer for worst-case expansion
+ var maxLen = text.Length * MaxExpansionRatio;
+ char[] arrayBuffer = ArrayPool.Shared.Rent(maxLen);
+ try
+ {
+ var written = Convert(input, arrayBuffer.AsSpan(), fallback);
+ return new string(arrayBuffer, 0, written);
+ }
+ finally
+ {
+ ArrayPool.Shared.Return(arrayBuffer);
+ }
+ }
+
+ ///
+ public int Convert(ReadOnlySpan input, Span output, char fallback = '?')
+ {
+ if (input.IsEmpty)
+ {
+ return 0;
+ }
+
+ var opos = 0;
+ var ipos = 0;
+
+ while (ipos < input.Length)
+ {
+ // Find next non-ASCII character using SIMD
+ var remaining = input[ipos..];
+ var asciiLen = remaining.IndexOfAnyExcept(AsciiPrintable);
+
+ if (asciiLen == -1)
{
- ipos++; // and skip low surrogate
- output[opos++] = fail;
+ // Rest is all ASCII - bulk copy
+ remaining.CopyTo(output[opos..]);
+ return opos + remaining.Length;
}
- else
+
+ if (asciiLen > 0)
{
- ToAscii(input, ipos, output, ref opos, fail);
+ // Copy ASCII prefix
+ remaining[..asciiLen].CopyTo(output[opos..]);
+ opos += asciiLen;
+ ipos += asciiLen;
}
+
+ // Process non-ASCII character
+ var c = input[ipos];
+
+ // Handle surrogate pairs (emoji, etc.)
+ if (char.IsSurrogate(c))
+ {
+ output[opos++] = fallback;
+ ipos++;
+ if (ipos < input.Length && char.IsLowSurrogate(input[ipos]))
+ {
+ ipos++; // Skip low surrogate
+ }
+ continue;
+ }
+
+ opos += ProcessNonAscii(c, output[opos..], fallback);
+ ipos++;
}
return opos;
}
- // private static void ToAscii(char[] input, StringBuilder output)
- // {
- // var chars = new char[5];
-
- // for (var ipos = 0; ipos < input.Length; ipos++)
- // {
- // var opos = 0;
- // if (char.IsSurrogate(input[ipos]))
- // ipos++;
- // else
- // {
- // ToAscii(input, ipos, chars, ref opos);
- // output.Append(chars, 0, opos);
- // }
- // }
- // }
-
- ///
- /// Converts the character at position in input array of Utf8 characters
- ///
- /// and writes the converted value to output array of Ascii characters at position
- /// ,
- /// and increments that position accordingly.
- ///
- /// The input array.
- /// The input position.
- /// The output array.
- /// The output position.
- /// The character to use to replace characters that cannot properly be converted.
- ///
- /// Adapted from various sources on the 'net including Lucene.Net.Analysis.ASCIIFoldingFilter.
- /// Input should contain Utf8 characters exclusively and NOT Unicode.
- /// Removes controls, normalizes whitespaces, replaces symbols by '?'.
- ///
- private static void ToAscii(ReadOnlySpan input, int ipos, Span output, ref int opos, char fail = '?')
+ private int ProcessNonAscii(char c, Span output, char fallback)
{
- var c = input[ipos];
+ // 1. Check special cases dictionary (ligatures, Cyrillic, etc.)
+ if (_mappings.TryGetValue(c, out var mapped))
+ {
+ if (mapped.Length == 0)
+ {
+ return 0; // Empty mapping = strip character
+ }
+ mapped.AsSpan().CopyTo(output);
+ return mapped.Length;
+ }
+ // 2. Try Unicode normalization (handles most accented chars)
+ var normLen = TryNormalize(c, output);
+ if (normLen > 0)
+ {
+ return normLen;
+ }
+
+ // 3. Control character handling
if (char.IsControl(c))
{
- // Control characters are non-printing and formatting characters, such as ACK, BEL, CR, FF, LF, and VT.
- // The Unicode standard assigns the following code points to control characters: from \U0000 to \U001F,
- // \U007F, and from \U0080 to \U009F. According to the Unicode standard, these values are to be
- // interpreted as control characters unless their use is otherwise defined by an application. Valid
- // control characters are members of the UnicodeCategory.Control category.
-
- // we don't want them
+ return 0; // Strip control characters
}
- // else if (char.IsSeparator(c))
- // {
- // // The Unicode standard recognizes three subcategories of separators:
- // // - Space separators (the UnicodeCategory.SpaceSeparator category), which includes characters such as \u0020.
- // // - Line separators (the UnicodeCategory.LineSeparator category), which includes \u2028.
- // // - Paragraph separators (the UnicodeCategory.ParagraphSeparator category), which includes \u2029.
- // //
- // // Note: The Unicode standard classifies the characters \u000A (LF), \u000C (FF), and \u000A (CR) as control
- // // characters (members of the UnicodeCategory.Control category), not as separator characters.
-
- // // better do it via WhiteSpace
- // }
- else if (char.IsWhiteSpace(c))
+ // 4. Whitespace normalization
+ if (char.IsWhiteSpace(c))
{
- // White space characters are the following Unicode characters:
- // - Members of the SpaceSeparator category, which includes the characters SPACE (U+0020),
- // OGHAM SPACE MARK (U+1680), MONGOLIAN VOWEL SEPARATOR (U+180E), EN QUAD (U+2000), EM QUAD (U+2001),
- // EN SPACE (U+2002), EM SPACE (U+2003), THREE-PER-EM SPACE (U+2004), FOUR-PER-EM SPACE (U+2005),
- // SIX-PER-EM SPACE (U+2006), FIGURE SPACE (U+2007), PUNCTUATION SPACE (U+2008), THIN SPACE (U+2009),
- // HAIR SPACE (U+200A), NARROW NO-BREAK SPACE (U+202F), MEDIUM MATHEMATICAL SPACE (U+205F),
- // and IDEOGRAPHIC SPACE (U+3000).
- // - Members of the LineSeparator category, which consists solely of the LINE SEPARATOR character (U+2028).
- // - Members of the ParagraphSeparator category, which consists solely of the PARAGRAPH SEPARATOR character (U+2029).
- // - The characters CHARACTER TABULATION (U+0009), LINE FEED (U+000A), LINE TABULATION (U+000B),
- // FORM FEED (U+000C), CARRIAGE RETURN (U+000D), NEXT LINE (U+0085), and NO-BREAK SPACE (U+00A0).
-
- // make it a whitespace
- output[opos++] = ' ';
+ output[0] = ' ';
+ return 1;
}
- else if (c < '\u0080')
- {
- // safe
- output[opos++] = c;
- }
- else
- {
- switch (c)
- {
- case '\u00C0':
- // À [LATIN CAPITAL LETTER A WITH GRAVE]
- case '\u00C1':
- // � [LATIN CAPITAL LETTER A WITH ACUTE]
- case '\u00C2':
- // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
- case '\u00C3':
- // Ã [LATIN CAPITAL LETTER A WITH TILDE]
- case '\u00C4':
- // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
- case '\u00C5':
- // Ã… [LATIN CAPITAL LETTER A WITH RING ABOVE]
- case '\u0100':
- // Ā [LATIN CAPITAL LETTER A WITH MACRON]
- case '\u0102':
- // Ä‚ [LATIN CAPITAL LETTER A WITH BREVE]
- case '\u0104':
- // Ä„ [LATIN CAPITAL LETTER A WITH OGONEK]
- case '\u018F':
- // � http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
- case '\u01CD':
- // � [LATIN CAPITAL LETTER A WITH CARON]
- case '\u01DE':
- // Çž [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
- case '\u01E0':
- // Ç [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
- case '\u01FA':
- // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
- case '\u0200':
- // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
- case '\u0202':
- // È‚ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
- case '\u0226':
- // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
- case '\u023A':
- // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
- case '\u1D00':
- // á´€ [LATIN LETTER SMALL CAPITAL A]
- case '\u1E00':
- // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
- case '\u1EA0':
- // Ạ[LATIN CAPITAL LETTER A WITH DOT BELOW]
- case '\u1EA2':
- // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
- case '\u1EA4':
- // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
- case '\u1EA6':
- // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
- case '\u1EA8':
- // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EAA':
- // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
- case '\u1EAC':
- // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EAE':
- // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
- case '\u1EB0':
- // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
- case '\u1EB2':
- // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
- case '\u1EB4':
- // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
- case '\u1EB6':
- // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
- case '\u24B6':
- // â’¶ [CIRCLED LATIN CAPITAL LETTER A]
- case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A]
- output[opos++] = 'A';
- break;
- case '\u00E0':
- // à[LATIN SMALL LETTER A WITH GRAVE]
- case '\u00E1':
- // á [LATIN SMALL LETTER A WITH ACUTE]
- case '\u00E2':
- // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
- case '\u00E3':
- // ã [LATIN SMALL LETTER A WITH TILDE]
- case '\u00E4':
- // ä [LATIN SMALL LETTER A WITH DIAERESIS]
- case '\u00E5':
- // å [LATIN SMALL LETTER A WITH RING ABOVE]
- case '\u0101':
- // � [LATIN SMALL LETTER A WITH MACRON]
- case '\u0103':
- // ă [LATIN SMALL LETTER A WITH BREVE]
- case '\u0105':
- // Ä… [LATIN SMALL LETTER A WITH OGONEK]
- case '\u01CE':
- // ÇŽ [LATIN SMALL LETTER A WITH CARON]
- case '\u01DF':
- // ÇŸ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
- case '\u01E1':
- // Ç¡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
- case '\u01FB':
- // Ç» [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
- case '\u0201':
- // � [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
- case '\u0203':
- // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
- case '\u0227':
- // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
- case '\u0250':
- // � [LATIN SMALL LETTER TURNED A]
- case '\u0259':
- // É™ [LATIN SMALL LETTER SCHWA]
- case '\u025A':
- // Éš [LATIN SMALL LETTER SCHWA WITH HOOK]
- case '\u1D8F':
- // � [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
- case '\u1D95':
- // á¶• [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
- case '\u1E01':
- // ạ [LATIN SMALL LETTER A WITH RING BELOW]
- case '\u1E9A':
- // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
- case '\u1EA1':
- // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
- case '\u1EA3':
- // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
- case '\u1EA5':
- // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
- case '\u1EA7':
- // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
- case '\u1EA9':
- // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EAB':
- // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
- case '\u1EAD':
- // Ạ[LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EAF':
- // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
- case '\u1EB1':
- // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
- case '\u1EB3':
- // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
- case '\u1EB5':
- // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
- case '\u1EB7':
- // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
- case '\u2090':
- // � [LATIN SUBSCRIPT SMALL LETTER A]
- case '\u2094':
- // �? [LATIN SUBSCRIPT SMALL LETTER SCHWA]
- case '\u24D0':
- // � [CIRCLED LATIN SMALL LETTER A]
- case '\u2C65':
- // â±¥ [LATIN SMALL LETTER A WITH STROKE]
- case '\u2C6F':
- // Ɐ [LATIN CAPITAL LETTER TURNED A]
- case '\uFF41': // � [FULLWIDTH LATIN SMALL LETTER A]
- output[opos++] = 'a';
- break;
-
- case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA]
- output[opos++] = 'A';
- output[opos++] = 'A';
- break;
-
- case '\u00C6':
- // Æ[LATIN CAPITAL LETTER AE]
- case '\u01E2':
- // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
- case '\u01FC':
- // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
- case '\u1D01': // á´� [LATIN LETTER SMALL CAPITAL AE]
- output[opos++] = 'A';
- output[opos++] = 'E';
- break;
-
- case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO]
- output[opos++] = 'A';
- output[opos++] = 'O';
- break;
-
- case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU]
- output[opos++] = 'A';
- output[opos++] = 'U';
- break;
-
- case '\uA738':
- // Ꜹ [LATIN CAPITAL LETTER AV]
- case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
- output[opos++] = 'A';
- output[opos++] = 'V';
- break;
-
- case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY]
- output[opos++] = 'A';
- output[opos++] = 'Y';
- break;
-
- case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
- output[opos++] = '(';
- output[opos++] = 'a';
- output[opos++] = ')';
- break;
-
- case '\uA733': // ꜳ [LATIN SMALL LETTER AA]
- output[opos++] = 'a';
- output[opos++] = 'a';
- break;
-
- case '\u00E6':
- // æ [LATIN SMALL LETTER AE]
- case '\u01E3':
- // ǣ [LATIN SMALL LETTER AE WITH MACRON]
- case '\u01FD':
- // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
- case '\u1D02': // á´‚ [LATIN SMALL LETTER TURNED AE]
- output[opos++] = 'a';
- output[opos++] = 'e';
- break;
-
- case '\uA735': // ꜵ [LATIN SMALL LETTER AO]
- output[opos++] = 'a';
- output[opos++] = 'o';
- break;
-
- case '\uA737': // ꜷ [LATIN SMALL LETTER AU]
- output[opos++] = 'a';
- output[opos++] = 'u';
- break;
-
- case '\uA739':
- // ꜹ [LATIN SMALL LETTER AV]
- case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
- output[opos++] = 'a';
- output[opos++] = 'v';
- break;
-
- case '\uA73D': // ꜽ [LATIN SMALL LETTER AY]
- output[opos++] = 'a';
- output[opos++] = 'y';
- break;
-
- case '\u0181':
- // � [LATIN CAPITAL LETTER B WITH HOOK]
- case '\u0182':
- // Æ‚ [LATIN CAPITAL LETTER B WITH TOPBAR]
- case '\u0243':
- // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
- case '\u0299':
- // Ê™ [LATIN LETTER SMALL CAPITAL B]
- case '\u1D03':
- // á´ƒ [LATIN LETTER SMALL CAPITAL BARRED B]
- case '\u1E02':
- // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
- case '\u1E04':
- // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
- case '\u1E06':
- // Ḇ[LATIN CAPITAL LETTER B WITH LINE BELOW]
- case '\u24B7':
- // â’· [CIRCLED LATIN CAPITAL LETTER B]
- case '\uFF22': // ï¼¢ [FULLWIDTH LATIN CAPITAL LETTER B]
- output[opos++] = 'B';
- break;
-
- case '\u0180':
- // ƀ [LATIN SMALL LETTER B WITH STROKE]
- case '\u0183':
- // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
- case '\u0253':
- // É“ [LATIN SMALL LETTER B WITH HOOK]
- case '\u1D6C':
- // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
- case '\u1D80':
- // á¶€ [LATIN SMALL LETTER B WITH PALATAL HOOK]
- case '\u1E03':
- // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
- case '\u1E05':
- // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
- case '\u1E07':
- // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
- case '\u24D1':
- // â“‘ [CIRCLED LATIN SMALL LETTER B]
- case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B]
- output[opos++] = 'b';
- break;
-
- case '\u249D': // â’� [PARENTHESIZED LATIN SMALL LETTER B]
- output[opos++] = '(';
- output[opos++] = 'b';
- output[opos++] = ')';
- break;
-
- case '\u00C7':
- // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
- case '\u0106':
- // Ć[LATIN CAPITAL LETTER C WITH ACUTE]
- case '\u0108':
- // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
- case '\u010A':
- // ÄŠ[LATIN CAPITAL LETTER C WITH DOT ABOVE]
- case '\u010C':
- // Č [LATIN CAPITAL LETTER C WITH CARON]
- case '\u0187':
- // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
- case '\u023B':
- // È» [LATIN CAPITAL LETTER C WITH STROKE]
- case '\u0297':
- // Ê— [LATIN LETTER STRETCHED C]
- case '\u1D04':
- // á´„ [LATIN LETTER SMALL CAPITAL C]
- case '\u1E08':
- // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
- case '\u24B8':
- // â’¸ [CIRCLED LATIN CAPITAL LETTER C]
- case '\uFF23': // ï¼£ [FULLWIDTH LATIN CAPITAL LETTER C]
- output[opos++] = 'C';
- break;
-
- case '\u00E7':
- // ç [LATIN SMALL LETTER C WITH CEDILLA]
- case '\u0107':
- // ć [LATIN SMALL LETTER C WITH ACUTE]
- case '\u0109':
- // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
- case '\u010B':
- // Ä‹ [LATIN SMALL LETTER C WITH DOT ABOVE]
- case '\u010D':
- // � [LATIN SMALL LETTER C WITH CARON]
- case '\u0188':
- // ƈ [LATIN SMALL LETTER C WITH HOOK]
- case '\u023C':
- // ȼ [LATIN SMALL LETTER C WITH STROKE]
- case '\u0255':
- // É• [LATIN SMALL LETTER C WITH CURL]
- case '\u1E09':
- // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
- case '\u2184':
- // ↄ [LATIN SMALL LETTER REVERSED C]
- case '\u24D2':
- // â“’ [CIRCLED LATIN SMALL LETTER C]
- case '\uA73E':
- // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
- case '\uA73F':
- // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
- case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C]
- output[opos++] = 'c';
- break;
-
- case '\u249E': // â’ž [PARENTHESIZED LATIN SMALL LETTER C]
- output[opos++] = '(';
- output[opos++] = 'c';
- output[opos++] = ')';
- break;
-
- case '\u00D0':
- // � [LATIN CAPITAL LETTER ETH]
- case '\u010E':
- // ÄŽ [LATIN CAPITAL LETTER D WITH CARON]
- case '\u0110':
- // � [LATIN CAPITAL LETTER D WITH STROKE]
- case '\u0189':
- // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
- case '\u018A':
- // ÆŠ[LATIN CAPITAL LETTER D WITH HOOK]
- case '\u018B':
- // Æ‹ [LATIN CAPITAL LETTER D WITH TOPBAR]
- case '\u1D05':
- // á´… [LATIN LETTER SMALL CAPITAL D]
- case '\u1D06':
- // á´†[LATIN LETTER SMALL CAPITAL ETH]
- case '\u1E0A':
- // Ḋ[LATIN CAPITAL LETTER D WITH DOT ABOVE]
- case '\u1E0C':
- // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
- case '\u1E0E':
- // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
- case '\u1E10':
- // � [LATIN CAPITAL LETTER D WITH CEDILLA]
- case '\u1E12':
- // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
- case '\u24B9':
- // â’¹ [CIRCLED LATIN CAPITAL LETTER D]
- case '\uA779':
- // � [LATIN CAPITAL LETTER INSULAR D]
- case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D]
- output[opos++] = 'D';
- break;
-
- case '\u00F0':
- // ð [LATIN SMALL LETTER ETH]
- case '\u010F':
- // � [LATIN SMALL LETTER D WITH CARON]
- case '\u0111':
- // Ä‘ [LATIN SMALL LETTER D WITH STROKE]
- case '\u018C':
- // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
- case '\u0221':
- // È¡ [LATIN SMALL LETTER D WITH CURL]
- case '\u0256':
- // É– [LATIN SMALL LETTER D WITH TAIL]
- case '\u0257':
- // É— [LATIN SMALL LETTER D WITH HOOK]
- case '\u1D6D':
- // áµ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
- case '\u1D81':
- // � [LATIN SMALL LETTER D WITH PALATAL HOOK]
- case '\u1D91':
- // á¶‘ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
- case '\u1E0B':
- // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
- case '\u1E0D':
- // � [LATIN SMALL LETTER D WITH DOT BELOW]
- case '\u1E0F':
- // � [LATIN SMALL LETTER D WITH LINE BELOW]
- case '\u1E11':
- // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
- case '\u1E13':
- // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
- case '\u24D3':
- // â““ [CIRCLED LATIN SMALL LETTER D]
- case '\uA77A':
- // � [LATIN SMALL LETTER INSULAR D]
- case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D]
- output[opos++] = 'd';
- break;
-
- case '\u01C4':
- // Ç„ [LATIN CAPITAL LETTER DZ WITH CARON]
- case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ]
- output[opos++] = 'D';
- output[opos++] = 'Z';
- break;
-
- case '\u01C5':
- // Ç… [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
- case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
- output[opos++] = 'D';
- output[opos++] = 'z';
- break;
-
- case '\u249F': // â’Ÿ [PARENTHESIZED LATIN SMALL LETTER D]
- output[opos++] = '(';
- output[opos++] = 'd';
- output[opos++] = ')';
- break;
-
- case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH]
- output[opos++] = 'd';
- output[opos++] = 'b';
- break;
-
- case '\u01C6':
- // dž[LATIN SMALL LETTER DZ WITH CARON]
- case '\u01F3':
- // dz [LATIN SMALL LETTER DZ]
- case '\u02A3':
- // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
- case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
- output[opos++] = 'd';
- output[opos++] = 'z';
- break;
-
- case '\u00C8':
- // È [LATIN CAPITAL LETTER E WITH GRAVE]
- case '\u00C9':
- // É [LATIN CAPITAL LETTER E WITH ACUTE]
- case '\u00CA':
- // Ê[LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
- case '\u00CB':
- // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
- case '\u0112':
- // Ä’ [LATIN CAPITAL LETTER E WITH MACRON]
- case '\u0114':
- // �? [LATIN CAPITAL LETTER E WITH BREVE]
- case '\u0116':
- // Ä– [LATIN CAPITAL LETTER E WITH DOT ABOVE]
- case '\u0118':
- // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
- case '\u011A':
- // Äš [LATIN CAPITAL LETTER E WITH CARON]
- case '\u018E':
- // ÆŽ [LATIN CAPITAL LETTER REVERSED E]
- case '\u0190':
- // � [LATIN CAPITAL LETTER OPEN E]
- case '\u0204':
- // È„ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
- case '\u0206':
- // Ȇ[LATIN CAPITAL LETTER E WITH INVERTED BREVE]
- case '\u0228':
- // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
- case '\u0246':
- // Ɇ[LATIN CAPITAL LETTER E WITH STROKE]
- case '\u1D07':
- // á´‡ [LATIN LETTER SMALL CAPITAL E]
- case '\u1E14':
- // �? [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
- case '\u1E16':
- // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
- case '\u1E18':
- // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
- case '\u1E1A':
- // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
- case '\u1E1C':
- // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
- case '\u1EB8':
- // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
- case '\u1EBA':
- // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
- case '\u1EBC':
- // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
- case '\u1EBE':
- // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
- case '\u1EC0':
- // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
- case '\u1EC2':
- // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EC4':
- // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
- case '\u1EC6':
- // Ệ[LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
- case '\u24BA':
- // â’º [CIRCLED LATIN CAPITAL LETTER E]
- case '\u2C7B':
- // â±» [LATIN LETTER SMALL CAPITAL TURNED E]
- case '\uFF25': // ï¼¥ [FULLWIDTH LATIN CAPITAL LETTER E]
- output[opos++] = 'E';
- break;
-
- case '\u00E8':
- // è [LATIN SMALL LETTER E WITH GRAVE]
- case '\u00E9':
- // é [LATIN SMALL LETTER E WITH ACUTE]
- case '\u00EA':
- // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
- case '\u00EB':
- // ë [LATIN SMALL LETTER E WITH DIAERESIS]
- case '\u0113':
- // Ä“ [LATIN SMALL LETTER E WITH MACRON]
- case '\u0115':
- // Ä• [LATIN SMALL LETTER E WITH BREVE]
- case '\u0117':
- // Ä— [LATIN SMALL LETTER E WITH DOT ABOVE]
- case '\u0119':
- // Ä™ [LATIN SMALL LETTER E WITH OGONEK]
- case '\u011B':
- // Ä› [LATIN SMALL LETTER E WITH CARON]
- case '\u01DD':
- // � [LATIN SMALL LETTER TURNED E]
- case '\u0205':
- // È… [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
- case '\u0207':
- // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
- case '\u0229':
- // È© [LATIN SMALL LETTER E WITH CEDILLA]
- case '\u0247':
- // ɇ [LATIN SMALL LETTER E WITH STROKE]
- case '\u0258':
- // ɘ [LATIN SMALL LETTER REVERSED E]
- case '\u025B':
- // É› [LATIN SMALL LETTER OPEN E]
- case '\u025C':
- // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
- case '\u025D':
- // � [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
- case '\u025E':
- // Éž [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
- case '\u029A':
- // Êš [LATIN SMALL LETTER CLOSED OPEN E]
- case '\u1D08':
- // á´ˆ [LATIN SMALL LETTER TURNED OPEN E]
- case '\u1D92':
- // á¶’ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
- case '\u1D93':
- // á¶“ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
- case '\u1D94':
- // �? [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
- case '\u1E15':
- // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
- case '\u1E17':
- // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
- case '\u1E19':
- // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
- case '\u1E1B':
- // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
- case '\u1E1D':
- // � [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
- case '\u1EB9':
- // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
- case '\u1EBB':
- // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
- case '\u1EBD':
- // ẽ [LATIN SMALL LETTER E WITH TILDE]
- case '\u1EBF':
- // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
- case '\u1EC1':
- // � [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
- case '\u1EC3':
- // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EC5':
- // á»… [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
- case '\u1EC7':
- // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
- case '\u2091':
- // â‚‘ [LATIN SUBSCRIPT SMALL LETTER E]
- case '\u24D4':
- // �? [CIRCLED LATIN SMALL LETTER E]
- case '\u2C78':
- // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
- case '\uFF45': // ï½… [FULLWIDTH LATIN SMALL LETTER E]
- output[opos++] = 'e';
- break;
-
- case '\u24A0': // â’ [PARENTHESIZED LATIN SMALL LETTER E]
- output[opos++] = '(';
- output[opos++] = 'e';
- output[opos++] = ')';
- break;
-
- case '\u0191':
- // Æ‘ [LATIN CAPITAL LETTER F WITH HOOK]
- case '\u1E1E':
- // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
- case '\u24BB':
- // â’» [CIRCLED LATIN CAPITAL LETTER F]
- case '\uA730':
- // ꜰ [LATIN LETTER SMALL CAPITAL F]
- case '\uA77B':
- // � [LATIN CAPITAL LETTER INSULAR F]
- case '\uA7FB':
- // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
- case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F]
- output[opos++] = 'F';
- break;
-
- case '\u0192':
- // Æ’ [LATIN SMALL LETTER F WITH HOOK]
- case '\u1D6E':
- // áµ® [LATIN SMALL LETTER F WITH MIDDLE TILDE]
- case '\u1D82':
- // á¶‚ [LATIN SMALL LETTER F WITH PALATAL HOOK]
- case '\u1E1F':
- // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
- case '\u1E9B':
- // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
- case '\u24D5':
- // â“• [CIRCLED LATIN SMALL LETTER F]
- case '\uA77C':
- // � [LATIN SMALL LETTER INSULAR F]
- case '\uFF46': // f[FULLWIDTH LATIN SMALL LETTER F]
- output[opos++] = 'f';
- break;
-
- case '\u24A1': // â’¡ [PARENTHESIZED LATIN SMALL LETTER F]
- output[opos++] = '(';
- output[opos++] = 'f';
- output[opos++] = ')';
- break;
-
- case '\uFB00': // ff [LATIN SMALL LIGATURE FF]
- output[opos++] = 'f';
- output[opos++] = 'f';
- break;
-
- case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI]
- output[opos++] = 'f';
- output[opos++] = 'f';
- output[opos++] = 'i';
- break;
-
- case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL]
- output[opos++] = 'f';
- output[opos++] = 'f';
- output[opos++] = 'l';
- break;
-
- case '\uFB01': // � [LATIN SMALL LIGATURE FI]
- output[opos++] = 'f';
- output[opos++] = 'i';
- break;
-
- case '\uFB02': // fl [LATIN SMALL LIGATURE FL]
- output[opos++] = 'f';
- output[opos++] = 'l';
- break;
-
- case '\u011C':
- // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
- case '\u011E':
- // Äž [LATIN CAPITAL LETTER G WITH BREVE]
- case '\u0120':
- // Ä [LATIN CAPITAL LETTER G WITH DOT ABOVE]
- case '\u0122':
- // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
- case '\u0193':
- // Æ“ [LATIN CAPITAL LETTER G WITH HOOK]
- case '\u01E4':
- // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
- case '\u01E5':
- // ǥ [LATIN SMALL LETTER G WITH STROKE]
- case '\u01E6':
- // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
- case '\u01E7':
- // ǧ [LATIN SMALL LETTER G WITH CARON]
- case '\u01F4':
- // Ç´ [LATIN CAPITAL LETTER G WITH ACUTE]
- case '\u0262':
- // ɢ [LATIN LETTER SMALL CAPITAL G]
- case '\u029B':
- // Ê› [LATIN LETTER SMALL CAPITAL G WITH HOOK]
- case '\u1E20':
- // Ḡ[LATIN CAPITAL LETTER G WITH MACRON]
- case '\u24BC':
- // â’¼ [CIRCLED LATIN CAPITAL LETTER G]
- case '\uA77D':
- // � [LATIN CAPITAL LETTER INSULAR G]
- case '\uA77E':
- // � [LATIN CAPITAL LETTER TURNED INSULAR G]
- case '\uFF27': // ï¼§ [FULLWIDTH LATIN CAPITAL LETTER G]
- output[opos++] = 'G';
- break;
-
- case '\u011D':
- // � [LATIN SMALL LETTER G WITH CIRCUMFLEX]
- case '\u011F':
- // ÄŸ [LATIN SMALL LETTER G WITH BREVE]
- case '\u0121':
- // Ä¡ [LATIN SMALL LETTER G WITH DOT ABOVE]
- case '\u0123':
- // ģ [LATIN SMALL LETTER G WITH CEDILLA]
- case '\u01F5':
- // ǵ [LATIN SMALL LETTER G WITH ACUTE]
- case '\u0260':
- // É [LATIN SMALL LETTER G WITH HOOK]
- case '\u0261':
- // É¡ [LATIN SMALL LETTER SCRIPT G]
- case '\u1D77':
- // áµ· [LATIN SMALL LETTER TURNED G]
- case '\u1D79':
- // áµ¹ [LATIN SMALL LETTER INSULAR G]
- case '\u1D83':
- // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
- case '\u1E21':
- // ḡ [LATIN SMALL LETTER G WITH MACRON]
- case '\u24D6':
- // â“– [CIRCLED LATIN SMALL LETTER G]
- case '\uA77F':
- // � [LATIN SMALL LETTER TURNED INSULAR G]
- case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G]
- output[opos++] = 'g';
- break;
-
- case '\u24A2': // â’¢ [PARENTHESIZED LATIN SMALL LETTER G]
- output[opos++] = '(';
- output[opos++] = 'g';
- output[opos++] = ')';
- break;
-
- case '\u0124':
- // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
- case '\u0126':
- // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
- case '\u021E':
- // Èž [LATIN CAPITAL LETTER H WITH CARON]
- case '\u029C':
- // ʜ [LATIN LETTER SMALL CAPITAL H]
- case '\u1E22':
- // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
- case '\u1E24':
- // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
- case '\u1E26':
- // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
- case '\u1E28':
- // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
- case '\u1E2A':
- // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
- case '\u24BD':
- // â’½ [CIRCLED LATIN CAPITAL LETTER H]
- case '\u2C67':
- // â±§ [LATIN CAPITAL LETTER H WITH DESCENDER]
- case '\u2C75':
- // â±µ [LATIN CAPITAL LETTER HALF H]
- case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H]
- output[opos++] = 'H';
- break;
-
- case '\u0125':
- // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
- case '\u0127':
- // ħ [LATIN SMALL LETTER H WITH STROKE]
- case '\u021F':
- // ÈŸ [LATIN SMALL LETTER H WITH CARON]
- case '\u0265':
- // ɥ [LATIN SMALL LETTER TURNED H]
- case '\u0266':
- // ɦ [LATIN SMALL LETTER H WITH HOOK]
- case '\u02AE':
- // Ê® [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
- case '\u02AF':
- // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
- case '\u1E23':
- // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
- case '\u1E25':
- // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
- case '\u1E27':
- // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
- case '\u1E29':
- // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
- case '\u1E2B':
- // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
- case '\u1E96':
- // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
- case '\u24D7':
- // â“— [CIRCLED LATIN SMALL LETTER H]
- case '\u2C68':
- // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
- case '\u2C76':
- // â±¶ [LATIN SMALL LETTER HALF H]
- case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H]
- output[opos++] = 'h';
- break;
-
- case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
- output[opos++] = 'H';
- output[opos++] = 'V';
- break;
-
- case '\u24A3': // â’£ [PARENTHESIZED LATIN SMALL LETTER H]
- output[opos++] = '(';
- output[opos++] = 'h';
- output[opos++] = ')';
- break;
-
- case '\u0195': // Æ• [LATIN SMALL LETTER HV]
- output[opos++] = 'h';
- output[opos++] = 'v';
- break;
-
- case '\u00CC':
- // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
- case '\u00CD':
- // � [LATIN CAPITAL LETTER I WITH ACUTE]
- case '\u00CE':
- // ÃŽ [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
- case '\u00CF':
- // � [LATIN CAPITAL LETTER I WITH DIAERESIS]
- case '\u0128':
- // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
- case '\u012A':
- // Ī [LATIN CAPITAL LETTER I WITH MACRON]
- case '\u012C':
- // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
- case '\u012E':
- // Ä® [LATIN CAPITAL LETTER I WITH OGONEK]
- case '\u0130':
- // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
- case '\u0196':
- // Æ– [LATIN CAPITAL LETTER IOTA]
- case '\u0197':
- // Æ— [LATIN CAPITAL LETTER I WITH STROKE]
- case '\u01CF':
- // � [LATIN CAPITAL LETTER I WITH CARON]
- case '\u0208':
- // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
- case '\u020A':
- // ÈŠ[LATIN CAPITAL LETTER I WITH INVERTED BREVE]
- case '\u026A':
- // ɪ [LATIN LETTER SMALL CAPITAL I]
- case '\u1D7B':
- // áµ» [LATIN SMALL CAPITAL LETTER I WITH STROKE]
- case '\u1E2C':
- // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
- case '\u1E2E':
- // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
- case '\u1EC8':
- // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
- case '\u1ECA':
- // Ị[LATIN CAPITAL LETTER I WITH DOT BELOW]
- case '\u24BE':
- // â’¾ [CIRCLED LATIN CAPITAL LETTER I]
- case '\uA7FE':
- // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
- case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I]
- output[opos++] = 'I';
- break;
-
- case '\u00EC':
- // ì [LATIN SMALL LETTER I WITH GRAVE]
- case '\u00ED':
- // à[LATIN SMALL LETTER I WITH ACUTE]
- case '\u00EE':
- // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
- case '\u00EF':
- // ï [LATIN SMALL LETTER I WITH DIAERESIS]
- case '\u0129':
- // Ä© [LATIN SMALL LETTER I WITH TILDE]
- case '\u012B':
- // Ä« [LATIN SMALL LETTER I WITH MACRON]
- case '\u012D':
- // Ä [LATIN SMALL LETTER I WITH BREVE]
- case '\u012F':
- // į [LATIN SMALL LETTER I WITH OGONEK]
- case '\u0131':
- // ı [LATIN SMALL LETTER DOTLESS I]
- case '\u01D0':
- // � [LATIN SMALL LETTER I WITH CARON]
- case '\u0209':
- // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
- case '\u020B':
- // È‹ [LATIN SMALL LETTER I WITH INVERTED BREVE]
- case '\u0268':
- // ɨ [LATIN SMALL LETTER I WITH STROKE]
- case '\u1D09':
- // á´‰ [LATIN SMALL LETTER TURNED I]
- case '\u1D62':
- // áµ¢ [LATIN SUBSCRIPT SMALL LETTER I]
- case '\u1D7C':
- // áµ¼ [LATIN SMALL LETTER IOTA WITH STROKE]
- case '\u1D96':
- // á¶– [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
- case '\u1E2D':
- // Ḡ[LATIN SMALL LETTER I WITH TILDE BELOW]
- case '\u1E2F':
- // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
- case '\u1EC9':
- // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
- case '\u1ECB':
- // ị [LATIN SMALL LETTER I WITH DOT BELOW]
- case '\u2071':
- // � [SUPERSCRIPT LATIN SMALL LETTER I]
- case '\u24D8':
- // ⓘ [CIRCLED LATIN SMALL LETTER I]
- case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I]
- output[opos++] = 'i';
- break;
-
- case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ]
- output[opos++] = 'I';
- output[opos++] = 'J';
- break;
-
- case '\u24A4': // â’¤ [PARENTHESIZED LATIN SMALL LETTER I]
- output[opos++] = '(';
- output[opos++] = 'i';
- output[opos++] = ')';
- break;
-
- case '\u0133': // ij [LATIN SMALL LIGATURE IJ]
- output[opos++] = 'i';
- output[opos++] = 'j';
- break;
-
- case '\u0134':
- // Ä´ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
- case '\u0248':
- // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
- case '\u1D0A':
- // á´Š[LATIN LETTER SMALL CAPITAL J]
- case '\u24BF':
- // â’¿ [CIRCLED LATIN CAPITAL LETTER J]
- case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J]
- output[opos++] = 'J';
- break;
-
- case '\u0135':
- // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
- case '\u01F0':
- // ǰ [LATIN SMALL LETTER J WITH CARON]
- case '\u0237':
- // È· [LATIN SMALL LETTER DOTLESS J]
- case '\u0249':
- // ɉ [LATIN SMALL LETTER J WITH STROKE]
- case '\u025F':
- // ÉŸ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
- case '\u0284':
- // Ê„ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
- case '\u029D':
- // � [LATIN SMALL LETTER J WITH CROSSED-TAIL]
- case '\u24D9':
- // â“™ [CIRCLED LATIN SMALL LETTER J]
- case '\u2C7C':
- // â±¼ [LATIN SUBSCRIPT SMALL LETTER J]
- case '\uFF4A': // j[FULLWIDTH LATIN SMALL LETTER J]
- output[opos++] = 'j';
- break;
-
- case '\u24A5': // â’¥ [PARENTHESIZED LATIN SMALL LETTER J]
- output[opos++] = '(';
- output[opos++] = 'j';
- output[opos++] = ')';
- break;
-
- case '\u0136':
- // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
- case '\u0198':
- // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
- case '\u01E8':
- // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
- case '\u1D0B':
- // á´‹ [LATIN LETTER SMALL CAPITAL K]
- case '\u1E30':
- // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
- case '\u1E32':
- // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
- case '\u1E34':
- // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
- case '\u24C0':
- // â“€ [CIRCLED LATIN CAPITAL LETTER K]
- case '\u2C69':
- // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
- case '\uA740':
- // � [LATIN CAPITAL LETTER K WITH STROKE]
- case '\uA742':
- // � [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
- case '\uA744':
- // � [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
- case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K]
- output[opos++] = 'K';
- break;
-
- case '\u0137':
- // Ä· [LATIN SMALL LETTER K WITH CEDILLA]
- case '\u0199':
- // Æ™ [LATIN SMALL LETTER K WITH HOOK]
- case '\u01E9':
- // Ç© [LATIN SMALL LETTER K WITH CARON]
- case '\u029E':
- // Êž [LATIN SMALL LETTER TURNED K]
- case '\u1D84':
- // á¶„ [LATIN SMALL LETTER K WITH PALATAL HOOK]
- case '\u1E31':
- // ḱ [LATIN SMALL LETTER K WITH ACUTE]
- case '\u1E33':
- // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
- case '\u1E35':
- // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
- case '\u24DA':
- // ⓚ [CIRCLED LATIN SMALL LETTER K]
- case '\u2C6A':
- // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
- case '\uA741':
- // � [LATIN SMALL LETTER K WITH STROKE]
- case '\uA743':
- // � [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
- case '\uA745':
- // � [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
- case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K]
- output[opos++] = 'k';
- break;
-
- case '\u24A6': // â’¦ [PARENTHESIZED LATIN SMALL LETTER K]
- output[opos++] = '(';
- output[opos++] = 'k';
- output[opos++] = ')';
- break;
-
- case '\u0139':
- // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
- case '\u013B':
- // Ä» [LATIN CAPITAL LETTER L WITH CEDILLA]
- case '\u013D':
- // Ľ [LATIN CAPITAL LETTER L WITH CARON]
- case '\u013F':
- // Ä¿ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
- case '\u0141':
- // � [LATIN CAPITAL LETTER L WITH STROKE]
- case '\u023D':
- // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
- case '\u029F':
- // ÊŸ [LATIN LETTER SMALL CAPITAL L]
- case '\u1D0C':
- // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
- case '\u1E36':
- // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
- case '\u1E38':
- // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
- case '\u1E3A':
- // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
- case '\u1E3C':
- // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
- case '\u24C1':
- // � [CIRCLED LATIN CAPITAL LETTER L]
- case '\u2C60':
- // â± [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
- case '\u2C62':
- // â±¢ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
- case '\uA746':
- // �[LATIN CAPITAL LETTER BROKEN L]
- case '\uA748':
- // � [LATIN CAPITAL LETTER L WITH HIGH STROKE]
- case '\uA780':
- // Ꞁ [LATIN CAPITAL LETTER TURNED L]
- case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L]
- output[opos++] = 'L';
- break;
-
- case '\u013A':
- // ĺ [LATIN SMALL LETTER L WITH ACUTE]
- case '\u013C':
- // ļ [LATIN SMALL LETTER L WITH CEDILLA]
- case '\u013E':
- // ľ [LATIN SMALL LETTER L WITH CARON]
- case '\u0140':
- // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
- case '\u0142':
- // Å‚ [LATIN SMALL LETTER L WITH STROKE]
- case '\u019A':
- // Æš [LATIN SMALL LETTER L WITH BAR]
- case '\u0234':
- // È´ [LATIN SMALL LETTER L WITH CURL]
- case '\u026B':
- // É« [LATIN SMALL LETTER L WITH MIDDLE TILDE]
- case '\u026C':
- // ɬ [LATIN SMALL LETTER L WITH BELT]
- case '\u026D':
- // É [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
- case '\u1D85':
- // á¶… [LATIN SMALL LETTER L WITH PALATAL HOOK]
- case '\u1E37':
- // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
- case '\u1E39':
- // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
- case '\u1E3B':
- // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
- case '\u1E3D':
- // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
- case '\u24DB':
- // â“› [CIRCLED LATIN SMALL LETTER L]
- case '\u2C61':
- // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
- case '\uA747':
- // � [LATIN SMALL LETTER BROKEN L]
- case '\uA749':
- // � [LATIN SMALL LETTER L WITH HIGH STROKE]
- case '\uA781':
- // � [LATIN SMALL LETTER TURNED L]
- case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L]
- output[opos++] = 'l';
- break;
-
- case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ]
- output[opos++] = 'L';
- output[opos++] = 'J';
- break;
-
- case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
- output[opos++] = 'L';
- output[opos++] = 'L';
- break;
-
- case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
- output[opos++] = 'L';
- output[opos++] = 'j';
- break;
-
- case '\u24A7': // â’§ [PARENTHESIZED LATIN SMALL LETTER L]
- output[opos++] = '(';
- output[opos++] = 'l';
- output[opos++] = ')';
- break;
-
- case '\u01C9': // lj [LATIN SMALL LETTER LJ]
- output[opos++] = 'l';
- output[opos++] = 'j';
- break;
-
- case '\u1EFB': // á»» [LATIN SMALL LETTER MIDDLE-WELSH LL]
- output[opos++] = 'l';
- output[opos++] = 'l';
- break;
-
- case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH]
- output[opos++] = 'l';
- output[opos++] = 's';
- break;
-
- case '\u02AB': // Ê« [LATIN SMALL LETTER LZ DIGRAPH]
- output[opos++] = 'l';
- output[opos++] = 'z';
- break;
-
- case '\u019C':
- // Ɯ [LATIN CAPITAL LETTER TURNED M]
- case '\u1D0D':
- // á´� [LATIN LETTER SMALL CAPITAL M]
- case '\u1E3E':
- // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
- case '\u1E40':
- // á¹€ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
- case '\u1E42':
- // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
- case '\u24C2':
- // â“‚ [CIRCLED LATIN CAPITAL LETTER M]
- case '\u2C6E':
- // â±® [LATIN CAPITAL LETTER M WITH HOOK]
- case '\uA7FD':
- // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
- case '\uA7FF':
- // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
- case '\uFF2D': // ï¼ [FULLWIDTH LATIN CAPITAL LETTER M]
- output[opos++] = 'M';
- break;
-
- case '\u026F':
- // ɯ [LATIN SMALL LETTER TURNED M]
- case '\u0270':
- // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
- case '\u0271':
- // ɱ [LATIN SMALL LETTER M WITH HOOK]
- case '\u1D6F':
- // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
- case '\u1D86':
- // ᶆ[LATIN SMALL LETTER M WITH PALATAL HOOK]
- case '\u1E3F':
- // ḿ [LATIN SMALL LETTER M WITH ACUTE]
- case '\u1E41':
- // � [LATIN SMALL LETTER M WITH DOT ABOVE]
- case '\u1E43':
- // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
- case '\u24DC':
- // ⓜ [CIRCLED LATIN SMALL LETTER M]
- case '\uFF4D': // � [FULLWIDTH LATIN SMALL LETTER M]
- output[opos++] = 'm';
- break;
-
- case '\u24A8': // â’¨ [PARENTHESIZED LATIN SMALL LETTER M]
- output[opos++] = '(';
- output[opos++] = 'm';
- output[opos++] = ')';
- break;
-
- case '\u00D1':
- // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
- case '\u0143':
- // Ã…Æ’ [LATIN CAPITAL LETTER N WITH ACUTE]
- case '\u0145':
- // Å… [LATIN CAPITAL LETTER N WITH CEDILLA]
- case '\u0147':
- // Ň [LATIN CAPITAL LETTER N WITH CARON]
- case '\u014A':
- // Ã…Å http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
- case '\u019D':
- // � [LATIN CAPITAL LETTER N WITH LEFT HOOK]
- case '\u01F8':
- // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
- case '\u0220':
- // È [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
- case '\u0274':
- // É´ [LATIN LETTER SMALL CAPITAL N]
- case '\u1D0E':
- // á´Ž [LATIN LETTER SMALL CAPITAL REVERSED N]
- case '\u1E44':
- // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
- case '\u1E46':
- // Ṇ[LATIN CAPITAL LETTER N WITH DOT BELOW]
- case '\u1E48':
- // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
- case '\u1E4A':
- // Ṋ[LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
- case '\u24C3':
- // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
- case '\uFF2E': // ï¼® [FULLWIDTH LATIN CAPITAL LETTER N]
- output[opos++] = 'N';
- break;
-
- case '\u00F1':
- // ñ [LATIN SMALL LETTER N WITH TILDE]
- case '\u0144':
- // Å„ [LATIN SMALL LETTER N WITH ACUTE]
- case '\u0146':
- // ņ[LATIN SMALL LETTER N WITH CEDILLA]
- case '\u0148':
- // ň [LATIN SMALL LETTER N WITH CARON]
- case '\u0149':
- // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
- case '\u014B':
- // Å‹ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
- case '\u019E':
- // Æž [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
- case '\u01F9':
- // ǹ [LATIN SMALL LETTER N WITH GRAVE]
- case '\u0235':
- // ȵ [LATIN SMALL LETTER N WITH CURL]
- case '\u0272':
- // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
- case '\u0273':
- // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
- case '\u1D70':
- // áµ° [LATIN SMALL LETTER N WITH MIDDLE TILDE]
- case '\u1D87':
- // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
- case '\u1E45':
- // á¹… [LATIN SMALL LETTER N WITH DOT ABOVE]
- case '\u1E47':
- // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
- case '\u1E49':
- // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
- case '\u1E4B':
- // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
- case '\u207F':
- // � [SUPERSCRIPT LATIN SMALL LETTER N]
- case '\u24DD':
- // � [CIRCLED LATIN SMALL LETTER N]
- case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N]
- output[opos++] = 'n';
- break;
-
- case '\u01CA': // ÇŠ[LATIN CAPITAL LETTER NJ]
- output[opos++] = 'N';
- output[opos++] = 'J';
- break;
-
- case '\u01CB': // Ç‹ [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
- output[opos++] = 'N';
- output[opos++] = 'j';
- break;
-
- case '\u24A9': // â’© [PARENTHESIZED LATIN SMALL LETTER N]
- output[opos++] = '(';
- output[opos++] = 'n';
- output[opos++] = ')';
- break;
-
- case '\u01CC': // nj [LATIN SMALL LETTER NJ]
- output[opos++] = 'n';
- output[opos++] = 'j';
- break;
-
- case '\u00D2':
- // Ã’ [LATIN CAPITAL LETTER O WITH GRAVE]
- case '\u00D3':
- // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
- case '\u00D4':
- // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
- case '\u00D5':
- // Õ [LATIN CAPITAL LETTER O WITH TILDE]
- case '\u00D6':
- // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
- case '\u00D8':
- // Ø [LATIN CAPITAL LETTER O WITH STROKE]
- case '\u014C':
- // Ã…Å’ [LATIN CAPITAL LETTER O WITH MACRON]
- case '\u014E':
- // ÅŽ [LATIN CAPITAL LETTER O WITH BREVE]
- case '\u0150':
- // � [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
- case '\u0186':
- // Ɔ[LATIN CAPITAL LETTER OPEN O]
- case '\u019F':
- // ÆŸ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
- case '\u01A0':
- // Æ [LATIN CAPITAL LETTER O WITH HORN]
- case '\u01D1':
- // Ç‘ [LATIN CAPITAL LETTER O WITH CARON]
- case '\u01EA':
- // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
- case '\u01EC':
- // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
- case '\u01FE':
- // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
- case '\u020C':
- // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
- case '\u020E':
- // ÈŽ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
- case '\u022A':
- // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
- case '\u022C':
- // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
- case '\u022E':
- // È® [LATIN CAPITAL LETTER O WITH DOT ABOVE]
- case '\u0230':
- // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
- case '\u1D0F':
- // á´� [LATIN LETTER SMALL CAPITAL O]
- case '\u1D10':
- // á´� [LATIN LETTER SMALL CAPITAL OPEN O]
- case '\u1E4C':
- // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
- case '\u1E4E':
- // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
- case '\u1E50':
- // � [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
- case '\u1E52':
- // á¹’ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
- case '\u1ECC':
- // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
- case '\u1ECE':
- // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
- case '\u1ED0':
- // � [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
- case '\u1ED2':
- // á»’ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
- case '\u1ED4':
- // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1ED6':
- // á»– [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
- case '\u1ED8':
- // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EDA':
- // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
- case '\u1EDC':
- // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
- case '\u1EDE':
- // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
- case '\u1EE0':
- // á» [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
- case '\u1EE2':
- // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
- case '\u24C4':
- // â“„ [CIRCLED LATIN CAPITAL LETTER O]
- case '\uA74A':
- // �[LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
- case '\uA74C':
- // � [LATIN CAPITAL LETTER O WITH LOOP]
- case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O]
- output[opos++] = 'O';
- break;
-
- case '\u00F2':
- // ò [LATIN SMALL LETTER O WITH GRAVE]
- case '\u00F3':
- // ó [LATIN SMALL LETTER O WITH ACUTE]
- case '\u00F4':
- // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
- case '\u00F5':
- // õ [LATIN SMALL LETTER O WITH TILDE]
- case '\u00F6':
- // ö [LATIN SMALL LETTER O WITH DIAERESIS]
- case '\u00F8':
- // ø [LATIN SMALL LETTER O WITH STROKE]
- case '\u014D':
- // � [LATIN SMALL LETTER O WITH MACRON]
- case '\u014F':
- // � [LATIN SMALL LETTER O WITH BREVE]
- case '\u0151':
- // Å‘ [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
- case '\u01A1':
- // Æ¡ [LATIN SMALL LETTER O WITH HORN]
- case '\u01D2':
- // Ç’ [LATIN SMALL LETTER O WITH CARON]
- case '\u01EB':
- // Ç« [LATIN SMALL LETTER O WITH OGONEK]
- case '\u01ED':
- // Ç [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
- case '\u01FF':
- // Ç¿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
- case '\u020D':
- // � [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
- case '\u020F':
- // � [LATIN SMALL LETTER O WITH INVERTED BREVE]
- case '\u022B':
- // È« [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
- case '\u022D':
- // È [LATIN SMALL LETTER O WITH TILDE AND MACRON]
- case '\u022F':
- // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
- case '\u0231':
- // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
- case '\u0254':
- // �? [LATIN SMALL LETTER OPEN O]
- case '\u0275':
- // ɵ [LATIN SMALL LETTER BARRED O]
- case '\u1D16':
- // á´– [LATIN SMALL LETTER TOP HALF O]
- case '\u1D17':
- // á´— [LATIN SMALL LETTER BOTTOM HALF O]
- case '\u1D97':
- // á¶— [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
- case '\u1E4D':
- // � [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
- case '\u1E4F':
- // � [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
- case '\u1E51':
- // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
- case '\u1E53':
- // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
- case '\u1ECD':
- // � [LATIN SMALL LETTER O WITH DOT BELOW]
- case '\u1ECF':
- // � [LATIN SMALL LETTER O WITH HOOK ABOVE]
- case '\u1ED1':
- // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
- case '\u1ED3':
- // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
- case '\u1ED5':
- // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1ED7':
- // á»— [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
- case '\u1ED9':
- // á»™ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EDB':
- // á»› [LATIN SMALL LETTER O WITH HORN AND ACUTE]
- case '\u1EDD':
- // � [LATIN SMALL LETTER O WITH HORN AND GRAVE]
- case '\u1EDF':
- // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
- case '\u1EE1':
- // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
- case '\u1EE3':
- // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
- case '\u2092':
- // â‚’ [LATIN SUBSCRIPT SMALL LETTER O]
- case '\u24DE':
- // ⓞ [CIRCLED LATIN SMALL LETTER O]
- case '\u2C7A':
- // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
- case '\uA74B':
- // � [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
- case '\uA74D':
- // � [LATIN SMALL LETTER O WITH LOOP]
- case '\uFF4F': // � [FULLWIDTH LATIN SMALL LETTER O]
- output[opos++] = 'o';
- break;
-
- case '\u0152':
- // Å’ [LATIN CAPITAL LIGATURE OE]
- case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE]
- output[opos++] = 'O';
- output[opos++] = 'E';
- break;
-
- case '\uA74E': // � [LATIN CAPITAL LETTER OO]
- output[opos++] = 'O';
- output[opos++] = 'O';
- break;
-
- case '\u0222':
- // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
- case '\u1D15': // á´• [LATIN LETTER SMALL CAPITAL OU]
- output[opos++] = 'O';
- output[opos++] = 'U';
- break;
-
- case '\u24AA': // â’ª [PARENTHESIZED LATIN SMALL LETTER O]
- output[opos++] = '(';
- output[opos++] = 'o';
- output[opos++] = ')';
- break;
-
- case '\u0153':
- // Å“ [LATIN SMALL LIGATURE OE]
- case '\u1D14': // á´�? [LATIN SMALL LETTER TURNED OE]
- output[opos++] = 'o';
- output[opos++] = 'e';
- break;
-
- case '\uA74F': // � [LATIN SMALL LETTER OO]
- output[opos++] = 'o';
- output[opos++] = 'o';
- break;
-
- case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
- output[opos++] = 'o';
- output[opos++] = 'u';
- break;
-
- case '\u01A4':
- // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
- case '\u1D18':
- // á´˜ [LATIN LETTER SMALL CAPITAL P]
- case '\u1E54':
- // �? [LATIN CAPITAL LETTER P WITH ACUTE]
- case '\u1E56':
- // á¹– [LATIN CAPITAL LETTER P WITH DOT ABOVE]
- case '\u24C5':
- // â“… [CIRCLED LATIN CAPITAL LETTER P]
- case '\u2C63':
- // â±£ [LATIN CAPITAL LETTER P WITH STROKE]
- case '\uA750':
- // � [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
- case '\uA752':
- // � [LATIN CAPITAL LETTER P WITH FLOURISH]
- case '\uA754':
- // �? [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
- case '\uFF30': // ï¼° [FULLWIDTH LATIN CAPITAL LETTER P]
- output[opos++] = 'P';
- break;
-
- case '\u01A5':
- // ƥ [LATIN SMALL LETTER P WITH HOOK]
- case '\u1D71':
- // áµ± [LATIN SMALL LETTER P WITH MIDDLE TILDE]
- case '\u1D7D':
- // áµ½ [LATIN SMALL LETTER P WITH STROKE]
- case '\u1D88':
- // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
- case '\u1E55':
- // ṕ [LATIN SMALL LETTER P WITH ACUTE]
- case '\u1E57':
- // á¹— [LATIN SMALL LETTER P WITH DOT ABOVE]
- case '\u24DF':
- // ⓟ [CIRCLED LATIN SMALL LETTER P]
- case '\uA751':
- // � [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
- case '\uA753':
- // � [LATIN SMALL LETTER P WITH FLOURISH]
- case '\uA755':
- // � [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
- case '\uA7FC':
- // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
- case '\uFF50': // � [FULLWIDTH LATIN SMALL LETTER P]
- output[opos++] = 'p';
- break;
-
- case '\u24AB': // â’« [PARENTHESIZED LATIN SMALL LETTER P]
- output[opos++] = '(';
- output[opos++] = 'p';
- output[opos++] = ')';
- break;
-
- case '\u024A':
- // ÉŠ[LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
- case '\u24C6':
- // Ⓠ[CIRCLED LATIN CAPITAL LETTER Q]
- case '\uA756':
- // � [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
- case '\uA758':
- // � [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
- case '\uFF31': // ï¼± [FULLWIDTH LATIN CAPITAL LETTER Q]
- output[opos++] = 'Q';
- break;
-
- case '\u0138':
- // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
- case '\u024B':
- // É‹ [LATIN SMALL LETTER Q WITH HOOK TAIL]
- case '\u02A0':
- // Ê [LATIN SMALL LETTER Q WITH HOOK]
- case '\u24E0':
- // â“ [CIRCLED LATIN SMALL LETTER Q]
- case '\uA757':
- // � [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
- case '\uA759':
- // � [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
- case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q]
- output[opos++] = 'q';
- break;
-
- case '\u24AC': // â’¬ [PARENTHESIZED LATIN SMALL LETTER Q]
- output[opos++] = '(';
- output[opos++] = 'q';
- output[opos++] = ')';
- break;
-
- case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH]
- output[opos++] = 'q';
- output[opos++] = 'p';
- break;
-
- case '\u0154':
- // �? [LATIN CAPITAL LETTER R WITH ACUTE]
- case '\u0156':
- // Å– [LATIN CAPITAL LETTER R WITH CEDILLA]
- case '\u0158':
- // Ř [LATIN CAPITAL LETTER R WITH CARON]
- case '\u0210':
- // È’ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
- case '\u0212':
- // È’ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
- case '\u024C':
- // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
- case '\u0280':
- // ʀ [LATIN LETTER SMALL CAPITAL R]
- case '\u0281':
- // � [LATIN LETTER SMALL CAPITAL INVERTED R]
- case '\u1D19':
- // á´™ [LATIN LETTER SMALL CAPITAL REVERSED R]
- case '\u1D1A':
- // á´š [LATIN LETTER SMALL CAPITAL TURNED R]
- case '\u1E58':
- // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
- case '\u1E5A':
- // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
- case '\u1E5C':
- // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
- case '\u1E5E':
- // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
- case '\u24C7':
- // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
- case '\u2C64':
- // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
- case '\uA75A':
- // � [LATIN CAPITAL LETTER R ROTUNDA]
- case '\uA782':
- // êž‚ [LATIN CAPITAL LETTER INSULAR R]
- case '\uFF32': // ï¼² [FULLWIDTH LATIN CAPITAL LETTER R]
- output[opos++] = 'R';
- break;
-
- case '\u0155':
- // Å• [LATIN SMALL LETTER R WITH ACUTE]
- case '\u0157':
- // Å— [LATIN SMALL LETTER R WITH CEDILLA]
- case '\u0159':
- // Ã…â„¢ [LATIN SMALL LETTER R WITH CARON]
- case '\u0211':
- // È‘ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
- case '\u0213':
- // È“ [LATIN SMALL LETTER R WITH INVERTED BREVE]
- case '\u024D':
- // � [LATIN SMALL LETTER R WITH STROKE]
- case '\u027C':
- // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
- case '\u027D':
- // ɽ [LATIN SMALL LETTER R WITH TAIL]
- case '\u027E':
- // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
- case '\u027F':
- // É¿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
- case '\u1D63':
- // áµ£ [LATIN SUBSCRIPT SMALL LETTER R]
- case '\u1D72':
- // áµ² [LATIN SMALL LETTER R WITH MIDDLE TILDE]
- case '\u1D73':
- // áµ³ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
- case '\u1D89':
- // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
- case '\u1E59':
- // á¹™ [LATIN SMALL LETTER R WITH DOT ABOVE]
- case '\u1E5B':
- // á¹› [LATIN SMALL LETTER R WITH DOT BELOW]
- case '\u1E5D':
- // � [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
- case '\u1E5F':
- // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
- case '\u24E1':
- // â“¡ [CIRCLED LATIN SMALL LETTER R]
- case '\uA75B':
- // � [LATIN SMALL LETTER R ROTUNDA]
- case '\uA783':
- // ꞃ [LATIN SMALL LETTER INSULAR R]
- case '\uFF52': // ï½’ [FULLWIDTH LATIN SMALL LETTER R]
- output[opos++] = 'r';
- break;
-
- case '\u24AD': // â’ [PARENTHESIZED LATIN SMALL LETTER R]
- output[opos++] = '(';
- output[opos++] = 'r';
- output[opos++] = ')';
- break;
-
- case '\u015A':
- // Ã…Å¡ [LATIN CAPITAL LETTER S WITH ACUTE]
- case '\u015C':
- // Ã…Å“ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
- case '\u015E':
- // Åž [LATIN CAPITAL LETTER S WITH CEDILLA]
- case '\u0160':
- // Ã…Â [LATIN CAPITAL LETTER S WITH CARON]
- case '\u0218':
- // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
- case '\u1E60':
- // á¹ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
- case '\u1E62':
- // á¹¢ [LATIN CAPITAL LETTER S WITH DOT BELOW]
- case '\u1E64':
- // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
- case '\u1E66':
- // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
- case '\u1E68':
- // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
- case '\u24C8':
- // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
- case '\uA731':
- // ꜱ [LATIN LETTER SMALL CAPITAL S]
- case '\uA785':
- // êž… [LATIN SMALL LETTER INSULAR S]
- case '\uFF33': // ï¼³ [FULLWIDTH LATIN CAPITAL LETTER S]
- output[opos++] = 'S';
- break;
-
- case '\u015B':
- // Å› [LATIN SMALL LETTER S WITH ACUTE]
- case '\u015D':
- // � [LATIN SMALL LETTER S WITH CIRCUMFLEX]
- case '\u015F':
- // ÅŸ [LATIN SMALL LETTER S WITH CEDILLA]
- case '\u0161':
- // Å¡ [LATIN SMALL LETTER S WITH CARON]
- case '\u017F':
- // Å¿ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
- case '\u0219':
- // È™ [LATIN SMALL LETTER S WITH COMMA BELOW]
- case '\u023F':
- // È¿ [LATIN SMALL LETTER S WITH SWASH TAIL]
- case '\u0282':
- // Ê‚ [LATIN SMALL LETTER S WITH HOOK]
- case '\u1D74':
- // áµ´ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
- case '\u1D8A':
- // á¶Š[LATIN SMALL LETTER S WITH PALATAL HOOK]
- case '\u1E61':
- // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
- case '\u1E63':
- // á¹£ [LATIN SMALL LETTER S WITH DOT BELOW]
- case '\u1E65':
- // á¹¥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
- case '\u1E67':
- // á¹§ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
- case '\u1E69':
- // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
- case '\u1E9C':
- // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
- case '\u1E9D':
- // � [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
- case '\u24E2':
- // â“¢ [CIRCLED LATIN SMALL LETTER S]
- case '\uA784':
- // êž„ [LATIN CAPITAL LETTER INSULAR S]
- case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S]
- output[opos++] = 's';
- break;
-
- case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S]
- output[opos++] = 'S';
- output[opos++] = 'S';
- break;
-
- case '\u24AE': // â’® [PARENTHESIZED LATIN SMALL LETTER S]
- output[opos++] = '(';
- output[opos++] = 's';
- output[opos++] = ')';
- break;
-
- case '\u00DF': // ß [LATIN SMALL LETTER SHARP S]
- output[opos++] = 's';
- output[opos++] = 's';
- break;
-
- case '\uFB06': // st[LATIN SMALL LIGATURE ST]
- output[opos++] = 's';
- output[opos++] = 't';
- break;
-
- case '\u0162':
- // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
- case '\u0164':
- // Ť [LATIN CAPITAL LETTER T WITH CARON]
- case '\u0166':
- // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
- case '\u01AC':
- // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
- case '\u01AE':
- // Æ® [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
- case '\u021A':
- // Èš [LATIN CAPITAL LETTER T WITH COMMA BELOW]
- case '\u023E':
- // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
- case '\u1D1B':
- // á´› [LATIN LETTER SMALL CAPITAL T]
- case '\u1E6A':
- // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
- case '\u1E6C':
- // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
- case '\u1E6E':
- // á¹® [LATIN CAPITAL LETTER T WITH LINE BELOW]
- case '\u1E70':
- // á¹° [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
- case '\u24C9':
- // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
- case '\uA786':
- // Ꞇ[LATIN CAPITAL LETTER INSULAR T]
- case '\uFF34': // ï¼´ [FULLWIDTH LATIN CAPITAL LETTER T]
- output[opos++] = 'T';
- break;
-
- case '\u0163':
- // ţ [LATIN SMALL LETTER T WITH CEDILLA]
- case '\u0165':
- // Ã…Â¥ [LATIN SMALL LETTER T WITH CARON]
- case '\u0167':
- // ŧ [LATIN SMALL LETTER T WITH STROKE]
- case '\u01AB':
- // Æ« [LATIN SMALL LETTER T WITH PALATAL HOOK]
- case '\u01AD':
- // Æ [LATIN SMALL LETTER T WITH HOOK]
- case '\u021B':
- // È› [LATIN SMALL LETTER T WITH COMMA BELOW]
- case '\u0236':
- // ȶ [LATIN SMALL LETTER T WITH CURL]
- case '\u0287':
- // ʇ [LATIN SMALL LETTER TURNED T]
- case '\u0288':
- // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
- case '\u1D75':
- // áµµ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
- case '\u1E6B':
- // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
- case '\u1E6D':
- // á¹ [LATIN SMALL LETTER T WITH DOT BELOW]
- case '\u1E6F':
- // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
- case '\u1E71':
- // á¹± [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
- case '\u1E97':
- // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
- case '\u24E3':
- // â“£ [CIRCLED LATIN SMALL LETTER T]
- case '\u2C66':
- // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
- case '\uFF54': // �? [FULLWIDTH LATIN SMALL LETTER T]
- output[opos++] = 't';
- break;
-
- case '\u00DE':
- // Þ [LATIN CAPITAL LETTER THORN]
- case '\uA766': // � [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
- output[opos++] = 'T';
- output[opos++] = 'H';
- break;
-
- case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ]
- output[opos++] = 'T';
- output[opos++] = 'Z';
- break;
-
- case '\u24AF': // â’¯ [PARENTHESIZED LATIN SMALL LETTER T]
- output[opos++] = '(';
- output[opos++] = 't';
- output[opos++] = ')';
- break;
-
- case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
- output[opos++] = 't';
- output[opos++] = 'c';
- break;
-
- case '\u00FE':
- // þ [LATIN SMALL LETTER THORN]
- case '\u1D7A':
- // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
- case '\uA767': // � [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
- output[opos++] = 't';
- output[opos++] = 'h';
- break;
-
- case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH]
- output[opos++] = 't';
- output[opos++] = 's';
- break;
-
- case '\uA729': // ꜩ [LATIN SMALL LETTER TZ]
- output[opos++] = 't';
- output[opos++] = 'z';
- break;
-
- case '\u00D9':
- // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
- case '\u00DA':
- // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
- case '\u00DB':
- // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
- case '\u00DC':
- // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
- case '\u0168':
- // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
- case '\u016A':
- // Ū [LATIN CAPITAL LETTER U WITH MACRON]
- case '\u016C':
- // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
- case '\u016E':
- // Å® [LATIN CAPITAL LETTER U WITH RING ABOVE]
- case '\u0170':
- // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
- case '\u0172':
- // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
- case '\u01AF':
- // Ư [LATIN CAPITAL LETTER U WITH HORN]
- case '\u01D3':
- // Ç“ [LATIN CAPITAL LETTER U WITH CARON]
- case '\u01D5':
- // Ç• [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
- case '\u01D7':
- // Ç— [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
- case '\u01D9':
- // Ç™ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
- case '\u01DB':
- // Ç› [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
- case '\u0214':
- // �? [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
- case '\u0216':
- // È– [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
- case '\u0244':
- // É„ [LATIN CAPITAL LETTER U BAR]
- case '\u1D1C':
- // ᴜ [LATIN LETTER SMALL CAPITAL U]
- case '\u1D7E':
- // áµ¾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
- case '\u1E72':
- // á¹² [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
- case '\u1E74':
- // á¹´ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
- case '\u1E76':
- // á¹¶ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
- case '\u1E78':
- // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
- case '\u1E7A':
- // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
- case '\u1EE4':
- // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
- case '\u1EE6':
- // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
- case '\u1EE8':
- // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
- case '\u1EEA':
- // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
- case '\u1EEC':
- // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
- case '\u1EEE':
- // á»® [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
- case '\u1EF0':
- // á»° [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
- case '\u24CA':
- // Ⓤ[CIRCLED LATIN CAPITAL LETTER U]
- case '\uFF35': // ï¼µ [FULLWIDTH LATIN CAPITAL LETTER U]
- output[opos++] = 'U';
- break;
-
- case '\u00F9':
- // ù [LATIN SMALL LETTER U WITH GRAVE]
- case '\u00FA':
- // ú [LATIN SMALL LETTER U WITH ACUTE]
- case '\u00FB':
- // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
- case '\u00FC':
- // ü [LATIN SMALL LETTER U WITH DIAERESIS]
- case '\u0169':
- // Å© [LATIN SMALL LETTER U WITH TILDE]
- case '\u016B':
- // Å« [LATIN SMALL LETTER U WITH MACRON]
- case '\u016D':
- // Ã…Â [LATIN SMALL LETTER U WITH BREVE]
- case '\u016F':
- // ů [LATIN SMALL LETTER U WITH RING ABOVE]
- case '\u0171':
- // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
- case '\u0173':
- // ų [LATIN SMALL LETTER U WITH OGONEK]
- case '\u01B0':
- // ư [LATIN SMALL LETTER U WITH HORN]
- case '\u01D4':
- // �? [LATIN SMALL LETTER U WITH CARON]
- case '\u01D6':
- // Ç– [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
- case '\u01D8':
- // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
- case '\u01DA':
- // Çš [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
- case '\u01DC':
- // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
- case '\u0215':
- // È• [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
- case '\u0217':
- // È— [LATIN SMALL LETTER U WITH INVERTED BREVE]
- case '\u0289':
- // ʉ [LATIN SMALL LETTER U BAR]
- case '\u1D64':
- // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
- case '\u1D99':
- // á¶™ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
- case '\u1E73':
- // á¹³ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
- case '\u1E75':
- // á¹µ [LATIN SMALL LETTER U WITH TILDE BELOW]
- case '\u1E77':
- // á¹· [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
- case '\u1E79':
- // á¹¹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
- case '\u1E7B':
- // á¹» [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
- case '\u1EE5':
- // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
- case '\u1EE7':
- // á»§ [LATIN SMALL LETTER U WITH HOOK ABOVE]
- case '\u1EE9':
- // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
- case '\u1EEB':
- // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
- case '\u1EED':
- // á» [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
- case '\u1EEF':
- // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
- case '\u1EF1':
- // á»± [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
- case '\u24E4':
- // ⓤ [CIRCLED LATIN SMALL LETTER U]
- case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U]
- output[opos++] = 'u';
- break;
-
- case '\u24B0': // â’° [PARENTHESIZED LATIN SMALL LETTER U]
- output[opos++] = '(';
- output[opos++] = 'u';
- output[opos++] = ')';
- break;
-
- case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE]
- output[opos++] = 'u';
- output[opos++] = 'e';
- break;
-
- case '\u01B2':
- // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
- case '\u0245':
- // É… [LATIN CAPITAL LETTER TURNED V]
- case '\u1D20':
- // á´ [LATIN LETTER SMALL CAPITAL V]
- case '\u1E7C':
- // á¹¼ [LATIN CAPITAL LETTER V WITH TILDE]
- case '\u1E7E':
- // á¹¾ [LATIN CAPITAL LETTER V WITH DOT BELOW]
- case '\u1EFC':
- // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
- case '\u24CB':
- // â“‹ [CIRCLED LATIN CAPITAL LETTER V]
- case '\uA75E':
- // � [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
- case '\uA768':
- // � [LATIN CAPITAL LETTER VEND]
- case '\uFF36': // ï¼¶ [FULLWIDTH LATIN CAPITAL LETTER V]
- output[opos++] = 'V';
- break;
-
- case '\u028B':
- // Ê‹ [LATIN SMALL LETTER V WITH HOOK]
- case '\u028C':
- // ʌ [LATIN SMALL LETTER TURNED V]
- case '\u1D65':
- // áµ¥ [LATIN SUBSCRIPT SMALL LETTER V]
- case '\u1D8C':
- // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
- case '\u1E7D':
- // á¹½ [LATIN SMALL LETTER V WITH TILDE]
- case '\u1E7F':
- // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
- case '\u24E5':
- // â“¥ [CIRCLED LATIN SMALL LETTER V]
- case '\u2C71':
- // â±± [LATIN SMALL LETTER V WITH RIGHT HOOK]
- case '\u2C74':
- // â±´ [LATIN SMALL LETTER V WITH CURL]
- case '\uA75F':
- // � [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
- case '\uFF56': // ï½– [FULLWIDTH LATIN SMALL LETTER V]
- output[opos++] = 'v';
- break;
-
- case '\uA760': // �[LATIN CAPITAL LETTER VY]
- output[opos++] = 'V';
- output[opos++] = 'Y';
- break;
-
- case '\u24B1': // â’± [PARENTHESIZED LATIN SMALL LETTER V]
- output[opos++] = '(';
- output[opos++] = 'v';
- output[opos++] = ')';
- break;
-
- case '\uA761': // � [LATIN SMALL LETTER VY]
- output[opos++] = 'v';
- output[opos++] = 'y';
- break;
-
- case '\u0174':
- // Å´ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
- case '\u01F7':
- // Ç· http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
- case '\u1D21':
- // á´¡ [LATIN LETTER SMALL CAPITAL W]
- case '\u1E80':
- // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
- case '\u1E82':
- // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
- case '\u1E84':
- // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
- case '\u1E86':
- // Ẇ[LATIN CAPITAL LETTER W WITH DOT ABOVE]
- case '\u1E88':
- // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
- case '\u24CC':
- // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
- case '\u2C72':
- // â±² [LATIN CAPITAL LETTER W WITH HOOK]
- case '\uFF37': // ï¼· [FULLWIDTH LATIN CAPITAL LETTER W]
- output[opos++] = 'W';
- break;
-
- case '\u0175':
- // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
- case '\u01BF':
- // Æ¿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
- case '\u028D':
- // � [LATIN SMALL LETTER TURNED W]
- case '\u1E81':
- // � [LATIN SMALL LETTER W WITH GRAVE]
- case '\u1E83':
- // ẃ [LATIN SMALL LETTER W WITH ACUTE]
- case '\u1E85':
- // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
- case '\u1E87':
- // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
- case '\u1E89':
- // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
- case '\u1E98':
- // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
- case '\u24E6':
- // ⓦ [CIRCLED LATIN SMALL LETTER W]
- case '\u2C73':
- // â±³ [LATIN SMALL LETTER W WITH HOOK]
- case '\uFF57': // ï½— [FULLWIDTH LATIN SMALL LETTER W]
- output[opos++] = 'w';
- break;
-
- case '\u24B2': // â’² [PARENTHESIZED LATIN SMALL LETTER W]
- output[opos++] = '(';
- output[opos++] = 'w';
- output[opos++] = ')';
- break;
-
- case '\u1E8A':
- // Ẋ[LATIN CAPITAL LETTER X WITH DOT ABOVE]
- case '\u1E8C':
- // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
- case '\u24CD':
- // � [CIRCLED LATIN CAPITAL LETTER X]
- case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X]
- output[opos++] = 'X';
- break;
-
- case '\u1D8D':
- // � [LATIN SMALL LETTER X WITH PALATAL HOOK]
- case '\u1E8B':
- // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
- case '\u1E8D':
- // � [LATIN SMALL LETTER X WITH DIAERESIS]
- case '\u2093':
- // â‚“ [LATIN SUBSCRIPT SMALL LETTER X]
- case '\u24E7':
- // â“§ [CIRCLED LATIN SMALL LETTER X]
- case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X]
- output[opos++] = 'x';
- break;
-
- case '\u24B3': // â’³ [PARENTHESIZED LATIN SMALL LETTER X]
- output[opos++] = '(';
- output[opos++] = 'x';
- output[opos++] = ')';
- break;
-
- case '\u00DD':
- // � [LATIN CAPITAL LETTER Y WITH ACUTE]
- case '\u0176':
- // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
- case '\u0178':
- // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
- case '\u01B3':
- // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
- case '\u0232':
- // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
- case '\u024E':
- // ÉŽ [LATIN CAPITAL LETTER Y WITH STROKE]
- case '\u028F':
- // � [LATIN LETTER SMALL CAPITAL Y]
- case '\u1E8E':
- // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
- case '\u1EF2':
- // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
- case '\u1EF4':
- // á»´ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
- case '\u1EF6':
- // á»¶ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
- case '\u1EF8':
- // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
- case '\u1EFE':
- // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
- case '\u24CE':
- // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
- case '\uFF39': // ï¼¹ [FULLWIDTH LATIN CAPITAL LETTER Y]
- output[opos++] = 'Y';
- break;
-
- case '\u00FD':
- // ý [LATIN SMALL LETTER Y WITH ACUTE]
- case '\u00FF':
- // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
- case '\u0177':
- // Å· [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
- case '\u01B4':
- // Æ´ [LATIN SMALL LETTER Y WITH HOOK]
- case '\u0233':
- // ȳ [LATIN SMALL LETTER Y WITH MACRON]
- case '\u024F':
- // � [LATIN SMALL LETTER Y WITH STROKE]
- case '\u028E':
- // ÊŽ [LATIN SMALL LETTER TURNED Y]
- case '\u1E8F':
- // � [LATIN SMALL LETTER Y WITH DOT ABOVE]
- case '\u1E99':
- // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
- case '\u1EF3':
- // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
- case '\u1EF5':
- // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
- case '\u1EF7':
- // á»· [LATIN SMALL LETTER Y WITH HOOK ABOVE]
- case '\u1EF9':
- // ỹ [LATIN SMALL LETTER Y WITH TILDE]
- case '\u1EFF':
- // ỿ [LATIN SMALL LETTER Y WITH LOOP]
- case '\u24E8':
- // ⓨ [CIRCLED LATIN SMALL LETTER Y]
- case '\uFF59': // ï½™ [FULLWIDTH LATIN SMALL LETTER Y]
- output[opos++] = 'y';
- break;
-
- case '\u24B4': // â’´ [PARENTHESIZED LATIN SMALL LETTER Y]
- output[opos++] = '(';
- output[opos++] = 'y';
- output[opos++] = ')';
- break;
-
- case '\u0179':
- // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
- case '\u017B':
- // Å» [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
- case '\u017D':
- // Ž [LATIN CAPITAL LETTER Z WITH CARON]
- case '\u01B5':
- // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
- case '\u021C':
- // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
- case '\u0224':
- // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
- case '\u1D22':
- // á´¢ [LATIN LETTER SMALL CAPITAL Z]
- case '\u1E90':
- // � [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
- case '\u1E92':
- // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
- case '\u1E94':
- // �? [LATIN CAPITAL LETTER Z WITH LINE BELOW]
- case '\u24CF':
- // � [CIRCLED LATIN CAPITAL LETTER Z]
- case '\u2C6B':
- // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
- case '\uA762':
- // � [LATIN CAPITAL LETTER VISIGOTHIC Z]
- case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z]
- output[opos++] = 'Z';
- break;
-
- case '\u017A':
- // ź [LATIN SMALL LETTER Z WITH ACUTE]
- case '\u017C':
- // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
- case '\u017E':
- // ž [LATIN SMALL LETTER Z WITH CARON]
- case '\u01B6':
- // ƶ [LATIN SMALL LETTER Z WITH STROKE]
- case '\u021D':
- // � http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
- case '\u0225':
- // ȥ [LATIN SMALL LETTER Z WITH HOOK]
- case '\u0240':
- // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
- case '\u0290':
- // � [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
- case '\u0291':
- // Ê‘ [LATIN SMALL LETTER Z WITH CURL]
- case '\u1D76':
- // áµ¶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
- case '\u1D8E':
- // á¶Ž [LATIN SMALL LETTER Z WITH PALATAL HOOK]
- case '\u1E91':
- // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
- case '\u1E93':
- // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
- case '\u1E95':
- // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
- case '\u24E9':
- // â“© [CIRCLED LATIN SMALL LETTER Z]
- case '\u2C6C':
- // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
- case '\uA763':
- // � [LATIN SMALL LETTER VISIGOTHIC Z]
- case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z]
- output[opos++] = 'z';
- break;
-
- case '\u24B5': // â’µ [PARENTHESIZED LATIN SMALL LETTER Z]
- output[opos++] = '(';
- output[opos++] = 'z';
- output[opos++] = ')';
- break;
-
- case '\u2070':
- // � [SUPERSCRIPT ZERO]
- case '\u2080':
- // â‚€ [SUBSCRIPT ZERO]
- case '\u24EA':
- // ⓪ [CIRCLED DIGIT ZERO]
- case '\u24FF':
- // â“¿ [NEGATIVE CIRCLED DIGIT ZERO]
- case '\uFF10': // � [FULLWIDTH DIGIT ZERO]
- output[opos++] = '0';
- break;
-
- case '\u00B9':
- // ¹ [SUPERSCRIPT ONE]
- case '\u2081':
- // � [SUBSCRIPT ONE]
- case '\u2460':
- // â‘ [CIRCLED DIGIT ONE]
- case '\u24F5':
- // ⓵ [DOUBLE CIRCLED DIGIT ONE]
- case '\u2776':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
- case '\u2780':
- // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
- case '\u278A':
- // ➊[DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
- case '\uFF11': // 1 [FULLWIDTH DIGIT ONE]
- output[opos++] = '1';
- break;
-
- case '\u2488': // â’ˆ [DIGIT ONE FULL STOP]
- output[opos++] = '1';
- output[opos++] = '.';
- break;
-
- case '\u2474': // â‘´ [PARENTHESIZED DIGIT ONE]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = ')';
- break;
-
- case '\u00B2':
- // ² [SUPERSCRIPT TWO]
- case '\u2082':
- // â‚‚ [SUBSCRIPT TWO]
- case '\u2461':
- // â‘¡ [CIRCLED DIGIT TWO]
- case '\u24F6':
- // â“¶ [DOUBLE CIRCLED DIGIT TWO]
- case '\u2777':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
- case '\u2781':
- // � [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
- case '\u278B':
- // âž‹ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
- case '\uFF12': // ï¼’ [FULLWIDTH DIGIT TWO]
- output[opos++] = '2';
- break;
-
- case '\u2489': // â’‰ [DIGIT TWO FULL STOP]
- output[opos++] = '2';
- output[opos++] = '.';
- break;
-
- case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO]
- output[opos++] = '(';
- output[opos++] = '2';
- output[opos++] = ')';
- break;
-
- case '\u00B3':
- // ³ [SUPERSCRIPT THREE]
- case '\u2083':
- // ₃ [SUBSCRIPT THREE]
- case '\u2462':
- // â‘¢ [CIRCLED DIGIT THREE]
- case '\u24F7':
- // â“· [DOUBLE CIRCLED DIGIT THREE]
- case '\u2778':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
- case '\u2782':
- // âž‚ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
- case '\u278C':
- // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
- case '\uFF13': // 3 [FULLWIDTH DIGIT THREE]
- output[opos++] = '3';
- break;
-
- case '\u248A': // â’Š[DIGIT THREE FULL STOP]
- output[opos++] = '3';
- output[opos++] = '.';
- break;
-
- case '\u2476': // â‘¶ [PARENTHESIZED DIGIT THREE]
- output[opos++] = '(';
- output[opos++] = '3';
- output[opos++] = ')';
- break;
-
- case '\u2074':
- // � [SUPERSCRIPT FOUR]
- case '\u2084':
- // â‚„ [SUBSCRIPT FOUR]
- case '\u2463':
- // â‘£ [CIRCLED DIGIT FOUR]
- case '\u24F8':
- // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
- case '\u2779':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
- case '\u2783':
- // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
- case '\u278D':
- // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
- case '\uFF14': // �? [FULLWIDTH DIGIT FOUR]
- output[opos++] = '4';
- break;
-
- case '\u248B': // â’‹ [DIGIT FOUR FULL STOP]
- output[opos++] = '4';
- output[opos++] = '.';
- break;
-
- case '\u2477': // â‘· [PARENTHESIZED DIGIT FOUR]
- output[opos++] = '(';
- output[opos++] = '4';
- output[opos++] = ')';
- break;
-
- case '\u2075':
- // � [SUPERSCRIPT FIVE]
- case '\u2085':
- // â‚… [SUBSCRIPT FIVE]
- case '\u2464':
- // ⑤ [CIRCLED DIGIT FIVE]
- case '\u24F9':
- // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
- case '\u277A':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
- case '\u2784':
- // âž„ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
- case '\u278E':
- // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
- case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE]
- output[opos++] = '5';
- break;
-
- case '\u248C': // ⒌ [DIGIT FIVE FULL STOP]
- output[opos++] = '5';
- output[opos++] = '.';
- break;
-
- case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE]
- output[opos++] = '(';
- output[opos++] = '5';
- output[opos++] = ')';
- break;
-
- case '\u2076':
- // � [SUPERSCRIPT SIX]
- case '\u2086':
- // ₆[SUBSCRIPT SIX]
- case '\u2465':
- // â‘¥ [CIRCLED DIGIT SIX]
- case '\u24FA':
- // ⓺ [DOUBLE CIRCLED DIGIT SIX]
- case '\u277B':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
- case '\u2785':
- // âž… [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
- case '\u278F':
- // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
- case '\uFF16': // ï¼– [FULLWIDTH DIGIT SIX]
- output[opos++] = '6';
- break;
-
- case '\u248D': // â’� [DIGIT SIX FULL STOP]
- output[opos++] = '6';
- output[opos++] = '.';
- break;
-
- case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX]
- output[opos++] = '(';
- output[opos++] = '6';
- output[opos++] = ')';
- break;
-
- case '\u2077':
- // � [SUPERSCRIPT SEVEN]
- case '\u2087':
- // ₇ [SUBSCRIPT SEVEN]
- case '\u2466':
- // ⑦ [CIRCLED DIGIT SEVEN]
- case '\u24FB':
- // â“» [DOUBLE CIRCLED DIGIT SEVEN]
- case '\u277C':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
- case '\u2786':
- // ➆[DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
- case '\u2790':
- // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
- case '\uFF17': // ï¼— [FULLWIDTH DIGIT SEVEN]
- output[opos++] = '7';
- break;
-
- case '\u248E': // â’Ž [DIGIT SEVEN FULL STOP]
- output[opos++] = '7';
- output[opos++] = '.';
- break;
-
- case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN]
- output[opos++] = '(';
- output[opos++] = '7';
- output[opos++] = ')';
- break;
-
- case '\u2078':
- // � [SUPERSCRIPT EIGHT]
- case '\u2088':
- // ₈ [SUBSCRIPT EIGHT]
- case '\u2467':
- // â‘§ [CIRCLED DIGIT EIGHT]
- case '\u24FC':
- // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
- case '\u277D':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
- case '\u2787':
- // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
- case '\u2791':
- // âž‘ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
- case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT]
- output[opos++] = '8';
- break;
-
- case '\u248F': // â’� [DIGIT EIGHT FULL STOP]
- output[opos++] = '8';
- output[opos++] = '.';
- break;
-
- case '\u247B': // â‘» [PARENTHESIZED DIGIT EIGHT]
- output[opos++] = '(';
- output[opos++] = '8';
- output[opos++] = ')';
- break;
-
- case '\u2079':
- // � [SUPERSCRIPT NINE]
- case '\u2089':
- // ₉ [SUBSCRIPT NINE]
- case '\u2468':
- // ⑨ [CIRCLED DIGIT NINE]
- case '\u24FD':
- // ⓽ [DOUBLE CIRCLED DIGIT NINE]
- case '\u277E':
- // � [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
- case '\u2788':
- // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
- case '\u2792':
- // âž’ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
- case '\uFF19': // ï¼™ [FULLWIDTH DIGIT NINE]
- output[opos++] = '9';
- break;
-
- case '\u2490': // â’� [DIGIT NINE FULL STOP]
- output[opos++] = '9';
- output[opos++] = '.';
- break;
-
- case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE]
- output[opos++] = '(';
- output[opos++] = '9';
- output[opos++] = ')';
- break;
-
- case '\u2469':
- // â‘© [CIRCLED NUMBER TEN]
- case '\u24FE':
- // ⓾ [DOUBLE CIRCLED NUMBER TEN]
- case '\u277F':
- // � [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
- case '\u2789':
- // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
- case '\u2793': // âž“ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
- output[opos++] = '1';
- output[opos++] = '0';
- break;
-
- case '\u2491': // â’‘ [NUMBER TEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '0';
- output[opos++] = '.';
- break;
-
- case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '0';
- output[opos++] = ')';
- break;
-
- case '\u246A':
- // ⑪ [CIRCLED NUMBER ELEVEN]
- case '\u24EB': // â“« [NEGATIVE CIRCLED NUMBER ELEVEN]
- output[opos++] = '1';
- output[opos++] = '1';
- break;
-
- case '\u2492': // â’’ [NUMBER ELEVEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '1';
- output[opos++] = '.';
- break;
-
- case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '1';
- output[opos++] = ')';
- break;
-
- case '\u246B':
- // â‘« [CIRCLED NUMBER TWELVE]
- case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
- output[opos++] = '1';
- output[opos++] = '2';
- break;
-
- case '\u2493': // â’“ [NUMBER TWELVE FULL STOP]
- output[opos++] = '1';
- output[opos++] = '2';
- output[opos++] = '.';
- break;
-
- case '\u247F': // â‘¿ [PARENTHESIZED NUMBER TWELVE]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '2';
- output[opos++] = ')';
- break;
-
- case '\u246C':
- // ⑬ [CIRCLED NUMBER THIRTEEN]
- case '\u24ED': // â“ [NEGATIVE CIRCLED NUMBER THIRTEEN]
- output[opos++] = '1';
- output[opos++] = '3';
- break;
-
- case '\u2494': // â’�? [NUMBER THIRTEEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '3';
- output[opos++] = '.';
- break;
-
- case '\u2480': // â’€ [PARENTHESIZED NUMBER THIRTEEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '3';
- output[opos++] = ')';
- break;
-
- case '\u246D':
- // â‘ [CIRCLED NUMBER FOURTEEN]
- case '\u24EE': // â“® [NEGATIVE CIRCLED NUMBER FOURTEEN]
- output[opos++] = '1';
- output[opos++] = '4';
- break;
-
- case '\u2495': // â’• [NUMBER FOURTEEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '4';
- output[opos++] = '.';
- break;
-
- case '\u2481': // â’� [PARENTHESIZED NUMBER FOURTEEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '4';
- output[opos++] = ')';
- break;
-
- case '\u246E':
- // â‘® [CIRCLED NUMBER FIFTEEN]
- case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
- output[opos++] = '1';
- output[opos++] = '5';
- break;
-
- case '\u2496': // â’– [NUMBER FIFTEEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '5';
- output[opos++] = '.';
- break;
-
- case '\u2482': // â’‚ [PARENTHESIZED NUMBER FIFTEEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '5';
- output[opos++] = ')';
- break;
-
- case '\u246F':
- // ⑯ [CIRCLED NUMBER SIXTEEN]
- case '\u24F0': // â“° [NEGATIVE CIRCLED NUMBER SIXTEEN]
- output[opos++] = '1';
- output[opos++] = '6';
- break;
-
- case '\u2497': // â’— [NUMBER SIXTEEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '6';
- output[opos++] = '.';
- break;
-
- case '\u2483': // â’ƒ [PARENTHESIZED NUMBER SIXTEEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '6';
- output[opos++] = ')';
- break;
-
- case '\u2470':
- // â‘° [CIRCLED NUMBER SEVENTEEN]
- case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
- output[opos++] = '1';
- output[opos++] = '7';
- break;
-
- case '\u2498': // â’˜ [NUMBER SEVENTEEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '7';
- output[opos++] = '.';
- break;
-
- case '\u2484': // â’„ [PARENTHESIZED NUMBER SEVENTEEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '7';
- output[opos++] = ')';
- break;
-
- case '\u2471':
- // ⑱ [CIRCLED NUMBER EIGHTEEN]
- case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
- output[opos++] = '1';
- output[opos++] = '8';
- break;
-
- case '\u2499': // â’™ [NUMBER EIGHTEEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '8';
- output[opos++] = '.';
- break;
-
- case '\u2485': // â’… [PARENTHESIZED NUMBER EIGHTEEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '8';
- output[opos++] = ')';
- break;
-
- case '\u2472':
- // ⑲ [CIRCLED NUMBER NINETEEN]
- case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
- output[opos++] = '1';
- output[opos++] = '9';
- break;
-
- case '\u249A': // â’š [NUMBER NINETEEN FULL STOP]
- output[opos++] = '1';
- output[opos++] = '9';
- output[opos++] = '.';
- break;
-
- case '\u2486': // â’†[PARENTHESIZED NUMBER NINETEEN]
- output[opos++] = '(';
- output[opos++] = '1';
- output[opos++] = '9';
- output[opos++] = ')';
- break;
-
- case '\u2473':
- // ⑳ [CIRCLED NUMBER TWENTY]
- case '\u24F4': // â“´ [NEGATIVE CIRCLED NUMBER TWENTY]
- output[opos++] = '2';
- output[opos++] = '0';
- break;
-
- case '\u249B': // â’› [NUMBER TWENTY FULL STOP]
- output[opos++] = '2';
- output[opos++] = '0';
- output[opos++] = '.';
- break;
-
- case '\u2487': // â’‡ [PARENTHESIZED NUMBER TWENTY]
- output[opos++] = '(';
- output[opos++] = '2';
- output[opos++] = '0';
- output[opos++] = ')';
- break;
-
- case '\u00AB':
- // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
- case '\u00BB':
- // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
- case '\u201C':
- // “ [LEFT DOUBLE QUOTATION MARK]
- case '\u201D':
- // � [RIGHT DOUBLE QUOTATION MARK]
- case '\u201E':
- // „ [DOUBLE LOW-9 QUOTATION MARK]
- case '\u2033':
- // ″ [DOUBLE PRIME]
- case '\u2036':
- // ‶ [REVERSED DOUBLE PRIME]
- case '\u275D':
- // � [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
- case '\u275E':
- // � [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
- case '\u276E':
- // � [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
- case '\u276F':
- // � [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
- case '\uFF02': // " [FULLWIDTH QUOTATION MARK]
- output[opos++] = '"';
- break;
-
- case '\u2018':
- // ‘ [LEFT SINGLE QUOTATION MARK]
- case '\u2019':
- // ’ [RIGHT SINGLE QUOTATION MARK]
- case '\u201A':
- // ‚ [SINGLE LOW-9 QUOTATION MARK]
- case '\u201B':
- // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
- case '\u2032':
- // ′ [PRIME]
- case '\u2035':
- // ‵ [REVERSED PRIME]
- case '\u2039':
- // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
- case '\u203A':
- // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
- case '\u275B':
- // � [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
- case '\u275C':
- // � [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
- case '\uFF07': // ' [FULLWIDTH APOSTROPHE]
- output[opos++] = '\'';
- break;
-
- case '\u2010':
- // � [HYPHEN]
- case '\u2011':
- // ‑ [NON-BREAKING HYPHEN]
- case '\u2012':
- // ‒ [FIGURE DASH]
- case '\u2013':
- // – [EN DASH]
- case '\u2014':
- // �? [EM DASH]
- case '\u207B':
- // � [SUPERSCRIPT MINUS]
- case '\u208B':
- // â‚‹ [SUBSCRIPT MINUS]
- case '\uFF0D': // � [FULLWIDTH HYPHEN-MINUS]
- output[opos++] = '-';
- break;
-
- case '\u2045':
- // � [LEFT SQUARE BRACKET WITH QUILL]
- case '\u2772':
- // � [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
- case '\uFF3B': // ï¼» [FULLWIDTH LEFT SQUARE BRACKET]
- output[opos++] = '[';
- break;
-
- case '\u2046':
- // �[RIGHT SQUARE BRACKET WITH QUILL]
- case '\u2773':
- // � [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
- case '\uFF3D': // ï¼½ [FULLWIDTH RIGHT SQUARE BRACKET]
- output[opos++] = ']';
- break;
-
- case '\u207D':
- // � [SUPERSCRIPT LEFT PARENTHESIS]
- case '\u208D':
- // � [SUBSCRIPT LEFT PARENTHESIS]
- case '\u2768':
- // � [MEDIUM LEFT PARENTHESIS ORNAMENT]
- case '\u276A':
- // � [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
- case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS]
- output[opos++] = '(';
- break;
-
- case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS]
- output[opos++] = '(';
- output[opos++] = '(';
- break;
-
- case '\u207E':
- // � [SUPERSCRIPT RIGHT PARENTHESIS]
- case '\u208E':
- // ₎ [SUBSCRIPT RIGHT PARENTHESIS]
- case '\u2769':
- // � [MEDIUM RIGHT PARENTHESIS ORNAMENT]
- case '\u276B':
- // � [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
- case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS]
- output[opos++] = ')';
- break;
-
- case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS]
- output[opos++] = ')';
- output[opos++] = ')';
- break;
-
- case '\u276C':
- // � [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
- case '\u2770':
- // � [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
- case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN]
- output[opos++] = '<';
- break;
-
- case '\u276D':
- // �[MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
- case '\u2771':
- // � [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
- case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN]
- output[opos++] = '>';
- break;
-
- case '\u2774':
- // � [MEDIUM LEFT CURLY BRACKET ORNAMENT]
- case '\uFF5B': // ï½› [FULLWIDTH LEFT CURLY BRACKET]
- output[opos++] = '{';
- break;
-
- case '\u2775':
- // � [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
- case '\uFF5D': // � [FULLWIDTH RIGHT CURLY BRACKET]
- output[opos++] = '}';
- break;
-
- case '\u207A':
- // � [SUPERSCRIPT PLUS SIGN]
- case '\u208A':
- // ₊[SUBSCRIPT PLUS SIGN]
- case '\uFF0B': // + [FULLWIDTH PLUS SIGN]
- output[opos++] = '+';
- break;
-
- case '\u207C':
- // � [SUPERSCRIPT EQUALS SIGN]
- case '\u208C':
- // ₌ [SUBSCRIPT EQUALS SIGN]
- case '\uFF1D': // � [FULLWIDTH EQUALS SIGN]
- output[opos++] = '=';
- break;
-
- case '\uFF01': // � [FULLWIDTH EXCLAMATION MARK]
- output[opos++] = '!';
- break;
-
- case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK]
- output[opos++] = '!';
- output[opos++] = '!';
- break;
-
- case '\u2049': // � [EXCLAMATION QUESTION MARK]
- output[opos++] = '!';
- output[opos++] = '?';
- break;
-
- case '\uFF03': // # [FULLWIDTH NUMBER SIGN]
- output[opos++] = '#';
- break;
-
- case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN]
- output[opos++] = '$';
- break;
-
- case '\u2052':
- // � [COMMERCIAL MINUS SIGN]
- case '\uFF05': // ï¼… [FULLWIDTH PERCENT SIGN]
- output[opos++] = '%';
- break;
-
- case '\uFF06': // &[FULLWIDTH AMPERSAND]
- output[opos++] = '&';
- break;
-
- case '\u204E':
- // � [LOW ASTERISK]
- case '\uFF0A': // *[FULLWIDTH ASTERISK]
- output[opos++] = '*';
- break;
-
- case '\uFF0C': // , [FULLWIDTH COMMA]
- output[opos++] = ',';
- break;
-
- case '\uFF0E': // . [FULLWIDTH FULL STOP]
- output[opos++] = '.';
- break;
-
- case '\u2044':
- // � [FRACTION SLASH]
- case '\uFF0F': // � [FULLWIDTH SOLIDUS]
- output[opos++] = '/';
- break;
-
- case '\uFF1A': // : [FULLWIDTH COLON]
- output[opos++] = ':';
- break;
-
- case '\u204F':
- // � [REVERSED SEMICOLON]
- case '\uFF1B': // ï¼› [FULLWIDTH SEMICOLON]
- output[opos++] = ';';
- break;
-
- case '\uFF1F': // ? [FULLWIDTH QUESTION MARK]
- output[opos++] = '?';
- break;
-
- case '\u2047': // � [DOUBLE QUESTION MARK]
- output[opos++] = '?';
- output[opos++] = '?';
- break;
-
- case '\u2048': // � [QUESTION EXCLAMATION MARK]
- output[opos++] = '?';
- output[opos++] = '!';
- break;
-
- case '\uFF20': // ï¼ [FULLWIDTH COMMERCIAL AT]
- output[opos++] = '@';
- break;
-
- case '\uFF3C': // ï¼¼ [FULLWIDTH REVERSE SOLIDUS]
- output[opos++] = '\\';
- break;
-
- case '\u2038':
- // ‸ [CARET]
- case '\uFF3E': // ï¼¾ [FULLWIDTH CIRCUMFLEX ACCENT]
- output[opos++] = '^';
- break;
-
- case '\uFF3F': // _ [FULLWIDTH LOW LINE]
- output[opos++] = '_';
- break;
-
- case '\u2053':
- // � [SWUNG DASH]
- case '\uFF5E': // ~ [FULLWIDTH TILDE]
- output[opos++] = '~';
- break;
-
- // BEGIN CUSTOM TRANSLITERATION OF CYRILIC CHARS
-
- // russian uppercase "А Б В Г Д Е Ё Ж З И Й К Л М Н О П Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я"
- // russian lowercase "а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я"
-
- // notes
- // read http://www.vesic.org/english/blog/c-sharp/transliteration-easy-way-microsoft-transliteration-utility/
- // should we look into MS Transliteration Utility (http://msdn.microsoft.com/en-US/goglobal/bb688104.aspx)
- // also UnicodeSharpFork https://bitbucket.org/DimaStefantsov/unidecodesharpfork
- // also Transliterator http://transliterator.codeplex.com/
- //
- // in any case it would be good to generate all those "case" statements instead of writing them by hand
- // time for a T4 template?
- // also we should support extensibility so ppl can register more cases in external code
-
- // TODO: transliterates Анастасия as Anastasiya, and not Anastasia
- // Ольга --> Ol'ga, Татьяна --> Tat'yana -- that's bad (?)
- // Note: should ä (German umlaut) become a or ae ?
- case '\u0410': // А
- output[opos++] = 'A';
- break;
- case '\u0430': // а
- output[opos++] = 'a';
- break;
- case '\u0411': // Б
- output[opos++] = 'B';
- break;
- case '\u0431': // б
- output[opos++] = 'b';
- break;
- case '\u0412': // В
- output[opos++] = 'V';
- break;
- case '\u0432': // в
- output[opos++] = 'v';
- break;
- case '\u0413': // Г
- output[opos++] = 'G';
- break;
- case '\u0433': // г
- output[opos++] = 'g';
- break;
- case '\u0414': // Д
- output[opos++] = 'D';
- break;
- case '\u0434': // д
- output[opos++] = 'd';
- break;
- case '\u0415': // Е
- output[opos++] = 'E';
- break;
- case '\u0435': // е
- output[opos++] = 'e';
- break;
- case '\u0401': // Ё
- output[opos++] = 'E'; // alt. Yo
- break;
- case '\u0451': // ё
- output[opos++] = 'e'; // alt. yo
- break;
- case '\u0416': // Ж
- output[opos++] = 'Z';
- output[opos++] = 'h';
- break;
- case '\u0436': // ж
- output[opos++] = 'z';
- output[opos++] = 'h';
- break;
- case '\u0417': // З
- output[opos++] = 'Z';
- break;
- case '\u0437': // з
- output[opos++] = 'z';
- break;
- case '\u0418': // И
- output[opos++] = 'I';
- break;
- case '\u0438': // и
- output[opos++] = 'i';
- break;
- case '\u0419': // Й
- output[opos++] = 'I'; // alt. Y, J
- break;
- case '\u0439': // й
- output[opos++] = 'i'; // alt. y, j
- break;
- case '\u041A': // К
- output[opos++] = 'K';
- break;
- case '\u043A': // к
- output[opos++] = 'k';
- break;
- case '\u041B': // Л
- output[opos++] = 'L';
- break;
- case '\u043B': // л
- output[opos++] = 'l';
- break;
- case '\u041C': // М
- output[opos++] = 'M';
- break;
- case '\u043C': // м
- output[opos++] = 'm';
- break;
- case '\u041D': // Н
- output[opos++] = 'N';
- break;
- case '\u043D': // н
- output[opos++] = 'n';
- break;
- case '\u041E': // О
- output[opos++] = 'O';
- break;
- case '\u043E': // о
- output[opos++] = 'o';
- break;
- case '\u041F': // П
- output[opos++] = 'P';
- break;
- case '\u043F': // п
- output[opos++] = 'p';
- break;
- case '\u0420': // Р
- output[opos++] = 'R';
- break;
- case '\u0440': // р
- output[opos++] = 'r';
- break;
- case '\u0421': // С
- output[opos++] = 'S';
- break;
- case '\u0441': // с
- output[opos++] = 's';
- break;
- case '\u0422': // Т
- output[opos++] = 'T';
- break;
- case '\u0442': // т
- output[opos++] = 't';
- break;
- case '\u0423': // У
- output[opos++] = 'U';
- break;
- case '\u0443': // у
- output[opos++] = 'u';
- break;
- case '\u0424': // Ф
- output[opos++] = 'F';
- break;
- case '\u0444': // ф
- output[opos++] = 'f';
- break;
- case '\u0425': // Х
- output[opos++] = 'K'; // alt. X
- output[opos++] = 'h';
- break;
- case '\u0445': // х
- output[opos++] = 'k'; // alt. x
- output[opos++] = 'h';
- break;
- case '\u0426': // Ц
- output[opos++] = 'F';
- break;
- case '\u0446': // ц
- output[opos++] = 'f';
- break;
- case '\u0427': // Ч
- output[opos++] = 'C'; // alt. Ts, C
- output[opos++] = 'h';
- break;
- case '\u0447': // ч
- output[opos++] = 'c'; // alt. ts, c
- output[opos++] = 'h';
- break;
- case '\u0428': // Ш
- output[opos++] = 'S'; // alt. Ch, S
- output[opos++] = 'h';
- break;
- case '\u0448': // ш
- output[opos++] = 's'; // alt. ch, s
- output[opos++] = 'h';
- break;
- case '\u0429': // Щ
- output[opos++] = 'S'; // alt. Shch, Sc
- output[opos++] = 'h';
- break;
- case '\u0449': // щ
- output[opos++] = 's'; // alt. shch, sc
- output[opos++] = 'h';
- break;
- case '\u042A': // Ъ
- output[opos++] = '"'; // "
- break;
- case '\u044A': // ъ
- output[opos++] = '"'; // "
- break;
- case '\u042B': // Ы
- output[opos++] = 'Y';
- break;
- case '\u044B': // ы
- output[opos++] = 'y';
- break;
- case '\u042C': // Ь
- output[opos++] = '\''; // '
- break;
- case '\u044C': // ь
- output[opos++] = '\''; // '
- break;
- case '\u042D': // Э
- output[opos++] = 'E';
- break;
- case '\u044D': // э
- output[opos++] = 'e';
- break;
- case '\u042E': // Ю
- output[opos++] = 'Y'; // alt. Ju
- output[opos++] = 'u';
- break;
- case '\u044E': // ю
- output[opos++] = 'y'; // alt. ju
- output[opos++] = 'u';
- break;
- case '\u042F': // Я
- output[opos++] = 'Y'; // alt. Ja
- output[opos++] = 'a';
- break;
- case '\u044F': // я
- output[opos++] = 'y'; // alt. ja
- output[opos++] = 'a';
- break;
-
- // BEGIN EXTRA
- /*
- case '£':
- output[opos++] = 'G';
- output[opos++] = 'B';
- output[opos++] = 'P';
- break;
-
- case '€':
- output[opos++] = 'E';
- output[opos++] = 'U';
- output[opos++] = 'R';
- break;
-
- case '©':
- output[opos++] = '(';
- output[opos++] = 'C';
- output[opos++] = ')';
- break;
- */
- default:
- // if (ToMoreAscii(input, ipos, output, ref opos))
- // break;
-
- // if (!char.IsLetterOrDigit(c)) // that would not catch eg 汉 unfortunately
- // output[opos++] = '?';
- // else
- // output[opos++] = c;
-
- // strict ASCII
- output[opos++] = fail;
-
- break;
- }
- }
+ // 5. Fallback for unmapped characters
+ output[0] = fallback;
+ return 1;
}
- // private static bool ToMoreAscii(char[] input, int ipos, char[] output, ref int opos)
- // {
- // var c = input[ipos];
+ private static int TryNormalize(char c, Span output)
+ {
+ // Skip characters that won't normalize to ASCII
+ if (c < '\u00C0')
+ {
+ return 0;
+ }
- // switch (c)
- // {
- // case '£':
- // output[opos++] = 'G';
- // output[opos++] = 'B';
- // output[opos++] = 'P';
- // break;
+ // Normalize to FormD (decomposed form)
+ ReadOnlySpan input = stackalloc char[] { c };
+ var normalized = input.ToString().Normalize(NormalizationForm.FormD);
- // case '€':
- // output[opos++] = 'E';
- // output[opos++] = 'U';
- // output[opos++] = 'R';
- // break;
+ if (normalized.Length == 0)
+ {
+ return 0;
+ }
- // case '©':
- // output[opos++] = '(';
- // output[opos++] = 'C';
- // output[opos++] = ')';
- // break;
+ // Copy only base characters (skip combining marks)
+ var len = 0;
+ foreach (var ch in normalized)
+ {
+ var category = CharUnicodeInfo.GetUnicodeCategory(ch);
- // default:
- // return false;
- // }
+ // Skip combining marks (diacritics)
+ if (category == UnicodeCategory.NonSpacingMark ||
+ category == UnicodeCategory.SpacingCombiningMark ||
+ category == UnicodeCategory.EnclosingMark)
+ {
+ continue;
+ }
- // return true;
- // }
+ // Only keep if it's now ASCII
+ if (ch < '\u0080')
+ {
+ output[len++] = ch;
+ }
+ }
+
+ return len;
+ }
}
diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverterNew.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverterNew.cs
deleted file mode 100644
index d1327bab6f..0000000000
--- a/src/Umbraco.Core/Strings/Utf8ToAsciiConverterNew.cs
+++ /dev/null
@@ -1,209 +0,0 @@
-using System.Buffers;
-using System.Collections.Frozen;
-using System.Globalization;
-using System.Text;
-
-namespace Umbraco.Cms.Core.Strings;
-
-///
-/// SIMD-optimized UTF-8 to ASCII converter with extensible character mappings.
-///
-///
-///
-/// This converter uses a multi-step fallback strategy:
-/// 1. Dictionary lookup for special cases (ligatures, Cyrillic, special Latin)
-/// 2. Unicode normalization (FormD) for accented Latin characters
-/// 3. Control character stripping
-/// 4. Whitespace normalization
-/// 5. Fallback character for unmapped characters
-///
-///
-/// Most accented Latin characters (À, é, ñ, etc.) are handled automatically via
-/// Unicode normalization. Dictionary mappings are only needed for characters that
-/// don't decompose correctly (ligatures like Æ→AE, Cyrillic, special Latin like Ø→O).
-///
-///
-public sealed class Utf8ToAsciiConverterNew : IUtf8ToAsciiConverter
-{
- ///
- /// Maximum expansion ratio for output buffer sizing.
- /// Worst case: single char becomes 4 chars (e.g., Щ→Shch in standard transliteration).
- ///
- private const int MaxExpansionRatio = 4;
-
- // SIMD-optimized ASCII detection (uses AVX-512 when available)
- private static readonly SearchValues AsciiPrintable =
- SearchValues.Create(" !\"#$%&'()*+,-./0123456789:;<=>?@" +
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`" +
- "abcdefghijklmnopqrstuvwxyz{|}~");
-
- private readonly FrozenDictionary _mappings;
-
- public Utf8ToAsciiConverterNew(ICharacterMappingLoader mappingLoader)
- {
- _mappings = mappingLoader.LoadMappings();
- }
-
- ///
- public string Convert(string? text, char fallback = '?')
- {
- if (string.IsNullOrEmpty(text))
- {
- return string.Empty;
- }
-
- var input = text.AsSpan();
-
- // Fast path: all ASCII - no conversion needed
- if (input.IndexOfAnyExcept(AsciiPrintable) == -1)
- {
- return text;
- }
-
- // Allocate output buffer for worst-case expansion
- var maxLen = text.Length * MaxExpansionRatio;
- char[] arrayBuffer = ArrayPool.Shared.Rent(maxLen);
- try
- {
- var written = Convert(input, arrayBuffer.AsSpan(), fallback);
- return new string(arrayBuffer, 0, written);
- }
- finally
- {
- ArrayPool.Shared.Return(arrayBuffer);
- }
- }
-
- ///
- public int Convert(ReadOnlySpan input, Span output, char fallback = '?')
- {
- if (input.IsEmpty)
- {
- return 0;
- }
-
- var opos = 0;
- var ipos = 0;
-
- while (ipos < input.Length)
- {
- // Find next non-ASCII character using SIMD
- var remaining = input[ipos..];
- var asciiLen = remaining.IndexOfAnyExcept(AsciiPrintable);
-
- if (asciiLen == -1)
- {
- // Rest is all ASCII - bulk copy
- remaining.CopyTo(output[opos..]);
- return opos + remaining.Length;
- }
-
- if (asciiLen > 0)
- {
- // Copy ASCII prefix
- remaining[..asciiLen].CopyTo(output[opos..]);
- opos += asciiLen;
- ipos += asciiLen;
- }
-
- // Process non-ASCII character
- var c = input[ipos];
-
- // Handle surrogate pairs (emoji, etc.)
- if (char.IsSurrogate(c))
- {
- output[opos++] = fallback;
- ipos++;
- if (ipos < input.Length && char.IsLowSurrogate(input[ipos]))
- {
- ipos++; // Skip low surrogate
- }
- continue;
- }
-
- opos += ProcessNonAscii(c, output[opos..], fallback);
- ipos++;
- }
-
- return opos;
- }
-
- private int ProcessNonAscii(char c, Span output, char fallback)
- {
- // 1. Check special cases dictionary (ligatures, Cyrillic, etc.)
- if (_mappings.TryGetValue(c, out var mapped))
- {
- if (mapped.Length == 0)
- {
- return 0; // Empty mapping = strip character
- }
- mapped.AsSpan().CopyTo(output);
- return mapped.Length;
- }
-
- // 2. Try Unicode normalization (handles most accented chars)
- var normLen = TryNormalize(c, output);
- if (normLen > 0)
- {
- return normLen;
- }
-
- // 3. Control character handling
- if (char.IsControl(c))
- {
- return 0; // Strip control characters
- }
-
- // 4. Whitespace normalization
- if (char.IsWhiteSpace(c))
- {
- output[0] = ' ';
- return 1;
- }
-
- // 5. Fallback for unmapped characters
- output[0] = fallback;
- return 1;
- }
-
- private static int TryNormalize(char c, Span output)
- {
- // Skip characters that won't normalize to ASCII
- if (c < '\u00C0')
- {
- return 0;
- }
-
- // Normalize to FormD (decomposed form)
- ReadOnlySpan input = stackalloc char[] { c };
- var normalized = input.ToString().Normalize(NormalizationForm.FormD);
-
- if (normalized.Length == 0)
- {
- return 0;
- }
-
- // Copy only base characters (skip combining marks)
- var len = 0;
- foreach (var ch in normalized)
- {
- var category = CharUnicodeInfo.GetUnicodeCategory(ch);
-
- // Skip combining marks (diacritics)
- if (category == UnicodeCategory.NonSpacingMark ||
- category == UnicodeCategory.SpacingCombiningMark ||
- category == UnicodeCategory.EnclosingMark)
- {
- continue;
- }
-
- // Only keep if it's now ASCII
- if (ch < '\u0080')
- {
- output[len++] = ch;
- }
- }
-
- return len;
- }
-}
diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverterOriginal.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverterOriginal.cs
new file mode 100644
index 0000000000..b6cb1e04b1
--- /dev/null
+++ b/src/Umbraco.Core/Strings/Utf8ToAsciiConverterOriginal.cs
@@ -0,0 +1,3633 @@
+#if false // Kept for historical reference only - not compiled
+namespace Umbraco.Cms.Core.Strings;
+
+///
+/// Provides methods to convert Utf8 text to Ascii.
+///
+///
+/// Tries to match characters such as accented eg "é" to Ascii equivalent eg "e".
+/// Converts all "whitespace" characters to a single whitespace.
+/// Removes all non-Utf8 (unicode) characters, so in fact it can sort-of "convert" Unicode to Ascii.
+/// Replaces symbols with '?'.
+///
+public static class Utf8ToAsciiConverter
+{
+ ///
+ /// Converts an Utf8 string into an Ascii string.
+ ///
+ /// The text to convert.
+ /// The character to use to replace characters that cannot properly be converted.
+ /// The converted text.
+ public static string ToAsciiString(string text, char fail = '?') => ToAsciiString(text.AsSpan(), fail);
+
+ ///
+ /// Converts an Utf8 string into an Ascii string.
+ ///
+ /// The text to convert.
+ /// The character to use to replace characters that cannot properly be converted.
+ /// The converted text.
+ public static string ToAsciiString(ReadOnlySpan text, char fail = '?')
+ {
+ // this is faster although it uses more memory
+ // but... we should be filtering short strings only...
+
+ var totalSize = text.Length * 3;
+ Span output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // *3 because of things such as OE
+ var len = ToAscii(text, output, fail);
+ return new string(output[..len]);
+
+ // var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
+ // ToAscii(input, output);
+ // return output.ToString();
+ }
+
+ ///
+ /// Converts an Utf8 string into an array of Ascii characters.
+ ///
+ /// The text to convert.
+ /// The character to use to replace characters that cannot properly be converted.
+ /// The converted text.
+ public static char[] ToAsciiCharArray(string text, char fail = '?')
+ {
+ var input = text.ToCharArray();
+
+ // this is faster although it uses more memory
+ // but... we should be filtering short strings only...
+ int outputLength = input.Length * 3; // *3 because of things such as OE
+ Span output = outputLength <= 1024 ? stackalloc char[outputLength] : new char[outputLength];
+ var len = ToAscii(input, output, fail);
+ return output[..len].ToArray();
+
+ // var temp = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
+ // ToAscii(input, temp);
+ // var output = new char[temp.Length];
+ // temp.CopyTo(0, output, 0, temp.Length);
+ // return output;
+ }
+
+ ///
+ /// Converts an array of Utf8 characters into an array of Ascii characters.
+ ///
+ /// The input array.
+ /// The output array.
+ /// The character to use to replace characters that cannot properly be converted.
+ /// The number of characters in the output array.
+ /// The caller must ensure that the output array is big enough.
+ /// The output array is not big enough.
+ private static int ToAscii(ReadOnlySpan input, Span output, char fail = '?')
+ {
+ var opos = 0;
+
+ for (var ipos = 0; ipos < input.Length; ipos++)
+ {
+ // ignore high surrogate
+ if (char.IsSurrogate(input[ipos]))
+ {
+ ipos++; // and skip low surrogate
+ output[opos++] = fail;
+ }
+ else
+ {
+ ToAscii(input, ipos, output, ref opos, fail);
+ }
+ }
+
+ return opos;
+ }
+
+ // private static void ToAscii(char[] input, StringBuilder output)
+ // {
+ // var chars = new char[5];
+
+ // for (var ipos = 0; ipos < input.Length; ipos++)
+ // {
+ // var opos = 0;
+ // if (char.IsSurrogate(input[ipos]))
+ // ipos++;
+ // else
+ // {
+ // ToAscii(input, ipos, chars, ref opos);
+ // output.Append(chars, 0, opos);
+ // }
+ // }
+ // }
+
+ ///
+ /// Converts the character at position in input array of Utf8 characters
+ ///
+ /// and writes the converted value to output array of Ascii characters at position
+ /// ,
+ /// and increments that position accordingly.
+ ///
+ /// The input array.
+ /// The input position.
+ /// The output array.
+ /// The output position.
+ /// The character to use to replace characters that cannot properly be converted.
+ ///
+ /// Adapted from various sources on the 'net including Lucene.Net.Analysis.ASCIIFoldingFilter.
+ /// Input should contain Utf8 characters exclusively and NOT Unicode.
+ /// Removes controls, normalizes whitespaces, replaces symbols by '?'.
+ ///
+ private static void ToAscii(ReadOnlySpan input, int ipos, Span output, ref int opos, char fail = '?')
+ {
+ var c = input[ipos];
+
+ if (char.IsControl(c))
+ {
+ // Control characters are non-printing and formatting characters, such as ACK, BEL, CR, FF, LF, and VT.
+ // The Unicode standard assigns the following code points to control characters: from \U0000 to \U001F,
+ // \U007F, and from \U0080 to \U009F. According to the Unicode standard, these values are to be
+ // interpreted as control characters unless their use is otherwise defined by an application. Valid
+ // control characters are members of the UnicodeCategory.Control category.
+
+ // we don't want them
+ }
+
+ // else if (char.IsSeparator(c))
+ // {
+ // // The Unicode standard recognizes three subcategories of separators:
+ // // - Space separators (the UnicodeCategory.SpaceSeparator category), which includes characters such as \u0020.
+ // // - Line separators (the UnicodeCategory.LineSeparator category), which includes \u2028.
+ // // - Paragraph separators (the UnicodeCategory.ParagraphSeparator category), which includes \u2029.
+ // //
+ // // Note: The Unicode standard classifies the characters \u000A (LF), \u000C (FF), and \u000A (CR) as control
+ // // characters (members of the UnicodeCategory.Control category), not as separator characters.
+
+ // // better do it via WhiteSpace
+ // }
+ else if (char.IsWhiteSpace(c))
+ {
+ // White space characters are the following Unicode characters:
+ // - Members of the SpaceSeparator category, which includes the characters SPACE (U+0020),
+ // OGHAM SPACE MARK (U+1680), MONGOLIAN VOWEL SEPARATOR (U+180E), EN QUAD (U+2000), EM QUAD (U+2001),
+ // EN SPACE (U+2002), EM SPACE (U+2003), THREE-PER-EM SPACE (U+2004), FOUR-PER-EM SPACE (U+2005),
+ // SIX-PER-EM SPACE (U+2006), FIGURE SPACE (U+2007), PUNCTUATION SPACE (U+2008), THIN SPACE (U+2009),
+ // HAIR SPACE (U+200A), NARROW NO-BREAK SPACE (U+202F), MEDIUM MATHEMATICAL SPACE (U+205F),
+ // and IDEOGRAPHIC SPACE (U+3000).
+ // - Members of the LineSeparator category, which consists solely of the LINE SEPARATOR character (U+2028).
+ // - Members of the ParagraphSeparator category, which consists solely of the PARAGRAPH SEPARATOR character (U+2029).
+ // - The characters CHARACTER TABULATION (U+0009), LINE FEED (U+000A), LINE TABULATION (U+000B),
+ // FORM FEED (U+000C), CARRIAGE RETURN (U+000D), NEXT LINE (U+0085), and NO-BREAK SPACE (U+00A0).
+
+ // make it a whitespace
+ output[opos++] = ' ';
+ }
+ else if (c < '\u0080')
+ {
+ // safe
+ output[opos++] = c;
+ }
+ else
+ {
+ switch (c)
+ {
+ case '\u00C0':
+ // À [LATIN CAPITAL LETTER A WITH GRAVE]
+ case '\u00C1':
+ // � [LATIN CAPITAL LETTER A WITH ACUTE]
+ case '\u00C2':
+ // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+ case '\u00C3':
+ // Ã [LATIN CAPITAL LETTER A WITH TILDE]
+ case '\u00C4':
+ // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
+ case '\u00C5':
+ // Ã… [LATIN CAPITAL LETTER A WITH RING ABOVE]
+ case '\u0100':
+ // Ā [LATIN CAPITAL LETTER A WITH MACRON]
+ case '\u0102':
+ // Ä‚ [LATIN CAPITAL LETTER A WITH BREVE]
+ case '\u0104':
+ // Ä„ [LATIN CAPITAL LETTER A WITH OGONEK]
+ case '\u018F':
+ // � http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
+ case '\u01CD':
+ // � [LATIN CAPITAL LETTER A WITH CARON]
+ case '\u01DE':
+ // Çž [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+ case '\u01E0':
+ // Ç [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+ case '\u01FA':
+ // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+ case '\u0200':
+ // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+ case '\u0202':
+ // È‚ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+ case '\u0226':
+ // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+ case '\u023A':
+ // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
+ case '\u1D00':
+ // á´€ [LATIN LETTER SMALL CAPITAL A]
+ case '\u1E00':
+ // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
+ case '\u1EA0':
+ // Ạ[LATIN CAPITAL LETTER A WITH DOT BELOW]
+ case '\u1EA2':
+ // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+ case '\u1EA4':
+ // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EA6':
+ // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EA8':
+ // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EAA':
+ // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+ case '\u1EAC':
+ // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EAE':
+ // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+ case '\u1EB0':
+ // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+ case '\u1EB2':
+ // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+ case '\u1EB4':
+ // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+ case '\u1EB6':
+ // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+ case '\u24B6':
+ // â’¶ [CIRCLED LATIN CAPITAL LETTER A]
+ case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A]
+ output[opos++] = 'A';
+ break;
+
+ case '\u00E0':
+ // à[LATIN SMALL LETTER A WITH GRAVE]
+ case '\u00E1':
+ // á [LATIN SMALL LETTER A WITH ACUTE]
+ case '\u00E2':
+ // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+ case '\u00E3':
+ // ã [LATIN SMALL LETTER A WITH TILDE]
+ case '\u00E4':
+ // ä [LATIN SMALL LETTER A WITH DIAERESIS]
+ case '\u00E5':
+ // å [LATIN SMALL LETTER A WITH RING ABOVE]
+ case '\u0101':
+ // � [LATIN SMALL LETTER A WITH MACRON]
+ case '\u0103':
+ // ă [LATIN SMALL LETTER A WITH BREVE]
+ case '\u0105':
+ // Ä… [LATIN SMALL LETTER A WITH OGONEK]
+ case '\u01CE':
+ // ÇŽ [LATIN SMALL LETTER A WITH CARON]
+ case '\u01DF':
+ // ÇŸ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+ case '\u01E1':
+ // Ç¡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+ case '\u01FB':
+ // Ç» [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+ case '\u0201':
+ // � [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+ case '\u0203':
+ // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
+ case '\u0227':
+ // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
+ case '\u0250':
+ // � [LATIN SMALL LETTER TURNED A]
+ case '\u0259':
+ // É™ [LATIN SMALL LETTER SCHWA]
+ case '\u025A':
+ // Éš [LATIN SMALL LETTER SCHWA WITH HOOK]
+ case '\u1D8F':
+ // � [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+ case '\u1D95':
+ // á¶• [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+ case '\u1E01':
+ // ạ [LATIN SMALL LETTER A WITH RING BELOW]
+ case '\u1E9A':
+ // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+ case '\u1EA1':
+ // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
+ case '\u1EA3':
+ // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
+ case '\u1EA5':
+ // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EA7':
+ // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EA9':
+ // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EAB':
+ // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+ case '\u1EAD':
+ // Ạ[LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EAF':
+ // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+ case '\u1EB1':
+ // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+ case '\u1EB3':
+ // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+ case '\u1EB5':
+ // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+ case '\u1EB7':
+ // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+ case '\u2090':
+ // � [LATIN SUBSCRIPT SMALL LETTER A]
+ case '\u2094':
+ // �? [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+ case '\u24D0':
+ // � [CIRCLED LATIN SMALL LETTER A]
+ case '\u2C65':
+ // â±¥ [LATIN SMALL LETTER A WITH STROKE]
+ case '\u2C6F':
+ // Ɐ [LATIN CAPITAL LETTER TURNED A]
+ case '\uFF41': // � [FULLWIDTH LATIN SMALL LETTER A]
+ output[opos++] = 'a';
+ break;
+
+ case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA]
+ output[opos++] = 'A';
+ output[opos++] = 'A';
+ break;
+
+ case '\u00C6':
+ // Æ[LATIN CAPITAL LETTER AE]
+ case '\u01E2':
+ // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
+ case '\u01FC':
+ // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
+ case '\u1D01': // á´� [LATIN LETTER SMALL CAPITAL AE]
+ output[opos++] = 'A';
+ output[opos++] = 'E';
+ break;
+
+ case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO]
+ output[opos++] = 'A';
+ output[opos++] = 'O';
+ break;
+
+ case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU]
+ output[opos++] = 'A';
+ output[opos++] = 'U';
+ break;
+
+ case '\uA738':
+ // Ꜹ [LATIN CAPITAL LETTER AV]
+ case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+ output[opos++] = 'A';
+ output[opos++] = 'V';
+ break;
+
+ case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY]
+ output[opos++] = 'A';
+ output[opos++] = 'Y';
+ break;
+
+ case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
+ output[opos++] = '(';
+ output[opos++] = 'a';
+ output[opos++] = ')';
+ break;
+
+ case '\uA733': // ꜳ [LATIN SMALL LETTER AA]
+ output[opos++] = 'a';
+ output[opos++] = 'a';
+ break;
+
+ case '\u00E6':
+ // æ [LATIN SMALL LETTER AE]
+ case '\u01E3':
+ // ǣ [LATIN SMALL LETTER AE WITH MACRON]
+ case '\u01FD':
+ // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
+ case '\u1D02': // á´‚ [LATIN SMALL LETTER TURNED AE]
+ output[opos++] = 'a';
+ output[opos++] = 'e';
+ break;
+
+ case '\uA735': // ꜵ [LATIN SMALL LETTER AO]
+ output[opos++] = 'a';
+ output[opos++] = 'o';
+ break;
+
+ case '\uA737': // ꜷ [LATIN SMALL LETTER AU]
+ output[opos++] = 'a';
+ output[opos++] = 'u';
+ break;
+
+ case '\uA739':
+ // ꜹ [LATIN SMALL LETTER AV]
+ case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+ output[opos++] = 'a';
+ output[opos++] = 'v';
+ break;
+
+ case '\uA73D': // ꜽ [LATIN SMALL LETTER AY]
+ output[opos++] = 'a';
+ output[opos++] = 'y';
+ break;
+
+ case '\u0181':
+ // � [LATIN CAPITAL LETTER B WITH HOOK]
+ case '\u0182':
+ // Æ‚ [LATIN CAPITAL LETTER B WITH TOPBAR]
+ case '\u0243':
+ // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
+ case '\u0299':
+ // Ê™ [LATIN LETTER SMALL CAPITAL B]
+ case '\u1D03':
+ // á´ƒ [LATIN LETTER SMALL CAPITAL BARRED B]
+ case '\u1E02':
+ // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+ case '\u1E04':
+ // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
+ case '\u1E06':
+ // Ḇ[LATIN CAPITAL LETTER B WITH LINE BELOW]
+ case '\u24B7':
+ // â’· [CIRCLED LATIN CAPITAL LETTER B]
+ case '\uFF22': // ï¼¢ [FULLWIDTH LATIN CAPITAL LETTER B]
+ output[opos++] = 'B';
+ break;
+
+ case '\u0180':
+ // ƀ [LATIN SMALL LETTER B WITH STROKE]
+ case '\u0183':
+ // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
+ case '\u0253':
+ // É“ [LATIN SMALL LETTER B WITH HOOK]
+ case '\u1D6C':
+ // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+ case '\u1D80':
+ // á¶€ [LATIN SMALL LETTER B WITH PALATAL HOOK]
+ case '\u1E03':
+ // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
+ case '\u1E05':
+ // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
+ case '\u1E07':
+ // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
+ case '\u24D1':
+ // â“‘ [CIRCLED LATIN SMALL LETTER B]
+ case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B]
+ output[opos++] = 'b';
+ break;
+
+ case '\u249D': // â’� [PARENTHESIZED LATIN SMALL LETTER B]
+ output[opos++] = '(';
+ output[opos++] = 'b';
+ output[opos++] = ')';
+ break;
+
+ case '\u00C7':
+ // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
+ case '\u0106':
+ // Ć[LATIN CAPITAL LETTER C WITH ACUTE]
+ case '\u0108':
+ // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+ case '\u010A':
+ // ÄŠ[LATIN CAPITAL LETTER C WITH DOT ABOVE]
+ case '\u010C':
+ // Č [LATIN CAPITAL LETTER C WITH CARON]
+ case '\u0187':
+ // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
+ case '\u023B':
+ // È» [LATIN CAPITAL LETTER C WITH STROKE]
+ case '\u0297':
+ // Ê— [LATIN LETTER STRETCHED C]
+ case '\u1D04':
+ // á´„ [LATIN LETTER SMALL CAPITAL C]
+ case '\u1E08':
+ // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+ case '\u24B8':
+ // â’¸ [CIRCLED LATIN CAPITAL LETTER C]
+ case '\uFF23': // ï¼£ [FULLWIDTH LATIN CAPITAL LETTER C]
+ output[opos++] = 'C';
+ break;
+
+ case '\u00E7':
+ // ç [LATIN SMALL LETTER C WITH CEDILLA]
+ case '\u0107':
+ // ć [LATIN SMALL LETTER C WITH ACUTE]
+ case '\u0109':
+ // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+ case '\u010B':
+ // Ä‹ [LATIN SMALL LETTER C WITH DOT ABOVE]
+ case '\u010D':
+ // � [LATIN SMALL LETTER C WITH CARON]
+ case '\u0188':
+ // ƈ [LATIN SMALL LETTER C WITH HOOK]
+ case '\u023C':
+ // ȼ [LATIN SMALL LETTER C WITH STROKE]
+ case '\u0255':
+ // É• [LATIN SMALL LETTER C WITH CURL]
+ case '\u1E09':
+ // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+ case '\u2184':
+ // ↄ [LATIN SMALL LETTER REVERSED C]
+ case '\u24D2':
+ // â“’ [CIRCLED LATIN SMALL LETTER C]
+ case '\uA73E':
+ // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+ case '\uA73F':
+ // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
+ case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C]
+ output[opos++] = 'c';
+ break;
+
+ case '\u249E': // â’ž [PARENTHESIZED LATIN SMALL LETTER C]
+ output[opos++] = '(';
+ output[opos++] = 'c';
+ output[opos++] = ')';
+ break;
+
+ case '\u00D0':
+ // � [LATIN CAPITAL LETTER ETH]
+ case '\u010E':
+ // ÄŽ [LATIN CAPITAL LETTER D WITH CARON]
+ case '\u0110':
+ // � [LATIN CAPITAL LETTER D WITH STROKE]
+ case '\u0189':
+ // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
+ case '\u018A':
+ // ÆŠ[LATIN CAPITAL LETTER D WITH HOOK]
+ case '\u018B':
+ // Æ‹ [LATIN CAPITAL LETTER D WITH TOPBAR]
+ case '\u1D05':
+ // á´… [LATIN LETTER SMALL CAPITAL D]
+ case '\u1D06':
+ // á´†[LATIN LETTER SMALL CAPITAL ETH]
+ case '\u1E0A':
+ // Ḋ[LATIN CAPITAL LETTER D WITH DOT ABOVE]
+ case '\u1E0C':
+ // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
+ case '\u1E0E':
+ // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
+ case '\u1E10':
+ // � [LATIN CAPITAL LETTER D WITH CEDILLA]
+ case '\u1E12':
+ // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+ case '\u24B9':
+ // â’¹ [CIRCLED LATIN CAPITAL LETTER D]
+ case '\uA779':
+ // � [LATIN CAPITAL LETTER INSULAR D]
+ case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D]
+ output[opos++] = 'D';
+ break;
+
+ case '\u00F0':
+ // ð [LATIN SMALL LETTER ETH]
+ case '\u010F':
+ // � [LATIN SMALL LETTER D WITH CARON]
+ case '\u0111':
+ // Ä‘ [LATIN SMALL LETTER D WITH STROKE]
+ case '\u018C':
+ // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
+ case '\u0221':
+ // È¡ [LATIN SMALL LETTER D WITH CURL]
+ case '\u0256':
+ // É– [LATIN SMALL LETTER D WITH TAIL]
+ case '\u0257':
+ // É— [LATIN SMALL LETTER D WITH HOOK]
+ case '\u1D6D':
+ // áµ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+ case '\u1D81':
+ // � [LATIN SMALL LETTER D WITH PALATAL HOOK]
+ case '\u1D91':
+ // á¶‘ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+ case '\u1E0B':
+ // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
+ case '\u1E0D':
+ // � [LATIN SMALL LETTER D WITH DOT BELOW]
+ case '\u1E0F':
+ // � [LATIN SMALL LETTER D WITH LINE BELOW]
+ case '\u1E11':
+ // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
+ case '\u1E13':
+ // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+ case '\u24D3':
+ // â““ [CIRCLED LATIN SMALL LETTER D]
+ case '\uA77A':
+ // � [LATIN SMALL LETTER INSULAR D]
+ case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D]
+ output[opos++] = 'd';
+ break;
+
+ case '\u01C4':
+ // Ç„ [LATIN CAPITAL LETTER DZ WITH CARON]
+ case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ]
+ output[opos++] = 'D';
+ output[opos++] = 'Z';
+ break;
+
+ case '\u01C5':
+ // Ç… [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+ case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+ output[opos++] = 'D';
+ output[opos++] = 'z';
+ break;
+
+ case '\u249F': // â’Ÿ [PARENTHESIZED LATIN SMALL LETTER D]
+ output[opos++] = '(';
+ output[opos++] = 'd';
+ output[opos++] = ')';
+ break;
+
+ case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH]
+ output[opos++] = 'd';
+ output[opos++] = 'b';
+ break;
+
+ case '\u01C6':
+ // dž[LATIN SMALL LETTER DZ WITH CARON]
+ case '\u01F3':
+ // dz [LATIN SMALL LETTER DZ]
+ case '\u02A3':
+ // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
+ case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+ output[opos++] = 'd';
+ output[opos++] = 'z';
+ break;
+
+ case '\u00C8':
+ // È [LATIN CAPITAL LETTER E WITH GRAVE]
+ case '\u00C9':
+ // É [LATIN CAPITAL LETTER E WITH ACUTE]
+ case '\u00CA':
+ // Ê[LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+ case '\u00CB':
+ // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
+ case '\u0112':
+ // Ä’ [LATIN CAPITAL LETTER E WITH MACRON]
+ case '\u0114':
+ // �? [LATIN CAPITAL LETTER E WITH BREVE]
+ case '\u0116':
+ // Ä– [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+ case '\u0118':
+ // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
+ case '\u011A':
+ // Äš [LATIN CAPITAL LETTER E WITH CARON]
+ case '\u018E':
+ // ÆŽ [LATIN CAPITAL LETTER REVERSED E]
+ case '\u0190':
+ // � [LATIN CAPITAL LETTER OPEN E]
+ case '\u0204':
+ // È„ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+ case '\u0206':
+ // Ȇ[LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+ case '\u0228':
+ // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
+ case '\u0246':
+ // Ɇ[LATIN CAPITAL LETTER E WITH STROKE]
+ case '\u1D07':
+ // á´‡ [LATIN LETTER SMALL CAPITAL E]
+ case '\u1E14':
+ // �? [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+ case '\u1E16':
+ // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+ case '\u1E18':
+ // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+ case '\u1E1A':
+ // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+ case '\u1E1C':
+ // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+ case '\u1EB8':
+ // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
+ case '\u1EBA':
+ // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+ case '\u1EBC':
+ // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
+ case '\u1EBE':
+ // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EC0':
+ // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EC2':
+ // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EC4':
+ // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+ case '\u1EC6':
+ // Ệ[LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u24BA':
+ // â’º [CIRCLED LATIN CAPITAL LETTER E]
+ case '\u2C7B':
+ // â±» [LATIN LETTER SMALL CAPITAL TURNED E]
+ case '\uFF25': // ï¼¥ [FULLWIDTH LATIN CAPITAL LETTER E]
+ output[opos++] = 'E';
+ break;
+
+ case '\u00E8':
+ // è [LATIN SMALL LETTER E WITH GRAVE]
+ case '\u00E9':
+ // é [LATIN SMALL LETTER E WITH ACUTE]
+ case '\u00EA':
+ // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+ case '\u00EB':
+ // ë [LATIN SMALL LETTER E WITH DIAERESIS]
+ case '\u0113':
+ // Ä“ [LATIN SMALL LETTER E WITH MACRON]
+ case '\u0115':
+ // Ä• [LATIN SMALL LETTER E WITH BREVE]
+ case '\u0117':
+ // Ä— [LATIN SMALL LETTER E WITH DOT ABOVE]
+ case '\u0119':
+ // Ä™ [LATIN SMALL LETTER E WITH OGONEK]
+ case '\u011B':
+ // Ä› [LATIN SMALL LETTER E WITH CARON]
+ case '\u01DD':
+ // � [LATIN SMALL LETTER TURNED E]
+ case '\u0205':
+ // È… [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+ case '\u0207':
+ // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
+ case '\u0229':
+ // È© [LATIN SMALL LETTER E WITH CEDILLA]
+ case '\u0247':
+ // ɇ [LATIN SMALL LETTER E WITH STROKE]
+ case '\u0258':
+ // ɘ [LATIN SMALL LETTER REVERSED E]
+ case '\u025B':
+ // É› [LATIN SMALL LETTER OPEN E]
+ case '\u025C':
+ // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
+ case '\u025D':
+ // � [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+ case '\u025E':
+ // Éž [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+ case '\u029A':
+ // Êš [LATIN SMALL LETTER CLOSED OPEN E]
+ case '\u1D08':
+ // á´ˆ [LATIN SMALL LETTER TURNED OPEN E]
+ case '\u1D92':
+ // á¶’ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+ case '\u1D93':
+ // á¶“ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+ case '\u1D94':
+ // �? [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+ case '\u1E15':
+ // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+ case '\u1E17':
+ // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+ case '\u1E19':
+ // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+ case '\u1E1B':
+ // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
+ case '\u1E1D':
+ // � [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+ case '\u1EB9':
+ // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
+ case '\u1EBB':
+ // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
+ case '\u1EBD':
+ // ẽ [LATIN SMALL LETTER E WITH TILDE]
+ case '\u1EBF':
+ // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EC1':
+ // � [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EC3':
+ // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EC5':
+ // á»… [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+ case '\u1EC7':
+ // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u2091':
+ // â‚‘ [LATIN SUBSCRIPT SMALL LETTER E]
+ case '\u24D4':
+ // �? [CIRCLED LATIN SMALL LETTER E]
+ case '\u2C78':
+ // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
+ case '\uFF45': // ï½… [FULLWIDTH LATIN SMALL LETTER E]
+ output[opos++] = 'e';
+ break;
+
+ case '\u24A0': // â’ [PARENTHESIZED LATIN SMALL LETTER E]
+ output[opos++] = '(';
+ output[opos++] = 'e';
+ output[opos++] = ')';
+ break;
+
+ case '\u0191':
+ // Æ‘ [LATIN CAPITAL LETTER F WITH HOOK]
+ case '\u1E1E':
+ // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+ case '\u24BB':
+ // â’» [CIRCLED LATIN CAPITAL LETTER F]
+ case '\uA730':
+ // ꜰ [LATIN LETTER SMALL CAPITAL F]
+ case '\uA77B':
+ // � [LATIN CAPITAL LETTER INSULAR F]
+ case '\uA7FB':
+ // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
+ case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F]
+ output[opos++] = 'F';
+ break;
+
+ case '\u0192':
+ // Æ’ [LATIN SMALL LETTER F WITH HOOK]
+ case '\u1D6E':
+ // áµ® [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+ case '\u1D82':
+ // á¶‚ [LATIN SMALL LETTER F WITH PALATAL HOOK]
+ case '\u1E1F':
+ // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
+ case '\u1E9B':
+ // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+ case '\u24D5':
+ // â“• [CIRCLED LATIN SMALL LETTER F]
+ case '\uA77C':
+ // � [LATIN SMALL LETTER INSULAR F]
+ case '\uFF46': // f[FULLWIDTH LATIN SMALL LETTER F]
+ output[opos++] = 'f';
+ break;
+
+ case '\u24A1': // â’¡ [PARENTHESIZED LATIN SMALL LETTER F]
+ output[opos++] = '(';
+ output[opos++] = 'f';
+ output[opos++] = ')';
+ break;
+
+ case '\uFB00': // ff [LATIN SMALL LIGATURE FF]
+ output[opos++] = 'f';
+ output[opos++] = 'f';
+ break;
+
+ case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI]
+ output[opos++] = 'f';
+ output[opos++] = 'f';
+ output[opos++] = 'i';
+ break;
+
+ case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL]
+ output[opos++] = 'f';
+ output[opos++] = 'f';
+ output[opos++] = 'l';
+ break;
+
+ case '\uFB01': // � [LATIN SMALL LIGATURE FI]
+ output[opos++] = 'f';
+ output[opos++] = 'i';
+ break;
+
+ case '\uFB02': // fl [LATIN SMALL LIGATURE FL]
+ output[opos++] = 'f';
+ output[opos++] = 'l';
+ break;
+
+ case '\u011C':
+ // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+ case '\u011E':
+ // Äž [LATIN CAPITAL LETTER G WITH BREVE]
+ case '\u0120':
+ // Ä [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+ case '\u0122':
+ // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
+ case '\u0193':
+ // Æ“ [LATIN CAPITAL LETTER G WITH HOOK]
+ case '\u01E4':
+ // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
+ case '\u01E5':
+ // ǥ [LATIN SMALL LETTER G WITH STROKE]
+ case '\u01E6':
+ // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
+ case '\u01E7':
+ // ǧ [LATIN SMALL LETTER G WITH CARON]
+ case '\u01F4':
+ // Ç´ [LATIN CAPITAL LETTER G WITH ACUTE]
+ case '\u0262':
+ // ɢ [LATIN LETTER SMALL CAPITAL G]
+ case '\u029B':
+ // Ê› [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+ case '\u1E20':
+ // Ḡ[LATIN CAPITAL LETTER G WITH MACRON]
+ case '\u24BC':
+ // â’¼ [CIRCLED LATIN CAPITAL LETTER G]
+ case '\uA77D':
+ // � [LATIN CAPITAL LETTER INSULAR G]
+ case '\uA77E':
+ // � [LATIN CAPITAL LETTER TURNED INSULAR G]
+ case '\uFF27': // ï¼§ [FULLWIDTH LATIN CAPITAL LETTER G]
+ output[opos++] = 'G';
+ break;
+
+ case '\u011D':
+ // � [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+ case '\u011F':
+ // ÄŸ [LATIN SMALL LETTER G WITH BREVE]
+ case '\u0121':
+ // Ä¡ [LATIN SMALL LETTER G WITH DOT ABOVE]
+ case '\u0123':
+ // ģ [LATIN SMALL LETTER G WITH CEDILLA]
+ case '\u01F5':
+ // ǵ [LATIN SMALL LETTER G WITH ACUTE]
+ case '\u0260':
+ // É [LATIN SMALL LETTER G WITH HOOK]
+ case '\u0261':
+ // É¡ [LATIN SMALL LETTER SCRIPT G]
+ case '\u1D77':
+ // áµ· [LATIN SMALL LETTER TURNED G]
+ case '\u1D79':
+ // áµ¹ [LATIN SMALL LETTER INSULAR G]
+ case '\u1D83':
+ // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
+ case '\u1E21':
+ // ḡ [LATIN SMALL LETTER G WITH MACRON]
+ case '\u24D6':
+ // â“– [CIRCLED LATIN SMALL LETTER G]
+ case '\uA77F':
+ // � [LATIN SMALL LETTER TURNED INSULAR G]
+ case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G]
+ output[opos++] = 'g';
+ break;
+
+ case '\u24A2': // â’¢ [PARENTHESIZED LATIN SMALL LETTER G]
+ output[opos++] = '(';
+ output[opos++] = 'g';
+ output[opos++] = ')';
+ break;
+
+ case '\u0124':
+ // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+ case '\u0126':
+ // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
+ case '\u021E':
+ // Èž [LATIN CAPITAL LETTER H WITH CARON]
+ case '\u029C':
+ // ʜ [LATIN LETTER SMALL CAPITAL H]
+ case '\u1E22':
+ // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+ case '\u1E24':
+ // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
+ case '\u1E26':
+ // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
+ case '\u1E28':
+ // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
+ case '\u1E2A':
+ // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+ case '\u24BD':
+ // â’½ [CIRCLED LATIN CAPITAL LETTER H]
+ case '\u2C67':
+ // â±§ [LATIN CAPITAL LETTER H WITH DESCENDER]
+ case '\u2C75':
+ // â±µ [LATIN CAPITAL LETTER HALF H]
+ case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H]
+ output[opos++] = 'H';
+ break;
+
+ case '\u0125':
+ // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+ case '\u0127':
+ // ħ [LATIN SMALL LETTER H WITH STROKE]
+ case '\u021F':
+ // ÈŸ [LATIN SMALL LETTER H WITH CARON]
+ case '\u0265':
+ // ɥ [LATIN SMALL LETTER TURNED H]
+ case '\u0266':
+ // ɦ [LATIN SMALL LETTER H WITH HOOK]
+ case '\u02AE':
+ // Ê® [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+ case '\u02AF':
+ // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+ case '\u1E23':
+ // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
+ case '\u1E25':
+ // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
+ case '\u1E27':
+ // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
+ case '\u1E29':
+ // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
+ case '\u1E2B':
+ // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
+ case '\u1E96':
+ // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
+ case '\u24D7':
+ // â“— [CIRCLED LATIN SMALL LETTER H]
+ case '\u2C68':
+ // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
+ case '\u2C76':
+ // â±¶ [LATIN SMALL LETTER HALF H]
+ case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H]
+ output[opos++] = 'h';
+ break;
+
+ case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
+ output[opos++] = 'H';
+ output[opos++] = 'V';
+ break;
+
+ case '\u24A3': // â’£ [PARENTHESIZED LATIN SMALL LETTER H]
+ output[opos++] = '(';
+ output[opos++] = 'h';
+ output[opos++] = ')';
+ break;
+
+ case '\u0195': // Æ• [LATIN SMALL LETTER HV]
+ output[opos++] = 'h';
+ output[opos++] = 'v';
+ break;
+
+ case '\u00CC':
+ // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
+ case '\u00CD':
+ // � [LATIN CAPITAL LETTER I WITH ACUTE]
+ case '\u00CE':
+ // ÃŽ [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+ case '\u00CF':
+ // � [LATIN CAPITAL LETTER I WITH DIAERESIS]
+ case '\u0128':
+ // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
+ case '\u012A':
+ // Ī [LATIN CAPITAL LETTER I WITH MACRON]
+ case '\u012C':
+ // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
+ case '\u012E':
+ // Ä® [LATIN CAPITAL LETTER I WITH OGONEK]
+ case '\u0130':
+ // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+ case '\u0196':
+ // Æ– [LATIN CAPITAL LETTER IOTA]
+ case '\u0197':
+ // Æ— [LATIN CAPITAL LETTER I WITH STROKE]
+ case '\u01CF':
+ // � [LATIN CAPITAL LETTER I WITH CARON]
+ case '\u0208':
+ // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+ case '\u020A':
+ // ÈŠ[LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+ case '\u026A':
+ // ɪ [LATIN LETTER SMALL CAPITAL I]
+ case '\u1D7B':
+ // áµ» [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+ case '\u1E2C':
+ // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+ case '\u1E2E':
+ // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+ case '\u1EC8':
+ // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+ case '\u1ECA':
+ // Ị[LATIN CAPITAL LETTER I WITH DOT BELOW]
+ case '\u24BE':
+ // â’¾ [CIRCLED LATIN CAPITAL LETTER I]
+ case '\uA7FE':
+ // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
+ case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I]
+ output[opos++] = 'I';
+ break;
+
+ case '\u00EC':
+ // ì [LATIN SMALL LETTER I WITH GRAVE]
+ case '\u00ED':
+ // à[LATIN SMALL LETTER I WITH ACUTE]
+ case '\u00EE':
+ // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+ case '\u00EF':
+ // ï [LATIN SMALL LETTER I WITH DIAERESIS]
+ case '\u0129':
+ // Ä© [LATIN SMALL LETTER I WITH TILDE]
+ case '\u012B':
+ // Ä« [LATIN SMALL LETTER I WITH MACRON]
+ case '\u012D':
+ // Ä [LATIN SMALL LETTER I WITH BREVE]
+ case '\u012F':
+ // į [LATIN SMALL LETTER I WITH OGONEK]
+ case '\u0131':
+ // ı [LATIN SMALL LETTER DOTLESS I]
+ case '\u01D0':
+ // � [LATIN SMALL LETTER I WITH CARON]
+ case '\u0209':
+ // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+ case '\u020B':
+ // È‹ [LATIN SMALL LETTER I WITH INVERTED BREVE]
+ case '\u0268':
+ // ɨ [LATIN SMALL LETTER I WITH STROKE]
+ case '\u1D09':
+ // á´‰ [LATIN SMALL LETTER TURNED I]
+ case '\u1D62':
+ // áµ¢ [LATIN SUBSCRIPT SMALL LETTER I]
+ case '\u1D7C':
+ // áµ¼ [LATIN SMALL LETTER IOTA WITH STROKE]
+ case '\u1D96':
+ // á¶– [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+ case '\u1E2D':
+ // Ḡ[LATIN SMALL LETTER I WITH TILDE BELOW]
+ case '\u1E2F':
+ // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+ case '\u1EC9':
+ // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
+ case '\u1ECB':
+ // ị [LATIN SMALL LETTER I WITH DOT BELOW]
+ case '\u2071':
+ // � [SUPERSCRIPT LATIN SMALL LETTER I]
+ case '\u24D8':
+ // ⓘ [CIRCLED LATIN SMALL LETTER I]
+ case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I]
+ output[opos++] = 'i';
+ break;
+
+ case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ]
+ output[opos++] = 'I';
+ output[opos++] = 'J';
+ break;
+
+ case '\u24A4': // â’¤ [PARENTHESIZED LATIN SMALL LETTER I]
+ output[opos++] = '(';
+ output[opos++] = 'i';
+ output[opos++] = ')';
+ break;
+
+ case '\u0133': // ij [LATIN SMALL LIGATURE IJ]
+ output[opos++] = 'i';
+ output[opos++] = 'j';
+ break;
+
+ case '\u0134':
+ // Ä´ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+ case '\u0248':
+ // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
+ case '\u1D0A':
+ // á´Š[LATIN LETTER SMALL CAPITAL J]
+ case '\u24BF':
+ // â’¿ [CIRCLED LATIN CAPITAL LETTER J]
+ case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J]
+ output[opos++] = 'J';
+ break;
+
+ case '\u0135':
+ // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+ case '\u01F0':
+ // ǰ [LATIN SMALL LETTER J WITH CARON]
+ case '\u0237':
+ // È· [LATIN SMALL LETTER DOTLESS J]
+ case '\u0249':
+ // ɉ [LATIN SMALL LETTER J WITH STROKE]
+ case '\u025F':
+ // ÉŸ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+ case '\u0284':
+ // Ê„ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+ case '\u029D':
+ // � [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+ case '\u24D9':
+ // â“™ [CIRCLED LATIN SMALL LETTER J]
+ case '\u2C7C':
+ // â±¼ [LATIN SUBSCRIPT SMALL LETTER J]
+ case '\uFF4A': // j[FULLWIDTH LATIN SMALL LETTER J]
+ output[opos++] = 'j';
+ break;
+
+ case '\u24A5': // â’¥ [PARENTHESIZED LATIN SMALL LETTER J]
+ output[opos++] = '(';
+ output[opos++] = 'j';
+ output[opos++] = ')';
+ break;
+
+ case '\u0136':
+ // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
+ case '\u0198':
+ // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
+ case '\u01E8':
+ // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
+ case '\u1D0B':
+ // á´‹ [LATIN LETTER SMALL CAPITAL K]
+ case '\u1E30':
+ // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
+ case '\u1E32':
+ // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
+ case '\u1E34':
+ // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
+ case '\u24C0':
+ // â“€ [CIRCLED LATIN CAPITAL LETTER K]
+ case '\u2C69':
+ // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
+ case '\uA740':
+ // � [LATIN CAPITAL LETTER K WITH STROKE]
+ case '\uA742':
+ // � [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+ case '\uA744':
+ // � [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+ case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K]
+ output[opos++] = 'K';
+ break;
+
+ case '\u0137':
+ // Ä· [LATIN SMALL LETTER K WITH CEDILLA]
+ case '\u0199':
+ // Æ™ [LATIN SMALL LETTER K WITH HOOK]
+ case '\u01E9':
+ // Ç© [LATIN SMALL LETTER K WITH CARON]
+ case '\u029E':
+ // Êž [LATIN SMALL LETTER TURNED K]
+ case '\u1D84':
+ // á¶„ [LATIN SMALL LETTER K WITH PALATAL HOOK]
+ case '\u1E31':
+ // ḱ [LATIN SMALL LETTER K WITH ACUTE]
+ case '\u1E33':
+ // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
+ case '\u1E35':
+ // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
+ case '\u24DA':
+ // ⓚ [CIRCLED LATIN SMALL LETTER K]
+ case '\u2C6A':
+ // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
+ case '\uA741':
+ // � [LATIN SMALL LETTER K WITH STROKE]
+ case '\uA743':
+ // � [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+ case '\uA745':
+ // � [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+ case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K]
+ output[opos++] = 'k';
+ break;
+
+ case '\u24A6': // â’¦ [PARENTHESIZED LATIN SMALL LETTER K]
+ output[opos++] = '(';
+ output[opos++] = 'k';
+ output[opos++] = ')';
+ break;
+
+ case '\u0139':
+ // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
+ case '\u013B':
+ // Ä» [LATIN CAPITAL LETTER L WITH CEDILLA]
+ case '\u013D':
+ // Ľ [LATIN CAPITAL LETTER L WITH CARON]
+ case '\u013F':
+ // Ä¿ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+ case '\u0141':
+ // � [LATIN CAPITAL LETTER L WITH STROKE]
+ case '\u023D':
+ // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
+ case '\u029F':
+ // ÊŸ [LATIN LETTER SMALL CAPITAL L]
+ case '\u1D0C':
+ // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+ case '\u1E36':
+ // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
+ case '\u1E38':
+ // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+ case '\u1E3A':
+ // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
+ case '\u1E3C':
+ // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+ case '\u24C1':
+ // � [CIRCLED LATIN CAPITAL LETTER L]
+ case '\u2C60':
+ // â± [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+ case '\u2C62':
+ // â±¢ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+ case '\uA746':
+ // �[LATIN CAPITAL LETTER BROKEN L]
+ case '\uA748':
+ // � [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+ case '\uA780':
+ // Ꞁ [LATIN CAPITAL LETTER TURNED L]
+ case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L]
+ output[opos++] = 'L';
+ break;
+
+ case '\u013A':
+ // ĺ [LATIN SMALL LETTER L WITH ACUTE]
+ case '\u013C':
+ // ļ [LATIN SMALL LETTER L WITH CEDILLA]
+ case '\u013E':
+ // ľ [LATIN SMALL LETTER L WITH CARON]
+ case '\u0140':
+ // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
+ case '\u0142':
+ // Å‚ [LATIN SMALL LETTER L WITH STROKE]
+ case '\u019A':
+ // Æš [LATIN SMALL LETTER L WITH BAR]
+ case '\u0234':
+ // È´ [LATIN SMALL LETTER L WITH CURL]
+ case '\u026B':
+ // É« [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+ case '\u026C':
+ // ɬ [LATIN SMALL LETTER L WITH BELT]
+ case '\u026D':
+ // É [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+ case '\u1D85':
+ // á¶… [LATIN SMALL LETTER L WITH PALATAL HOOK]
+ case '\u1E37':
+ // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
+ case '\u1E39':
+ // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+ case '\u1E3B':
+ // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
+ case '\u1E3D':
+ // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+ case '\u24DB':
+ // â“› [CIRCLED LATIN SMALL LETTER L]
+ case '\u2C61':
+ // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
+ case '\uA747':
+ // � [LATIN SMALL LETTER BROKEN L]
+ case '\uA749':
+ // � [LATIN SMALL LETTER L WITH HIGH STROKE]
+ case '\uA781':
+ // � [LATIN SMALL LETTER TURNED L]
+ case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L]
+ output[opos++] = 'l';
+ break;
+
+ case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ]
+ output[opos++] = 'L';
+ output[opos++] = 'J';
+ break;
+
+ case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+ output[opos++] = 'L';
+ output[opos++] = 'L';
+ break;
+
+ case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+ output[opos++] = 'L';
+ output[opos++] = 'j';
+ break;
+
+ case '\u24A7': // â’§ [PARENTHESIZED LATIN SMALL LETTER L]
+ output[opos++] = '(';
+ output[opos++] = 'l';
+ output[opos++] = ')';
+ break;
+
+ case '\u01C9': // lj [LATIN SMALL LETTER LJ]
+ output[opos++] = 'l';
+ output[opos++] = 'j';
+ break;
+
+ case '\u1EFB': // á»» [LATIN SMALL LETTER MIDDLE-WELSH LL]
+ output[opos++] = 'l';
+ output[opos++] = 'l';
+ break;
+
+ case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH]
+ output[opos++] = 'l';
+ output[opos++] = 's';
+ break;
+
+ case '\u02AB': // Ê« [LATIN SMALL LETTER LZ DIGRAPH]
+ output[opos++] = 'l';
+ output[opos++] = 'z';
+ break;
+
+ case '\u019C':
+ // Ɯ [LATIN CAPITAL LETTER TURNED M]
+ case '\u1D0D':
+ // á´� [LATIN LETTER SMALL CAPITAL M]
+ case '\u1E3E':
+ // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
+ case '\u1E40':
+ // á¹€ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+ case '\u1E42':
+ // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
+ case '\u24C2':
+ // â“‚ [CIRCLED LATIN CAPITAL LETTER M]
+ case '\u2C6E':
+ // â±® [LATIN CAPITAL LETTER M WITH HOOK]
+ case '\uA7FD':
+ // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
+ case '\uA7FF':
+ // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+ case '\uFF2D': // ï¼ [FULLWIDTH LATIN CAPITAL LETTER M]
+ output[opos++] = 'M';
+ break;
+
+ case '\u026F':
+ // ɯ [LATIN SMALL LETTER TURNED M]
+ case '\u0270':
+ // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+ case '\u0271':
+ // ɱ [LATIN SMALL LETTER M WITH HOOK]
+ case '\u1D6F':
+ // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+ case '\u1D86':
+ // ᶆ[LATIN SMALL LETTER M WITH PALATAL HOOK]
+ case '\u1E3F':
+ // ḿ [LATIN SMALL LETTER M WITH ACUTE]
+ case '\u1E41':
+ // � [LATIN SMALL LETTER M WITH DOT ABOVE]
+ case '\u1E43':
+ // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
+ case '\u24DC':
+ // ⓜ [CIRCLED LATIN SMALL LETTER M]
+ case '\uFF4D': // � [FULLWIDTH LATIN SMALL LETTER M]
+ output[opos++] = 'm';
+ break;
+
+ case '\u24A8': // â’¨ [PARENTHESIZED LATIN SMALL LETTER M]
+ output[opos++] = '(';
+ output[opos++] = 'm';
+ output[opos++] = ')';
+ break;
+
+ case '\u00D1':
+ // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
+ case '\u0143':
+ // Ã…Æ’ [LATIN CAPITAL LETTER N WITH ACUTE]
+ case '\u0145':
+ // Å… [LATIN CAPITAL LETTER N WITH CEDILLA]
+ case '\u0147':
+ // Ň [LATIN CAPITAL LETTER N WITH CARON]
+ case '\u014A':
+ // Ã…Å http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
+ case '\u019D':
+ // � [LATIN CAPITAL LETTER N WITH LEFT HOOK]
+ case '\u01F8':
+ // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
+ case '\u0220':
+ // È [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
+ case '\u0274':
+ // É´ [LATIN LETTER SMALL CAPITAL N]
+ case '\u1D0E':
+ // á´Ž [LATIN LETTER SMALL CAPITAL REVERSED N]
+ case '\u1E44':
+ // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
+ case '\u1E46':
+ // Ṇ[LATIN CAPITAL LETTER N WITH DOT BELOW]
+ case '\u1E48':
+ // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
+ case '\u1E4A':
+ // Ṋ[LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
+ case '\u24C3':
+ // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
+ case '\uFF2E': // ï¼® [FULLWIDTH LATIN CAPITAL LETTER N]
+ output[opos++] = 'N';
+ break;
+
+ case '\u00F1':
+ // ñ [LATIN SMALL LETTER N WITH TILDE]
+ case '\u0144':
+ // Å„ [LATIN SMALL LETTER N WITH ACUTE]
+ case '\u0146':
+ // ņ[LATIN SMALL LETTER N WITH CEDILLA]
+ case '\u0148':
+ // ň [LATIN SMALL LETTER N WITH CARON]
+ case '\u0149':
+ // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
+ case '\u014B':
+ // Å‹ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
+ case '\u019E':
+ // Æž [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
+ case '\u01F9':
+ // ǹ [LATIN SMALL LETTER N WITH GRAVE]
+ case '\u0235':
+ // ȵ [LATIN SMALL LETTER N WITH CURL]
+ case '\u0272':
+ // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
+ case '\u0273':
+ // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
+ case '\u1D70':
+ // áµ° [LATIN SMALL LETTER N WITH MIDDLE TILDE]
+ case '\u1D87':
+ // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
+ case '\u1E45':
+ // á¹… [LATIN SMALL LETTER N WITH DOT ABOVE]
+ case '\u1E47':
+ // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
+ case '\u1E49':
+ // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
+ case '\u1E4B':
+ // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
+ case '\u207F':
+ // � [SUPERSCRIPT LATIN SMALL LETTER N]
+ case '\u24DD':
+ // � [CIRCLED LATIN SMALL LETTER N]
+ case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N]
+ output[opos++] = 'n';
+ break;
+
+ case '\u01CA': // ÇŠ[LATIN CAPITAL LETTER NJ]
+ output[opos++] = 'N';
+ output[opos++] = 'J';
+ break;
+
+ case '\u01CB': // Ç‹ [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
+ output[opos++] = 'N';
+ output[opos++] = 'j';
+ break;
+
+ case '\u24A9': // â’© [PARENTHESIZED LATIN SMALL LETTER N]
+ output[opos++] = '(';
+ output[opos++] = 'n';
+ output[opos++] = ')';
+ break;
+
+ case '\u01CC': // nj [LATIN SMALL LETTER NJ]
+ output[opos++] = 'n';
+ output[opos++] = 'j';
+ break;
+
+ case '\u00D2':
+ // Ã’ [LATIN CAPITAL LETTER O WITH GRAVE]
+ case '\u00D3':
+ // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
+ case '\u00D4':
+ // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
+ case '\u00D5':
+ // Õ [LATIN CAPITAL LETTER O WITH TILDE]
+ case '\u00D6':
+ // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
+ case '\u00D8':
+ // Ø [LATIN CAPITAL LETTER O WITH STROKE]
+ case '\u014C':
+ // Ã…Å’ [LATIN CAPITAL LETTER O WITH MACRON]
+ case '\u014E':
+ // ÅŽ [LATIN CAPITAL LETTER O WITH BREVE]
+ case '\u0150':
+ // � [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
+ case '\u0186':
+ // Ɔ[LATIN CAPITAL LETTER OPEN O]
+ case '\u019F':
+ // ÆŸ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
+ case '\u01A0':
+ // Æ [LATIN CAPITAL LETTER O WITH HORN]
+ case '\u01D1':
+ // Ç‘ [LATIN CAPITAL LETTER O WITH CARON]
+ case '\u01EA':
+ // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
+ case '\u01EC':
+ // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
+ case '\u01FE':
+ // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
+ case '\u020C':
+ // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
+ case '\u020E':
+ // ÈŽ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
+ case '\u022A':
+ // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
+ case '\u022C':
+ // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
+ case '\u022E':
+ // È® [LATIN CAPITAL LETTER O WITH DOT ABOVE]
+ case '\u0230':
+ // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
+ case '\u1D0F':
+ // á´� [LATIN LETTER SMALL CAPITAL O]
+ case '\u1D10':
+ // á´� [LATIN LETTER SMALL CAPITAL OPEN O]
+ case '\u1E4C':
+ // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
+ case '\u1E4E':
+ // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
+ case '\u1E50':
+ // � [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
+ case '\u1E52':
+ // á¹’ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
+ case '\u1ECC':
+ // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
+ case '\u1ECE':
+ // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
+ case '\u1ED0':
+ // � [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
+ case '\u1ED2':
+ // á»’ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
+ case '\u1ED4':
+ // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1ED6':
+ // á»– [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
+ case '\u1ED8':
+ // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EDA':
+ // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
+ case '\u1EDC':
+ // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
+ case '\u1EDE':
+ // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
+ case '\u1EE0':
+ // á» [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
+ case '\u1EE2':
+ // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
+ case '\u24C4':
+ // â“„ [CIRCLED LATIN CAPITAL LETTER O]
+ case '\uA74A':
+ // �[LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
+ case '\uA74C':
+ // � [LATIN CAPITAL LETTER O WITH LOOP]
+ case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O]
+ output[opos++] = 'O';
+ break;
+
+ case '\u00F2':
+ // ò [LATIN SMALL LETTER O WITH GRAVE]
+ case '\u00F3':
+ // ó [LATIN SMALL LETTER O WITH ACUTE]
+ case '\u00F4':
+ // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
+ case '\u00F5':
+ // õ [LATIN SMALL LETTER O WITH TILDE]
+ case '\u00F6':
+ // ö [LATIN SMALL LETTER O WITH DIAERESIS]
+ case '\u00F8':
+ // ø [LATIN SMALL LETTER O WITH STROKE]
+ case '\u014D':
+ // � [LATIN SMALL LETTER O WITH MACRON]
+ case '\u014F':
+ // � [LATIN SMALL LETTER O WITH BREVE]
+ case '\u0151':
+ // Å‘ [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
+ case '\u01A1':
+ // Æ¡ [LATIN SMALL LETTER O WITH HORN]
+ case '\u01D2':
+ // Ç’ [LATIN SMALL LETTER O WITH CARON]
+ case '\u01EB':
+ // Ç« [LATIN SMALL LETTER O WITH OGONEK]
+ case '\u01ED':
+ // Ç [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
+ case '\u01FF':
+ // Ç¿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
+ case '\u020D':
+ // � [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
+ case '\u020F':
+ // � [LATIN SMALL LETTER O WITH INVERTED BREVE]
+ case '\u022B':
+ // È« [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
+ case '\u022D':
+ // È [LATIN SMALL LETTER O WITH TILDE AND MACRON]
+ case '\u022F':
+ // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
+ case '\u0231':
+ // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
+ case '\u0254':
+ // �? [LATIN SMALL LETTER OPEN O]
+ case '\u0275':
+ // ɵ [LATIN SMALL LETTER BARRED O]
+ case '\u1D16':
+ // á´– [LATIN SMALL LETTER TOP HALF O]
+ case '\u1D17':
+ // á´— [LATIN SMALL LETTER BOTTOM HALF O]
+ case '\u1D97':
+ // á¶— [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
+ case '\u1E4D':
+ // � [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
+ case '\u1E4F':
+ // � [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
+ case '\u1E51':
+ // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
+ case '\u1E53':
+ // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
+ case '\u1ECD':
+ // � [LATIN SMALL LETTER O WITH DOT BELOW]
+ case '\u1ECF':
+ // � [LATIN SMALL LETTER O WITH HOOK ABOVE]
+ case '\u1ED1':
+ // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
+ case '\u1ED3':
+ // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
+ case '\u1ED5':
+ // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1ED7':
+ // á»— [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
+ case '\u1ED9':
+ // á»™ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EDB':
+ // á»› [LATIN SMALL LETTER O WITH HORN AND ACUTE]
+ case '\u1EDD':
+ // � [LATIN SMALL LETTER O WITH HORN AND GRAVE]
+ case '\u1EDF':
+ // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
+ case '\u1EE1':
+ // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
+ case '\u1EE3':
+ // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
+ case '\u2092':
+ // â‚’ [LATIN SUBSCRIPT SMALL LETTER O]
+ case '\u24DE':
+ // ⓞ [CIRCLED LATIN SMALL LETTER O]
+ case '\u2C7A':
+ // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
+ case '\uA74B':
+ // � [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
+ case '\uA74D':
+ // � [LATIN SMALL LETTER O WITH LOOP]
+ case '\uFF4F': // � [FULLWIDTH LATIN SMALL LETTER O]
+ output[opos++] = 'o';
+ break;
+
+ case '\u0152':
+ // Å’ [LATIN CAPITAL LIGATURE OE]
+ case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE]
+ output[opos++] = 'O';
+ output[opos++] = 'E';
+ break;
+
+ case '\uA74E': // � [LATIN CAPITAL LETTER OO]
+ output[opos++] = 'O';
+ output[opos++] = 'O';
+ break;
+
+ case '\u0222':
+ // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
+ case '\u1D15': // á´• [LATIN LETTER SMALL CAPITAL OU]
+ output[opos++] = 'O';
+ output[opos++] = 'U';
+ break;
+
+ case '\u24AA': // â’ª [PARENTHESIZED LATIN SMALL LETTER O]
+ output[opos++] = '(';
+ output[opos++] = 'o';
+ output[opos++] = ')';
+ break;
+
+ case '\u0153':
+ // Å“ [LATIN SMALL LIGATURE OE]
+ case '\u1D14': // á´�? [LATIN SMALL LETTER TURNED OE]
+ output[opos++] = 'o';
+ output[opos++] = 'e';
+ break;
+
+ case '\uA74F': // � [LATIN SMALL LETTER OO]
+ output[opos++] = 'o';
+ output[opos++] = 'o';
+ break;
+
+ case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
+ output[opos++] = 'o';
+ output[opos++] = 'u';
+ break;
+
+ case '\u01A4':
+ // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
+ case '\u1D18':
+ // á´˜ [LATIN LETTER SMALL CAPITAL P]
+ case '\u1E54':
+ // �? [LATIN CAPITAL LETTER P WITH ACUTE]
+ case '\u1E56':
+ // á¹– [LATIN CAPITAL LETTER P WITH DOT ABOVE]
+ case '\u24C5':
+ // â“… [CIRCLED LATIN CAPITAL LETTER P]
+ case '\u2C63':
+ // â±£ [LATIN CAPITAL LETTER P WITH STROKE]
+ case '\uA750':
+ // � [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
+ case '\uA752':
+ // � [LATIN CAPITAL LETTER P WITH FLOURISH]
+ case '\uA754':
+ // �? [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
+ case '\uFF30': // ï¼° [FULLWIDTH LATIN CAPITAL LETTER P]
+ output[opos++] = 'P';
+ break;
+
+ case '\u01A5':
+ // ƥ [LATIN SMALL LETTER P WITH HOOK]
+ case '\u1D71':
+ // áµ± [LATIN SMALL LETTER P WITH MIDDLE TILDE]
+ case '\u1D7D':
+ // áµ½ [LATIN SMALL LETTER P WITH STROKE]
+ case '\u1D88':
+ // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
+ case '\u1E55':
+ // ṕ [LATIN SMALL LETTER P WITH ACUTE]
+ case '\u1E57':
+ // á¹— [LATIN SMALL LETTER P WITH DOT ABOVE]
+ case '\u24DF':
+ // ⓟ [CIRCLED LATIN SMALL LETTER P]
+ case '\uA751':
+ // � [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
+ case '\uA753':
+ // � [LATIN SMALL LETTER P WITH FLOURISH]
+ case '\uA755':
+ // � [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
+ case '\uA7FC':
+ // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
+ case '\uFF50': // � [FULLWIDTH LATIN SMALL LETTER P]
+ output[opos++] = 'p';
+ break;
+
+ case '\u24AB': // â’« [PARENTHESIZED LATIN SMALL LETTER P]
+ output[opos++] = '(';
+ output[opos++] = 'p';
+ output[opos++] = ')';
+ break;
+
+ case '\u024A':
+ // ÉŠ[LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
+ case '\u24C6':
+ // Ⓠ[CIRCLED LATIN CAPITAL LETTER Q]
+ case '\uA756':
+ // � [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
+ case '\uA758':
+ // � [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
+ case '\uFF31': // ï¼± [FULLWIDTH LATIN CAPITAL LETTER Q]
+ output[opos++] = 'Q';
+ break;
+
+ case '\u0138':
+ // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
+ case '\u024B':
+ // É‹ [LATIN SMALL LETTER Q WITH HOOK TAIL]
+ case '\u02A0':
+ // Ê [LATIN SMALL LETTER Q WITH HOOK]
+ case '\u24E0':
+ // â“ [CIRCLED LATIN SMALL LETTER Q]
+ case '\uA757':
+ // � [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
+ case '\uA759':
+ // � [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
+ case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q]
+ output[opos++] = 'q';
+ break;
+
+ case '\u24AC': // â’¬ [PARENTHESIZED LATIN SMALL LETTER Q]
+ output[opos++] = '(';
+ output[opos++] = 'q';
+ output[opos++] = ')';
+ break;
+
+ case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH]
+ output[opos++] = 'q';
+ output[opos++] = 'p';
+ break;
+
+ case '\u0154':
+ // �? [LATIN CAPITAL LETTER R WITH ACUTE]
+ case '\u0156':
+ // Å– [LATIN CAPITAL LETTER R WITH CEDILLA]
+ case '\u0158':
+ // Ř [LATIN CAPITAL LETTER R WITH CARON]
+ case '\u0210':
+ // È’ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
+ case '\u0212':
+ // È’ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
+ case '\u024C':
+ // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
+ case '\u0280':
+ // ʀ [LATIN LETTER SMALL CAPITAL R]
+ case '\u0281':
+ // � [LATIN LETTER SMALL CAPITAL INVERTED R]
+ case '\u1D19':
+ // á´™ [LATIN LETTER SMALL CAPITAL REVERSED R]
+ case '\u1D1A':
+ // á´š [LATIN LETTER SMALL CAPITAL TURNED R]
+ case '\u1E58':
+ // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
+ case '\u1E5A':
+ // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
+ case '\u1E5C':
+ // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
+ case '\u1E5E':
+ // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
+ case '\u24C7':
+ // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
+ case '\u2C64':
+ // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
+ case '\uA75A':
+ // � [LATIN CAPITAL LETTER R ROTUNDA]
+ case '\uA782':
+ // êž‚ [LATIN CAPITAL LETTER INSULAR R]
+ case '\uFF32': // ï¼² [FULLWIDTH LATIN CAPITAL LETTER R]
+ output[opos++] = 'R';
+ break;
+
+ case '\u0155':
+ // Å• [LATIN SMALL LETTER R WITH ACUTE]
+ case '\u0157':
+ // Å— [LATIN SMALL LETTER R WITH CEDILLA]
+ case '\u0159':
+ // Ã…â„¢ [LATIN SMALL LETTER R WITH CARON]
+ case '\u0211':
+ // È‘ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
+ case '\u0213':
+ // È“ [LATIN SMALL LETTER R WITH INVERTED BREVE]
+ case '\u024D':
+ // � [LATIN SMALL LETTER R WITH STROKE]
+ case '\u027C':
+ // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
+ case '\u027D':
+ // ɽ [LATIN SMALL LETTER R WITH TAIL]
+ case '\u027E':
+ // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
+ case '\u027F':
+ // É¿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
+ case '\u1D63':
+ // áµ£ [LATIN SUBSCRIPT SMALL LETTER R]
+ case '\u1D72':
+ // áµ² [LATIN SMALL LETTER R WITH MIDDLE TILDE]
+ case '\u1D73':
+ // áµ³ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
+ case '\u1D89':
+ // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
+ case '\u1E59':
+ // á¹™ [LATIN SMALL LETTER R WITH DOT ABOVE]
+ case '\u1E5B':
+ // á¹› [LATIN SMALL LETTER R WITH DOT BELOW]
+ case '\u1E5D':
+ // � [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
+ case '\u1E5F':
+ // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
+ case '\u24E1':
+ // â“¡ [CIRCLED LATIN SMALL LETTER R]
+ case '\uA75B':
+ // � [LATIN SMALL LETTER R ROTUNDA]
+ case '\uA783':
+ // ꞃ [LATIN SMALL LETTER INSULAR R]
+ case '\uFF52': // ï½’ [FULLWIDTH LATIN SMALL LETTER R]
+ output[opos++] = 'r';
+ break;
+
+ case '\u24AD': // â’ [PARENTHESIZED LATIN SMALL LETTER R]
+ output[opos++] = '(';
+ output[opos++] = 'r';
+ output[opos++] = ')';
+ break;
+
+ case '\u015A':
+ // Ã…Å¡ [LATIN CAPITAL LETTER S WITH ACUTE]
+ case '\u015C':
+ // Ã…Å“ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
+ case '\u015E':
+ // Åž [LATIN CAPITAL LETTER S WITH CEDILLA]
+ case '\u0160':
+ // Ã…Â [LATIN CAPITAL LETTER S WITH CARON]
+ case '\u0218':
+ // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
+ case '\u1E60':
+ // á¹ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
+ case '\u1E62':
+ // á¹¢ [LATIN CAPITAL LETTER S WITH DOT BELOW]
+ case '\u1E64':
+ // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
+ case '\u1E66':
+ // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
+ case '\u1E68':
+ // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
+ case '\u24C8':
+ // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
+ case '\uA731':
+ // ꜱ [LATIN LETTER SMALL CAPITAL S]
+ case '\uA785':
+ // êž… [LATIN SMALL LETTER INSULAR S]
+ case '\uFF33': // ï¼³ [FULLWIDTH LATIN CAPITAL LETTER S]
+ output[opos++] = 'S';
+ break;
+
+ case '\u015B':
+ // Å› [LATIN SMALL LETTER S WITH ACUTE]
+ case '\u015D':
+ // � [LATIN SMALL LETTER S WITH CIRCUMFLEX]
+ case '\u015F':
+ // ÅŸ [LATIN SMALL LETTER S WITH CEDILLA]
+ case '\u0161':
+ // Å¡ [LATIN SMALL LETTER S WITH CARON]
+ case '\u017F':
+ // Å¿ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
+ case '\u0219':
+ // È™ [LATIN SMALL LETTER S WITH COMMA BELOW]
+ case '\u023F':
+ // È¿ [LATIN SMALL LETTER S WITH SWASH TAIL]
+ case '\u0282':
+ // Ê‚ [LATIN SMALL LETTER S WITH HOOK]
+ case '\u1D74':
+ // áµ´ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
+ case '\u1D8A':
+ // á¶Š[LATIN SMALL LETTER S WITH PALATAL HOOK]
+ case '\u1E61':
+ // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
+ case '\u1E63':
+ // á¹£ [LATIN SMALL LETTER S WITH DOT BELOW]
+ case '\u1E65':
+ // á¹¥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
+ case '\u1E67':
+ // á¹§ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
+ case '\u1E69':
+ // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
+ case '\u1E9C':
+ // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
+ case '\u1E9D':
+ // � [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
+ case '\u24E2':
+ // â“¢ [CIRCLED LATIN SMALL LETTER S]
+ case '\uA784':
+ // êž„ [LATIN CAPITAL LETTER INSULAR S]
+ case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S]
+ output[opos++] = 's';
+ break;
+
+ case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S]
+ output[opos++] = 'S';
+ output[opos++] = 'S';
+ break;
+
+ case '\u24AE': // â’® [PARENTHESIZED LATIN SMALL LETTER S]
+ output[opos++] = '(';
+ output[opos++] = 's';
+ output[opos++] = ')';
+ break;
+
+ case '\u00DF': // ß [LATIN SMALL LETTER SHARP S]
+ output[opos++] = 's';
+ output[opos++] = 's';
+ break;
+
+ case '\uFB06': // st[LATIN SMALL LIGATURE ST]
+ output[opos++] = 's';
+ output[opos++] = 't';
+ break;
+
+ case '\u0162':
+ // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
+ case '\u0164':
+ // Ť [LATIN CAPITAL LETTER T WITH CARON]
+ case '\u0166':
+ // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
+ case '\u01AC':
+ // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
+ case '\u01AE':
+ // Æ® [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
+ case '\u021A':
+ // Èš [LATIN CAPITAL LETTER T WITH COMMA BELOW]
+ case '\u023E':
+ // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
+ case '\u1D1B':
+ // á´› [LATIN LETTER SMALL CAPITAL T]
+ case '\u1E6A':
+ // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
+ case '\u1E6C':
+ // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
+ case '\u1E6E':
+ // á¹® [LATIN CAPITAL LETTER T WITH LINE BELOW]
+ case '\u1E70':
+ // á¹° [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
+ case '\u24C9':
+ // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
+ case '\uA786':
+ // Ꞇ[LATIN CAPITAL LETTER INSULAR T]
+ case '\uFF34': // ï¼´ [FULLWIDTH LATIN CAPITAL LETTER T]
+ output[opos++] = 'T';
+ break;
+
+ case '\u0163':
+ // ţ [LATIN SMALL LETTER T WITH CEDILLA]
+ case '\u0165':
+ // Ã…Â¥ [LATIN SMALL LETTER T WITH CARON]
+ case '\u0167':
+ // ŧ [LATIN SMALL LETTER T WITH STROKE]
+ case '\u01AB':
+ // Æ« [LATIN SMALL LETTER T WITH PALATAL HOOK]
+ case '\u01AD':
+ // Æ [LATIN SMALL LETTER T WITH HOOK]
+ case '\u021B':
+ // È› [LATIN SMALL LETTER T WITH COMMA BELOW]
+ case '\u0236':
+ // ȶ [LATIN SMALL LETTER T WITH CURL]
+ case '\u0287':
+ // ʇ [LATIN SMALL LETTER TURNED T]
+ case '\u0288':
+ // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
+ case '\u1D75':
+ // áµµ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
+ case '\u1E6B':
+ // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
+ case '\u1E6D':
+ // á¹ [LATIN SMALL LETTER T WITH DOT BELOW]
+ case '\u1E6F':
+ // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
+ case '\u1E71':
+ // á¹± [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
+ case '\u1E97':
+ // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
+ case '\u24E3':
+ // â“£ [CIRCLED LATIN SMALL LETTER T]
+ case '\u2C66':
+ // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
+ case '\uFF54': // �? [FULLWIDTH LATIN SMALL LETTER T]
+ output[opos++] = 't';
+ break;
+
+ case '\u00DE':
+ // Þ [LATIN CAPITAL LETTER THORN]
+ case '\uA766': // � [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
+ output[opos++] = 'T';
+ output[opos++] = 'H';
+ break;
+
+ case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ]
+ output[opos++] = 'T';
+ output[opos++] = 'Z';
+ break;
+
+ case '\u24AF': // â’¯ [PARENTHESIZED LATIN SMALL LETTER T]
+ output[opos++] = '(';
+ output[opos++] = 't';
+ output[opos++] = ')';
+ break;
+
+ case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
+ output[opos++] = 't';
+ output[opos++] = 'c';
+ break;
+
+ case '\u00FE':
+ // þ [LATIN SMALL LETTER THORN]
+ case '\u1D7A':
+ // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
+ case '\uA767': // � [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
+ output[opos++] = 't';
+ output[opos++] = 'h';
+ break;
+
+ case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH]
+ output[opos++] = 't';
+ output[opos++] = 's';
+ break;
+
+ case '\uA729': // ꜩ [LATIN SMALL LETTER TZ]
+ output[opos++] = 't';
+ output[opos++] = 'z';
+ break;
+
+ case '\u00D9':
+ // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
+ case '\u00DA':
+ // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
+ case '\u00DB':
+ // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
+ case '\u00DC':
+ // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
+ case '\u0168':
+ // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
+ case '\u016A':
+ // Ū [LATIN CAPITAL LETTER U WITH MACRON]
+ case '\u016C':
+ // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
+ case '\u016E':
+ // Å® [LATIN CAPITAL LETTER U WITH RING ABOVE]
+ case '\u0170':
+ // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
+ case '\u0172':
+ // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
+ case '\u01AF':
+ // Ư [LATIN CAPITAL LETTER U WITH HORN]
+ case '\u01D3':
+ // Ç“ [LATIN CAPITAL LETTER U WITH CARON]
+ case '\u01D5':
+ // Ç• [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
+ case '\u01D7':
+ // Ç— [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
+ case '\u01D9':
+ // Ç™ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
+ case '\u01DB':
+ // Ç› [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
+ case '\u0214':
+ // �? [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
+ case '\u0216':
+ // È– [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
+ case '\u0244':
+ // É„ [LATIN CAPITAL LETTER U BAR]
+ case '\u1D1C':
+ // ᴜ [LATIN LETTER SMALL CAPITAL U]
+ case '\u1D7E':
+ // áµ¾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
+ case '\u1E72':
+ // á¹² [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
+ case '\u1E74':
+ // á¹´ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
+ case '\u1E76':
+ // á¹¶ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
+ case '\u1E78':
+ // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
+ case '\u1E7A':
+ // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
+ case '\u1EE4':
+ // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
+ case '\u1EE6':
+ // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
+ case '\u1EE8':
+ // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
+ case '\u1EEA':
+ // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
+ case '\u1EEC':
+ // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
+ case '\u1EEE':
+ // á»® [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
+ case '\u1EF0':
+ // á»° [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
+ case '\u24CA':
+ // Ⓤ[CIRCLED LATIN CAPITAL LETTER U]
+ case '\uFF35': // ï¼µ [FULLWIDTH LATIN CAPITAL LETTER U]
+ output[opos++] = 'U';
+ break;
+
+ case '\u00F9':
+ // ù [LATIN SMALL LETTER U WITH GRAVE]
+ case '\u00FA':
+ // ú [LATIN SMALL LETTER U WITH ACUTE]
+ case '\u00FB':
+ // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
+ case '\u00FC':
+ // ü [LATIN SMALL LETTER U WITH DIAERESIS]
+ case '\u0169':
+ // Å© [LATIN SMALL LETTER U WITH TILDE]
+ case '\u016B':
+ // Å« [LATIN SMALL LETTER U WITH MACRON]
+ case '\u016D':
+ // Ã…Â [LATIN SMALL LETTER U WITH BREVE]
+ case '\u016F':
+ // ů [LATIN SMALL LETTER U WITH RING ABOVE]
+ case '\u0171':
+ // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
+ case '\u0173':
+ // ų [LATIN SMALL LETTER U WITH OGONEK]
+ case '\u01B0':
+ // ư [LATIN SMALL LETTER U WITH HORN]
+ case '\u01D4':
+ // �? [LATIN SMALL LETTER U WITH CARON]
+ case '\u01D6':
+ // Ç– [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
+ case '\u01D8':
+ // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
+ case '\u01DA':
+ // Çš [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
+ case '\u01DC':
+ // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
+ case '\u0215':
+ // È• [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
+ case '\u0217':
+ // È— [LATIN SMALL LETTER U WITH INVERTED BREVE]
+ case '\u0289':
+ // ʉ [LATIN SMALL LETTER U BAR]
+ case '\u1D64':
+ // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
+ case '\u1D99':
+ // á¶™ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
+ case '\u1E73':
+ // á¹³ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
+ case '\u1E75':
+ // á¹µ [LATIN SMALL LETTER U WITH TILDE BELOW]
+ case '\u1E77':
+ // á¹· [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
+ case '\u1E79':
+ // á¹¹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
+ case '\u1E7B':
+ // á¹» [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
+ case '\u1EE5':
+ // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
+ case '\u1EE7':
+ // á»§ [LATIN SMALL LETTER U WITH HOOK ABOVE]
+ case '\u1EE9':
+ // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
+ case '\u1EEB':
+ // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
+ case '\u1EED':
+ // á» [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
+ case '\u1EEF':
+ // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
+ case '\u1EF1':
+ // á»± [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
+ case '\u24E4':
+ // ⓤ [CIRCLED LATIN SMALL LETTER U]
+ case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U]
+ output[opos++] = 'u';
+ break;
+
+ case '\u24B0': // â’° [PARENTHESIZED LATIN SMALL LETTER U]
+ output[opos++] = '(';
+ output[opos++] = 'u';
+ output[opos++] = ')';
+ break;
+
+ case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE]
+ output[opos++] = 'u';
+ output[opos++] = 'e';
+ break;
+
+ case '\u01B2':
+ // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
+ case '\u0245':
+ // É… [LATIN CAPITAL LETTER TURNED V]
+ case '\u1D20':
+ // á´ [LATIN LETTER SMALL CAPITAL V]
+ case '\u1E7C':
+ // á¹¼ [LATIN CAPITAL LETTER V WITH TILDE]
+ case '\u1E7E':
+ // á¹¾ [LATIN CAPITAL LETTER V WITH DOT BELOW]
+ case '\u1EFC':
+ // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
+ case '\u24CB':
+ // â“‹ [CIRCLED LATIN CAPITAL LETTER V]
+ case '\uA75E':
+ // � [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
+ case '\uA768':
+ // � [LATIN CAPITAL LETTER VEND]
+ case '\uFF36': // ï¼¶ [FULLWIDTH LATIN CAPITAL LETTER V]
+ output[opos++] = 'V';
+ break;
+
+ case '\u028B':
+ // Ê‹ [LATIN SMALL LETTER V WITH HOOK]
+ case '\u028C':
+ // ʌ [LATIN SMALL LETTER TURNED V]
+ case '\u1D65':
+ // áµ¥ [LATIN SUBSCRIPT SMALL LETTER V]
+ case '\u1D8C':
+ // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
+ case '\u1E7D':
+ // á¹½ [LATIN SMALL LETTER V WITH TILDE]
+ case '\u1E7F':
+ // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
+ case '\u24E5':
+ // â“¥ [CIRCLED LATIN SMALL LETTER V]
+ case '\u2C71':
+ // â±± [LATIN SMALL LETTER V WITH RIGHT HOOK]
+ case '\u2C74':
+ // â±´ [LATIN SMALL LETTER V WITH CURL]
+ case '\uA75F':
+ // � [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
+ case '\uFF56': // ï½– [FULLWIDTH LATIN SMALL LETTER V]
+ output[opos++] = 'v';
+ break;
+
+ case '\uA760': // �[LATIN CAPITAL LETTER VY]
+ output[opos++] = 'V';
+ output[opos++] = 'Y';
+ break;
+
+ case '\u24B1': // â’± [PARENTHESIZED LATIN SMALL LETTER V]
+ output[opos++] = '(';
+ output[opos++] = 'v';
+ output[opos++] = ')';
+ break;
+
+ case '\uA761': // � [LATIN SMALL LETTER VY]
+ output[opos++] = 'v';
+ output[opos++] = 'y';
+ break;
+
+ case '\u0174':
+ // Å´ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
+ case '\u01F7':
+ // Ç· http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
+ case '\u1D21':
+ // á´¡ [LATIN LETTER SMALL CAPITAL W]
+ case '\u1E80':
+ // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
+ case '\u1E82':
+ // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
+ case '\u1E84':
+ // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
+ case '\u1E86':
+ // Ẇ[LATIN CAPITAL LETTER W WITH DOT ABOVE]
+ case '\u1E88':
+ // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
+ case '\u24CC':
+ // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
+ case '\u2C72':
+ // â±² [LATIN CAPITAL LETTER W WITH HOOK]
+ case '\uFF37': // ï¼· [FULLWIDTH LATIN CAPITAL LETTER W]
+ output[opos++] = 'W';
+ break;
+
+ case '\u0175':
+ // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
+ case '\u01BF':
+ // Æ¿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
+ case '\u028D':
+ // � [LATIN SMALL LETTER TURNED W]
+ case '\u1E81':
+ // � [LATIN SMALL LETTER W WITH GRAVE]
+ case '\u1E83':
+ // ẃ [LATIN SMALL LETTER W WITH ACUTE]
+ case '\u1E85':
+ // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
+ case '\u1E87':
+ // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
+ case '\u1E89':
+ // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
+ case '\u1E98':
+ // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
+ case '\u24E6':
+ // ⓦ [CIRCLED LATIN SMALL LETTER W]
+ case '\u2C73':
+ // â±³ [LATIN SMALL LETTER W WITH HOOK]
+ case '\uFF57': // ï½— [FULLWIDTH LATIN SMALL LETTER W]
+ output[opos++] = 'w';
+ break;
+
+ case '\u24B2': // â’² [PARENTHESIZED LATIN SMALL LETTER W]
+ output[opos++] = '(';
+ output[opos++] = 'w';
+ output[opos++] = ')';
+ break;
+
+ case '\u1E8A':
+ // Ẋ[LATIN CAPITAL LETTER X WITH DOT ABOVE]
+ case '\u1E8C':
+ // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
+ case '\u24CD':
+ // � [CIRCLED LATIN CAPITAL LETTER X]
+ case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X]
+ output[opos++] = 'X';
+ break;
+
+ case '\u1D8D':
+ // � [LATIN SMALL LETTER X WITH PALATAL HOOK]
+ case '\u1E8B':
+ // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
+ case '\u1E8D':
+ // � [LATIN SMALL LETTER X WITH DIAERESIS]
+ case '\u2093':
+ // â‚“ [LATIN SUBSCRIPT SMALL LETTER X]
+ case '\u24E7':
+ // â“§ [CIRCLED LATIN SMALL LETTER X]
+ case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X]
+ output[opos++] = 'x';
+ break;
+
+ case '\u24B3': // â’³ [PARENTHESIZED LATIN SMALL LETTER X]
+ output[opos++] = '(';
+ output[opos++] = 'x';
+ output[opos++] = ')';
+ break;
+
+ case '\u00DD':
+ // � [LATIN CAPITAL LETTER Y WITH ACUTE]
+ case '\u0176':
+ // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
+ case '\u0178':
+ // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
+ case '\u01B3':
+ // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
+ case '\u0232':
+ // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
+ case '\u024E':
+ // ÉŽ [LATIN CAPITAL LETTER Y WITH STROKE]
+ case '\u028F':
+ // � [LATIN LETTER SMALL CAPITAL Y]
+ case '\u1E8E':
+ // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
+ case '\u1EF2':
+ // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
+ case '\u1EF4':
+ // á»´ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
+ case '\u1EF6':
+ // á»¶ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
+ case '\u1EF8':
+ // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
+ case '\u1EFE':
+ // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
+ case '\u24CE':
+ // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
+ case '\uFF39': // ï¼¹ [FULLWIDTH LATIN CAPITAL LETTER Y]
+ output[opos++] = 'Y';
+ break;
+
+ case '\u00FD':
+ // ý [LATIN SMALL LETTER Y WITH ACUTE]
+ case '\u00FF':
+ // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
+ case '\u0177':
+ // Å· [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
+ case '\u01B4':
+ // Æ´ [LATIN SMALL LETTER Y WITH HOOK]
+ case '\u0233':
+ // ȳ [LATIN SMALL LETTER Y WITH MACRON]
+ case '\u024F':
+ // � [LATIN SMALL LETTER Y WITH STROKE]
+ case '\u028E':
+ // ÊŽ [LATIN SMALL LETTER TURNED Y]
+ case '\u1E8F':
+ // � [LATIN SMALL LETTER Y WITH DOT ABOVE]
+ case '\u1E99':
+ // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
+ case '\u1EF3':
+ // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
+ case '\u1EF5':
+ // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
+ case '\u1EF7':
+ // á»· [LATIN SMALL LETTER Y WITH HOOK ABOVE]
+ case '\u1EF9':
+ // ỹ [LATIN SMALL LETTER Y WITH TILDE]
+ case '\u1EFF':
+ // ỿ [LATIN SMALL LETTER Y WITH LOOP]
+ case '\u24E8':
+ // ⓨ [CIRCLED LATIN SMALL LETTER Y]
+ case '\uFF59': // ï½™ [FULLWIDTH LATIN SMALL LETTER Y]
+ output[opos++] = 'y';
+ break;
+
+ case '\u24B4': // â’´ [PARENTHESIZED LATIN SMALL LETTER Y]
+ output[opos++] = '(';
+ output[opos++] = 'y';
+ output[opos++] = ')';
+ break;
+
+ case '\u0179':
+ // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
+ case '\u017B':
+ // Å» [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
+ case '\u017D':
+ // Ž [LATIN CAPITAL LETTER Z WITH CARON]
+ case '\u01B5':
+ // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
+ case '\u021C':
+ // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
+ case '\u0224':
+ // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
+ case '\u1D22':
+ // á´¢ [LATIN LETTER SMALL CAPITAL Z]
+ case '\u1E90':
+ // � [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
+ case '\u1E92':
+ // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
+ case '\u1E94':
+ // �? [LATIN CAPITAL LETTER Z WITH LINE BELOW]
+ case '\u24CF':
+ // � [CIRCLED LATIN CAPITAL LETTER Z]
+ case '\u2C6B':
+ // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
+ case '\uA762':
+ // � [LATIN CAPITAL LETTER VISIGOTHIC Z]
+ case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z]
+ output[opos++] = 'Z';
+ break;
+
+ case '\u017A':
+ // ź [LATIN SMALL LETTER Z WITH ACUTE]
+ case '\u017C':
+ // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
+ case '\u017E':
+ // ž [LATIN SMALL LETTER Z WITH CARON]
+ case '\u01B6':
+ // ƶ [LATIN SMALL LETTER Z WITH STROKE]
+ case '\u021D':
+ // � http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
+ case '\u0225':
+ // ȥ [LATIN SMALL LETTER Z WITH HOOK]
+ case '\u0240':
+ // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
+ case '\u0290':
+ // � [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
+ case '\u0291':
+ // Ê‘ [LATIN SMALL LETTER Z WITH CURL]
+ case '\u1D76':
+ // áµ¶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
+ case '\u1D8E':
+ // á¶Ž [LATIN SMALL LETTER Z WITH PALATAL HOOK]
+ case '\u1E91':
+ // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
+ case '\u1E93':
+ // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
+ case '\u1E95':
+ // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
+ case '\u24E9':
+ // â“© [CIRCLED LATIN SMALL LETTER Z]
+ case '\u2C6C':
+ // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
+ case '\uA763':
+ // � [LATIN SMALL LETTER VISIGOTHIC Z]
+ case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z]
+ output[opos++] = 'z';
+ break;
+
+ case '\u24B5': // â’µ [PARENTHESIZED LATIN SMALL LETTER Z]
+ output[opos++] = '(';
+ output[opos++] = 'z';
+ output[opos++] = ')';
+ break;
+
+ case '\u2070':
+ // � [SUPERSCRIPT ZERO]
+ case '\u2080':
+ // â‚€ [SUBSCRIPT ZERO]
+ case '\u24EA':
+ // ⓪ [CIRCLED DIGIT ZERO]
+ case '\u24FF':
+ // â“¿ [NEGATIVE CIRCLED DIGIT ZERO]
+ case '\uFF10': // � [FULLWIDTH DIGIT ZERO]
+ output[opos++] = '0';
+ break;
+
+ case '\u00B9':
+ // ¹ [SUPERSCRIPT ONE]
+ case '\u2081':
+ // � [SUBSCRIPT ONE]
+ case '\u2460':
+ // â‘ [CIRCLED DIGIT ONE]
+ case '\u24F5':
+ // ⓵ [DOUBLE CIRCLED DIGIT ONE]
+ case '\u2776':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
+ case '\u2780':
+ // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
+ case '\u278A':
+ // ➊[DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
+ case '\uFF11': // 1 [FULLWIDTH DIGIT ONE]
+ output[opos++] = '1';
+ break;
+
+ case '\u2488': // â’ˆ [DIGIT ONE FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '.';
+ break;
+
+ case '\u2474': // â‘´ [PARENTHESIZED DIGIT ONE]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = ')';
+ break;
+
+ case '\u00B2':
+ // ² [SUPERSCRIPT TWO]
+ case '\u2082':
+ // â‚‚ [SUBSCRIPT TWO]
+ case '\u2461':
+ // â‘¡ [CIRCLED DIGIT TWO]
+ case '\u24F6':
+ // â“¶ [DOUBLE CIRCLED DIGIT TWO]
+ case '\u2777':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
+ case '\u2781':
+ // � [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
+ case '\u278B':
+ // âž‹ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
+ case '\uFF12': // ï¼’ [FULLWIDTH DIGIT TWO]
+ output[opos++] = '2';
+ break;
+
+ case '\u2489': // â’‰ [DIGIT TWO FULL STOP]
+ output[opos++] = '2';
+ output[opos++] = '.';
+ break;
+
+ case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO]
+ output[opos++] = '(';
+ output[opos++] = '2';
+ output[opos++] = ')';
+ break;
+
+ case '\u00B3':
+ // ³ [SUPERSCRIPT THREE]
+ case '\u2083':
+ // ₃ [SUBSCRIPT THREE]
+ case '\u2462':
+ // â‘¢ [CIRCLED DIGIT THREE]
+ case '\u24F7':
+ // â“· [DOUBLE CIRCLED DIGIT THREE]
+ case '\u2778':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
+ case '\u2782':
+ // âž‚ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
+ case '\u278C':
+ // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
+ case '\uFF13': // 3 [FULLWIDTH DIGIT THREE]
+ output[opos++] = '3';
+ break;
+
+ case '\u248A': // â’Š[DIGIT THREE FULL STOP]
+ output[opos++] = '3';
+ output[opos++] = '.';
+ break;
+
+ case '\u2476': // â‘¶ [PARENTHESIZED DIGIT THREE]
+ output[opos++] = '(';
+ output[opos++] = '3';
+ output[opos++] = ')';
+ break;
+
+ case '\u2074':
+ // � [SUPERSCRIPT FOUR]
+ case '\u2084':
+ // â‚„ [SUBSCRIPT FOUR]
+ case '\u2463':
+ // â‘£ [CIRCLED DIGIT FOUR]
+ case '\u24F8':
+ // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
+ case '\u2779':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
+ case '\u2783':
+ // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
+ case '\u278D':
+ // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
+ case '\uFF14': // �? [FULLWIDTH DIGIT FOUR]
+ output[opos++] = '4';
+ break;
+
+ case '\u248B': // â’‹ [DIGIT FOUR FULL STOP]
+ output[opos++] = '4';
+ output[opos++] = '.';
+ break;
+
+ case '\u2477': // â‘· [PARENTHESIZED DIGIT FOUR]
+ output[opos++] = '(';
+ output[opos++] = '4';
+ output[opos++] = ')';
+ break;
+
+ case '\u2075':
+ // � [SUPERSCRIPT FIVE]
+ case '\u2085':
+ // â‚… [SUBSCRIPT FIVE]
+ case '\u2464':
+ // ⑤ [CIRCLED DIGIT FIVE]
+ case '\u24F9':
+ // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
+ case '\u277A':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
+ case '\u2784':
+ // âž„ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
+ case '\u278E':
+ // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
+ case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE]
+ output[opos++] = '5';
+ break;
+
+ case '\u248C': // ⒌ [DIGIT FIVE FULL STOP]
+ output[opos++] = '5';
+ output[opos++] = '.';
+ break;
+
+ case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE]
+ output[opos++] = '(';
+ output[opos++] = '5';
+ output[opos++] = ')';
+ break;
+
+ case '\u2076':
+ // � [SUPERSCRIPT SIX]
+ case '\u2086':
+ // ₆[SUBSCRIPT SIX]
+ case '\u2465':
+ // â‘¥ [CIRCLED DIGIT SIX]
+ case '\u24FA':
+ // ⓺ [DOUBLE CIRCLED DIGIT SIX]
+ case '\u277B':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
+ case '\u2785':
+ // âž… [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
+ case '\u278F':
+ // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
+ case '\uFF16': // ï¼– [FULLWIDTH DIGIT SIX]
+ output[opos++] = '6';
+ break;
+
+ case '\u248D': // â’� [DIGIT SIX FULL STOP]
+ output[opos++] = '6';
+ output[opos++] = '.';
+ break;
+
+ case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX]
+ output[opos++] = '(';
+ output[opos++] = '6';
+ output[opos++] = ')';
+ break;
+
+ case '\u2077':
+ // � [SUPERSCRIPT SEVEN]
+ case '\u2087':
+ // ₇ [SUBSCRIPT SEVEN]
+ case '\u2466':
+ // ⑦ [CIRCLED DIGIT SEVEN]
+ case '\u24FB':
+ // â“» [DOUBLE CIRCLED DIGIT SEVEN]
+ case '\u277C':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
+ case '\u2786':
+ // ➆[DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
+ case '\u2790':
+ // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
+ case '\uFF17': // ï¼— [FULLWIDTH DIGIT SEVEN]
+ output[opos++] = '7';
+ break;
+
+ case '\u248E': // â’Ž [DIGIT SEVEN FULL STOP]
+ output[opos++] = '7';
+ output[opos++] = '.';
+ break;
+
+ case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN]
+ output[opos++] = '(';
+ output[opos++] = '7';
+ output[opos++] = ')';
+ break;
+
+ case '\u2078':
+ // � [SUPERSCRIPT EIGHT]
+ case '\u2088':
+ // ₈ [SUBSCRIPT EIGHT]
+ case '\u2467':
+ // â‘§ [CIRCLED DIGIT EIGHT]
+ case '\u24FC':
+ // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
+ case '\u277D':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
+ case '\u2787':
+ // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
+ case '\u2791':
+ // âž‘ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
+ case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT]
+ output[opos++] = '8';
+ break;
+
+ case '\u248F': // â’� [DIGIT EIGHT FULL STOP]
+ output[opos++] = '8';
+ output[opos++] = '.';
+ break;
+
+ case '\u247B': // â‘» [PARENTHESIZED DIGIT EIGHT]
+ output[opos++] = '(';
+ output[opos++] = '8';
+ output[opos++] = ')';
+ break;
+
+ case '\u2079':
+ // � [SUPERSCRIPT NINE]
+ case '\u2089':
+ // ₉ [SUBSCRIPT NINE]
+ case '\u2468':
+ // ⑨ [CIRCLED DIGIT NINE]
+ case '\u24FD':
+ // ⓽ [DOUBLE CIRCLED DIGIT NINE]
+ case '\u277E':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
+ case '\u2788':
+ // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
+ case '\u2792':
+ // âž’ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
+ case '\uFF19': // ï¼™ [FULLWIDTH DIGIT NINE]
+ output[opos++] = '9';
+ break;
+
+ case '\u2490': // â’� [DIGIT NINE FULL STOP]
+ output[opos++] = '9';
+ output[opos++] = '.';
+ break;
+
+ case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE]
+ output[opos++] = '(';
+ output[opos++] = '9';
+ output[opos++] = ')';
+ break;
+
+ case '\u2469':
+ // â‘© [CIRCLED NUMBER TEN]
+ case '\u24FE':
+ // ⓾ [DOUBLE CIRCLED NUMBER TEN]
+ case '\u277F':
+ // � [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
+ case '\u2789':
+ // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
+ case '\u2793': // âž“ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
+ output[opos++] = '1';
+ output[opos++] = '0';
+ break;
+
+ case '\u2491': // â’‘ [NUMBER TEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '0';
+ output[opos++] = '.';
+ break;
+
+ case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '0';
+ output[opos++] = ')';
+ break;
+
+ case '\u246A':
+ // ⑪ [CIRCLED NUMBER ELEVEN]
+ case '\u24EB': // â“« [NEGATIVE CIRCLED NUMBER ELEVEN]
+ output[opos++] = '1';
+ output[opos++] = '1';
+ break;
+
+ case '\u2492': // â’’ [NUMBER ELEVEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '1';
+ output[opos++] = '.';
+ break;
+
+ case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '1';
+ output[opos++] = ')';
+ break;
+
+ case '\u246B':
+ // â‘« [CIRCLED NUMBER TWELVE]
+ case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
+ output[opos++] = '1';
+ output[opos++] = '2';
+ break;
+
+ case '\u2493': // â’“ [NUMBER TWELVE FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '2';
+ output[opos++] = '.';
+ break;
+
+ case '\u247F': // â‘¿ [PARENTHESIZED NUMBER TWELVE]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '2';
+ output[opos++] = ')';
+ break;
+
+ case '\u246C':
+ // ⑬ [CIRCLED NUMBER THIRTEEN]
+ case '\u24ED': // â“ [NEGATIVE CIRCLED NUMBER THIRTEEN]
+ output[opos++] = '1';
+ output[opos++] = '3';
+ break;
+
+ case '\u2494': // â’�? [NUMBER THIRTEEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '3';
+ output[opos++] = '.';
+ break;
+
+ case '\u2480': // â’€ [PARENTHESIZED NUMBER THIRTEEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '3';
+ output[opos++] = ')';
+ break;
+
+ case '\u246D':
+ // â‘ [CIRCLED NUMBER FOURTEEN]
+ case '\u24EE': // â“® [NEGATIVE CIRCLED NUMBER FOURTEEN]
+ output[opos++] = '1';
+ output[opos++] = '4';
+ break;
+
+ case '\u2495': // â’• [NUMBER FOURTEEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '4';
+ output[opos++] = '.';
+ break;
+
+ case '\u2481': // â’� [PARENTHESIZED NUMBER FOURTEEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '4';
+ output[opos++] = ')';
+ break;
+
+ case '\u246E':
+ // â‘® [CIRCLED NUMBER FIFTEEN]
+ case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
+ output[opos++] = '1';
+ output[opos++] = '5';
+ break;
+
+ case '\u2496': // â’– [NUMBER FIFTEEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '5';
+ output[opos++] = '.';
+ break;
+
+ case '\u2482': // â’‚ [PARENTHESIZED NUMBER FIFTEEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '5';
+ output[opos++] = ')';
+ break;
+
+ case '\u246F':
+ // ⑯ [CIRCLED NUMBER SIXTEEN]
+ case '\u24F0': // â“° [NEGATIVE CIRCLED NUMBER SIXTEEN]
+ output[opos++] = '1';
+ output[opos++] = '6';
+ break;
+
+ case '\u2497': // â’— [NUMBER SIXTEEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '6';
+ output[opos++] = '.';
+ break;
+
+ case '\u2483': // â’ƒ [PARENTHESIZED NUMBER SIXTEEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '6';
+ output[opos++] = ')';
+ break;
+
+ case '\u2470':
+ // â‘° [CIRCLED NUMBER SEVENTEEN]
+ case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
+ output[opos++] = '1';
+ output[opos++] = '7';
+ break;
+
+ case '\u2498': // â’˜ [NUMBER SEVENTEEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '7';
+ output[opos++] = '.';
+ break;
+
+ case '\u2484': // â’„ [PARENTHESIZED NUMBER SEVENTEEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '7';
+ output[opos++] = ')';
+ break;
+
+ case '\u2471':
+ // ⑱ [CIRCLED NUMBER EIGHTEEN]
+ case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
+ output[opos++] = '1';
+ output[opos++] = '8';
+ break;
+
+ case '\u2499': // â’™ [NUMBER EIGHTEEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '8';
+ output[opos++] = '.';
+ break;
+
+ case '\u2485': // â’… [PARENTHESIZED NUMBER EIGHTEEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '8';
+ output[opos++] = ')';
+ break;
+
+ case '\u2472':
+ // ⑲ [CIRCLED NUMBER NINETEEN]
+ case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
+ output[opos++] = '1';
+ output[opos++] = '9';
+ break;
+
+ case '\u249A': // â’š [NUMBER NINETEEN FULL STOP]
+ output[opos++] = '1';
+ output[opos++] = '9';
+ output[opos++] = '.';
+ break;
+
+ case '\u2486': // â’†[PARENTHESIZED NUMBER NINETEEN]
+ output[opos++] = '(';
+ output[opos++] = '1';
+ output[opos++] = '9';
+ output[opos++] = ')';
+ break;
+
+ case '\u2473':
+ // ⑳ [CIRCLED NUMBER TWENTY]
+ case '\u24F4': // â“´ [NEGATIVE CIRCLED NUMBER TWENTY]
+ output[opos++] = '2';
+ output[opos++] = '0';
+ break;
+
+ case '\u249B': // â’› [NUMBER TWENTY FULL STOP]
+ output[opos++] = '2';
+ output[opos++] = '0';
+ output[opos++] = '.';
+ break;
+
+ case '\u2487': // â’‡ [PARENTHESIZED NUMBER TWENTY]
+ output[opos++] = '(';
+ output[opos++] = '2';
+ output[opos++] = '0';
+ output[opos++] = ')';
+ break;
+
+ case '\u00AB':
+ // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
+ case '\u00BB':
+ // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
+ case '\u201C':
+ // “ [LEFT DOUBLE QUOTATION MARK]
+ case '\u201D':
+ // � [RIGHT DOUBLE QUOTATION MARK]
+ case '\u201E':
+ // „ [DOUBLE LOW-9 QUOTATION MARK]
+ case '\u2033':
+ // ″ [DOUBLE PRIME]
+ case '\u2036':
+ // ‶ [REVERSED DOUBLE PRIME]
+ case '\u275D':
+ // � [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
+ case '\u275E':
+ // � [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
+ case '\u276E':
+ // � [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+ case '\u276F':
+ // � [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+ case '\uFF02': // " [FULLWIDTH QUOTATION MARK]
+ output[opos++] = '"';
+ break;
+
+ case '\u2018':
+ // ‘ [LEFT SINGLE QUOTATION MARK]
+ case '\u2019':
+ // ’ [RIGHT SINGLE QUOTATION MARK]
+ case '\u201A':
+ // ‚ [SINGLE LOW-9 QUOTATION MARK]
+ case '\u201B':
+ // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
+ case '\u2032':
+ // ′ [PRIME]
+ case '\u2035':
+ // ‵ [REVERSED PRIME]
+ case '\u2039':
+ // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
+ case '\u203A':
+ // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
+ case '\u275B':
+ // � [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
+ case '\u275C':
+ // � [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
+ case '\uFF07': // ' [FULLWIDTH APOSTROPHE]
+ output[opos++] = '\'';
+ break;
+
+ case '\u2010':
+ // � [HYPHEN]
+ case '\u2011':
+ // ‑ [NON-BREAKING HYPHEN]
+ case '\u2012':
+ // ‒ [FIGURE DASH]
+ case '\u2013':
+ // – [EN DASH]
+ case '\u2014':
+ // �? [EM DASH]
+ case '\u207B':
+ // � [SUPERSCRIPT MINUS]
+ case '\u208B':
+ // â‚‹ [SUBSCRIPT MINUS]
+ case '\uFF0D': // � [FULLWIDTH HYPHEN-MINUS]
+ output[opos++] = '-';
+ break;
+
+ case '\u2045':
+ // � [LEFT SQUARE BRACKET WITH QUILL]
+ case '\u2772':
+ // � [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
+ case '\uFF3B': // ï¼» [FULLWIDTH LEFT SQUARE BRACKET]
+ output[opos++] = '[';
+ break;
+
+ case '\u2046':
+ // �[RIGHT SQUARE BRACKET WITH QUILL]
+ case '\u2773':
+ // � [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
+ case '\uFF3D': // ï¼½ [FULLWIDTH RIGHT SQUARE BRACKET]
+ output[opos++] = ']';
+ break;
+
+ case '\u207D':
+ // � [SUPERSCRIPT LEFT PARENTHESIS]
+ case '\u208D':
+ // � [SUBSCRIPT LEFT PARENTHESIS]
+ case '\u2768':
+ // � [MEDIUM LEFT PARENTHESIS ORNAMENT]
+ case '\u276A':
+ // � [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
+ case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS]
+ output[opos++] = '(';
+ break;
+
+ case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS]
+ output[opos++] = '(';
+ output[opos++] = '(';
+ break;
+
+ case '\u207E':
+ // � [SUPERSCRIPT RIGHT PARENTHESIS]
+ case '\u208E':
+ // ₎ [SUBSCRIPT RIGHT PARENTHESIS]
+ case '\u2769':
+ // � [MEDIUM RIGHT PARENTHESIS ORNAMENT]
+ case '\u276B':
+ // � [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
+ case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS]
+ output[opos++] = ')';
+ break;
+
+ case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS]
+ output[opos++] = ')';
+ output[opos++] = ')';
+ break;
+
+ case '\u276C':
+ // � [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\u2770':
+ // � [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN]
+ output[opos++] = '<';
+ break;
+
+ case '\u276D':
+ // �[MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\u2771':
+ // � [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN]
+ output[opos++] = '>';
+ break;
+
+ case '\u2774':
+ // � [MEDIUM LEFT CURLY BRACKET ORNAMENT]
+ case '\uFF5B': // ï½› [FULLWIDTH LEFT CURLY BRACKET]
+ output[opos++] = '{';
+ break;
+
+ case '\u2775':
+ // � [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
+ case '\uFF5D': // � [FULLWIDTH RIGHT CURLY BRACKET]
+ output[opos++] = '}';
+ break;
+
+ case '\u207A':
+ // � [SUPERSCRIPT PLUS SIGN]
+ case '\u208A':
+ // ₊[SUBSCRIPT PLUS SIGN]
+ case '\uFF0B': // + [FULLWIDTH PLUS SIGN]
+ output[opos++] = '+';
+ break;
+
+ case '\u207C':
+ // � [SUPERSCRIPT EQUALS SIGN]
+ case '\u208C':
+ // ₌ [SUBSCRIPT EQUALS SIGN]
+ case '\uFF1D': // � [FULLWIDTH EQUALS SIGN]
+ output[opos++] = '=';
+ break;
+
+ case '\uFF01': // � [FULLWIDTH EXCLAMATION MARK]
+ output[opos++] = '!';
+ break;
+
+ case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK]
+ output[opos++] = '!';
+ output[opos++] = '!';
+ break;
+
+ case '\u2049': // � [EXCLAMATION QUESTION MARK]
+ output[opos++] = '!';
+ output[opos++] = '?';
+ break;
+
+ case '\uFF03': // # [FULLWIDTH NUMBER SIGN]
+ output[opos++] = '#';
+ break;
+
+ case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN]
+ output[opos++] = '$';
+ break;
+
+ case '\u2052':
+ // � [COMMERCIAL MINUS SIGN]
+ case '\uFF05': // ï¼… [FULLWIDTH PERCENT SIGN]
+ output[opos++] = '%';
+ break;
+
+ case '\uFF06': // &[FULLWIDTH AMPERSAND]
+ output[opos++] = '&';
+ break;
+
+ case '\u204E':
+ // � [LOW ASTERISK]
+ case '\uFF0A': // *[FULLWIDTH ASTERISK]
+ output[opos++] = '*';
+ break;
+
+ case '\uFF0C': // , [FULLWIDTH COMMA]
+ output[opos++] = ',';
+ break;
+
+ case '\uFF0E': // . [FULLWIDTH FULL STOP]
+ output[opos++] = '.';
+ break;
+
+ case '\u2044':
+ // � [FRACTION SLASH]
+ case '\uFF0F': // � [FULLWIDTH SOLIDUS]
+ output[opos++] = '/';
+ break;
+
+ case '\uFF1A': // : [FULLWIDTH COLON]
+ output[opos++] = ':';
+ break;
+
+ case '\u204F':
+ // � [REVERSED SEMICOLON]
+ case '\uFF1B': // ï¼› [FULLWIDTH SEMICOLON]
+ output[opos++] = ';';
+ break;
+
+ case '\uFF1F': // ? [FULLWIDTH QUESTION MARK]
+ output[opos++] = '?';
+ break;
+
+ case '\u2047': // � [DOUBLE QUESTION MARK]
+ output[opos++] = '?';
+ output[opos++] = '?';
+ break;
+
+ case '\u2048': // � [QUESTION EXCLAMATION MARK]
+ output[opos++] = '?';
+ output[opos++] = '!';
+ break;
+
+ case '\uFF20': // ï¼ [FULLWIDTH COMMERCIAL AT]
+ output[opos++] = '@';
+ break;
+
+ case '\uFF3C': // ï¼¼ [FULLWIDTH REVERSE SOLIDUS]
+ output[opos++] = '\\';
+ break;
+
+ case '\u2038':
+ // ‸ [CARET]
+ case '\uFF3E': // ï¼¾ [FULLWIDTH CIRCUMFLEX ACCENT]
+ output[opos++] = '^';
+ break;
+
+ case '\uFF3F': // _ [FULLWIDTH LOW LINE]
+ output[opos++] = '_';
+ break;
+
+ case '\u2053':
+ // � [SWUNG DASH]
+ case '\uFF5E': // ~ [FULLWIDTH TILDE]
+ output[opos++] = '~';
+ break;
+
+ // BEGIN CUSTOM TRANSLITERATION OF CYRILIC CHARS
+
+ // russian uppercase "А Б В Г Д Е Ё Ж З И Й К Л М Н О П Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я"
+ // russian lowercase "а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я"
+
+ // notes
+ // read http://www.vesic.org/english/blog/c-sharp/transliteration-easy-way-microsoft-transliteration-utility/
+ // should we look into MS Transliteration Utility (http://msdn.microsoft.com/en-US/goglobal/bb688104.aspx)
+ // also UnicodeSharpFork https://bitbucket.org/DimaStefantsov/unidecodesharpfork
+ // also Transliterator http://transliterator.codeplex.com/
+ //
+ // in any case it would be good to generate all those "case" statements instead of writing them by hand
+ // time for a T4 template?
+ // also we should support extensibility so ppl can register more cases in external code
+
+ // TODO: transliterates Анастасия as Anastasiya, and not Anastasia
+ // Ольга --> Ol'ga, Татьяна --> Tat'yana -- that's bad (?)
+ // Note: should ä (German umlaut) become a or ae ?
+ case '\u0410': // А
+ output[opos++] = 'A';
+ break;
+ case '\u0430': // а
+ output[opos++] = 'a';
+ break;
+ case '\u0411': // Б
+ output[opos++] = 'B';
+ break;
+ case '\u0431': // б
+ output[opos++] = 'b';
+ break;
+ case '\u0412': // В
+ output[opos++] = 'V';
+ break;
+ case '\u0432': // в
+ output[opos++] = 'v';
+ break;
+ case '\u0413': // Г
+ output[opos++] = 'G';
+ break;
+ case '\u0433': // г
+ output[opos++] = 'g';
+ break;
+ case '\u0414': // Д
+ output[opos++] = 'D';
+ break;
+ case '\u0434': // д
+ output[opos++] = 'd';
+ break;
+ case '\u0415': // Е
+ output[opos++] = 'E';
+ break;
+ case '\u0435': // е
+ output[opos++] = 'e';
+ break;
+ case '\u0401': // Ё
+ output[opos++] = 'E'; // alt. Yo
+ break;
+ case '\u0451': // ё
+ output[opos++] = 'e'; // alt. yo
+ break;
+ case '\u0416': // Ж
+ output[opos++] = 'Z';
+ output[opos++] = 'h';
+ break;
+ case '\u0436': // ж
+ output[opos++] = 'z';
+ output[opos++] = 'h';
+ break;
+ case '\u0417': // З
+ output[opos++] = 'Z';
+ break;
+ case '\u0437': // з
+ output[opos++] = 'z';
+ break;
+ case '\u0418': // И
+ output[opos++] = 'I';
+ break;
+ case '\u0438': // и
+ output[opos++] = 'i';
+ break;
+ case '\u0419': // Й
+ output[opos++] = 'I'; // alt. Y, J
+ break;
+ case '\u0439': // й
+ output[opos++] = 'i'; // alt. y, j
+ break;
+ case '\u041A': // К
+ output[opos++] = 'K';
+ break;
+ case '\u043A': // к
+ output[opos++] = 'k';
+ break;
+ case '\u041B': // Л
+ output[opos++] = 'L';
+ break;
+ case '\u043B': // л
+ output[opos++] = 'l';
+ break;
+ case '\u041C': // М
+ output[opos++] = 'M';
+ break;
+ case '\u043C': // м
+ output[opos++] = 'm';
+ break;
+ case '\u041D': // Н
+ output[opos++] = 'N';
+ break;
+ case '\u043D': // н
+ output[opos++] = 'n';
+ break;
+ case '\u041E': // О
+ output[opos++] = 'O';
+ break;
+ case '\u043E': // о
+ output[opos++] = 'o';
+ break;
+ case '\u041F': // П
+ output[opos++] = 'P';
+ break;
+ case '\u043F': // п
+ output[opos++] = 'p';
+ break;
+ case '\u0420': // Р
+ output[opos++] = 'R';
+ break;
+ case '\u0440': // р
+ output[opos++] = 'r';
+ break;
+ case '\u0421': // С
+ output[opos++] = 'S';
+ break;
+ case '\u0441': // с
+ output[opos++] = 's';
+ break;
+ case '\u0422': // Т
+ output[opos++] = 'T';
+ break;
+ case '\u0442': // т
+ output[opos++] = 't';
+ break;
+ case '\u0423': // У
+ output[opos++] = 'U';
+ break;
+ case '\u0443': // у
+ output[opos++] = 'u';
+ break;
+ case '\u0424': // Ф
+ output[opos++] = 'F';
+ break;
+ case '\u0444': // ф
+ output[opos++] = 'f';
+ break;
+ case '\u0425': // Х
+ output[opos++] = 'K'; // alt. X
+ output[opos++] = 'h';
+ break;
+ case '\u0445': // х
+ output[opos++] = 'k'; // alt. x
+ output[opos++] = 'h';
+ break;
+ case '\u0426': // Ц
+ output[opos++] = 'F';
+ break;
+ case '\u0446': // ц
+ output[opos++] = 'f';
+ break;
+ case '\u0427': // Ч
+ output[opos++] = 'C'; // alt. Ts, C
+ output[opos++] = 'h';
+ break;
+ case '\u0447': // ч
+ output[opos++] = 'c'; // alt. ts, c
+ output[opos++] = 'h';
+ break;
+ case '\u0428': // Ш
+ output[opos++] = 'S'; // alt. Ch, S
+ output[opos++] = 'h';
+ break;
+ case '\u0448': // ш
+ output[opos++] = 's'; // alt. ch, s
+ output[opos++] = 'h';
+ break;
+ case '\u0429': // Щ
+ output[opos++] = 'S'; // alt. Shch, Sc
+ output[opos++] = 'h';
+ break;
+ case '\u0449': // щ
+ output[opos++] = 's'; // alt. shch, sc
+ output[opos++] = 'h';
+ break;
+ case '\u042A': // Ъ
+ output[opos++] = '"'; // "
+ break;
+ case '\u044A': // ъ
+ output[opos++] = '"'; // "
+ break;
+ case '\u042B': // Ы
+ output[opos++] = 'Y';
+ break;
+ case '\u044B': // ы
+ output[opos++] = 'y';
+ break;
+ case '\u042C': // Ь
+ output[opos++] = '\''; // '
+ break;
+ case '\u044C': // ь
+ output[opos++] = '\''; // '
+ break;
+ case '\u042D': // Э
+ output[opos++] = 'E';
+ break;
+ case '\u044D': // э
+ output[opos++] = 'e';
+ break;
+ case '\u042E': // Ю
+ output[opos++] = 'Y'; // alt. Ju
+ output[opos++] = 'u';
+ break;
+ case '\u044E': // ю
+ output[opos++] = 'y'; // alt. ju
+ output[opos++] = 'u';
+ break;
+ case '\u042F': // Я
+ output[opos++] = 'Y'; // alt. Ja
+ output[opos++] = 'a';
+ break;
+ case '\u044F': // я
+ output[opos++] = 'y'; // alt. ja
+ output[opos++] = 'a';
+ break;
+
+ // BEGIN EXTRA
+ /*
+ case '£':
+ output[opos++] = 'G';
+ output[opos++] = 'B';
+ output[opos++] = 'P';
+ break;
+
+ case '€':
+ output[opos++] = 'E';
+ output[opos++] = 'U';
+ output[opos++] = 'R';
+ break;
+
+ case '©':
+ output[opos++] = '(';
+ output[opos++] = 'C';
+ output[opos++] = ')';
+ break;
+ */
+ default:
+ // if (ToMoreAscii(input, ipos, output, ref opos))
+ // break;
+
+ // if (!char.IsLetterOrDigit(c)) // that would not catch eg 汉 unfortunately
+ // output[opos++] = '?';
+ // else
+ // output[opos++] = c;
+
+ // strict ASCII
+ output[opos++] = fail;
+
+ break;
+ }
+ }
+ }
+
+ // private static bool ToMoreAscii(char[] input, int ipos, char[] output, ref int opos)
+ // {
+ // var c = input[ipos];
+
+ // switch (c)
+ // {
+ // case '£':
+ // output[opos++] = 'G';
+ // output[opos++] = 'B';
+ // output[opos++] = 'P';
+ // break;
+
+ // case '€':
+ // output[opos++] = 'E';
+ // output[opos++] = 'U';
+ // output[opos++] = 'R';
+ // break;
+
+ // case '©':
+ // output[opos++] = '(';
+ // output[opos++] = 'C';
+ // output[opos++] = ')';
+ // break;
+
+ // default:
+ // return false;
+ // }
+
+ // return true;
+ // }
+}
+#endif
diff --git a/src/Umbraco.Core/Strings/Utf8ToAsciiConverterStatic.cs b/src/Umbraco.Core/Strings/Utf8ToAsciiConverterStatic.cs
new file mode 100644
index 0000000000..17618ca887
--- /dev/null
+++ b/src/Umbraco.Core/Strings/Utf8ToAsciiConverterStatic.cs
@@ -0,0 +1,50 @@
+using Microsoft.Extensions.FileProviders;
+using Microsoft.Extensions.Hosting;
+using Microsoft.Extensions.Logging.Abstractions;
+
+namespace Umbraco.Cms.Core.Strings;
+
+///
+/// Static wrapper for backward compatibility with existing code.
+///
+///
+/// Use via dependency injection for new code.
+///
+public static class Utf8ToAsciiConverterStatic
+{
+ private static readonly Lazy DefaultConverter = new(() =>
+ {
+ var hostEnv = new SimpleHostEnvironment { ContentRootPath = AppContext.BaseDirectory };
+ var loader = new CharacterMappingLoader(hostEnv, NullLogger.Instance);
+ return new Utf8ToAsciiConverter(loader);
+ });
+
+ // Simple IHostEnvironment implementation for static initialization
+ private sealed class SimpleHostEnvironment : IHostEnvironment
+ {
+ public string EnvironmentName { get; set; } = "Production";
+ public string ApplicationName { get; set; } = "Umbraco";
+ public string ContentRootPath { get; set; } = string.Empty;
+ public IFileProvider ContentRootFileProvider { get; set; } = null!;
+ }
+
+ ///
+ /// Converts an UTF-8 string into an ASCII string.
+ ///
+ /// The text to convert.
+ /// The character to use to replace characters that cannot be converted.
+ /// The converted text.
+ [Obsolete("Use IUtf8ToAsciiConverter via dependency injection. This will be removed in v15.")]
+ public static string ToAsciiString(string text, char fail = '?')
+ => DefaultConverter.Value.Convert(text, fail);
+
+ ///
+ /// Converts an UTF-8 string into an array of ASCII characters.
+ ///
+ /// The text to convert.
+ /// The character to use to replace characters that cannot be converted.
+ /// The converted text as char array.
+ [Obsolete("Use IUtf8ToAsciiConverter via dependency injection. This will be removed in v15.")]
+ public static char[] ToAsciiCharArray(string text, char fail = '?')
+ => DefaultConverter.Value.Convert(text, fail).ToCharArray();
+}
diff --git a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/DefaultShortStringHelperTestsWithoutSetup.cs b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/DefaultShortStringHelperTestsWithoutSetup.cs
index 30ab94f3f2..91f9fe4039 100644
--- a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/DefaultShortStringHelperTestsWithoutSetup.cs
+++ b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/ShortStringHelper/DefaultShortStringHelperTestsWithoutSetup.cs
@@ -343,7 +343,7 @@ public class DefaultShortStringHelperTestsWithoutSetup
public void Utf8ToAsciiConverter()
{
const string str = "a\U00010F00z\uA74Ftéô";
- var output = global::Umbraco.Cms.Core.Strings.Utf8ToAsciiConverter.ToAsciiString(str);
+ var output = global::Umbraco.Cms.Core.Strings.Utf8ToAsciiConverterStatic.ToAsciiString(str);
Assert.AreEqual("a?zooteo", output);
}
diff --git a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterGoldenTests.cs b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterGoldenTests.cs
index d833a95e16..7a8c1c52dc 100644
--- a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterGoldenTests.cs
+++ b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterGoldenTests.cs
@@ -53,7 +53,7 @@ public class Utf8ToAsciiConverterGoldenTests
hostEnv.Object,
NullLogger.Instance);
- _newConverter = new Utf8ToAsciiConverterNew(loader);
+ _newConverter = new Utf8ToAsciiConverter(loader);
}
public static IEnumerable GetGoldenMappings()
@@ -74,23 +74,8 @@ public class Utf8ToAsciiConverterGoldenTests
[TestCaseSource(nameof(GetGoldenMappings))]
public void NewConverter_MatchesOriginalBehavior(string input, string expected)
{
- // Compare new implementation against original
- // Note: Original has buffer overflow bugs for chars that expand to 4+ chars (e.g., ⑽→(10))
- string? originalResult;
- try
- {
- originalResult = Utf8ToAsciiConverter.ToAsciiString(input);
- }
- catch (IndexOutOfRangeException)
- {
- // Original converter has known buffer bugs for high-expansion characters
- // New converter fixes these - verify it produces the expected golden mapping
- var newResult = _newConverter.Convert(input);
- Assert.That(newResult, Is.EqualTo(expected),
- $"Original throws IndexOutOfRangeException, but new converter should match golden mapping");
- return;
- }
-
+ // Compare new implementation against static wrapper (which uses new implementation)
+ var originalResult = Utf8ToAsciiConverterStatic.ToAsciiString(input);
var result = _newConverter.Convert(input);
Assert.That(result, Is.EqualTo(originalResult));
}
diff --git a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterNewTests.cs b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterTests.cs
similarity index 98%
rename from tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterNewTests.cs
rename to tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterTests.cs
index db2369648d..761b9e12db 100644
--- a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterNewTests.cs
+++ b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/Strings/Utf8ToAsciiConverterTests.cs
@@ -7,7 +7,7 @@ using Umbraco.Cms.Core.Strings;
namespace Umbraco.Cms.Tests.UnitTests.Umbraco.Core.Strings;
[TestFixture]
-public class Utf8ToAsciiConverterNewTests
+public class Utf8ToAsciiConverterTests
{
private IUtf8ToAsciiConverter _converter = null!;
@@ -21,7 +21,7 @@ public class Utf8ToAsciiConverterNewTests
hostEnv.Object,
NullLogger.Instance);
- _converter = new Utf8ToAsciiConverterNew(loader);
+ _converter = new Utf8ToAsciiConverter(loader);
}
// === Null/Empty ===