feat(strings): add complete character mappings from golden test data

Adds missing character mappings to ensure behavioral equivalence with
original Utf8ToAsciiConverter implementation. Creates extended-mappings.json
with 1,213 additional characters covering punctuation, symbols, extended
Latin, Greek, and other Unicode blocks.

Also fixes 8 Cyrillic character mappings to match original behavior.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-13 00:31:26 +00:00
parent b9ba2bd043
commit dff0f68b39
3 changed files with 1229 additions and 9 deletions

View File

@@ -11,7 +11,7 @@ namespace Umbraco.Cms.Core.Strings;
public sealed class CharacterMappingLoader : ICharacterMappingLoader
{
private static readonly string[] BuiltInFiles =
["ligatures.json", "special-latin.json", "cyrillic.json"];
["ligatures.json", "special-latin.json", "cyrillic.json", "extended-mappings.json"];
private static readonly JsonSerializerOptions JsonOptions = new()
{

View File

@@ -15,16 +15,16 @@
"д": "d",
"Е": "E",
"е": "e",
"Ё": "Yo",
"ё": "yo",
"Ё": "E",
"ё": "e",
"Ж": "Zh",
"ж": "zh",
"З": "Z",
"з": "z",
"И": "I",
"и": "i",
"Й": "Y",
"й": "y",
"Й": "I",
"й": "i",
"К": "K",
"к": "k",
"Л": "L",
@@ -49,14 +49,14 @@
"ф": "f",
"Х": "Kh",
"х": "kh",
"Ц": "Ts",
"ц": "ts",
"Ц": "F",
"ц": "f",
"Ч": "Ch",
"ч": "ch",
"Ш": "Sh",
"ш": "sh",
"Щ": "Shch",
"щ": "shch",
"Щ": "Sh",
"щ": "sh",
"Ъ": "\"",
"ъ": "\"",
"Ы": "Y",

File diff suppressed because it is too large Load Diff