Skip to content

Commit 6eb1f31

Browse files
authored
Modifier Sinological extensions to the IPA (#847)
* UnicodeData.txt lines from Kirk * lb=AL * Latin * Diacritic * Regenerate UCD * Other_Lowercase * Regenerate UCD * a test. * Ignore IDNA2008_Category
1 parent 4e0f6a5 commit 6eb1f31

20 files changed

+175
-76
lines changed

unicodetools/data/ucd/dev/DerivedAge.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# DerivedAge-18.0.0.txt
2-
# Date: 2025-11-22, 18:10:01 GMT
2+
# Date: 2025-11-22, 22:10:53 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -2141,12 +2141,12 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG
21412141
18D1F..18D20 ; 18.0 # [2] TANGUT IDEOGRAPH-18D1F..TANGUT IDEOGRAPH-18D20
21422142
1DF1F..1DF24 ; 18.0 # [6] LATIN SMALL LETTER D-ETH DIGRAPH..LATIN SMALL LETTER T-THETA DIGRAPH
21432143
1DF2B..1DF56 ; 18.0 # [44] LATIN SMALL LETTER DEZH DIGRAPH WITH CURL..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
2144-
1DFD2..1DFE8 ; 18.0 # [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
2144+
1DFD2..1DFF2 ; 18.0 # [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
21452145
1F7DB ; 18.0 # BULLET IN DOUBLE CIRCLE
21462146
1F7F1..1F7FF ; 18.0 # [15] CIRCLE WITH DOUBLE VERTICAL AND HORIZONTAL LINE..RHOMBUS
21472147
2B81E ; 18.0 # CJK UNIFIED IDEOGRAPH-2B81E
21482148
3D000..3FC3F ; 18.0 # [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
21492149

2150-
# Total code points: 11826
2150+
# Total code points: 11836
21512151

21522152
# EOF

unicodetools/data/ucd/dev/DerivedCoreProperties.txt

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# DerivedCoreProperties-18.0.0.txt
2-
# Date: 2025-11-22, 18:10:36 GMT
2+
# Date: 2025-11-22, 22:11:17 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -1394,7 +1394,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
13941394
1DF00..1DF09 ; Alphabetic # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
13951395
1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
13961396
1DF0B..1DF56 ; Alphabetic # L& [76] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
1397-
1DFD2..1DFE8 ; Alphabetic # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
1397+
1DFD2..1DFF2 ; Alphabetic # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
13981398
1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
13991399
1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
14001400
1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
@@ -1476,7 +1476,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
14761476
31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479
14771477
3D000..3FC3F ; Alphabetic # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
14781478

1479-
# Total code points: 159210
1479+
# Total code points: 159220
14801480

14811481
# ================================================
14821482

@@ -2179,11 +2179,11 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
21792179
1DF4B..1DF4C ; Lowercase # L& [2] LATIN SMALL LETTER BARRED M..LATIN SMALL LETTER BARRED M WITH HOOK
21802180
1DF4E..1DF50 ; Lowercase # L& [3] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER TURNED R WITH STROKE
21812181
1DF52..1DF56 ; Lowercase # L& [5] LATIN SMALL LETTER BARRED V..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
2182-
1DFD2..1DFE8 ; Lowercase # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
2182+
1DFD2..1DFF2 ; Lowercase # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
21832183
1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
21842184
1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
21852185

2186-
# Total code points: 2666
2186+
# Total code points: 2676
21872187

21882188
# ================================================
21892189

@@ -3035,14 +3035,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
30353035
1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
30363036
1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
30373037
1DF0B..1DF56 ; Cased # L& [76] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
3038-
1DFD2..1DFE8 ; Cased # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
3038+
1DFD2..1DFF2 ; Cased # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
30393039
1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
30403040
1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
30413041
1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z
30423042
1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
30433043
1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
30443044

3045-
# Total code points: 4708
3045+
# Total code points: 4718
30463046

30473047
# ================================================
30483048

@@ -3547,7 +3547,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI
35473547
1DA84 ; Case_Ignorable # Mn SIGNWRITING LOCATION HEAD NECK
35483548
1DA9B..1DA9F ; Case_Ignorable # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
35493549
1DAA1..1DAAF ; Case_Ignorable # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
3550-
1DFD2..1DFE8 ; Case_Ignorable # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
3550+
1DFD2..1DFF2 ; Case_Ignorable # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
35513551
1E000..1E006 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
35523552
1E008..1E018 ; Case_Ignorable # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
35533553
1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
@@ -3575,7 +3575,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG
35753575
E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG
35763576
E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
35773577

3578-
# Total code points: 2832
3578+
# Total code points: 2842
35793579

35803580
# ================================================
35813581

@@ -7030,7 +7030,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
70307030
1DF00..1DF09 ; ID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
70317031
1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
70327032
1DF0B..1DF56 ; ID_Start # L& [76] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
7033-
1DFD2..1DFE8 ; ID_Start # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
7033+
1DFD2..1DFF2 ; ID_Start # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
70347034
1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
70357035
1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
70367036
1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@@ -7098,7 +7098,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
70987098
31350..33479 ; ID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479
70997099
3D000..3FC3F ; ID_Start # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
71007100

7101-
# Total code points: 157702
7101+
# Total code points: 157712
71027102

71037103
# ================================================
71047104

@@ -8450,7 +8450,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
84508450
1DF00..1DF09 ; ID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
84518451
1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
84528452
1DF0B..1DF56 ; ID_Continue # L& [76] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
8453-
1DFD2..1DFE8 ; ID_Continue # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
8453+
1DFD2..1DFF2 ; ID_Continue # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
84548454
1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
84558455
1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
84568456
1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
@@ -8542,7 +8542,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
85428542
3D000..3FC3F ; ID_Continue # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
85438543
E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
85448544

8545-
# Total code points: 161048
8545+
# Total code points: 161058
85468546

85478547
# ================================================
85488548

@@ -9273,7 +9273,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
92739273
1DF00..1DF09 ; XID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
92749274
1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
92759275
1DF0B..1DF56 ; XID_Start # L& [76] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
9276-
1DFD2..1DFE8 ; XID_Start # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
9276+
1DFD2..1DFF2 ; XID_Start # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
92779277
1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
92789278
1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
92799279
1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@@ -9341,7 +9341,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
93419341
31350..33479 ; XID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479
93429342
3D000..3FC3F ; XID_Start # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
93439343

9344-
# Total code points: 157679
9344+
# Total code points: 157689
93459345

93469346
# ================================================
93479347

@@ -10694,7 +10694,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
1069410694
1DF00..1DF09 ; XID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
1069510695
1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
1069610696
1DF0B..1DF56 ; XID_Continue # L& [76] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
10697-
1DFD2..1DFE8 ; XID_Continue # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
10697+
1DFD2..1DFF2 ; XID_Continue # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
1069810698
1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1069910699
1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1070010700
1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
@@ -10786,7 +10786,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
1078610786
3D000..3FC3F ; XID_Continue # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
1078710787
E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1078810788

10789-
# Total code points: 161029
10789+
# Total code points: 161039
1079010790

1079110791
# ================================================
1079210792

@@ -12957,7 +12957,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
1295712957
1DF00..1DF09 ; Grapheme_Base # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
1295812958
1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
1295912959
1DF0B..1DF56 ; Grapheme_Base # L& [76] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE
12960-
1DFD2..1DFE8 ; Grapheme_Base # Lm [23] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL CAPITAL D
12960+
1DFD2..1DFF2 ; Grapheme_Base # Lm [33] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL T WITH CURL
1296112961
1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
1296212962
1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
1296312963
1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@@ -13086,7 +13086,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
1308613086
31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479
1308713087
3D000..3FC3F ; Grapheme_Base # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
1308813088

13089-
# Total code points: 169308
13089+
# Total code points: 169318
1309013090

1309113091
# ================================================
1309213092

0 commit comments

Comments
 (0)