Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 99 additions & 37 deletions unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.VersionInfo;
import java.util.Arrays;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -48,12 +49,34 @@ enum SpecialProperty {
public final UcdProperty property;
public final SpecialProperty special;

public static class FieldMapping implements Comparable<FieldMapping> {
FieldMapping(int valueField) {
this(0, valueField);
}

FieldMapping(int keyField, int valueField) {
this.keyField = keyField;
this.valueField = valueField;
}

@Override
public int compareTo(FieldMapping other) {
return Comparator.<FieldMapping>comparingInt(m -> m.keyField)
.thenComparing(m -> m.valueField)
.compare(this, other);
}

final int keyField;
final int valueField;
}
;

/**
* Maps from Unicode versions to field number. A property whose field number depends on the
* version has more than one entry. A particular field number applies to the Unicode versions
* Maps from Unicode versions to field mapping. A property whose field mapping depends on the
* version has more than one entry. A particular field mapping applies to the Unicode versions
* after the previous-version entry, up to and including its own version.
*/
TreeMap<VersionInfo, Integer> fieldNumbers;
TreeMap<VersionInfo, FieldMapping> fieldMappings;

/**
* Maps from Unicode versions to files. A property whose file depends on the version has more
Expand Down Expand Up @@ -105,16 +128,17 @@ enum SpecialProperty {
Relation.of(new HashMap<String, Set<PropertyParsingInfo>>(), HashSet.class);

public PropertyParsingInfo(
String file, UcdProperty property, int fieldNumber, SpecialProperty special) {
String file, UcdProperty property, FieldMapping fieldMapping, SpecialProperty special) {
this.files = new TreeMap<>();
files.put(Settings.LATEST_VERSION_INFO, file);
this.property = property;
this.fieldNumbers = new TreeMap<>();
fieldNumbers.put(Settings.LATEST_VERSION_INFO, fieldNumber);
this.fieldMappings = new TreeMap<>();
fieldMappings.put(Settings.LATEST_VERSION_INFO, fieldMapping);
this.special = special;
}

static final Pattern VERSION = Pattern.compile("v\\d+(\\.\\d+)+");
static final Pattern FIELD_MAPPING = Pattern.compile("(\\d+)\\s*↦\\s*(\\d+)");

private static void fromStrings(String... propertyInfo) {
if (propertyInfo.length < 2 || propertyInfo.length > 4) {
Expand All @@ -130,13 +154,20 @@ private static void fromStrings(String... propertyInfo) {

String last = propertyInfo[propertyInfo.length - 1];

int temp = 1;
var fieldMapping = new FieldMapping(1);
if (propertyInfo.length > 2
&& !propertyInfo[2].isEmpty()
&& !VERSION.matcher(propertyInfo[2]).matches()) {
temp = Integer.parseInt(propertyInfo[2]);
final var matcher = FIELD_MAPPING.matcher(propertyInfo[2]);
if (matcher.matches()) {
fieldMapping =
new FieldMapping(
Integer.parseInt(matcher.group(1)),
Integer.parseInt(matcher.group(2)));
} else {
fieldMapping = new FieldMapping(Integer.parseInt(propertyInfo[2]));
}
}
int _fieldNumber = temp;

if (VERSION.matcher(last).matches()) {
propertyInfo[propertyInfo.length - 1] = "";
Expand All @@ -146,7 +177,7 @@ private static void fromStrings(String... propertyInfo) {
"No modern info for property with old file record: " + propName);
}
result.files.put(VersionInfo.getInstance(last.substring(1)), _file);
result.fieldNumbers.put(VersionInfo.getInstance(last.substring(1)), _fieldNumber);
result.fieldMappings.put(VersionInfo.getInstance(last.substring(1)), fieldMapping);
file2PropertyInfoSet.put(_file, result);
return;
}
Expand All @@ -156,7 +187,7 @@ private static void fromStrings(String... propertyInfo) {
? SpecialProperty.None
: SpecialProperty.valueOf(propertyInfo[3]);
PropertyParsingInfo result =
new PropertyParsingInfo(_file, _property, _fieldNumber, _special);
new PropertyParsingInfo(_file, _property, fieldMapping, _special);

try {
PropertyUtilities.putNew(property2PropertyInfo, _property, result);
Expand All @@ -173,7 +204,9 @@ private static void fromUnihanProperty(UcdProperty prop) {
}
PropertyParsingInfo info = property2PropertyInfo.get(prop);
if (info == null) {
info = new PropertyParsingInfo(filename, prop, 1, SpecialProperty.None);
info =
new PropertyParsingInfo(
filename, prop, new FieldMapping(1), SpecialProperty.None);
property2PropertyInfo.put(prop, info);
}
file2PropertyInfoSet.put(filename, info);
Expand All @@ -185,7 +218,7 @@ public String toString() {
+ " ;\t"
+ property
+ " ;\t"
+ fieldNumbers
+ fieldMappings
+ " ;\t"
+ special
+ " ;\t"
Expand All @@ -212,8 +245,9 @@ public int compareTo(PropertyParsingInfo arg0) {
if (0 != (result = property.toString().compareTo(arg0.property.toString()))) {
return result;
}
return fieldNumbers.get(Settings.LATEST_VERSION_INFO)
- arg0.fieldNumbers.get(Settings.LATEST_VERSION_INFO);
return fieldMappings
.get(Settings.LATEST_VERSION_INFO)
.compareTo(arg0.fieldMappings.get(Settings.LATEST_VERSION_INFO));
}

public static String getFullFileName(UcdProperty prop, VersionInfo ucdVersion) {
Expand All @@ -240,18 +274,18 @@ public String getFileName(VersionInfo ucdVersionRequested) {
}
}

public int getFieldNumber(VersionInfo ucdVersionRequested) {
int fieldNumber = 0;
if (fieldNumbers.size() == 1) {
return fieldNumbers.values().iterator().next();
public FieldMapping getFieldMapping(VersionInfo ucdVersionRequested) {
FieldMapping fieldMapping = null;
if (fieldMappings.size() == 1) {
return fieldMappings.values().iterator().next();
}
for (final var entry : fieldNumbers.entrySet()) {
for (final var entry : fieldMappings.entrySet()) {
if (ucdVersionRequested.compareTo(entry.getKey()) <= 0) {
fieldNumber = entry.getValue();
fieldMapping = entry.getValue();
break;
}
}
return fieldNumber;
return fieldMapping;
}

private static final VersionInfo V13 = VersionInfo.getInstance(13);
Expand Down Expand Up @@ -665,7 +699,11 @@ static void parseSourceFile(
if (propInfoSet.size() == 1
&& (propInfo = propInfoSet.iterator().next()).special
== SpecialProperty.None
&& propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) == 1) {
&& propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).keyField
== 0
&& propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion)
.valueField
== 1) {
if (fileName.equals("math/*/MathClass")
&& indexUnicodeProperties.ucdVersion.compareTo(
VersionInfo.UNICODE_6_3)
Expand Down Expand Up @@ -1490,9 +1528,12 @@ private static void parseFields(
throw new UnicodePropertyException();
}
String value =
propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) >= parts.length
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).valueField
>= parts.length
? null
: parts[propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion)];
: parts[
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion)
.valueField];
if (propInfo.property == UcdProperty.Joining_Group
&& indexUnicodeProperties.ucdVersion.compareTo(VersionInfo.UNICODE_4_0_1)
<= 0
Expand Down Expand Up @@ -1562,22 +1603,43 @@ private static void parseFields(
// 21EA..21F3;;⇪..⇳;;;; 21EA-21F3 are keyboard
value = "None";
}
propInfo.put(
data,
line.getMissingSet(),
line.getRange(),
value,
merger,
hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping,
nextProperties == null
? null
: nextProperties.getProperty(propInfo.property));
if (propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).keyField == 0) {
propInfo.put(
data,
line.getMissingSet(),
line.getRange(),
value,
merger,
hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping,
nextProperties == null
? null
: nextProperties.getProperty(propInfo.property));
} else {
final var key = new IntRange();
key.set(
parts[
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion)
.keyField]);
propInfo.put(
data,
line.getMissingSet(),
key,
value,
IndexUnicodeProperties.MULTIVALUED_JOINER,
hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping,
nextProperties == null
? null
: nextProperties.getProperty(propInfo.property));
}
}
} else {
for (final PropertyParsingInfo propInfo : propInfoSet) {
final String value =
propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) < parts.length
? parts[propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion)]
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).valueField
< parts.length
? parts[
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion)
.valueField]
: null;
setPropDefault(
propInfo.property,
Expand Down
13 changes: 13 additions & 0 deletions unicodetools/src/main/java/org/unicode/props/UcdProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.unicode.props.UcdPropertyValues.Block_Values;
import org.unicode.props.UcdPropertyValues.Canonical_Combining_Class_Values;
import org.unicode.props.UcdPropertyValues.Decomposition_Type_Values;
import org.unicode.props.UcdPropertyValues.Do_Not_Emit_Dispreferred_Type_Values;
import org.unicode.props.UcdPropertyValues.Do_Not_Emit_Type_Values;
import org.unicode.props.UcdPropertyValues.East_Asian_Width_Values;
import org.unicode.props.UcdPropertyValues.General_Category_Values;
Expand Down Expand Up @@ -86,6 +87,12 @@ public enum UcdProperty {
Confusable_SA(PropertyType.String, DerivedPropertyStatus.NonUCDNonProperty, "ConfSA"),
Confusable_SL(PropertyType.String, DerivedPropertyStatus.NonUCDNonProperty, "ConfSL"),
Decomposition_Mapping(PropertyType.String, DerivedPropertyStatus.Approved, "dm"),
Do_Not_Emit_Dispreferred(
PropertyType.String,
DerivedPropertyStatus.UCDNonProperty,
null,
ValueCardinality.Unordered,
"Do_Not_Emit_Dispreferred"),
Do_Not_Emit_Preferred(
PropertyType.String, DerivedPropertyStatus.UCDNonProperty, "Do_Not_Emit_Preferred"),
Equivalent_Unified_Ideograph(PropertyType.String, DerivedPropertyStatus.Approved, "EqUIdeo"),
Expand Down Expand Up @@ -646,6 +653,12 @@ public enum UcdProperty {
Decomposition_Type_Values.class,
null,
"dt"),
Do_Not_Emit_Dispreferred_Type(
PropertyType.Enumerated,
DerivedPropertyStatus.UCDNonProperty,
Do_Not_Emit_Dispreferred_Type_Values.class,
ValueCardinality.Unordered,
"Do_Not_Emit_Dispreferred_Type"),
Do_Not_Emit_Type(
PropertyType.Enumerated,
DerivedPropertyStatus.UCDNonProperty,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,52 @@ public static Decomposition_Type_Values forName(String name) {
}
}

// Do_Not_Emit_Dispreferred
public enum Do_Not_Emit_Dispreferred_Type_Values implements Named {
None("None"),
Indic_Atomic_Consonant("Indic_Atomic_Consonant"),
Indic_Consonant_Conjunct("Indic_Consonant_Conjunct"),
Indic_Vowel_Letter("Indic_Vowel_Letter"),
Bengali_Khanda_Ta("Bengali_Khanda_Ta"),
Malayalam_Chillu("Malayalam_Chillu"),
Tamil_Shrii("Tamil_Shrii"),
Dotless_Form("Dotless_Form"),
Hamza_Form("Hamza_Form"),
Precomposed_Hieroglyph("Precomposed_Hieroglyph"),
Precomposed_Form("Precomposed_Form"),
Deprecated("Deprecated"),
Discouraged("Discouraged"),
Preferred_Spelling("Preferred_Spelling"),
Arabic_Tashkil("Arabic_Tashkil");
private final PropertyNames<Do_Not_Emit_Dispreferred_Type_Values> names;

private Do_Not_Emit_Dispreferred_Type_Values(String shortName, String... otherNames) {
names =
new PropertyNames<Do_Not_Emit_Dispreferred_Type_Values>(
Do_Not_Emit_Dispreferred_Type_Values.class,
this,
shortName,
otherNames);
}

@Override
public PropertyNames<Do_Not_Emit_Dispreferred_Type_Values> getNames() {
return names;
}

@Override
public String getShortName() {
return names.getShortName();
}

private static final NameMatcher<Do_Not_Emit_Dispreferred_Type_Values> NAME_MATCHER =
PropertyNames.getNameToEnums(Do_Not_Emit_Dispreferred_Type_Values.class);

public static Do_Not_Emit_Dispreferred_Type_Values forName(String name) {
return NAME_MATCHER.get(name);
}
}

// Do_Not_Emit_Preferred
public enum Do_Not_Emit_Type_Values implements Named {
None("None"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ IDNA2008_Category ; IDNA2008_Category ; NonUCDProperty
Other_Joining_Type ; Other_Joining_Type ; UCDNonProperty

Do_Not_Emit_Type ; Do_Not_Emit_Type ; UCDNonProperty
Do_Not_Emit_Dispreferred_Type ; Do_Not_Emit_Dispreferred_Type ; UCDNonProperty

kEH_Core ; kEH_Core ; Provisional

Expand Down Expand Up @@ -75,6 +76,7 @@ cjkTraditionalVariant ; kTraditionalVariant ; Provisional
cjkSpoofingVariant ; kSpoofingVariant ; Provisional

Do_Not_Emit_Preferred ; Do_Not_Emit_Preferred ; UCDNonProperty
Do_Not_Emit_Dispreferred ; Do_Not_Emit_Dispreferred ; UCDNonProperty

normalization_correction_original ; normalization_correction_original ; UCDNonProperty
normalization_correction_corrected ; normalization_correction_corrected ; UCDNonProperty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,24 @@ Do_Not_Emit_Type ; Discouraged ; Discouraged
Do_Not_Emit_Type ; Preferred_Spelling ; Preferred_Spelling
Do_Not_Emit_Type ; Arabic_Tashkil ; Arabic_Tashkil

# TODO(egg): This is really the same enumeration as Do_Not_Emit_Type, cf. Script and Script_Extensions.
# @missing: 0000..10FFFF; Do_Not_Emit_Dispreferred_Type ; None
Do_Not_Emit_Dispreferred_Type ; None ; None
Do_Not_Emit_Dispreferred_Type ; Indic_Atomic_Consonant ; Indic_Atomic_Consonant
Do_Not_Emit_Dispreferred_Type ; Indic_Consonant_Conjunct ; Indic_Consonant_Conjunct
Do_Not_Emit_Dispreferred_Type ; Indic_Vowel_Letter ; Indic_Vowel_Letter
Do_Not_Emit_Dispreferred_Type ; Bengali_Khanda_Ta ; Bengali_Khanda_Ta
Do_Not_Emit_Dispreferred_Type ; Malayalam_Chillu ; Malayalam_Chillu
Do_Not_Emit_Dispreferred_Type ; Tamil_Shrii ; Tamil_Shrii
Do_Not_Emit_Dispreferred_Type ; Dotless_Form ; Dotless_Form
Do_Not_Emit_Dispreferred_Type ; Hamza_Form ; Hamza_Form
Do_Not_Emit_Dispreferred_Type ; Precomposed_Hieroglyph ; Precomposed_Hieroglyph
Do_Not_Emit_Dispreferred_Type ; Precomposed_Form ; Precomposed_Form
Do_Not_Emit_Dispreferred_Type ; Deprecated ; Deprecated
Do_Not_Emit_Dispreferred_Type ; Discouraged ; Discouraged
Do_Not_Emit_Dispreferred_Type ; Preferred_Spelling ; Preferred_Spelling
Do_Not_Emit_Dispreferred_Type ; Arabic_Tashkil ; Arabic_Tashkil

# @missing: 0000..10FFFF; normalization_correction_original; <none>
# @missing: 0000..10FFFF; normalization_correction_corrected; <none>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ Names_List_Alias ; MULTI_VALUED ; .*
Names_List_Comment ; MULTI_VALUED ; .*
Names_List_Cross_Ref ; MULTI_VALUED ; .*

Do_Not_Emit_Dispreferred ; MULTI_VALUED ; .*
Do_Not_Emit_Dispreferred_Type ; MULTI_VALUED ; .*

# Regex patterns from UAX #57

kEH_Cat ; SINGLE_VALUED ; ([A-IK-Z]|AA)-\d{2}-\d{3}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,8 @@ NamedSequencesProv ; Named_Sequences_Prov
StandardizedVariants ; Standardized_Variant
emoji-variation-sequences ; emoji-variation-sequence
DoNotEmit ; Do_Not_Emit_Preferred ; 1
DoNotEmit ; Do_Not_Emit_Dispreferred ; 1 ↦ 0
DoNotEmit ; Do_Not_Emit_Dispreferred_Type ; 1 ↦ 2
DoNotEmit ; Do_Not_Emit_Type ; 2

idna/*/IdnaMappingTable; Idn_Status;
Expand Down
Loading
Loading