Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 113 additions & 37 deletions unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.VersionInfo;
import java.util.Arrays;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -49,11 +50,45 @@ enum SpecialProperty {
public final SpecialProperty special;

/**
* Maps from Unicode versions to field number. A property whose field number depends on the
* version has more than one entry. A particular field number applies to the Unicode versions
* Represents a mapping from one field of a UCD file to another. For instance, given the data
* line ABCD ; Value ; 1234 FieldMapping(1) maps U+ABCD to Value, FieldMapping(2) maps U+ABCD to
* 1234 (which may be interpreted as U+1234) depending on the property type, and FieldMapping(2,
* 1) maps U+1234 to Value.
*/
public static class FieldMapping implements Comparable<FieldMapping> {
/** A mapping from field 0 to field `valueField`. This is the most common case. */
FieldMapping(int valueField) {
this(0, valueField);
}

FieldMapping(int keyField, int valueField) {
this.keyField = keyField;
this.valueField = valueField;
}

@Override
public int compareTo(FieldMapping other) {
return comparator.compare(this, other);
}

@Override
public String toString() {
return keyField + " ↦ " + valueField;
}

final int keyField;
final int valueField;
static final Comparator<FieldMapping> comparator =
Comparator.<FieldMapping>comparingInt(m -> m.keyField)
.thenComparing(m -> m.valueField);
}

/**
* Maps from Unicode versions to field mapping. A property whose field mapping depends on the
* version has more than one entry. A particular field mapping applies to the Unicode versions
* after the previous-version entry, up to and including its own version.
*/
TreeMap<VersionInfo, Integer> fieldNumbers;
TreeMap<VersionInfo, FieldMapping> fieldMappings;

/**
* Maps from Unicode versions to files. A property whose file depends on the version has more
Expand Down Expand Up @@ -105,16 +140,17 @@ enum SpecialProperty {
Relation.of(new HashMap<String, Set<PropertyParsingInfo>>(), HashSet.class);

public PropertyParsingInfo(
String file, UcdProperty property, int fieldNumber, SpecialProperty special) {
String file, UcdProperty property, FieldMapping fieldMapping, SpecialProperty special) {
this.files = new TreeMap<>();
files.put(Settings.LATEST_VERSION_INFO, file);
this.property = property;
this.fieldNumbers = new TreeMap<>();
fieldNumbers.put(Settings.LATEST_VERSION_INFO, fieldNumber);
this.fieldMappings = new TreeMap<>();
fieldMappings.put(Settings.LATEST_VERSION_INFO, fieldMapping);
this.special = special;
}

static final Pattern VERSION = Pattern.compile("v\\d+(\\.\\d+)+");
static final Pattern FIELD_MAPPING = Pattern.compile("(\\d+)\\s*↦\\s*(\\d+)");

private static void fromStrings(String... propertyInfo) {
if (propertyInfo.length < 2 || propertyInfo.length > 4) {
Expand All @@ -130,13 +166,20 @@ private static void fromStrings(String... propertyInfo) {

String last = propertyInfo[propertyInfo.length - 1];

int temp = 1;
var fieldMapping = new FieldMapping(1);
if (propertyInfo.length > 2
&& !propertyInfo[2].isEmpty()
&& !VERSION.matcher(propertyInfo[2]).matches()) {
temp = Integer.parseInt(propertyInfo[2]);
final var matcher = FIELD_MAPPING.matcher(propertyInfo[2]);
if (matcher.matches()) {
fieldMapping =
new FieldMapping(
Integer.parseInt(matcher.group(1)),
Integer.parseInt(matcher.group(2)));
} else {
fieldMapping = new FieldMapping(Integer.parseInt(propertyInfo[2]));
}
}
int _fieldNumber = temp;

if (VERSION.matcher(last).matches()) {
propertyInfo[propertyInfo.length - 1] = "";
Expand All @@ -146,7 +189,7 @@ private static void fromStrings(String... propertyInfo) {
"No modern info for property with old file record: " + propName);
}
result.files.put(VersionInfo.getInstance(last.substring(1)), _file);
result.fieldNumbers.put(VersionInfo.getInstance(last.substring(1)), _fieldNumber);
result.fieldMappings.put(VersionInfo.getInstance(last.substring(1)), fieldMapping);
file2PropertyInfoSet.put(_file, result);
return;
}
Expand All @@ -156,7 +199,7 @@ private static void fromStrings(String... propertyInfo) {
? SpecialProperty.None
: SpecialProperty.valueOf(propertyInfo[3]);
PropertyParsingInfo result =
new PropertyParsingInfo(_file, _property, _fieldNumber, _special);
new PropertyParsingInfo(_file, _property, fieldMapping, _special);

try {
PropertyUtilities.putNew(property2PropertyInfo, _property, result);
Expand All @@ -173,7 +216,9 @@ private static void fromUnihanProperty(UcdProperty prop) {
}
PropertyParsingInfo info = property2PropertyInfo.get(prop);
if (info == null) {
info = new PropertyParsingInfo(filename, prop, 1, SpecialProperty.None);
info =
new PropertyParsingInfo(
filename, prop, new FieldMapping(1), SpecialProperty.None);
property2PropertyInfo.put(prop, info);
}
file2PropertyInfoSet.put(filename, info);
Expand All @@ -185,7 +230,7 @@ public String toString() {
+ " ;\t"
+ property
+ " ;\t"
+ fieldNumbers
+ fieldMappings
+ " ;\t"
+ special
+ " ;\t"
Expand All @@ -212,8 +257,9 @@ public int compareTo(PropertyParsingInfo arg0) {
if (0 != (result = property.toString().compareTo(arg0.property.toString()))) {
return result;
}
return fieldNumbers.get(Settings.LATEST_VERSION_INFO)
- arg0.fieldNumbers.get(Settings.LATEST_VERSION_INFO);
return fieldMappings
.get(Settings.LATEST_VERSION_INFO)
.compareTo(arg0.fieldMappings.get(Settings.LATEST_VERSION_INFO));
}

public static String getFullFileName(UcdProperty prop, VersionInfo ucdVersion) {
Expand All @@ -240,18 +286,18 @@ public String getFileName(VersionInfo ucdVersionRequested) {
}
}

public int getFieldNumber(VersionInfo ucdVersionRequested) {
int fieldNumber = 0;
if (fieldNumbers.size() == 1) {
return fieldNumbers.values().iterator().next();
public FieldMapping getFieldMapping(VersionInfo ucdVersionRequested) {
FieldMapping fieldMapping = null;
if (fieldMappings.size() == 1) {
return fieldMappings.values().iterator().next();
}
for (final var entry : fieldNumbers.entrySet()) {
for (final var entry : fieldMappings.entrySet()) {
if (ucdVersionRequested.compareTo(entry.getKey()) <= 0) {
fieldNumber = entry.getValue();
fieldMapping = entry.getValue();
break;
}
}
return fieldNumber;
return fieldMapping;
}

private static final VersionInfo V13 = VersionInfo.getInstance(13);
Expand Down Expand Up @@ -662,10 +708,16 @@ static void parseSourceFile(
propInfoSet);
break;
case Field:
FieldMapping mapping;
if (propInfoSet.size() == 1
&& (propInfo = propInfoSet.iterator().next()).special
== SpecialProperty.None
&& propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) == 1) {
&& (mapping =
propInfo.getFieldMapping(
indexUnicodeProperties.ucdVersion))
.keyField
== 0
&& mapping.valueField == 1) {
if (fileName.equals("math/*/MathClass")
&& indexUnicodeProperties.ucdVersion.compareTo(
VersionInfo.UNICODE_6_3)
Expand Down Expand Up @@ -1490,9 +1542,12 @@ private static void parseFields(
throw new UnicodePropertyException();
}
String value =
propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) >= parts.length
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).valueField
>= parts.length
? null
: parts[propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion)];
: parts[
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion)
.valueField];
if (propInfo.property == UcdProperty.Joining_Group
&& indexUnicodeProperties.ucdVersion.compareTo(VersionInfo.UNICODE_4_0_1)
<= 0
Expand Down Expand Up @@ -1562,22 +1617,43 @@ private static void parseFields(
// 21EA..21F3;;⇪..⇳;;;; 21EA-21F3 are keyboard
value = "None";
}
propInfo.put(
data,
line.getMissingSet(),
line.getRange(),
value,
merger,
hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping,
nextProperties == null
? null
: nextProperties.getProperty(propInfo.property));
if (propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).keyField == 0) {
propInfo.put(
data,
line.getMissingSet(),
line.getRange(),
value,
merger,
hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping,
nextProperties == null
? null
: nextProperties.getProperty(propInfo.property));
} else {
final var key = new IntRange();
key.set(
parts[
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion)
.keyField]);
propInfo.put(
data,
line.getMissingSet(),
key,
value,
IndexUnicodeProperties.MULTIVALUED_JOINER,
hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping,
nextProperties == null
? null
: nextProperties.getProperty(propInfo.property));
}
}
} else {
for (final PropertyParsingInfo propInfo : propInfoSet) {
final String value =
propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) < parts.length
? parts[propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion)]
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).valueField
< parts.length
? parts[
propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion)
.valueField]
: null;
setPropDefault(
propInfo.property,
Expand Down
13 changes: 13 additions & 0 deletions unicodetools/src/main/java/org/unicode/props/UcdProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.unicode.props.UcdPropertyValues.Block_Values;
import org.unicode.props.UcdPropertyValues.Canonical_Combining_Class_Values;
import org.unicode.props.UcdPropertyValues.Decomposition_Type_Values;
import org.unicode.props.UcdPropertyValues.Do_Not_Emit_Dispreferred_Type_Values;
import org.unicode.props.UcdPropertyValues.Do_Not_Emit_Type_Values;
import org.unicode.props.UcdPropertyValues.East_Asian_Width_Values;
import org.unicode.props.UcdPropertyValues.General_Category_Values;
Expand Down Expand Up @@ -86,6 +87,12 @@ public enum UcdProperty {
Confusable_SA(PropertyType.String, DerivedPropertyStatus.NonUCDNonProperty, "ConfSA"),
Confusable_SL(PropertyType.String, DerivedPropertyStatus.NonUCDNonProperty, "ConfSL"),
Decomposition_Mapping(PropertyType.String, DerivedPropertyStatus.Approved, "dm"),
Do_Not_Emit_Dispreferred(
PropertyType.String,
DerivedPropertyStatus.UCDNonProperty,
null,
ValueCardinality.Unordered,
"Do_Not_Emit_Dispreferred"),
Do_Not_Emit_Preferred(
PropertyType.String, DerivedPropertyStatus.UCDNonProperty, "Do_Not_Emit_Preferred"),
Equivalent_Unified_Ideograph(PropertyType.String, DerivedPropertyStatus.Approved, "EqUIdeo"),
Expand Down Expand Up @@ -646,6 +653,12 @@ public enum UcdProperty {
Decomposition_Type_Values.class,
null,
"dt"),
Do_Not_Emit_Dispreferred_Type(
PropertyType.Enumerated,
DerivedPropertyStatus.UCDNonProperty,
Do_Not_Emit_Dispreferred_Type_Values.class,
ValueCardinality.Unordered,
"Do_Not_Emit_Dispreferred_Type"),
Do_Not_Emit_Type(
PropertyType.Enumerated,
DerivedPropertyStatus.UCDNonProperty,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,52 @@ public static Decomposition_Type_Values forName(String name) {
}
}

// Do_Not_Emit_Dispreferred
public enum Do_Not_Emit_Dispreferred_Type_Values implements Named {
None("None"),
Indic_Atomic_Consonant("Indic_Atomic_Consonant"),
Indic_Consonant_Conjunct("Indic_Consonant_Conjunct"),
Indic_Vowel_Letter("Indic_Vowel_Letter"),
Bengali_Khanda_Ta("Bengali_Khanda_Ta"),
Malayalam_Chillu("Malayalam_Chillu"),
Tamil_Shrii("Tamil_Shrii"),
Dotless_Form("Dotless_Form"),
Hamza_Form("Hamza_Form"),
Precomposed_Hieroglyph("Precomposed_Hieroglyph"),
Precomposed_Form("Precomposed_Form"),
Deprecated("Deprecated"),
Discouraged("Discouraged"),
Preferred_Spelling("Preferred_Spelling"),
Arabic_Tashkil("Arabic_Tashkil");
private final PropertyNames<Do_Not_Emit_Dispreferred_Type_Values> names;

private Do_Not_Emit_Dispreferred_Type_Values(String shortName, String... otherNames) {
names =
new PropertyNames<Do_Not_Emit_Dispreferred_Type_Values>(
Do_Not_Emit_Dispreferred_Type_Values.class,
this,
shortName,
otherNames);
}

@Override
public PropertyNames<Do_Not_Emit_Dispreferred_Type_Values> getNames() {
return names;
}

@Override
public String getShortName() {
return names.getShortName();
}

private static final NameMatcher<Do_Not_Emit_Dispreferred_Type_Values> NAME_MATCHER =
PropertyNames.getNameToEnums(Do_Not_Emit_Dispreferred_Type_Values.class);

public static Do_Not_Emit_Dispreferred_Type_Values forName(String name) {
return NAME_MATCHER.get(name);
}
}

// Do_Not_Emit_Preferred
public enum Do_Not_Emit_Type_Values implements Named {
None("None"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ IDNA2008_Category ; IDNA2008_Category ; NonUCDProperty
Other_Joining_Type ; Other_Joining_Type ; UCDNonProperty

Do_Not_Emit_Type ; Do_Not_Emit_Type ; UCDNonProperty
Do_Not_Emit_Dispreferred_Type ; Do_Not_Emit_Dispreferred_Type ; UCDNonProperty

kEH_Core ; kEH_Core ; Provisional

Expand Down Expand Up @@ -75,6 +76,7 @@ cjkTraditionalVariant ; kTraditionalVariant ; Provisional
cjkSpoofingVariant ; kSpoofingVariant ; Provisional

Do_Not_Emit_Preferred ; Do_Not_Emit_Preferred ; UCDNonProperty
Do_Not_Emit_Dispreferred ; Do_Not_Emit_Dispreferred ; UCDNonProperty

normalization_correction_original ; normalization_correction_original ; UCDNonProperty
normalization_correction_corrected ; normalization_correction_corrected ; UCDNonProperty
Expand Down
Loading
Loading