Created
June 11, 2019 06:20
-
-
Save onigoetz/616fc7426fb6c286667279fab313e9c3 to your computer and use it in GitHub Desktop.
CLDR Data subset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.HashSet; | |
import java.util.Map; | |
import java.util.Map.Entry; | |
import java.util.Set; | |
import java.util.function.Function; | |
import java.util.stream.Collectors; | |
import com.google.gson.JsonElement; | |
import com.google.gson.JsonObject; | |
/** | |
* (c) Swissquote 02.05.17 | |
* | |
* @author sgoetz | |
*/ | |
public class CLDRGenerator { | |
private final CldrFileGetter cldrFileGetter; | |
public CLDRGenerator() throws IOException { | |
cldrFileGetter = new CldrFileGetter(); | |
} | |
public JsonObject getCLDRBaseData() throws IOException { | |
JsonObject results = new JsonObject(); | |
// Modules: currency, date, message, number, plural, relativeTime, unit | |
// Disabled; we'll get only the likelySubtags that might be needed with the current locale | |
//results = mergeRecursive(results, getCLDRFile("supplemental/likelySubtags.json")); | |
// Modules: currency, date, number, relativeTime, unit | |
// Disabled; we'll get only the numberingSystems used in the current locale | |
//results = mergeRecursive(results, getCLDRFile("supplemental/numberingSystems.json")); | |
// Modules: currency, plural, relativeTime, unit | |
// Disabled; we'll get only the plurals needed for the current locale | |
//results = mergeRecursive(results, getCLDRFile("supplemental/plurals.json")); | |
//results = mergeRecursive(results, getCLDRFile("supplemental/ordinals.json")); | |
// Modules: currency | |
results = mergeRecursive(results, getCLDRFile("supplemental/currencyData.json")); | |
results.getAsJsonObject("supplemental").getAsJsonObject("currencyData").remove("region"); | |
// Modules: date | |
results = mergeRecursive(results, getCLDRFile("supplemental/timeData.json")); | |
results = mergeRecursive(results, getCLDRFile("supplemental/weekData.json")); | |
return results; | |
} | |
public JsonObject getCLDRLocaleData(String locale) throws IOException { | |
String cldrLocale = convertToCDRLocale(locale); | |
//Modules: currency, date, number, relativeTime, unit | |
JsonObject results = getCLDRFile("main/" + cldrLocale + "/numbers.json"); | |
// Get only the numbering systems actually used in this locale | |
// Modules: currency, date, number, relativeTime, unit | |
results = mergeRecursive(results, getNumberingSystems(results, cldrLocale)); | |
// Get only the locales that are needed | |
// Modules: currency, plural, relativeTime, unit | |
results = mergeRecursive(results, getPlurals(cldrLocale)); | |
// Get only the likelySubtags that might be needed | |
// Modules: currency, date, message, number, plural, relativeTime, unit | |
results = mergeRecursive(results, getLikelySubtags(cldrLocale)); | |
//Modules: currency | |
//results = mergeRecursive(results, getCLDRFile("main/" + cldrLocale + "/currencies.json")); | |
//Modules: date | |
results = mergeRecursive(results, getCLDRFile("main/" + cldrLocale + "/ca-gregorian.json")); | |
results = mergeRecursive(results, getCLDRFile("main/" + cldrLocale + "/timeZoneNames.json")); | |
//Modules: relativeTime | |
results = mergeRecursive(results, getCLDRFile("main/" + cldrLocale + "/dateFields.json")); | |
//Modules: unit | |
// We don't load this module on the front | |
//results = mergeRecursive(results, getCLDRFile("main/" + cldrLocale + "/units.json")); | |
return results; | |
} | |
void addValid(JsonObject source, JsonObject destination, final Set<String> validkeys) { | |
addValid(source, destination, (Entry<String, JsonElement> entry) -> validkeys.contains(entry.getKey())); | |
} | |
void addValid(JsonObject source, JsonObject destination, Function<Entry<String, JsonElement>, Boolean> callback) { | |
for (Map.Entry<String, JsonElement> entry : source.entrySet()) { | |
if (callback.apply(entry)) { | |
destination.add(entry.getKey(), entry.getValue()); | |
} | |
} | |
} | |
JsonObject getLikelySubtags(String cldrLocale) throws IOException { | |
Set<String> validLocales = getValidLocales(cldrLocale).stream() | |
.map(item -> item.toLowerCase()) | |
.collect(Collectors.toSet()); | |
JsonObject supplemental = new JsonObject(); | |
JsonObject likelySubtagsUsed = new JsonObject(); | |
supplemental.add("likelySubtags", likelySubtagsUsed); | |
JsonObject likelySubtags = getCLDRFile("supplemental/likelySubtags.json") | |
.getAsJsonObject("supplemental") | |
.getAsJsonObject("likelySubtags"); | |
addValid(likelySubtags, likelySubtagsUsed, | |
entry -> validLocales.stream().anyMatch(item -> entry.getKey().toLowerCase().indexOf(item) > -1)); | |
JsonObject root = new JsonObject(); | |
root.add("supplemental", supplemental); | |
return root; | |
} | |
Set<String> getValidLocales(String cldrLocale) { | |
Set<String> validLocales = new HashSet<>(); | |
validLocales.add(cldrLocale); | |
String moreLikelyToBeATag = cldrLocale.replaceAll("_", "-"); | |
// If the language tag was generated with "Locale.toString" chances are we have this wrong | |
validLocales.add(moreLikelyToBeATag); | |
// recursive bcp-47 fallback using a less specific tag, for example if | |
// we only have values for "en" and "fr" but "fr-FR" was requested. | |
// it is generally preferable to use "fr" | |
String primaryLanguage = moreLikelyToBeATag; | |
while (primaryLanguage.contains("-")) { | |
primaryLanguage = primaryLanguage.substring(0, primaryLanguage.lastIndexOf('-')); | |
validLocales.add(primaryLanguage); | |
} | |
return validLocales; | |
} | |
/** | |
* Pluralization rules are stored in a common place, | |
* but we only need the ones for the current locale. | |
*/ | |
JsonObject getPlurals(String cldrLocale) throws IOException { | |
Set<String> validLocales = getValidLocales(cldrLocale); | |
JsonObject supplemental = new JsonObject(); | |
JsonObject cardinalUsed = new JsonObject(); | |
supplemental.add("plurals-type-cardinal", cardinalUsed); | |
JsonObject ordinalUsed = new JsonObject(); | |
supplemental.add("plurals-type-ordinal", ordinalUsed); | |
JsonObject cardinal = getCLDRFile("supplemental/plurals.json") | |
.getAsJsonObject("supplemental") | |
.getAsJsonObject("plurals-type-cardinal"); | |
addValid(cardinal, cardinalUsed, validLocales); | |
JsonObject ordinal = getCLDRFile("supplemental/ordinals.json") | |
.getAsJsonObject("supplemental") | |
.getAsJsonObject("plurals-type-ordinal"); | |
addValid(ordinal, ordinalUsed, validLocales); | |
JsonObject root = new JsonObject(); | |
root.add("supplemental", supplemental); | |
return root; | |
} | |
/** | |
* Instead of adding all numbering systems, we only | |
* load the ones that are declared in the current locale | |
*/ | |
JsonObject getNumberingSystems(JsonObject results, String cldrLocale) throws IOException { | |
JsonObject numbers = results.getAsJsonObject("main").getAsJsonObject(cldrLocale).getAsJsonObject("numbers"); | |
String defaultNumberingSystem = numbers.getAsJsonPrimitive("defaultNumberingSystem").getAsString(); | |
Set<String> set = new HashSet<>(); | |
set.add(defaultNumberingSystem); | |
JsonObject otherNumberingSystems = numbers.getAsJsonObject("otherNumberingSystems"); | |
for (Entry<String, JsonElement> entry : otherNumberingSystems.entrySet()) { | |
set.add(entry.getValue().getAsString()); | |
} | |
JsonObject allNumberingSystems = getCLDRFile("supplemental/numberingSystems.json") | |
.getAsJsonObject("supplemental") | |
.getAsJsonObject("numberingSystems"); | |
JsonObject usedNumberingSystems = new JsonObject(); | |
addValid(allNumberingSystems, usedNumberingSystems, set); | |
JsonObject supplemental = new JsonObject(); | |
JsonObject root = new JsonObject(); | |
supplemental.add("numberingSystems", usedNumberingSystems); | |
root.add("supplemental", supplemental); | |
return root; | |
} | |
String convertToCDRLocale(String locale) { | |
switch (locale.toLowerCase()) { | |
case "cn": | |
case "zh-cn": | |
case "zh_cn": | |
return "zh"; | |
case "tw": | |
case "zh-tw": | |
case "zh_tw": | |
return "zh-Hant"; | |
default: | |
return locale; | |
} | |
} | |
JsonObject getCLDRFile(String file) throws IOException { | |
return cldrFileGetter.getFile(file).getAsJsonObject(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment