Skip to content

Instantly share code, notes, and snippets.

@khaledhosny
Last active February 10, 2025 16:37
Show Gist options
  • Save khaledhosny/7c120b9795edb11dca9e12287cdb3b10 to your computer and use it in GitHub Desktop.
Save khaledhosny/7c120b9795edb11dca9e12287cdb3b10 to your computer and use it in GitHub Desktop.
Calculate font script and language coverage based on ICU exemplar data
# Copyright 2020 Khaled Hosny
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from icu import Locale, LocaleData, Script
import unicodedata
def get_coverage(font, threshold=0.5):
cmap = set(chr(c) for c in font.getBestCmap())
scripts = set()
full_languages = set()
partial_languages = {}
for locale in Locale.getAvailableLocales():
data = LocaleData(locale)
examplar = set("".join(data.getExemplarSet()))
if not cmap.isdisjoint(examplar):
locale = Locale(locale)
locale.addLikelySubtags()
diff = examplar - cmap
if diff != examplar:
if any(
Script.getScript(c).getName() not in ("Common", "Inherited")
for c in examplar & cmap
):
scripts.add(locale.getDisplayScript())
if not diff:
full_languages.add(locale.getDisplayLanguage())
elif len(diff) / len(examplar) <= threshold:
partial_languages[locale.getDisplayLanguage()] = diff
return scripts, full_languages, partial_languages
def make_report(scripts, full_languages, partial_languages):
text = []
if scripts:
text.append("# Script coverage")
text.append(", ".join(sorted(scripts)))
if full_languages:
if text:
text.append("")
text.append("# Full language coverage")
text.append(", ".join(sorted(full_languages)))
if partial_languages:
if text:
text.append("")
text.append("# Partial Language coverage (missing characters shown)")
for language, missing in partial_languages.items():
missing = ", ".join(
(unicodedata.combining(c) and f"\u25cc{c}" or c)
for c in sorted(missing)
)
text.append(f"* {language}: {missing}")
return "\n".join(text)
if __name__ == "__main__":
import argparse
from fontTools.ttLib import TTFont
parser = argparse.ArgumentParser(
description=" Calculate font script and language coverage based on ICU exemplar data"
)
parser.add_argument("font", help="Input font file")
parser.add_argument(
"-n",
"--font-number",
type=int,
default=0,
help="Font number in a font collection (default: 0)",
)
parser.add_argument(
"-t",
"--threshold",
type=int,
default=50,
help="Threshold for partial coverage percentage (default: 50)",
)
args = parser.parse_args()
font = TTFont(args.font, fontNumber=args.font_number)
coverage = get_coverage(font, args.threshold / 100)
report = make_report(*coverage)
print(report)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment