# Copyright 2020 Khaled Hosny
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from icu import Locale, LocaleData, Script
import unicodedata


def get_coverage(font, threshold=0.5):
    cmap = set(chr(c) for c in font.getBestCmap())

    scripts = set()
    full_languages = set()
    partial_languages = {}

    for locale in Locale.getAvailableLocales():
        data = LocaleData(locale)
        examplar = set("".join(data.getExemplarSet()))
        if not cmap.isdisjoint(examplar):
            locale = Locale(locale)
            locale.addLikelySubtags()
            diff = examplar - cmap
            if diff != examplar:
                if any(
                    Script.getScript(c).getName() not in ("Common", "Inherited")
                    for c in examplar & cmap
                ):
                    scripts.add(locale.getDisplayScript())
            if not diff:
                full_languages.add(locale.getDisplayLanguage())
            elif len(diff) / len(examplar) <= threshold:
                partial_languages[locale.getDisplayLanguage()] = diff

    return scripts, full_languages, partial_languages


def make_report(scripts, full_languages, partial_languages):
    text = []

    if scripts:
        text.append("# Script coverage")
        text.append(", ".join(sorted(scripts)))

    if full_languages:
        if text:
            text.append("")
        text.append("# Full language coverage")
        text.append(", ".join(sorted(full_languages)))

    if partial_languages:
        if text:
            text.append("")
        text.append("# Partial Language coverage (missing characters shown)")
        for language, missing in partial_languages.items():
            missing = ", ".join(
                (unicodedata.combining(c) and f"\u25cc{c}" or c)
                for c in sorted(missing)
            )
            text.append(f"* {language}: {missing}")

    return "\n".join(text)


if __name__ == "__main__":
    import argparse
    from fontTools.ttLib import TTFont

    parser = argparse.ArgumentParser(
        description=" Calculate font script and language coverage based on ICU exemplar data"
    )
    parser.add_argument("font", help="Input font file")
    parser.add_argument(
        "-n",
        "--font-number",
        type=int,
        default=0,
        help="Font number in a font collection (default: 0)",
    )
    parser.add_argument(
        "-t",
        "--threshold",
        type=int,
        default=50,
        help="Threshold for partial coverage percentage (default: 50)",
    )

    args = parser.parse_args()

    font = TTFont(args.font, fontNumber=args.font_number)
    coverage = get_coverage(font, args.threshold / 100)
    report = make_report(*coverage)
    print(report)