Created
September 21, 2018 15:24
-
-
Save updateing/a8b546df19c5ff429103093668557d40 to your computer and use it in GitHub Desktop.
subset_noto_cjk.py in multiple processes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# coding=UTF-8 | |
# | |
# Copyright 2016 Google Inc. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Create a curated subset of Noto CJK for Android.""" | |
from multiprocessing import Process | |
import os | |
from fontTools import ttLib | |
from nototools import font_data | |
from nototools import tool_utils | |
from nototools import ttc_utils | |
# Characters supported in Noto CJK fonts that UTR #51 recommends default to | |
# emoji-style. | |
EMOJI_IN_CJK = { | |
0x26BD, # β½ SOCCER BALL | |
0x26BE, # βΎ BASEBALL | |
0x1F18E, # π NEGATIVE SQUARED AB | |
0x1F191, # π SQUARED CL | |
0x1F192, # π SQUARED COOL | |
0x1F193, # π SQUARED FREE | |
0x1F194, # π SQUARED ID | |
0x1F195, # π SQUARED NEW | |
0x1F196, # π SQUARED NG | |
0x1F197, # π SQUARED OK | |
0x1F198, # π SQUARED SOS | |
0x1F199, # π SQUARED UP WITH EXCLAMATION MARK | |
0x1F19A, # π SQUARED VS | |
0x1F201, # π SQUARED KATAKANA KOKO | |
0x1F21A, # π SQUARED CJK UNIFIED IDEOGRAPH-7121 | |
0x1F22F, # π― SQUARED CJK UNIFIED IDEOGRAPH-6307 | |
0x1F232, # π² SQUARED CJK UNIFIED IDEOGRAPH-7981 | |
0x1F233, # π³ SQUARED CJK UNIFIED IDEOGRAPH-7A7A | |
0x1F234, # π΄ SQUARED CJK UNIFIED IDEOGRAPH-5408 | |
0x1F235, # π΅ SQUARED CJK UNIFIED IDEOGRAPH-6E80 | |
0x1F236, # πΆ SQUARED CJK UNIFIED IDEOGRAPH-6709 | |
0x1F238, # πΈ SQUARED CJK UNIFIED IDEOGRAPH-7533 | |
0x1F239, # πΉ SQUARED CJK UNIFIED IDEOGRAPH-5272 | |
0x1F23A, # πΊ SQUARED CJK UNIFIED IDEOGRAPH-55B6 | |
0x1F250, # π CIRCLED IDEOGRAPH ADVANTAGE | |
0x1F251, # π CIRCLED IDEOGRAPH ACCEPT | |
} | |
# Characters we have decided we are doing as emoji-style in Android, | |
# despite UTR #51's recommendation | |
ANDROID_EMOJI = { | |
0x2600, # β BLACK SUN WITH RAYS | |
0x2601, # β CLOUD | |
0X260E, # β BLACK TELEPHONE | |
0x261D, # β WHITE UP POINTING INDEX | |
0x263A, # βΊ WHITE SMILING FACE | |
0x2660, # β BLACK SPADE SUIT | |
0x2663, # β£ BLACK CLUB SUIT | |
0x2665, # β₯ BLACK HEART SUIT | |
0x2666, # β¦ BLACK DIAMOND SUIT | |
0x270C, # β VICTORY HAND | |
0x2744, # β SNOWFLAKE | |
0x2764, # β€ HEAVY BLACK HEART | |
} | |
# We don't want support for ASCII control chars. | |
CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F'); | |
EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS) | |
def remove_from_cmap(infile, outfile, exclude=frozenset()): | |
"""Removes a set of characters from a font file's cmap table.""" | |
font = ttLib.TTFont(infile) | |
font_data.delete_from_cmap(font, exclude) | |
font.save(outfile) | |
TEMP_DIR = 'subsetted' | |
def remove_codepoints_from_ttc(ttc_name): | |
otf_names = ttc_utils.ttcfile_extract(ttc_name, TEMP_DIR) | |
with tool_utils.temp_chdir(TEMP_DIR): | |
process_pool = [] | |
for index, otf_name in enumerate(otf_names): | |
print 'Subsetting %s...' % otf_name | |
proc = Process(target=remove_from_cmap, args=(otf_name, otf_name), kwargs={"exclude": EXCLUDED_CODEPOINTS}) | |
proc.start() | |
process_pool.append(proc) | |
for proc in process_pool: | |
proc.join() | |
ttc_utils.ttcfile_build(ttc_name, otf_names) | |
for f in otf_names: | |
os.remove(f) | |
#remove_codepoints_from_ttc('NotoSansCJK-Thin.ttc') | |
#remove_codepoints_from_ttc('NotoSansCJK-Light.ttc') | |
#remove_codepoints_from_ttc('NotoSansCJK-Regular.ttc') | |
#remove_codepoints_from_ttc('NotoSansCJK-Medium.ttc') | |
#remove_codepoints_from_ttc('NotoSansCJK-Bold.ttc') | |
#remove_codepoints_from_ttc('NotoSansCJK-Black.ttc') | |
#remove_codepoints_from_ttc('NotoSerifCJK-Light.ttc') | |
#remove_codepoints_from_ttc('NotoSerifCJK-Medium.ttc') | |
#remove_codepoints_from_ttc('NotoSerifCJK-Regular.ttc') | |
#remove_codepoints_from_ttc('NotoSerifCJK-SemiBold.ttc') | |
#remove_codepoints_from_ttc('NotoSerifCJK-Bold.ttc') | |
#remove_codepoints_from_ttc('NotoSerifCJK-Black.ttc') | |
FONT_LIST = ['NotoSerifCJK-SemiBold.ttc', 'NotoSerifCJK-Light.ttc', 'NotoSerifCJK-Bold.ttc', 'NotoSerifCJK-Black.ttc', 'NotoSerifCJK-Medium.ttc'] | |
process_pool = [] | |
for font in FONT_LIST: | |
proc = Process(target=remove_codepoints_from_ttc, args=(font,)) | |
proc.start() | |
process_pool.append(proc) | |
for proc in process_pool: | |
proc.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment