#!/usr/bin/env python3 """usage: ./gen-emoji-table.py [--rust] emoji-data.txt emoji-test.txt Input file: * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt * https://www.unicode.org/Public/emoji/latest/emoji-test.txt """ import os import sys from collections import OrderedDict import packTab if len(sys.argv) > 1 and sys.argv[1] == "--rust": del sys.argv[1] language = packTab.languages["rust"] else: language = packTab.languages["c"] if len (sys.argv) != 3: sys.exit (__doc__) f = open(sys.argv[1]) header = [f.readline () for _ in range(10)] ranges = OrderedDict() for line in f.readlines(): line = line.strip() if not line or line[0] == '#': continue rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] rang = [int(s, 16) for s in rang.split('..')] if len(rang) > 1: start, end = rang else: start = end = rang[0] if typ not in ranges: ranges[typ] = [] if ranges[typ] and ranges[typ][-1][1] == start - 1: ranges[typ][-1] = (ranges[typ][-1][0], end) else: ranges[typ].append((start, end)) print ("/* == Start of generated table == */") print ("/*") print (" * The following tables are generated by running:") print (" *") print (" * ./gen-emoji-table.py %semoji-data.txt" % ("--rust " if language.name == "rust" else "")) print (" *") print (" * on file with this header:") print (" *") for l in header: print (" * %s" % (l.strip())) print (" */") print () if language.name == "c": print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH") print ("#define HB_UNICODE_EMOJI_TABLE_HH") print () print ('#include "hb-unicode.hh"') print () elif language.name == "rust": print ("#![allow(unused_parens)]") print ("#![allow(clippy::unnecessary_cast, clippy::unreadable_literal, clippy::double_parens)]") print () print ("use crate::hb::unicode::Codepoint;") print () else: assert False, "Unknown language: %s" % language.name for typ, s in ranges.items(): if typ != "Extended_Pictographic": continue arr = dict() for start,end in s: for i in range(start, end + 1): arr[i] = 1 sol = packTab.pack_table(arr, 0, compression=9) code = packTab.Code('_hb_emoji') if language.name == "c": sol.genCode(code, 'is_'+typ, language=language) code.print_code(language=language) print() elif language.name == "rust": sol.genCode(code, 'is_'+typ+'_u8', language=language, private=False) code.print_code(language=language, private=False) print() print ("#[inline]") print ("pub(crate) fn is_%s (u: Codepoint) -> bool" % typ) print ("{") print (" _hb_emoji_is_%s_u8 (u as usize) != 0" % typ) print ("}") print() else: assert False, "Unknown language: %s" % language.name print () if language.name == "c": print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") print () print ("/* == End of generated table == */") # Generate test file. sequences = [] with open(sys.argv[2]) as f: for line in f.readlines(): if "#" in line: line = line[:line.index("#")] if ";" in line: line = line[:line.index(";")] line = line.strip() line = line.split(" ") if len(line) < 2: continue sequences.append(line) test_path = os.path.join(os.path.dirname(__file__), "..", "test", "shape", "data", "in-house", "tests", "emoji-clusters.tests") if os.path.isdir(os.path.dirname(test_path)): try: with open(test_path, "w") as f: for sequence in sequences: f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot") f.write(";" + ",".join(sequence)) f.write(";[" + "|".join("1=0" for c in sequence) + "]\n") except OSError: pass