/** * Unistring: breaks a native string into an array of grapheme cluster, * and provides native string like manipulation methods. * ============================================================================= * * * @author akahuku@gmail.com * @license MIT */ /* * data table, taken from: * http://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt */ // GENERATED CODE START <<<1 // This data was generated by the command 'node mkdata.js -g'. constconst GRAPHEME_BREAK_PROP_UNIT_LENGTH = 5; const GBP = { /* ` */'Other': 0, /* a */'SOT': 1, /* b */'EOT': 2, /* c */'Prepend': 3, /* d */'CR': 4, /* e */'LF': 5, /* f */'Control': 6, /* g */'Extend': 7, /* h */'Regional_Indicator': 8, /* i */'SpacingMark': 9, /* j */'L': 10, /* k */'V': 11, /* l */'T': 12, /* m */'LV': 13, /* n */'LVT': 14, /* o */'ZWJ': 15 }; const GBP_NAMES = Object.keys(GBP); // GENERATED CODE END // >>> /* * While a property defined for splitting, specially assign a value for Extended_Pictographic */ GBP['Extended_Pictographic'] = 16; /* p */ /* * data table, taken from: * http://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt */ // GENERATED CODE START <<<1 // This data was generated by the command 'node mkdata.js -w'. const WORD_BREAK_PROPS = '\ 1309002000070A002000080B004000060D0020001320002000032200200004270020000E2C002000\ 0F2E00200010300040010D3A0020000E3B0020000C41004003115F0020000C610040030885002000\ 0CAA0020000BAD0020000CB50020000DB70020000CBA0020000CC000E0020CD800E0030CF800003C\ 0CDE024004090003000E0C7003A0000C760340000C7A0380000E7E0320000C7F0320000C86032000\ 0D870320000C880360000C8C0320000C8E0380020CA303600A0CF7036011098304E0000C8A04C014\ 0C3105C0040C590580000C5E0520000D5F0520000C600520050E890520000C8A052000099105A005\ 09BF05200009C105400009C405400009C705200005D005600305EF0580000CF30520000DF4052000\ 0B0006C0000E0C06400009100660010B1C0620000C20066005094B06A0021060064001106B062000\ 0E6C0620000C6E06400009700620000C7106600C0CD506200009D606E0000BDD06200009DF06C000\ 0CE506400009E706400009EA0680000CEE06400010F00640010CFA0660000CFF0620000B0F072000\ 0C1007200009110720000C1207C00309300760030C4D07200B09A60760010CB107200010C0074001\ 0CCA07200409EB0720010CF40740000EF80720000CFA07200009FD0720000C0008C0020916088000\ 0C1A082000091B0820010C2408200009250860000C28082000092908A0000C400820030959086000\ 0C600860010C700800030C8908C0000B9008400009980800010CA008400509CA0800030BE2082000\ 09E30820040C0409C006093A0960000C3D092000093E0940020C50092000095109E0000C58094001\ 096209400010660940010C7109000209810960000C850900010C8F0940000C9309C0020CAA09E000\ 0CB20920000CB609800009BC0920000CBD09200009BE09E00009C709400009CB0960000CCE092000\ 09D70920000CDC0940000CDF09600009E209400010E60940010CF00940000CFC09200009FE092000\ 09010A60000C050AC0000C0F0A40000C130AC0020C2A0AE0000C320A40000C350A40000C380A4000\ 093C0A2000093E0AA00009470A4000094B0A600009510A20000C590A80000C5E0A200010660A4001\ 09700A40000C720A600009750A200009810A60000C850A20010C8F0A60000C930AC0020CAA0AE000\ 0CB20A40000CB50AA00009BC0A20000CBD0A200009BE0A000109C70A600009CB0A60000CD00A2000\ 0CE00A400009E20A400010E60A40010CF90A200009FA0AC00009010B60000C050B00010C0F0B4000\ 0C130BC0020C2A0BE0000C320B40000C350BA000093C0B20000C3D0B2000093E0BE00009470B4000\ 094B0B600009550B60000C5C0B40000C5F0B600009620B400010660B40010C710B200009820B2000\ 0C830B20000C850BC0000C8E0B60000C920B80000C990B40000C9C0B20000C9E0B40000CA30B4000\ 0CA80B60000CAE0B800109BE0BA00009C60B600009CA0B80000CD00B200009D70B200010E60B4001\ 09000CA0000C050C00010C0E0C60000C120CE0020C2A0C0002093C0C20000C3D0C2000093E0CE000\ 09460C6000094A0C800009550C40000C580C60000C5D0C20000C600C400009620C400010660C4001\ 0C800C200009810C60000C850C00010C8E0C60000C920CE0020CAA0C40010CB50CA00009BC0C2000\ 0CBD0C200009BE0CE00009C60C600009CA0C800009D50C40000CDD0C40000CE00C400009E20C4000\ 10E60C40010CF10C400009000D80000C040D20010C0E0D60000C120D2005093B0D40000C3D0D2000\ 093E0DE00009460D6000094A0D80000C4E0D20000C540D600009570D20000C5F0D600009620D4000\ 10660D40010C7A0DC00009810D60000C850D40020C9A0D00030CB30D20010CBD0D20000CC00DE000\ 09CA0D200009CF0DC00009D60D200009D80D000110E60D400109F20D400009310E200009340EE000\ 09470E000110500E400109B10E200009B40E200109C80EC00010D00E40010C000F200009180F4000\ 10200F400109350F200009370F200009390F2000093E0F40000C400F00010C490F800409710F8002\ 09860F40000C880FA000098D0F600109990F800409C60F2000092B10800210401040010956108000\ 095E1060000962106000096710E00009711080000982108001098F1020001090104001099A108000\ 0CA010C0040CC71020000CCD1020000CD01060050CFC10A0290C4A1280000C5012E0000C58122000\ 0C5A1280000C601220050C8A1280000C901220040CB21280000CB812E0000CC01220000CC2128000\ 0CC812E0010CD81220070C121380000C18136008095D1360000C801300020CA013C00A0CF813C000\ 0C0114804D0C6F16200213801620000C811640030CA01660090CEE1660010C001740020912178000\ 0C1F17600209321760000C4017400209521740000C6017A0010C6E176000097217400009B4170004\ 09DD17200010E0174001090B1860000B0E182000090F18200010101840010C2018200B0C8018A000\ 09851840000C8718400409A91820000CAA1820000CB018C0080C0019E00309201980010930198001\ 104619400110D01940010C001AE00209171AA00009551A400109601AA003097F1A200010801A4001\ 10901A400109B01AE00309001BA0000C051BE00509341B20020C451B000110501B4001096B1B2001\ 09801B60000C831BC00309A11BA0010CAE1B400010B01B40010CBA1B800509E61BC0010C001C8004\ 09241C800210401C40010C4D1C600010501C40010C5A1C80040C801C20010C901C60050CBD1C6000\ 09D01C600009D41CA0020CE91C800009ED1C20000CEE1CC00009F41C20000CF51C400009F71C6000\ 0CFA1C20000C001D001809C01D00080C001EC0220C181FC0000C201FC0040C481FC0000C501F0001\ 0C591F20000C5B1F20000C5D1F20000C5F1FE0030C801FA0060CB61FE0000CBE1F20000CC21F6000\ 0CC61FE0000CD01F80000CD61FC0000CE01FA0010CF21F60000CF61FE000130020E0001308206000\ 090C202000120D2020000B0E2040000F182040000F242020000D2720200008282040000B2A20A000\ 112F202000113F2040000E442020001154202000135F2020000B6020A0000B662040010C71202000\ 0C7F2020000C9020A00109D02020040C022120000C072120000C0A2140010C152120000C1921A000\ 0C242120000C262120000C282120000C2A2180000C2F2160010C3C2180000C4521A0000C4E212000\ 0C602120050CB62480060C002CA01C0CEB2C800009EF2C60000CF22C40000C002DC0040C272D2000\ 0C2D2D20000C302D00070C6F2D2000097F2D20000C802DE0020CA02DE0000CA82DE0000CB02DE000\ 0CB82DE0000CC02DE0000CC82DE0000CD02DE0000CD82DE00009E02D00040C2F2E20001300302000\ 0C05302000092A30C000163130A0000C3B304000154130C00A0999304000169B308000159F302000\ 16A030200014A130400B16FC30600014FF3020000C053160050C3131C00B0CA031000414F0310002\ 14D032E005140033000B0C00A0A0910CD0A4C0050C00A5A0210C10A600021020A640010C2AA64000\ 0C40A6E005096FA680000974A640010C7FA6E003099EA640000CA0A6000A09F0A640000C08A76018\ 0CD0A740000CD3A720000CD5A7A0000CF2A700020902A820000C03A860000906A820000C07A88000\ 090BA820000C0CA8E0020923A8A000092CA820000C40A880060980A840000C82A8400609B4A84002\ 10D0A8400109E0A840020CF2A8C0000CFBA820000CFDA8400009FFA820001000A940010C0AA98003\ 0926A900010C30A9E0020947A9A0010C60A9A0030980A980000C84A9E00509B3A9C0010CCFA92000\ 10D0A9400109E5A9200010F0A940010C00AA20050929AAC0010C40AA60000943AA20000C44AA0001\ 094CAA40001050AA4001097BAA600009B0AA200009B2AA600009B7AA400009BEAA400009C1AA2000\ 0CE0AA600109EBAAA0000CF2AA600009F5AA40000C01ABC0000C09ABC0000C11ABC0000C20ABE000\ 0C28ABE0000C30AB40070C70AB600E09E3AB000109ECAB400010F0AB40010C00ACE0FF0CFFB3E0FF\ 0CFEBBE0FF0CFDC3E0FF0CFCCBE0FF0CFBD320750CB0D7E0020CCBD720060C00FBE0000C13FBA000\ 051DFB2000091EFB2000051FFB4001052AFBA0010538FBA000053EFB20000540FB40000543FB4000\ 0546FB40010C50FB400C0CD3FB602D0C50FD00080C92FDC0060CF0FD80010900FE00020E10FE2000\ 0D13FE20000E14FE20000920FE00021133FE4000114DFE60000E50FE20000F52FE20000E54FE2000\ 0D55FE20000C70FEA0000C76FEE0100BFFFE20000F07FF20000E0CFF20000F0EFF20001010FF4001\ 0D1AFF20000E1BFF20000C21FF4003113FFF20000C41FF40031466FF40011670FF20001471FFA005\ 099EFF40000CA0FFE0030CC2FFC0000CCAFFC0000CD2FFC0000CDAFF60000BF9FF60000C00008101\ 0C0D0041030C280061020C3C0041000C3F00E1010C5000C1010C8000610F0C4001A10609FD012100\ 0C8002A1030CA002210609E00221000C000301040C2D03C1030C5003C104097603A1000C8003C103\ 0CA00381040CC80301010CD103A1000C0004C11310A00441010CB00481040CD80481040C00050105\ 0C300581060C700561010C7C05E1010C8C05E1000C940541000C970561010CA305E1010CB305E100\ 0CBB0541000C0006E1260C4007C1020C600701010C8007C1000C870741050CB20721010C0008C100\ 0C080821000C0A0881050C370841000C3C0821000C3F08E1020C6008E1020C8008E1030CE0086102\ 0CF40841000C0009C1020C200941030C800901070CBE0941000C000A210009010A610009050A4100\ 090C0A81000C100A81000C150A61000C190AA10309380A6100093F0A21000C600AA1030C800AA103\ 0CC00A01010CC90A810309E50A41000C000BC1060C400BC1020C600B61020C800B41020C000C2109\ 0C800C61060CC00C61060C000D810409240D810010300D41010C800E410509AB0E41000CB00E4100\ 0C000FA1030C270F21000C300FC10209460F61010C700F410209820F81000CB00FA1020CE00FE102\ 09001061000C0310A106093810E101106610410109701021000C7110410009731041000C75102100\ 097F1081000C8310A10509B01061010BBD10210009C21021000BCD1021000CD010210310F0104101\ 09001161000C03118104092711C10110361141010C4411210009451141000C471121000C50116104\ 09731121000C7611210009801161000C8311010609B311C1010CC111810009C911810009CE114100\ 10D01141010CDA1121000CDC1121000C001241020C13122103092C128101093E1221000C8012E100\ 0C881221000C8A1281000C8F12E1010C9F1241010CB012E10509DF12810110F01241010900138100\ 0C051301010C0F1341000C1313C1020C2A13E1000C321341000C3513A100093B1341000C3D132100\ 093E13E1000947134100094B1361000C5013210009571321000C5D13A1000962134100096613E100\ 097013A1000C0014A10609351441020C471481001050144101095E1421000C5F1461000C80140106\ 09B01481020CC41441000CC714210010D01441010C8015E10509AF15E10009B81521010CD8158100\ 09DC1541000C0016010609301621020C4416210010501641010C8016610509AB16A1010CB8162100\ 10C0164101091D17E10110301741010C00188105092C18E1010CA018010810E01841010CFF180101\ 0C091921000C0C1901010C151941000C18190103093019C1000937194100093B1981000C3F192100\ 09401921000C41192100094219410010501941010CA01901010CAA19E10409D119E10009DA19E100\ 0CE11921000CE319210009E41921000C001A210009011A41010C0B1A010509331AE1000C3A1A2100\ 093B1A810009471A21000C501A210009511A61010C5C1AC105098A1A01020C9D1A21000CB01A2109\ 0C001C21010C0A1CA104092F1C010109381C01010C401C210010501C41010C721CC10309921CC102\ 09A91CC1010C001DE1000C081D41000C0B1DC10409311DC100093A1D2100093C1D4100093F1DE100\ 0C461D210009471D210010501D41010C601DC1000C671D41000C6A1D0104098A1DA10009901D4100\ 09931DA1000C981D210010A01D41010CE01E610209F31E81000CB01F21000C002041730C0024E10D\ 0C802481180C902F210C0C0030E1850B303421010C0044E1480C006821470C406AE10310606A4101\ 0C706AE10910C06A41010CD06AC10309F06AA1000C006B010609306BE1000C406B810010506B4101\ 0C636BA1020C7D6B61020C406E01080C006F6109094F6F21000C506F210009516FE106098F6F8100\ 0C936FA1010CE06F41000CE36F210009E46F210009F06F41001400B021000C00BC610D0C70BCA101\ 0C80BC21010C90BC4101099DBC41000BA0BC81000900CFC1050930CFE1020965D1A100096DD1C100\ 0B73D10101097BD101010985D1E10009AAD181000942D261000C00D4A10A0C56D4E1080C9ED44100\ 0CA2D421000CA5D441000CA9D481000CAED481010CBBD421000CBDD4E1000CC5D421080C07D58100\ 0C0DD501010C16D5E1000C1ED581030C3BD581000C40D5A1000C46D521000C4AD5E1000C52D5812A\ 0CA8D621030CC2D621030CDCD6E1030CFCD621030C16D7E1030C36D721030C50D7E1030C70D72103\ 0C8AD7E1030CAAD721030CC4D7010110CED741060900DAE106093BDA41060975DA21000984DA2100\ 099BDAA10009A1DAE1010C00DFE1030900E0E1000908E02102091BE0E1000923E041000926E0A100\ 0C00E1A1050930E1E1000C37E1E1001040E141010C4EE121000C90E2C10309AEE221000CC0E28105\ 09ECE2810010F0E241010CE0E7E1000CE8E781000CEDE741000CF0E7E1010C00E8A11809D0E8E100\ 0C00E981080944E9E1000C4BE921001050E941010C00EE81000C05EE61030C21EE41000C24EE2100\ 0C27EE21000C29EE41010C34EE81000C39EE21000C3BEE21000C42EE21000C47EE21000C49EE2100\ 0C4BEE21000C4DEE61000C51EE41000C54EE21000C57EE21000C59EE21000C5BEE21000C5DEE2100\ 0C5FEE21000C61EE41000C64EE21000C67EE81000C6CEEE1000C74EE81000C79EE81000C7EEE2100\ 0C80EE41010C8BEE21020CA1EE61000CA5EEA1000CABEE21020C30F141030C50F141030C70F14103\ 0AE6F1410309FBF3A10010F0FB41010B01002E000920000E0C0900010E1E'; const WORD_BREAK_PROP_UNIT_LENGTH = 5; const WBP = { /* ` */'Other': 0, /* a */'SOT': 1, /* b */'EOT': 2, /* c */'Double_Quote': 3, /* d */'Single_Quote': 4, /* e */'Hebrew_Letter': 5, /* f */'CR': 6, /* g */'LF': 7, /* h */'Newline': 8, /* i */'Extend': 9, /* j */'Regional_Indicator': 10, /* k */'Format': 11, /* l */'ALetter': 12, /* m */'MidLetter': 13, /* n */'MidNum': 14, /* o */'MidNumLet': 15, /* p */'Numeric': 16, /* q */'ExtendNumLet': 17, /* r */'ZWJ': 18, /* s */'WSegSpace': 19, /* t */'Katakana': 20, /* u */'Hiragana': 21, /* v */'KanaExtension': 22 }; const WBP_NAMES = Object.keys(WBP); // GENERATED CODE END // >>> /* * While a property defined for splitting, specially assign a value for Extended_Pictographic */ WBP['Extended_Pictographic'] = 23; /* w */ /* * data table, taken from: * http://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt */ // GENERATED CODE START <<<1 // This data was generated by the command 'node mkdata.js -e'. constconst SENTENCE_BREAK_PROP_UNIT_LENGTH = 5; const SBP = { /* ` */'Other': 0, /* a */'SOT': 1, /* b */'EOT': 2, /* c */'CR': 3, /* d */'LF': 4, /* e */'Extend': 5, /* f */'Sep': 6, /* g */'Format': 7, /* h */'Sp': 8, /* i */'Lower': 9, /* j */'Upper': 10, /* k */'OLetter': 11, /* l */'Numeric': 12, /* m */'ATerm': 13, /* n */'STerm': 14, /* o */'Close': 15, /* p */'SContinue': 16 }; const SBP_NAMES = Object.keys(SBP); // GENERATED CODE END // >>> /* * data table, taken from: * http://www.unicode.org/Public/14.0.0/ucd/Scripts.txt */ // GENERATED CODE START <<<1 // This data was generated by the command 'node mkdata.js -s'. const SCRIPTS = '\ 01000020080241004003015B00C0000261004003017B00E00502AA00200001AB00E00102BA002000\ 01BB00A00002C000E00201D700200002D800E00301F700200002F800203801B902E00402E002A000\ 01E502A00023EA02400001EC028002290003000E037003800001740320000375036000037A038000\ 017E032000037F03200003840320000185032000038603200001870320000388036000038C032000\ 038E03800203A303E00737E203C00103F0030002040004A01029850440000487042015053105C004\ 0559054006058D056000069105E00606D005600306EF05C000070006A0000105062000070606C000\ 010C062000070D06C001011B062000071C066000011F062000072006000401400620000741064001\ 294B06600107560640032970062000077106800D01DD06200007DE064004080007C001080F078007\ 084D0760000750070006098007400642C007600742FD076000520008C005523008E0015F40088003\ 5F5E0820000860086001077008E0030790084000079808400901E208200007E308A0030A0009200A\ 29510980000A5509E00101640940000A660940030B800980000B850900010B8F0940000B9309C002\ 0BAA09E0000BB20920000BB60980000BBC0920010BC70940000BCB0980000BD70920000BDC094000\ 0BDF09A0000BE60920030C010A60000C050AC0000C0F0A40000C130AC0020C2A0AE0000C320A4000\ 0C350A40000C380A40000C3C0A20000C3E0AA0000C470A40000C4B0A60000C510A20000C590A8000\ 0C5E0A20000C660A20020D810A60000D850A20010D8F0A60000D930AC0020DAA0AE0000DB20A4000\ 0DB50AA0000DBC0A40010DC70A60000DCB0A60000DD00A20000DE00A80000DE60A80010DF90AE000\ 0E010B60000E050B00010E0F0B40000E130BC0020E2A0BE0000E320B40000E350BA0000E3C0B2001\ 0E470B40000E4B0B60000E550B60000E5C0B40000E5F0BA0000E660B40020F820B40000F850BC000\ 0F8E0B60000F920B80000F990B40000F9C0B20000F9E0B40000FA30B40000FA80B60000FAE0B8001\ 0FBE0BA0000FC60B60000FCA0B80000FD00B20000FD70B20000FE60BA00210000CA001100E0C6000\ 10120CE002102A0C0002103C0C200110460C6000104A0C800010550C400010580C6000105D0C2000\ 10600C800010660C400110770C200111800CA001118E0C600011920CE00211AA0C400111B50CA000\ 11BC0C200111C60C600011CA0C800011D50C400011DD0C400011E00C800011E60C400111F10C4000\ 12000DA001120E0D600012120D600612460D6000124A0DC00012540D000212660D400313810D6000\ 13850D4002139A0D000313B30D200113BD0D200013C00DE00013CA0D200013CF0DC00013D60D2000\ 13D80D000113E60D400113F20D600014010E4007013F0E200014400E800315810E400015840E2000\ 15860EA000158C0E000315A50E200015A70EE00215C00EA00015C60E200015C80EC00015D00E4001\ 15DC0E800016000F000916490F800416710FE00416990F800416BE0FE00116CE0FE00001D50F8000\ 16D90F4000170010001418A010C00418C710200018CD10200018D010600501FB10200018FC108000\ 19001100201A001220091A4A1280001A5012E0001A581220001A5A1280001A601220051A8A128000\ 1A901220041AB21280001AB812E0001AC01220001AC21280001AC812E0011AD81220071A12138000\ 1A181360081A5D1300041A801340031BA013C00A1BF813C0001C001400501D8016A0031EA0166009\ 01EB1660001EEE1660012A0017C0022A1F1720002B2017A00201351740002C401780022D6017A001\ 2D6E1760002D721740001F8017C00B1FE01740011FF0174001200018400001021840002004182000\ 01051820002006188002202018200B20801860051CB018C0082E0019E0032E201980012E30198001\ 2E401920002E441980012F5019C0032F7019A000388019800538B019400338D019600138DE194000\ 1FE019000436001A8003361E1A40004E201AE0074E601AA0034E7F1A60014E901A40014EA01AC001\ 29B01AE0033E001BA0093E501BE00543801B00085DC01B80065DFC1B800044001C0007443B1CE001\ 444D1C600045501C000604801C200118901C600518BD1C600043C01C000129D01C600001D31C2000\ 29D41CA00101E11C200029E21CE00001E91C800029ED1C200001EE1CC00029F41C200001F51C6000\ 29F81C400001FA1C200002001DC00403261DA000042B1D2000022C1D2006035D1DA00002621D8000\ 03661DA000026B1DA00104781D200002791DC00803BF1D200029C01D000802001E002003001FC002\ 03181FC00003201FC00403481FC00003501F000103591F2000035B1F2000035D1F2000035F1FE003\ 03801FA00603B61FE00103C61FC00103D61FC00003DD1F600203F21F600003F61F20010100208001\ 290C204000010E20E00A016620600102712020000174206001027F202000018020E001029020A001\ 01A020200429D0202004010021C00403262120000127216000022A214000012C21C0000232212000\ 0133216003024E212000014F21200202602120050189216000019021E05201402460010160240074\ 3500280020010029804E01762B000401972B200D39002C000C02602C000437802C800E37F92CE000\ 18002DC00418272D2000182D2D20003A302D00073A6F2D40003A7F2D20001A802DE0021AA02DE000\ 1AA82DE0001AB02DE0001AB82DE0001AC02DE0001AC82DE0001AD02DE0001AD82DE00004E02D0004\ 01002EC00B24802E4003249B2E200B24002FC01A01F02F8001010030A00024053020000106302000\ 240730200001083020032421302001292A308000192E30400001303000012438308000013C308000\ 214130C00A2999304000019B304000219D30600001A030200022A130400B01FB30400022FD306000\ 2305316005193131C00B019031000223A031000401C031800422F0310002190032E0030120320008\ 196032E003017F32200A22D032E00501FF322000220033000B0158330015240034E0FF24FF3BE0FF\ 24FE43E0FF24FD4B603801C04D000824004EE0FF24FF55E0FF24FE5DE0FF24FD65E0FF24FC6DE0FF\ 24FB75E0FF24FA7DE0FF24F985E0FF24F88DE0FF24F795E0FF24F69D40412500A0A0912590A4E006\ 53D0A400064600A580250440A6000C54A0A6000B0100A740040222A7C00C0188A76000028BA70008\ 02D0A7400002D3A7200002D5A7A00002F2A7C0013B00A8A0050130A840014140A800074780A8C008\ 47CEA880010AE0A800044800A9C005012EA92000482FA920004930A98004495FA920001960A9A003\ 5580A9C00901CFA9200055D0A9400155DEA9400017E0A9E0034D00AAE0064D40AAC0014D50AA4001\ 4D5CAA80001760AA00044F80AA60084FDBAAA00056E0AAE0021A01ABC0001A09ABC0001A11ABC000\ 1A20ABE0001A28ABE0000230AB6005015BAB2000025CAB20010365AB20000266AB8000016AAB4000\ 1B70AB000A56C0ABC00556F0AB40011900ACE0FF19FFB3E0FF19FEBBE0FF19FDC3E0FF19FCCBE0FF\ 19FBD3207519B0D7E00219CBD720062400F9C02D2470FA400D0200FBE0000513FBA000061DFB4003\ 0638FBA000063EFB20000640FB40000643FB40000646FB40010750FB600E07D3FB602D013EFD4000\ 0740FD000A0792FDC00607CFFD200007F0FD00022900FE00020110FE40012920FEC001042EFE4000\ 0130FE60040154FE60020168FE80000770FEA0000776FEE01001FFFE20000101FF00040221FF4003\ 013BFFC0000241FF4003015BFF60012266FF40010170FF20002271FFA005019EFF400019A0FFE003\ 19C2FFC00019CAFFC00019D2FFC00019DAFF600001E0FFE00001E8FFE00001F9FFA0003000008101\ 300D0041033028006102303C004100303F00E101305000C101308000610F0100016100010701A105\ 0137012101034001E109019001A10103A001210001D001A10529FD0121004A8002A1034BA0022106\ 29E002210001E10261032600038104262D03610027300361037850036105318003C103319F032100\ 3CA00381043CC803C101280004010A3250040106338004C10333A004410188B004810488D8048104\ 6A000501056730058106676F052100A170056101A17C05E101A18C05E100A194054100A197056101\ A1A305E101A1B305E100A1BB0541006E0006E1266E4007C1026E60070101028007C1000287074105\ 02B2072101340008C1003408082100340A0881053437084100343C082100343F082100574008C102\ 57570821017660080104758008E10375A708210180E008610280F408410080FB08A1004000098103\ 401F0921004C200941034C3F092100628009010461A009010361BC09810261D209C1053D000A8100\ 3D050A41003D0C0A01013D150A61003D190AA1033D380A61003D3F0A41013D500A210158600A0104\ 74800A010470C00AE10470EB0A810150000BC10650390BE10059400BC10259580B01015A600B6102\ 5A780B010179800B410279990B810079A90BE1005B000C210982800C610682C00C610682FA0CC100\ 92000D010592300D410107600EE1039C800E41059CAB0E61009CB00E410094000F010593300F4105\ 9E700F410399B00F810395E00FE1025E0010C1095E521081045E7F1021005C801061085CCD102100\ 65D010210365F0104101600011A10660361141026F5011E104648011010C13E11181026D00124102\ 6D13128105818012E1008188122100818A128100818F12E101819F1261017BB01261077BF0124101\ 6B001381006B051301016B0F1341006B1313C1026B2A13E1006B321341006B3513A100293B132100\ 6B3C1321016B471341006B4B1361006B501321006B571321006B5D13E1006B6613E1006B7013A100\ 870014810B875D14A1007C801401097CD01441017A8015C1067AB815C104720016A1087250164101\ 206016A101668016410766C01641017E001761037E1D17E1017E3017E1028E001881077DA018610A\ 7DFF1821009A0019E1009A091921009A0C1901019A151941009A1819C1039A371941009A3B198101\ 9A5019410196A019010196AA19C10596DA1961018D001A01098C501A610A1CB01A010277C01A2107\ 85001C2101850A1CA10585381CC10185501CA10386701C010486921CC10286A91CC1018A001DE100\ 8A081D41008A0B1D81058A3A1D21008A3C1D41008A3F1D21018A501D41018F601DC1008F671D4100\ 8F6A1DA1048F901D41008F931DC1008FA01D410190E01E210353B01F21000FC01F41060FFF1F2100\ 3F002041733F0024E10D3F7024A1003F802481189D902F610C510030E18551303421017F0044E148\ 540068214773406AE10373606A4101736E6A41009F706AE1099FC06A410168D06AC10368F06AC100\ 6C006BC1086C506B41016C5B6BE1006C636BA1026C7D6B610291406E610B63006F6109634F6F2107\ 638F6F210289E06F21008BE16F210024E26F41009BE46F210024F06F4100890070E1FF89FF77E1FF\ 89FE7F41FF89008801609B008BC13A89008D210122F0AF810022F5AFE10022FDAF41002200B02100\ 2101B0E1232220B161002150B161002264B181008B70B181316900BC610D6970BCA1016980BC2101\ 6990BC4101699CBC810001A0BC81002900CFC1052930CFE1020150CF810E0100D0C11E0100D1E104\ 0129D1C1072967D16100016AD12102297BD101010183D141002985D1E100018CD1C10329AAD18100\ 01AED1A1070300D2C10801E0D281020100D3E10A0160D321030100D4A10A0156D4E108019ED44100\ 01A2D4210001A5D4410001A9D4810001AED4810101BBD4210001BDD4E10001C5D421080107D58100\ 010DD501010116D5E100011ED58103013BD581000140D5A1000146D52100014AD5E1000152D5812A\ 01A8D6812401CED741068300D88151839BDAA10083A1DAE1010200DFE1033900E0E1003908E02102\ 391BE0E1003923E041003926E0A1009700E1A1059730E1C1019740E14101974EE14100A090E2E103\ 98C0E2410798FFE221001AE0E7E1001AE8E781001AEDE741001AF0E7E1017100E8A11871C7E80102\ 8400E981098450E94101845EE941000171EC81080101EDA1070700EE81000705EE61030721EE4100\ 0724EE21000727EE21000729EE41010734EE81000739EE2100073BEE21000742EE21000747EE2100\ 0749EE2100074BEE2100074DEE61000751EE41000754EE21000757EE21000759EE2100075BEE2100\ 075DEE2100075FEE21000761EE41000764EE21000767EE8100076CEEE1000774EE81000779EE8100\ 077EEE21000780EE4101078BEE210207A1EE610007A5EEA10007ABEE210207F0EE41000100F08105\ 0130F0810C01A0F0E10101B1F0E10101C1F0E10101D1F0A1040100F1C11501E6F141032100F22100\ 0101F241000110F281050140F221010150F241000160F2C1000100F3017B01DDF6010201F0F6A101\ 0100F7810E0180F7210B01E0F7810101F0F721000100F881010110F801070150F841010160F80105\ 0190F8C10301B0F841000100F9812A0160FAC1010170FAA1000178FAA1000180FAE1000190FAA103\ 01B0FA610101C0FAC10001D0FA410101E0FA010101F0FAE1000100FB61120194FBE10601F0FB4101\ 240000E2FF24FF07E2FF24FE0FE2FF24FD17E2FF24FC1FE2FF24FB27E2FF24FA2FE2FF24F937E2FF\ 24F83FE2FF24F747E2FF24F64FE2FF24F557E2FF24F45FE2FF24F367E2FF24F26FE2FF24F177E2FF\ 24F07FE2FF24EF87E2FF24EE8FE2FF24ED97E2FF24EC9F82DE2400A7E2FF24FFAEE2FF24FEB66207\ 2440B7C21B2420B8E2FF241FC0E2FF241EC882D024B0CEE2FF24AFD6E2FF24AEDEE2FF24ADE682A6\ 2400F8C243240000E3FF24FF07E3FF24FE0FA3690101002E000120000E0C2900010E1E'; const SCRIPTS_PROP_UNIT_LENGTH = 5; const SCRIPT = { 'Unknown': 0, 'Common': 1, 'Latin': 2, 'Greek': 3, 'Cyrillic': 4, 'Armenian': 5, 'Hebrew': 6, 'Arabic': 7, 'Syriac': 8, 'Thaana': 9, 'Devanagari': 10, 'Bengali': 11, 'Gurmukhi': 12, 'Gujarati': 13, 'Oriya': 14, 'Tamil': 15, 'Telugu': 16, 'Kannada': 17, 'Malayalam': 18, 'Sinhala': 19, 'Thai': 20, 'Lao': 21, 'Tibetan': 22, 'Myanmar': 23, 'Georgian': 24, 'Hangul': 25, 'Ethiopic': 26, 'Cherokee': 27, 'Canadian_Aboriginal': 28, 'Ogham': 29, 'Runic': 30, 'Khmer': 31, 'Mongolian': 32, 'Hiragana': 33, 'Katakana': 34, 'Bopomofo': 35, 'Han': 36, 'Yi': 37, 'Old_Italic': 38, 'Gothic': 39, 'Deseret': 40, 'Inherited': 41, 'Tagalog': 42, 'Hanunoo': 43, 'Buhid': 44, 'Tagbanwa': 45, 'Limbu': 46, 'Tai_Le': 47, 'Linear_B': 48, 'Ugaritic': 49, 'Shavian': 50, 'Osmanya': 51, 'Cypriot': 52, 'Braille': 53, 'Buginese': 54, 'Coptic': 55, 'New_Tai_Lue': 56, 'Glagolitic': 57, 'Tifinagh': 58, 'Syloti_Nagri': 59, 'Old_Persian': 60, 'Kharoshthi': 61, 'Balinese': 62, 'Cuneiform': 63, 'Phoenician': 64, 'Phags_Pa': 65, 'Nko': 66, 'Sundanese': 67, 'Lepcha': 68, 'Ol_Chiki': 69, 'Vai': 70, 'Saurashtra': 71, 'Kayah_Li': 72, 'Rejang': 73, 'Lycian': 74, 'Carian': 75, 'Lydian': 76, 'Cham': 77, 'Tai_Tham': 78, 'Tai_Viet': 79, 'Avestan': 80, 'Egyptian_Hieroglyphs': 81, 'Samaritan': 82, 'Lisu': 83, 'Bamum': 84, 'Javanese': 85, 'Meetei_Mayek': 86, 'Imperial_Aramaic': 87, 'Old_South_Arabian': 88, 'Inscriptional_Parthian': 89, 'Inscriptional_Pahlavi': 90, 'Old_Turkic': 91, 'Kaithi': 92, 'Batak': 93, 'Brahmi': 94, 'Mandaic': 95, 'Chakma': 96, 'Meroitic_Cursive': 97, 'Meroitic_Hieroglyphs': 98, 'Miao': 99, 'Sharada': 100, 'Sora_Sompeng': 101, 'Takri': 102, 'Caucasian_Albanian': 103, 'Bassa_Vah': 104, 'Duployan': 105, 'Elbasan': 106, 'Grantha': 107, 'Pahawh_Hmong': 108, 'Khojki': 109, 'Linear_A': 110, 'Mahajani': 111, 'Manichaean': 112, 'Mende_Kikakui': 113, 'Modi': 114, 'Mro': 115, 'Old_North_Arabian': 116, 'Nabataean': 117, 'Palmyrene': 118, 'Pau_Cin_Hau': 119, 'Old_Permic': 120, 'Psalter_Pahlavi': 121, 'Siddham': 122, 'Khudawadi': 123, 'Tirhuta': 124, 'Warang_Citi': 125, 'Ahom': 126, 'Anatolian_Hieroglyphs': 127, 'Hatran': 128, 'Multani': 129, 'Old_Hungarian': 130, 'SignWriting': 131, 'Adlam': 132, 'Bhaiksuki': 133, 'Marchen': 134, 'Newa': 135, 'Osage': 136, 'Tangut': 137, 'Masaram_Gondi': 138, 'Nushu': 139, 'Soyombo': 140, 'Zanabazar_Square': 141, 'Dogra': 142, 'Gunjala_Gondi': 143, 'Makasar': 144, 'Medefaidrin': 145, 'Hanifi_Rohingya': 146, 'Sogdian': 147, 'Old_Sogdian': 148, 'Elymaic': 149, 'Nandinagari': 150, 'Nyiakeng_Puachue_Hmong': 151, 'Wancho': 152, 'Chorasmian': 153, 'Dives_Akuru': 154, 'Khitan_Small_Script': 155, 'Yezidi': 156, 'Cypro_Minoan': 157, 'Old_Uyghur': 158, 'Tangsa': 159, 'Toto': 160, 'Vithkuqi': 161 }; const SCRIPT_NAMES = Object.keys(SCRIPT); // GENERATED CODE END // >>> /* * data table, taken from: * http://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt */ // GENERATED CODE START <<<1 // This data was generated by the command 'node mkdata.js -u 14.0.0 -i'. constconst LINE_BREAK_PROP_UNIT_LENGTH = 5; const LBP = { /* @ */'XX': 0, /* A */'SOT': 1, /* B */'EOT': 2, /* C */'CM': 3, /* D */'BA': 4, /* E */'LF': 5, /* F */'BK': 6, /* G */'CR': 7, /* H */'SP': 8, /* I */'EX': 9, /* J */'QU': 10, /* K */'AL': 11, /* L */'PR': 12, /* M */'PO': 13, /* N */'OP': 14, /* O */'CP': 15, /* P */'IS': 16, /* Q */'HY': 17, /* R */'SY': 18, /* S */'NU': 19, /* T */'CL': 20, /* U */'NL': 21, /* V */'GL': 22, /* W */'AI': 23, /* X */'BB': 24, /* Y */'HL': 25, /* Z */'SA': 26, /* [ */'JL': 27, /* \ */'JV': 28, /* ] */'JT': 29, /* ^ */'NS': 30, /* _ */'ZW': 31, /* ` */'ZWJ': 32, /* a */'B2': 33, /* b */'IN': 34, /* c */'WJ': 35, /* d */'ID': 36, /* e */'EB': 37, /* f */'CJ': 38, /* g */'H2': 39, /* h */'H3': 40, /* i */'SG': 41, /* j */'CB': 42, /* k */'RI': 43, /* l */'EM': 44 }; const LBP_NAMES = Object.keys(LBP); // GENERATED CODE END // >>> /* * data table, taken from: * http://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt */ // GENERATED CODE START <<<1 // This data was generated by the command 'node mkdata.js -a'. const EAST_ASIAN_WIDTH_PROPS = '\ 0000000004012000E00B007F00400402A100200001A200400002A400200001A500400002A7004000\ 00A900200002AA00200000AB00200001AC00200002AD00400001AF00200002B000A00000B5002000\ 02B600A00000BB00200002BC00800000C000C00002C600200000C700200102D000200000D100C000\ 02D700400000D900A00002DE00800000E200800002E600200000E700200002E800600000EB002000\ 02EC00400000EE00400002F000200000F100200002F200400000F400600002F700800000FB002000\ 02FC00200000FD00200002FE00200000FF0040000201012000000201E00102110120000012012000\ 0213012000001401E000021B012000001C01400102260140000028016000022B012000002C01A000\ 023101600000340180000238012000003901C000023F018000004301200002440120000045016000\ 0248018000004C012000024D012000004E0180000252014000005401400202660140000068016000\ 026B012000006C01400C02CE01200000CF01200002D001200000D101200002D201200000D3012000\ 02D401200000D501200002D601200000D701200002D801200000D901200002DA01200000DB012000\ 02DC01200000DD01800E0251022000005202E0010261022000006202400C02C402200000C5024000\ 02C702200000C802200002C902600000CC02200002CD02200000CE02400002D002200000D102E000\ 02D802800000DC02200002DD02200000DE02200002DF02200000E0020004020003000E0070030001\ 007A03C000008403E000008C032000008E036000029103200202A303E00000AA03E00002B1032002\ 00C203200002C303E00000CA03E0060201042000000204C001021004000800500420000251042000\ 005204C01B003105C0040059054006008D056000009105E00600D005600300EF05C000000006C021\ 000F078007004D07A00C00C007600700FD072006003008E0010040088003005E0820000060086001\ 007008E0030090084000009808801D0085090001008F094000009309C00200AA09E00000B2092000\ 00B609800000BC09200100C709400000CB09800000D709200000DC09400000DF09A00000E6092003\ 00010A600000050AC000000F0A400000130AC002002A0AE00000320A400000350A400000380A4000\ 003C0A2000003E0AA00000470A4000004B0A600000510A200000590A8000005E0A200000660A2002\ 00810A600000850A2001008F0A600000930AC00200AA0AE00000B20A400000B50AA00000BC0A4001\ 00C70A600000CB0A600000D00A200000E00A800000E60A800100F90AE00000010B600000050B0001\ 000F0B400000130BC002002A0BE00000320B400000350BA000003C0B200100470B4000004B0B6000\ 00550B6000005C0B4000005F0BA00000660B400200820B400000850BC000008E0B600000920B8000\ 00990B4000009C0B2000009E0B400000A30B400000A80B600000AE0B800100BE0BA00000C60B6000\ 00CA0B800000D00B200000D70B200000E60BA00200000CA001000E0C600000120CE002002A0C0002\ 003C0C200100460C6000004A0C800000550C400000580C6000005D0C200000600C800000660C4001\ 00770CC002008E0C600000920CE00200AA0C400100B50CA00000BC0C200100C60C600000CA0C8000\ 00D50C400000DD0C400000E00C800000E60C400100F10C400000000DA001000E0D600000120D6006\ 00460D6000004A0DC00000540D000200660D400300810D600000850D4002009A0D000300B30D2001\ 00BD0D200000C00DE00000CA0D200000CF0DC00000D60D200000D80D000100E60D400100F20D6000\ 00010E4007003F0EA00300810E400000840E200000860EA000008C0E000300A50E200000A70EE002\ 00C00EA00000C60E200000C80EC00000D00E400100DC0E800000000F000900490F800400710FE004\ 00990F800400BE0FE00100CE0FA001000010C01800C710200000CD10200000D0100006030011000C\ 006011201D004A128000005012E0000058122000005A1280000060122005008A1280000090122004\ 00B212800000B812E00000C012200000C212800000C812E00100D812200700121380000018136008\ 005D130004008013400300A013C00A00F813C000000014A05300A016200B000017C002001F170003\ 0040178002006017A001006E1760000072174000008017C00B00E017400100F01740010000184003\ 002018200B008018600500B018C008000019E0030020198001003019800100401920000044194005\ 007019A000008019800500B019400300D019600100DE19C007001E1A200800601AA003007F1A6001\ 00901A400100A01AC00100B01AE00300001BA00900501BE00500801B800E00FC1B8007003B1CE001\ 004D1C800700901C600500BD1C600100D01C600500001DC04200181FC00000201FC00400481FC000\ 00501F000100591F2000005B1F2000005D1F2000005F1FE00300801FA00600B61FE00100C61FC001\ 00D61FC00000DD1F600200F21F600000F61F20010000200002021020200000112040000213208000\ 00172020000218204000001A204000021C204000001E204000022020600000232020000224208000\ 002820000102302020000031202000023220400000342020000235202000003620A000023B202000\ 003C204000023E202000003F20C004006620800102742020000075204001027F2020000080202000\ 02812080000085204001009020A00100A020200104A920200000AA20400002AC20200000AD208002\ 00D0202004000021600002032120000004212000020521200000062160000209212000000A212001\ 02132120000014214000021621200000172140010221214000002321600002262120000027218000\ 022B212000002C21E0040253214000005521C000025B218000005F2120000260218001006C218000\ 0270214001007A21E0010289212000008A2140000290214001009A21C00302B821400000BA210003\ 02D221200000D321200002D421200000D521400202E721200000E821000302002220000001222000\ 0202224000000422600002072240000009224000020B222000000C226000020F2220000010222000\ 0211222000001222600002152220000016228000021A222000001B224000021D2280000021224000\ 0223222000002422200002252220000026222000022722C000002D222000022E222000002F22A000\ 02342280000038228000023C224000003E22400102482220000049226000024C222000004D22A000\ 0252222000005322A0010260224000006222400002642280000068224000026A224000006C224000\ 026E2240000070224002028222400000842240000286224000008822A00102952220000096226000\ 0299222000009A22600102A522200000A622200302BF22200000C022400A0212232000001323E000\ 031A234000001C23A0010329234000002B23C01703E923800000ED23600003F023200000F1234000\ 03F323200000F42360060040246001026024401100EA24200002EB24200C004C2580000250258004\ 0074258001028025000200902540000292258000009625400102A025400000A225200002A325E000\ 00AA25000102B225400000B425400002B625400000B825800002BC25400000BE25400002C0254000\ 00C225800002C625600000C925400002CB25200000CC25400002CE25800000D225000202E2258000\ 00E625200102EF25200000F025A00103FD25400000FF25C000020526400000072640000209262000\ 000A268000020E26400000102680000314264000001626C000021C262000001D262000021E262000\ 001F262004024026200000412620000242262000004326A000034826800100542680010260264000\ 0062262000026326600000662620000267268000006B262000026C264000006E262000026F262000\ 007026E001037F262000008026600203932620000094264001029E26400000A026200003A1262000\ 00A226000103AA26400000AC26200203BD26400002BF26200000C026800003C426400002C6260001\ 03CE26200002CF26A00003D426200002D526A00100E226200002E326200000E426800002E8264000\ 03EA26200002EB26E00003F226400002F426200003F526200002F626800003FA26200002FB264000\ 03FD26200002FE264000000027A00003052720000006278000030A274000000C2780030328272000\ 0029278002023D272000003E27C001034C272000004D272000034E272000004F2780000353276000\ 00562720000357272000005827C0030276274001008027A0020395276000009827000303B0272000\ 00B127C00103BF27200000C027C00401E627000100EE27E03201852940000087298032031B2B4000\ 001D2B600603502B200000512B800003552B200002562B8000005A2B400300762B000400972BA02B\ 00F92CA00500272D2000002D2D200000302D0007006F2D4000007F2D000300A02DE00000A82DE000\ 00B02DE00000B82DE00000C02DE00000C82DE00000D02DE00000D82DE00000E02DC00F03802E4003\ 039B2E200B03002FC01A03F02F80010500302000030130C007003F302000034130C00A039930E00C\ 0305316005033131C00B039031800A03F031E00503203200050248320001035032E0FF034F3AE0FF\ 034E42E0FF034D4A606E00C04D000803004EE0FF03FF55E0FF03FE5DE0FF03FD65E0FF03FC6DE0FF\ 03FB75E0FF03FA7DE0FF03F985E0FF03F88DE0FF03F795E0FF03F69DE0D20390A4E00600D0A4802B\ 0040A600170000A7601900D0A7400000D3A7200000D5A7A00000F2A760070030A840010040A80007\ 0080A8C00800CEA8800100E0A8800E005FA920000360A9A0030080A9C00900CFA9600100DEA92004\ 0000AAE0060040AAC0010050AA4001005CAAE00C00DBAA80030001ABC0000009ABC0000011ABC000\ 0020ABE0000028ABE0000030AB80070070ABC00F00F0AB40010300ACE0FF03FFB3E0FF03FEBBE0FF\ 03FDC3E0FF03FCCBE0FF03FBD3207500B0D7E00200CBD720060000D8E0FF00FFDF20000200E0E0FF\ 02FFE7E0FF02FEEFE0FF02FDF760200300F900400000FBE0000013FBA000001DFB40030038FBA000\ 003EFB20000040FB40000043FB40000046FBA00F00D3FBA0370092FDC00600CFFD200000F0FD0002\ 0200FE00020310FE40010020FE00020330FE60040354FE60020368FE80000070FEA0000076FEE010\ 00FFFE20000501FF000C0461FFC00B04C2FFC00004CAFFC00004D2FFC00004DAFF600005E0FFE000\ 04E8FFE00000F9FF800002FDFF20000000008101000D0041030028006102003C004100003F00E101\ 005000C101008000610F0000016100000701A105003701010B009001A10100A001210000D001C105\ 008002A10300A002210600E00281030000038104002D03C1030050036105008003C103009F03A104\ 00C803C101000004C11300A004410100B004810400D804810400000501050030058106006F058101\ 007C05E101008C05E1000094054100009705610100A305E10100B305E10000BB054100000006E126\ 004007C1020060070101008007C100008707410500B2072101000008C1000008082100000A088105\ 0037084100003C082100003F08E102005708010900A708210100E008610200F408410000FB082104\ 001F096103003F092100008009010700BC09810200D209410600050A4100000C0A010100150A6100\ 00190AA10300380A6100003F0A410100500A210100600A010800C00AE10400EB0A810100000BC106\ 00390BA10300580B610300780B410300990B810000A90BE10000000C210900800C610600C00C6106\ 00FA0CC10500300D410100600EE10300800E410500AB0E610000B00E410000000F010500300F4105\ 00700F410300B00F810300E00FE102000010C1090052108104007F10810800CD10210000D0102103\ 00F0104101000011A1060036114102005011E104008011010C00E111810200001241020013128105\ 008012E1000088122100008A128100008F12E101009F12610100B012610700F01241010000138100\ 0005130101000F134100001313C102002A13E1000032134100003513A100003B1341010047134100\ 004B13610000501321000057132100005D13E100006613E100007013A100000014810B005D14A100\ 008014010900D0144101008015C10600B815C104000016A1080050164101006016A1010080164107\ 00C01641010000176103001D17E101003017E102000018810700A018610A00FF1801010009192100\ 000C1901010015194100001819C1030037194100003B198101005019410100A019010100AA19C105\ 00DA19610100001A010900501A610A00B01A210900001C2101000A1CA10500381CC10100501CA103\ 00701C010400921CC10200A91CC10100001DE10000081D4100000B1D8105003A1D2100003C1D4100\ 003F1D210100501D410100601DC10000671D4100006A1DA10400901D410000931DC10000A01D4101\ 00E01E210300B01F210000C01F410600FF1F6173000024E10D007024A100008024811800902F610C\ 000030E1850030342101000044E148000068214700406AE10300606A4101006E6A210A00C06A4101\ 00D06AC10300F06AC10000006BC10800506B4101005B6BE10000636BA102007D6B610200406E610B\ 00006F6109004F6F2107008F6F210203E06FA10003F06F4100030070E1FF03FF77E1FF03FE7F41FF\ 030088C19A03008D210103F0AF810003F5AFE10003FDAF41000300B061240350B161000364B18100\ 0370B181310000BC610D0070BCA1010080BC21010090BC4101009CBC01010000CFC1050030CFE102\ 0050CF810E0000D0C11E0000D1E1040029D141180000D2C10800E0D281020000D3E10A0060D32103\ 0000D4A10A0056D4E108009ED4410000A2D4210000A5D4410000A9D4810000AED4810100BBD42100\ 00BDD4E10000C5D421080007D58100000DD501010016D5E100001ED58103003BD581000040D5A100\ 0046D52100004AD5E1000052D5812A00A8D6812400CED7C157009BDAA10000A1DAE1010000DFE103\ 0000E0E1000008E02102001BE0E1000023E041000026E0A1000000E1A1050030E1C1010040E14101\ 004EE141000090E2E10300C0E2410700FFE2210000E0E7E10000E8E7810000EDE7410000F0E7E101\ 0000E8A11800C7E801020000E981090050E94101005EE941000071EC81080001EDA1070000EE8100\ 0005EE61030021EE41000024EE21000027EE21000029EE41010034EE81000039EE2100003BEE2100\ 0042EE21000047EE21000049EE2100004BEE2100004DEE61000051EE41000054EE21000057EE2100\ 0059EE2100005BEE2100005DEE2100005FEE21000061EE41000064EE21000067EE8100006CEEE100\ 0074EE81000079EE8100007EEE21000080EE4101008BEE210200A1EE610000A5EEA10000ABEE2102\ 00F0EE41000000F081000304F021000005F0E1040030F0810C00A0F0E10100B1F0E10100C1F0C101\ 03CFF0210000D1F0A1040200F16101000BF1A1000210F1C103002EF141000230F14107006AF1C100\ 0270F1C103038EF12100028FF141000391F14101029BF1410200ADF1210000E6F141030300F26100\ 0310F281050340F221010350F241000360F2C1000300F321040021F38101032DF321010036F32100\ 0337F3C108007DF32100037EF3C1020094F3810103A0F3610500CBF3810003CFF3A10000D4F38101\ 03E0F3210200F1F3610003F4F3210000F5F3610003F8F3E108003FF421000340F421000041F42100\ 0342F4611700FDF4410003FFF4E107003EF5A101034BF58100004FF521000350F501030068F54102\ 037AF52100007BF541030395F541000097F5A10103A4F5210000A5F5C10A03FBF5A10A0050F60106\ 0380F6C10800C6F6C10003CCF6210000CDF6610003D0F6610000D3F6410003D5F6610003DDF66100\ 00E0F6610103EBF6410000F0F6810003F4F621010000F7810E0080F7210B03E0F7810103F0F72100\ 0000F881010010F801070050F841010060F801050090F8C10300B0F841000000F98101030CF9E105\ 003BF92100033CF941010046F921000347F921170000FA810A0060FAC1010370FAA1000378FAA100\ 0380FAE1000390FAA10303B0FA610103C0FAC10003D0FA410103E0FA010103F0FAE1000000FB6112\ 0094FBE10600F0FB4101030000E2FF03FF07E2FF03FE0FE2FF03FD17E2FF03FC1FE2FF03FB27E2FF\ 03FA2FE2FF03F937E2FF03F83FE2FF03F747E2FF03F64FE2FF03F557E2FF03F45FE2FF03F367E2FF\ 03F26FE2FF03F177E2FF03F07FE2FF03EF87E2FF03EE8FE2FF03ED97E2FF03EC9FE2FF03EBA7E2FF\ 03EAAFE2FF03E9B7E2FF03E8BFE2FF03E7C7E2FF03E6CFE2FF03E5D7E2FF03E4DFE2FF03E3E7E2FF\ 03E2EFE2FF03E1F7E2FF03E0FFC203030000E3FF03FF07E3FF03FE0FE3FF03FD17E3FF03FC1FE3FF\ 03FB27E3FF03FA2FE3FF03F937E3FF03F83FE3FF03F747E3FF03F64FE3FF03F557E3FF03F45FE3FF\ 03F367E3FF03F26FE3FF03F177E3FF03F07FE3FF03EF87E3FF03EE8FE3FF03ED97E3FF03EC9FE3FF\ 03EBA7E3FF03EAAFE3FF03E9B7E3FF03E8BFE3FF03E7C7E3FF03E6CFE3FF03E5D7E3FF03E4DFE3FF\ 03E3E7E3FF03E2EFE3FF03E1F7E3FF03E0FFC3030001002E000020000E0C0200010E1E020000EFFF\ 02FF07EFFF02FE0FEFFF02FD17EFFF02FC1FEFFF02FB27EFFF02FA2FEFFF02F937EFFF02F83FEFFF\ 02F747EFFF02F64FEFFF02F557EFFF02F45FEFFF02F367EFFF02F26FEFFF02F177EFFF02F07FEFFF\ 02EF87EFFF02EE8FEFFF02ED97EFFF02EC9FEFFF02EBA7EFFF02EAAFEFFF02E9B7EFFF02E8BFEFFF\ 02E7C7EFFF02E6CFEFFF02E5D7EFFF02E4DFEFFF02E3E7EFFF02E2EFEFFF02E1F7EFFF02E0FFCF03\ 020000F0FF02FF07F0FF02FE0FF0FF02FD17F0FF02FC1FF0FF02FB27F0FF02FA2FF0FF02F937F0FF\ 02F83FF0FF02F747F0FF02F64FF0FF02F557F0FF02F45FF0FF02F367F0FF02F26FF0FF02F177F0FF\ 02F07FF0FF02EF87F0FF02EE8FF0FF02ED97F0FF02EC9FF0FF02EBA7F0FF02EAAFF0FF02E9B7F0FF\ 02E8BFF0FF02E7C7F0FF02E6CFF0FF02E5D7F0FF02E4DFF0FF02E3E7F0FF02E2EFF0FF02E1F7F0FF\ 02E0FFD003'; const EAST_ASIAN_WIDTH_PROP_UNIT_LENGTH = 5; const EAW = { /* ` */'N': 0, /* a */'Na': 1, /* b */'A': 2, /* c */'W': 3, /* d */'H': 4, /* e */'F': 5 }; const EAW_NAMES = Object.keys(EAW); // GENERATED CODE END // >>> /* * classes */ function TimelimitCache () { this.lastCleared = Date.now(); this.cache = new Map; } TimelimitCache.prototype = { TTL_MSECS: 1000 * 60, has: function (key) { return this.cache.has(key); }, get: function (key) { const result = this.cache.get(key); if (Date.now() - this.lastCleared >= this.TTL_MSECS) { this.cache.clear; this.lastCleared = Date.now(); } return result; }, set: function (key, value) { this.cache.set(key, value); } }; /* * variables */ function stub (label, creator) { return new Proxy(() => {}, { apply: (obj, thisArg, args) => { return creator().apply(thisArg, args); } }); } let graphemeFinder = stub('grapheme', () => { return graphemeFinder = createFinderWithEmoji( Uint8Array.from( GRAPHEME_BREAK_PROPS.match(/[0-9A-F]{2}/g), a => parseInt(a, 16)), GRAPHEME_BREAK_PROP_UNIT_LENGTH, GBP.Other, GBP_NAMES.length ); }); let wordFinder = stub('word', () => { return wordFinder = createFinderWithEmoji( Uint8Array.from( WORD_BREAK_PROPS.match(/[0-9A-F]{2}/g), a => parseInt(a, 16)), WORD_BREAK_PROP_UNIT_LENGTH, WBP.Other, WBP_NAMES.length ); }); let sentenceFinder = stub('sentence', () => { return sentenceFinder = createFinder( Uint8Array.from( SENTENCE_BREAK_PROPS.match(/[0-9A-F]{2}/g), a => parseInt(a, 16)), SENTENCE_BREAK_PROP_UNIT_LENGTH, SBP.Other ); }); let scriptFinder = stub('script', () => { return scriptFinder = createFinder( Uint8Array.from( SCRIPTS.match(/[0-9A-F]{2}/g), a => parseInt(a, 16)), SCRIPTS_PROP_UNIT_LENGTH, SCRIPT.Unknown ); }); let lineBreakFinder = stub('script', () => { return lineBreakFinder = createFinder( Uint8Array.from( LINE_BREAK_PROPS.match(/[0-9A-F]{2}/g), a => parseInt(a, 16)), LINE_BREAK_PROP_UNIT_LENGTH, LBP.XX ); }); let eastAsianWidthFinder = stub('script', () => { return eastAsianWidthFinder = createFinder( Uint8Array.from( EAST_ASIAN_WIDTH_PROPS.match(/[0-9A-F]{2}/g), a => parseInt(a, 16)), EAST_ASIAN_WIDTH_PROP_UNIT_LENGTH, EAW.N); }); let linkCount = 0; let graphemeClusterCache = new TimelimitCache; let wordClusterCache = new Map([[false, new TimelimitCache], [true, new TimelimitCache]]); let lineBreakableClusterCache = new TimelimitCache; const eawMap = [ /* Neutral */ 1, /* Narrow */ 1, /* Ambiguous */ 2, /* Wide */ 2, /* Half Width */ 1, /* Full Width */ 2, ]; /* * utility functions */ function pick2 (data, index) { return data[index] | data[index + 1] << 8; } function pick3 (data, index) { return data[index] | data[index + 1] << 8 | data[index + 2] << 16; } function pick4 (data, index) { return data[index] | data[index + 1] << 8 | data[index + 2] << 16 | data[index + 3] << 24; } function resolveSurrogates (s) { return Array.from(s, ch => ch.codePointAt(0)); } function find (cp, table, units, otherValue) { let left = 0, right = ((table.length / units) >> 0) - 1; let middle, index, middlecp, length; while (left <= right) { middle = ((left + right) / 2) >> 0; index = middle * units; middlecp = pick4(table, index + 1); length = (middlecp >> 21) & 0x7ff; middlecp = middlecp & 0x1fffff; if (middlecp + length - 1 < cp) { left = middle + 1; } else if (cp < middlecp) { right = middle - 1; } else { return table[index]; } } return otherValue; } function createFinder (table, units, otherValue) { const cache = {}; return cp => { if (cp in cache) { return cache[cp]; } else { return cache[cp] = find(cp, table, units, otherValue); } }; } function createFinderWithEmoji (table, units, otherValue, emojiValue) { const cache = {}; return cp => { if (cp in cache) { return cache[cp]; } else { if (/^\p{Extended_Pictographic}$/u.test(String.fromCodePoint(cp))) { return cache[cp] = emojiValue; } else { return cache[cp] = find(cp, table, units, otherValue); } } }; } function startsWith (subject, candidates) { return candidates.includes(subject.substr(0, 1)); } function endsWith (subject, candidates) { return candidates.includes(subject.substr(-1)); } function getUTF16FromCodePoint (cp) { const p = (cp & 0x1f0000) >> 16; const o = cp & 0xffff; if (p) { return String.fromCharCode(0xd800 | ((p - 1) << 6) | ((o & 0xfc00) >> 10)) + String.fromCharCode(0xdc00 | (o & 0x03ff)); } else { return String.fromCharCode(o); } } function getCodePointString (cp, type) { let result = ''; if (cp < 0x10000) { result = ('0000' + cp.toString(16).toUpperCase()).substr(-4); } else { result = cp.toString(16).toUpperCase(); } switch (type) { case 'entity': result = '&#x' + result + ';' break; case 'unicode': result = 'U+' + result; break; } return result; } /* * grapheme cluster handling functions */ function buildGraphemeClusters (codePoints) { const CODE_OFFSET = 96; const result = []; let propString = ''; let prevIndex = 0; let rawIndex = 0; let i = 0; for (let goal = codePoints.length; i < goal; i++) { const nextProp = String.fromCharCode(CODE_OFFSET + graphemeFinder(codePoints[i])); if (canBreak(propString, nextProp) > 0) { if (prevIndex < i) { const grapheme = new Grapheme(codePoints.slice(prevIndex, i), rawIndex); result.push(grapheme); rawIndex += grapheme.rawString.length; } prevIndex = i; } propString += nextProp; } if (canBreak(propString, String.fromCharCode(CODE_OFFSET + GBP.EOT)) > 0) { if (prevIndex < i) { result.push( new Grapheme(codePoints.slice(prevIndex, i), rawIndex) ); } } return result; } function canBreak (prev, next) { /* * This rules are taken from: * http://unicode.org/reports/tr29/, Version 14.0.0, 2021-08-24 * ============================================================ */ // Break at the start and end of text. // GB1: sot ÷ if (prev == '') return 1; // GB2: ÷ eot if (next == 'b') return 2; // Do not break between a CR and LF. // Otherwise, break before and after controls. // GB3: CR × LF if (endsWith(prev, 'd') && next == 'e') return -3; // GB4: ( Control | CR | LF ) ÷ if (endsWith(prev, 'fde')) return 4; // GB5: ÷ ( Control | CR | LF ) if (startsWith(next, 'fde')) return 5; // Do not break Hangul syllable sequences. // GB6: L × ( L | V | LV | LVT ) if (endsWith(prev, 'j') && startsWith(next, 'jkmn')) return -6; // GB7: ( LV | V ) × ( V | T ) if (endsWith(prev, 'mk') && startsWith(next, 'kl')) return -7; // GB8: ( LVT | T) × T if (endsWith(prev, 'nl') && next == 'l') return -8; // Do not break before extending characters. // GB9: × ( Extend | ZWJ ) if (startsWith(next, 'go')) return -9; // The GB9a and GB9b rules only apply to extended grapheme clusters: // Do not break before SpacingMarks, or after Prepend characters. // GB9a: × SpacingMark if (next == 'i') return -9.1; // GB9b: Prepend × if (endsWith(prev, 'c')) return -9.2; // GB10 is removed. // Do not break within emoji modifier sequences or emoji zwj sequences. // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} if (/pg*o$/u.test(prev) && next == 'p') return -11; // Do not break within emoji flag sequences. That is, do not break // between regional indicator (RI) symbols if there is an odd number of // RI characters before the break point. // GB12: ^ ( RI RI )* RI × RI if (/^(hh)*h$/.test(prev) && next == 'h') return -12; // GB13: [^RI] ( RI RI )* RI × RI if (/[^h](hh)*h$/.test(prev) && next == 'h') return -13; // Otherwise, break everywhere. // GB999: Any ÷ Any return 999; } /* * word boundary handling functions */ function buildWordClusters (codePoints, useScripts) { const CODE_OFFSET = 96; const result = []; let prevIndex = 0; let prevProps = ''; let nextProps = codePoints.map(cp => { return String.fromCharCode(CODE_OFFSET + wordFinder(cp)); }).join('') + String.fromCharCode(CODE_OFFSET + WBP.EOT); let rawIndex = 0; for ( let i = 0, goal = nextProps.length; i < goal; i++, prevProps += nextProps.charAt(0), nextProps = nextProps.substring(1) ) { if (canBreakWord(prevProps, nextProps) < 0) continue; if (useScripts && i > 0 && isInScriptWord(prevProps, nextProps, codePoints[i - 1], codePoints[i])) continue; if (prevIndex < i) { const text = codePoints.slice(prevIndex, i).map(getUTF16FromCodePoint).join(''); result.push({ text, index: prevIndex, rawIndex, length: i - prevIndex, type: prevProps.charCodeAt(prevIndex) - CODE_OFFSET }); rawIndex += text.length; } prevIndex = i; } return result; } function canBreakWord (prev, next) { /* * This rules are taken from: * http://unicode.org/reports/tr29/, Version 14.0.0, 2021-08-24 * ============================================================ */ // Break at the start and end of text. // WB1: sot ÷ if (prev == '') return 1; // WB2: ÷ eot if (startsWith(next, 'b')) return 2; // Do not break within CRLF. // WB3: CR × LF if (endsWith(prev, 'f') && startsWith(next, 'g')) return -3; // Otherwise break before and after Newlines (including CR and LF) // WB3a: (Newline | CR | LF) ÷ if (endsWith(prev, 'hfg')) return 3.1; // WB3b: ÷ (Newline | CR | LF) if (startsWith(next, 'hfg')) return 3.2; // Do not break within emoji zwj sequences. // WB3c: ZWJ × \p{Extended_Pictographic} if (endsWith(prev, 'r') && startsWith(next, 'w')) return -3.3; // Keep horizontal whitespace together. // WB3d: WSegSpace × WSegSpace if (endsWith(prev, 's') && startsWith(next, 's')) return -3.4; // Ignore Format and Extend characters, except after sot, CR, LF, and // Newline. (See Section 6.2, Replacing Ignore Rules.) This also has // the effect of: Any × (Format | Extend | ZWJ) // WB4: X (Extend | Format | ZWJ)* → X if (startsWith(next, 'ikr')) return -4; prev = prev.replace(/([^afgh])[ikr]+/g, '$1'); next = next.replace(/([^afgh])[ikr]+/g, '$1'); // Do not break between most letters. // WB5: AHLetter × AHLetter // * AHLetter represents (ALetter | Hebrew_Letter) if (endsWith(prev, 'le') && startsWith(next, 'le')) return -5; // Do not break letters across certain punctuation. // WB6: AHLetter × (MidLetter | MidNumLetQ) AHLetter // * MidNumLetQ represents (MidNumLet | Single_Quote) if (endsWith(prev, 'le') && /^[mod][le]/.test(next)) return -6; // WB7: AHLetter (MidLetter | MidNumLetQ) × AHLetter if (/[le][mod]$/.test(prev) && startsWith(next, 'le')) return -7; // WB7a: Hebrew_Letter × Single_Quote if (endsWith(prev, 'e') && startsWith(next, 'd')) return -7.1; // WB7b: Hebrew_Letter × Double_Quote Hebrew_Letter if (endsWith(prev, 'e') && /^ce/.test(next)) return -7.2; // WB7c: Hebrew_Letter Double_Quote × Hebrew_Letter if (/ec$/.test(prev) && startsWith(next, 'e')) return -7.3; // Do not break within sequences of digits, or digits adjacent to // letters (“3a”, or “A3”). // WB8: Numeric × Numeric if (endsWith(prev, 'p') && startsWith(next, 'p')) return -8; // WB9: AHLetter × Numeric if (endsWith(prev, 'le') && startsWith(next, 'p')) return -9; // WB10: Numeric × AHLetter if (endsWith(prev, 'p') && startsWith(next, 'le')) return -10; // Do not break within sequences, such as “3.2” or “3,456.789”. // WB11: Numeric (MidNum | MidNumLetQ) × Numeric if (/p[nod]$/.test(prev) && startsWith(next, 'p')) return -11; // WB12: Numeric × (MidNum | MidNumLetQ) Numeric if (endsWith(prev, 'p') && /^[nod]p/.test(next)) return -12; // Do not break between Katakana. // WB13: Katakana × Katakana // [unistring extension]: do not use this rule. use WB13-unistring-1 instead of. //if (endsWith(prev, 't') && startsWith(next, 't')) return -13; // [unistring extension]: Do not break between Katakana, Hiragana, KanaExtension // WB13-unistring-1: Katakana × Katakana // Hiragana × Hiragana // KanaExtension × KanaExtension if (endsWith(prev, 'tuv') && prev.substr(-1) == next.charAt(0)) return -13.1; // [unistring extension]: Do not break between Kana and its extension // WB13-unistring-2: (Katakana | Hiragana) × KanaExtension if (endsWith(prev, 'tu') && startsWith(next, 'v')) return -13.2; // [unistring extension]: Do not break between Kana and its extension // WB13-unistring-3: KanaExtension × (Katakana | Hiragana) if (endsWith(prev, 'v') && startsWith(next, 'tu')) return -13.3; // Do not break from extenders. // WB13a: (AHLetter | Numeric | Katakana | Hiragana | KanaExtension | ExtendNumLet) × ExtendNumLet // [unistring extension]: added Hiragana and KanaExtension if (endsWith(prev, 'leptuvq') && startsWith(next, 'q')) return -13.4; // WB13b: ExtendNumLet × (AHLetter | Numeric | Katakana | Hiragana | KanaExtension) // [unistring extension]: added Hiragana and KanaExtension if (endsWith(prev, 'q') && startsWith(next, 'leptuvq')) return -13.5; // WB14 is removed // Do not break within emoji flag sequences. That is, do not break // between regional indicator (RI) symbols if there is an odd number of // RI characters before the break point. // WB15: ^ (RI RI)* RI × RI if (/^(jj)*j$/.test(prev) && startsWith(next, 'j')) return -15; // WB16: [^RI] (RI RI)* RI × RI if (/[^j](jj)*j$/.test(prev) && startsWith(next, 'j')) return -16; // Otherwise, break everywhere (including around ideographs). // WB999: Any ÷ Any return 999; } function wordIndexOf (utf16Index) { let left = 0, right = this.length - 1; let middle, rawIndex, length; while (left <= right) { middle = ((left + right) / 2) >> 0; rawIndex = this[middle].rawIndex; length = this[middle].text.length; if (rawIndex + length - 1 < utf16Index) { left = middle + 1; } else if (utf16Index < rawIndex) { right = middle - 1; } else { return middle; } } return -1; } function getWords (s, useScripts) { const cache = wordClusterCache.get(!!useScripts); let result; if (cache.has(s)) { result = cache.get(s).map(c => {return {...c}}); } else { result = buildWordClusters(resolveSurrogates(s), useScripts); cache.set(s, result); } Object.defineProperty(result, 'wordIndexOf', { value: wordIndexOf }); return result; } function isInScriptWord (prev, next, prevcp, nextcp) { prev = prev.substr(-1); next = next.charAt(0); // Space × Space if (prev == 's' && next == 's') return true; // !Space ÷ Space if (prev != 's' && next == 's') return false; // Space ÷ !Space if (prev == 's' && next != 's') return false; if (/[ab]/.test(prev) || /[ab]/.test(next)) return false; return scriptFinder(prevcp) == scriptFinder(nextcp); } /* * sentence boundary handling functions/ */ function buildSentenceClusters (codePoints) { const CODE_OFFSET = 96; const result = []; let prevIndex = 0; let prevProps = ''; let nextProps = codePoints.map(cp => { return String.fromCharCode(CODE_OFFSET + sentenceFinder(cp)); }).join('') + String.fromCharCode(CODE_OFFSET + SBP.EOT); let rawIndex = 0; for ( let i = 0, goal = nextProps.length; i < goal; i++, prevProps += nextProps.charAt(0), nextProps = nextProps.substring(1) ) { if (canBreakSentence(prevProps, nextProps) < 0) continue; if (prevIndex < i) { const text = codePoints.slice(prevIndex, i).map(getUTF16FromCodePoint).join(''); result.push({ text, index: prevIndex, rawIndex, length: i - prevIndex, type: prevProps.charCodeAt(prevIndex) - CODE_OFFSET, punctuated: /[mn]o*h*[fcd]?$/.test(prevProps.substring(prevIndex).replace(/([^afcd])[ge]+/g, '$1')) }); rawIndex += text.length; } prevIndex = i; } return result; } function canBreakSentence (prev, next) { /* * This rules are taken from: * http://unicode.org/reports/tr29/, Version 14.0.0, 2021-08-24 * ============================================================ */ // Break at the start and end of text, unless the text is empty. // SB1 sot ÷ Any if (prev == '' && next != '') return 1; // SB2 Any ÷ eot if (prev != '' && startsWith(next, 'b')) return 2; // Do not break within CRLF. // SB3 CR × LF if (endsWith(prev, 'c') && startsWith(next, 'd')) return -3; // Break after paragraph separators. // SB4 ParaSep ÷ if (endsWith(prev, 'fcd')) return 4; // Ignore Format and Extend characters, except after sot, ParaSep, and // within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also // has the effect of: Any × (Format | Extend) // SB5 X (Extend | Format)* → X if (startsWith(next, 'ge')) return -5; next = next.replace(/([^afcd])[ge]+/g, '$1'); prev = prev.replace(/([^afcd])[ge]+/g, '$1'); // Do not break after full stop in certain contexts. [See note below.] // SB6 ATerm × Numeric if (endsWith(prev, 'm') && startsWith(next, 'l')) return -6; // SB7 (Upper | Lower) ATerm × Upper if (/[ij]m$/.test(prev) && startsWith(next, 'j')) return -7; // SB8 ATerm Close* Sp* × ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower if (/mo*h*$/.test(prev) && /^[^kjifcdmn]*i/.test(next)) return -8; // SB8a SATerm Close* Sp* × (SContinue | SATerm) if (/[mn]o*h*$/.test(prev) && startsWith(next, 'pmn')) return -8.1; // Break after sentence terminators, but include closing punctuation, // trailing spaces, and any paragraph separator. [See note below.] // SB9 SATerm Close* × (Close | Sp | ParaSep) if (/[mn]o*$/.test(prev) && startsWith(next, 'ohfcd')) return -9; // SB10 SATerm Close* Sp* × (Sp | ParaSep) if (/[mn]o*h*$/.test(prev) && startsWith(next, 'hfcd')) return -10; // SB11 SATerm Close* Sp* ParaSep? ÷ if (/[mn]o*h*[fcd]?$/.test(prev)) return 11; // Otherwise, do not break. // SB998 Any × Any return -998; } function getSentences (s) { const result = buildSentenceClusters(resolveSurrogates(s)); Object.defineProperty(result, 'sentenceIndexOf', { value: wordIndexOf }); return result; } /* * line breaking handling functions */ function buildLineBreakableClusters (codePoints) { const CODE_OFFSET = 64; const result = []; let prevIndex = 0; let prevProps = ''; let nextProps = codePoints.map(cp => { let prop = lineBreakFinder(cp); // Assign a line breaking class to each code point of the input. // Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes // depending on criteria outside the scope of this algorithm. // // AI, SG, XX -> AL // SA (Only Mn or Mc) -> CM // SA (Except Mn or MC) -> AL // CJ -> NS switch (prop) { case 23: case 41: case 0: prop = 11; break; case 26: prop = /\p{gc=Mn}|\p{gc=Mc}/u.test(String.fromCodePoint(cp)) ? 3 : 11; break; case 38: prop = 30; break; } return String.fromCharCode(CODE_OFFSET + prop); }).join('') + String.fromCharCode(CODE_OFFSET + LBP.EOT); let rawIndex = 0; for ( let i = 0, goal = nextProps.length; i < goal; i++, prevProps += nextProps.charAt(0), nextProps = nextProps.substring(1) ) { if (canBreakLine(prevProps, nextProps, codePoints[i - 1], codePoints[i]) < 0) continue; if (prevIndex < i) { const text = codePoints.slice(prevIndex, i).map(getUTF16FromCodePoint).join(''); result.push({ text, index: prevIndex, rawIndex, length: i - prevIndex, type: prevProps.charCodeAt(prevIndex) - CODE_OFFSET }); rawIndex += text.length; } prevIndex = i; } return result; } function canBreakLine (prev, next, prevcp, nextcp) { /* * This rules are taken from: * https://www.unicode.org/reports/tr14/tr14.html, Version 14.0.0, 2021-08-24 * ============================================================ */ // LB2 Never break at the start of text. // sot × if (prev == '') return -2; // LB3 Always break at the end of text. // ! eot if (startsWith(next, 'B')) return 3; // LB4 Always break after hard line breaks. // BK ! if (endsWith(prev, 'F')) return 4; // LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks. // CR × LF // CR ! // LF ! // NL ! if (endsWith(prev, 'G') && startsWith(next, 'E')) return -5.1; if (endsWith(prev, 'GEU')) return 5.2; // LB6 Do not break before hard line breaks. // × ( BK | CR | LF | NL ) if (startsWith(next, 'FGEU')) return -6; // LB7 Do not break before spaces or zero width space. // × SP // × ZW if (startsWith(next, 'H_')) return -7; // LB8 Break before any character following a zero-width space, // even if one or more spaces intervene. // ZW SP* ÷ if (/_H*$/.test(prev)) return 8; // LB8a Do not break after a zero width joiner. // ZWJ × // // A ZWJ will prevent breaks between most pairs of characters. // This behavior is used to prevent breaks within emoji zwj sequences. if (endsWith(prev, '`')) return -8.1; // LB9 Do not break a combining character sequence; treat it as if it has // the line breaking class of the base character in all of the // following rules. Treat ZWJ as if it were CM. // Treat X (CM | ZWJ)* as if it were X. // where X is any line break class except BK, CR, LF, NL, SP, or ZW. // // At any possible break opportunity between CM and a following character, // CM behaves as if it had the type of its base character. Note that // despite the summary title, this rule is not limited to standard combining // character sequences. For the purposes of line breaking, sequences // containing most of the control codes or layout control characters are // treated like combining sequences. next = next.replace(/([^FGEUH_])[C`]+/g, '$1'); prev = prev.replace(/([^FGEUH_])[C`]+/g, '$1'); if (/[^FGEUH_]$/.test(prev) && /^[C`]+/.test(next)) { return -9; } // LB10 Treat any remaining combining mark or ZWJ as AL. // Treat any remaining CM or ZWJ as it if were AL. // // This catches the case where a CM is the first character on the line or // follows SP, BK, CR, LF, NL, or ZW. next = next.replace(/[C`]/g, 'K'); prev = prev.replace(/[C`]/g, 'K'); // LB11 Do not break before or after Word joiner and related characters. // × WJ // WJ × if (startsWith(next, 'c')) return -11; if (endsWith(prev, 'c')) return -11.1; // LB12 Do not break after NBSP and related characters. // GL × if (endsWith(prev, 'V')) return -12; // LB12a Do not break before NBSP and related characters, except after // spaces and hyphens. // [^SP BA HY] × GL // // The expression [^SP, BA, HY] designates any line break class other // than SP, BA or HY. The symbol ^ is used, instead of !, to avoid // confusion with the use of ! to indicate an explicit break. Unlike the // case for WJ, inserting a SP overrides the non-breaking nature of a GL. // Allowing a break after BA or HY matches widespread implementation // practice and supports a common way of handling special line breaking of // explicit hyphens, such as in Polish and Portuguese. See Section 5.3, // Use of Hyphen. if (/[^HDQ]$/.test(prev) && startsWith(next, 'V')) return -12.1; /* // LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces. // × CL // × CP // × EX // × IS // × SY if (startsWith(next, 'TOIPR')) return -13; */ // LB13 Rules modified according to regular expression-based LB25 // [^NU] × CL // [^NU] × CP // × EX // [^NU] × IS // [^NU] × SY if (/[^S]$/.test(prev) && startsWith(next, 'TOPR')) return -13; if (startsWith(next, 'I')) return -13.1; // LB14 Do not break after ‘[’, even after spaces. // OP SP* × if (/NH*$/.test(prev)) return -14; // LB15 Do not break within ‘”[’, even with intervening spaces. // QU SP* × OP // // For more information on this rule, see the note in the description for // the QU class. if (/JH*$/.test(prev) && startsWith(next, 'N')) return -15; // LB16 Do not break between closing punctuation and a nonstarter (lb=NS), // even with intervening spaces. // (CL | CP) SP* × NS if (/[TO]H*$/.test(prev) && startsWith(next, '^')) return -16; // LB17 Do not break within ‘——’, even with intervening spaces. // B2 SP* × B2 if (/aH*$/.test(prev) && startsWith(next, 'a')) return -17; // LB18 Break after spaces. // SP ÷ if (endsWith(prev, 'H')) return 18; // LB19 Do not break before or after quotation marks, such as ‘ ” ’. // × QU // QU × if (startsWith(next, 'J')) return -19; if (endsWith(prev, 'J')) return -19.1; // LB20 Break before and after unresolved CB. // ÷ CB // CB ÷ // // Conditional breaks should be resolved external to the line breaking // rules. However, the default action is to treat unresolved CB as breaking // before and after. if (startsWith(next, 'j')) return 20; if (endsWith(prev, 'j')) return 20.1; // LB21 Do not break before hyphen-minus, other hyphens, fixed-width // spaces, small kana, and other non-starters, or after acute accents. // × BA // × HY // × NS // BB × if (startsWith(next, 'DQ^')) return -21; if (endsWith(prev, 'X')) return -21.1; // LB21a Don't break after Hebrew + Hyphen. // HL (HY | BA) × if (/Y[QD]$/.test(prev)) return -21.2; // LB21b Don’t break between Solidus and Hebrew letters. // SY × HL if (endsWith(prev, 'R') && startsWith(next, 'Y')) return -21.3; // LB22 Do not break before ellipses. // × IN // // Examples: ‘9...’, ‘a...’, ‘H...’ if (startsWith(next, 'b')) return -22; // LB23 Do not break between digits and letters. // (AL | HL) × NU // NU × (AL | HL) if (endsWith(prev, 'KY') && startsWith(next, 'S')) return -23; if (endsWith(prev, 'S') && startsWith(next, 'KY')) return -23.1; // LB23a Do not break between numeric prefixes and ideographs, or between // ideographs and numeric postfixes. // PR × (ID | EB | EM) // (ID | EB | EM) × PO if (endsWith(prev, 'L') && startsWith(next, 'del')) return -23.2; if (endsWith(prev, 'del') && startsWith(next, 'M')) return -23.3; // LB24 Do not break between numeric prefix/postfix and letters, or // between letters and prefix/postfix. // (PR | PO) × (AL | HL) // (AL | HL) × (PR | PO) // // In general, it is recommended to not break lines inside numbers of the // form described by the following regular expression: // // (PR | PO)? (OP | HY )? NU (NU | SY | IS)* (CL | CP)? (PR | PO)? // // Examples: $(12.35) 2,1234 (12)¢ 12.54¢ // // The default line breaking algorithm approximates this with the following // rule. Note that some cases have already been handled, such as ‘9,’, ‘[9’. // For a tailoring that supports the regular expression directly, as well // as a key to the notation see Section 8.2, Examples of Customization. if (endsWith(prev, 'LM') && startsWith(next, 'KY')) return -24; if (endsWith(prev, 'KY') && startsWith(next, 'LM')) return -24.1; /* // LB25 Do not break between the following pairs of classes relevant to // numbers: // CL × PO // CP × PO // CL × PR // CP × PR // NU × PO // NU × PR // PO × OP // PO × NU // PR × OP // PR × NU // HY × NU // IS × NU // NU × NU // SY × NU // // Example pairs: ‘$9’, ‘$[’, ‘$-’, ‘-9’, ‘/9’, ‘99’, ‘,9’, ‘9%’ ‘]%’ switch (prev.substr(-1) + next.substr(0, 1)) { case 'TM': return -25.00; case 'OM': return -25.01; case 'TL': return -25.02; case 'OL': return -25.03; case 'SM': return -25.04; case 'SL': return -25.05; case 'MN': return -25.06; case 'MS': return -25.07; case 'LN': return -25.08; case 'LS': return -25.09; case 'QS': return -25.10; case 'PS': return -25.11; case 'SS': return -25.12; case 'RS': return -25.13; } */ // LB25 Regex-Number: Do not break numbers. // (PR | PO) × ( OP | HY )? NU // ( OP | HY ) × NU // NU × (NU | SY | IS) // NU (NU | SY | IS)* × (NU | SY | IS | CL | CP ) // NU (NU | SY | IS)* (CL | CP)? × (PO | PR) if (endsWith(prev, 'LM') && /^[NQ]?S/.test(next)) return -25; if (endsWith(prev, 'NQ') && startsWith(next, 'S')) return -25.1; if (endsWith(prev, 'S') && startsWith(next, 'SRP')) return -25.2; if (/S[SRP]*$/.test(prev) && startsWith(next, 'SRPTO')) return -25.3; if (/S[SRP]*[TO]?$/.test(prev) && startsWith(next, 'ML')) return -25.4; // LB26 Do not break a Korean syllable. // JL × (JL | JV | H2 | H3) // (JV | H2) × (JV | JT) // (JT | H3) × JT // // where the notation (JT | H3) means JT or H3. The effective line breaking // class for the syllable block matches the line breaking class for Hangul // syllables, which is ID by default. This is achieved by the following // rule: if (endsWith(prev, '[') && startsWith(next, '[\\gh')) return -26; if (endsWith(prev, '\\g') && startsWith(next, '\\]')) return -26.1; if (endsWith(prev, ']h') && startsWith(next, ']')) return -26.2; // LB27 Treat a Korean Syllable Block the same as ID. // (JL | JV | JT | H2 | H3) × PO // PR × (JL | JV | JT | H2 | H3) // // When Korean uses SPACE for line breaking, the classes in rule LB26, as // well as characters of class ID, are often tailored to AL; see Section 8, // Customization. if (endsWith(prev, '[\\]gh') && startsWith(next, 'M')) return -27; if (endsWith(prev, 'L') && startsWith(next, '[\\]gh')) return -27.1; // LB28 Do not break between alphabetics (“at”). // (AL | HL) × (AL | HL) if (endsWith(prev, 'KY') && startsWith(next, 'KY')) return -28; // LB29 Do not break between numeric punctuation and alphabetics (“e.g.”). // IS × (AL | HL) if (endsWith(prev, 'P') && startsWith(next, 'KY')) return -29; // LB30 Do not break between letters, numbers, or ordinary symbols and // opening or closing parentheses. // (AL | HL | NU) × [OP-[\p{ea=F}\p{ea=W}\p{ea=H}]] // [CP-[\p{ea=F}\p{ea=W}\p{ea=H}]] × (AL | HL | NU) // // The purpose of this rule is to prevent breaks in common cases where a // part of a word appears between delimiters—for example, in “person(s)”. // // The excluded set ([\p{ea=F}\p{ea=W}\p{ea=H}]) refines the behavior of // this rule, to enable a break before an East Asian OP or after an East // Asian CP. Those cases are identified by excluding East_Asian_Width // values of Fullwidth, Wide, or Halfwidth. This is illustrated by the // following example, which shows East Asian corner brackets immediately // following a Latin letter in Japanese text. In such a case, the preferred // line break is between the Latin letter and the opening angle bracket. // // Preferred Bad Break // ------------------- ------------------ // 日中韓統合漢字拡張G 日中韓統合漢字拡張 // 「ユニコード」 G「ユニコード」 if (endsWith(prev, 'KYS') && startsWith(next, 'N')) { const eaw = eastAsianWidthFinder(nextcp); if (eaw != 5 && eaw != 3 && eaw != 4) return -30; } if (endsWith(prev, 'O') && startsWith(next, 'KYS')) { const eaw = eastAsianWidthFinder(prevcp); if (eaw != 5 && eaw != 3 && eaw != 4) return -30.1; } // LB30a Break between two regional indicator symbols if and only if there // are an even number of regional indicators preceding the position // of the break. // sot (RI RI)* RI × RI // [^RI] (RI RI)* RI × RI if (/^(kk)*k$/.test(prev) && startsWith(next, 'k')) return -30.2; if (/[^k](kk)*k$/.test(prev) && startsWith(next, 'k')) return -30.3; // LB30b Do not break between an emoji base (or potential emoji) and an // emoji modifier. // EB × EM // [\p{Extended_Pictographic}&\p{Cn}] × EM if (endsWith(prev, 'e') && startsWith(next, 'l')) return -30.4; if (/\p{Extended_Pictographic}/u.test(String.fromCodePoint(prevcp)) && /\p{Cn}/u.test(String.fromCodePoint(prevcp)) && startsWith(next, 'l')) return -30.5; // LB31 Break everywhere else. // ALL ÷ // ÷ ALL return 999; } function getColumnsFor (s, options = {}) { let result = 0; if (options.characterReference) { s = s.replace(/&#([xX])?([^;]+);/g, ($0, $1, $2) => { return String.fromCodePoint(parseInt($2, /^x/i.test($1) ? 16 : 10)); }) } if (options.ansi) { const pattern = /\u001b\[.*?[\u0040-\u007e]|\u001b[\]P].+?(?:\u0007|\u001b\\)|\u001b[\u0040-\u005f]/g; let re, plainIndex = 0; while ((re = pattern.exec(s)) !== null) { if (re.index > plainIndex) { result += getColumnsFor.plain(s.substring(plainIndex, re.index), options.awidth); } plainIndex = re.index + re[0].length; } if (plainIndex < s.length) { result += getColumnsFor.plain(s.substring(plainIndex), options.awidth); } } else { result += getColumnsFor.plain(s, options.awidth); } return result; } getColumnsFor.plain = (s, awidth) => { /* * special handling of halfwidth katakana voiced/semi-voiced marks: * should be considered as a single grapheme, not a modifier. * * TBD: there may be other characters that require special handling */ s = s.replace(/[\uff9e\uff9f]/g, '_'); const oldawidth = eawMap[2]; let result = 0; if (awidth) { eawMap[2] = awidth; } Unistring(s).forEach(clusters => { result += eawMap[eastAsianWidthFinder(clusters.codePoints[0])] || 0; }); eawMap[2] = oldawidth; return result; }; function normalizeHyperlinks (lines) { const pattern = /\x1b\]8;([^;]*);([^\x07]*)\x07/g; const lastLinkStart = { line: -1, index: -1, length: -1, p2: null, p3: null }; for (let i = 0; i < lines.length; i++) { pattern.lastIndex = 0; let re; while ((re = pattern.exec(lines[i]))) { // found link start if (re[2] != '') { lastLinkStart.line = i; lastLinkStart.index = re.index; lastLinkStart.length = re[0].length; lastLinkStart.p2 = re[1].length ? re[1] : null; lastLinkStart.p3 = re[2].length ? re[2] : null; } // found link end else if (lastLinkStart.line >= 0) { // single line link if (lastLinkStart.line == i) { ; } // multiple line link else { const {line, index, length, p2, p3} = lastLinkStart; const linkId = p2 || `id=_${Date.now()}_${linkCount++}`; const linkStart = `\x1b]8;${linkId};${p3}\x07`; const linkEnd = `\x1b]8;;\x07`; // update middle lines for (let j = i - 1; j > line; j--) { lines[j] = `${linkStart}${lines[j]}${linkEnd}`; } // update head line lines[line] = `${lines[line].substring(0, index)}${linkStart}${lines[line].substring(index + length)}${linkEnd}`; // update bottom line lines[i] = `${linkStart}${lines[i]}`; pattern.lastIndex += linkStart.length; } lastLinkStart.line = -1; lastLinkStart.index = -1; lastLinkStart.length = -1; lastLinkStart.p2 = null; lastLinkStart.p3 = null; } } } return lines; } function divideByColumns (s, columns, options = {}) { if (options.characterReference) { s = s.replace(/&#([xX])?([^;]+);/g, ($0, $1, $2) => { return String.fromCodePoint(parseInt($2, /^x/i.test($1) ? 16 : 10)); }) } if (columns <= 0) { return ['', s]; } if (options.ansi) { const clusters = []; const pattern = /\u001b\[.*?[\u0040-\u007e]|\u001b[\]P].+?(?:\u0007|\u001b\\)|\u001b[\u0040-\u005f]/g; let re, plainIndex = 0; while ((re = pattern.exec(s)) !== null) { if (re.index > plainIndex) { Unistring(s.substring(plainIndex, re.index)).forEach(cluster => { clusters.push([ cluster.rawString, getColumnsFor.plain(cluster.rawString, options.awidth) ]); }); } clusters.push([re[0], 0]); plainIndex = re.index + re[0].length; } if (plainIndex < s.length) { Unistring(s.substring(plainIndex)).forEach(cluster => { clusters.push([ cluster.rawString, getColumnsFor.plain(cluster.rawString, options.awidth) ]); }); } let result = ''; let leftColumns = 0; for (let i = 0; i < clusters.length; i++) { const graphemeColumn = clusters[i][1]; if (leftColumns + graphemeColumn > columns) { return normalizeHyperlinks([ clusters.slice(0, i).map(c => c[0]).join(''), clusters.slice(i).map(c => c[0]).join('') ]); } leftColumns += graphemeColumn; } return [s, '']; } else { return divideByColumns.plain(s, columns, options.awidth); } } divideByColumns.plain = (s, columns, awidth) => { if (columns <= 0) { return ['', s]; } let result = ''; let leftColumns = 0; const u = Unistring(s); for (let i = 0; i < u.clusters.length; i++) { const grapheme = u.clusters[i]; const graphemeColumn = getColumnsFor.plain(grapheme.rawString, awidth); if (leftColumns + graphemeColumn > columns) { return [ u.slice(0, i).toString(), u.slice(i).toString() ]; } leftColumns += graphemeColumn; } return [s, '']; }; function getLineBreakableClusters (s) { if (lineBreakableClusterCache.has(s)) { return lineBreakableClusterCache.get(s).map(c => {return {...c}}); } else { const result = buildLineBreakableClusters(resolveSurrogates(s)); lineBreakableClusterCache.set(s, result); return result; } } function getFoldedLines (s, options = {}) { function fetchPlainClusters (line) { const result = []; const clusters = getLineBreakableClusters(line); for (const cluster of clusters) { result.push([cluster.text, getColumnsFor.plain(cluster.text, options.awidth)]); } return result; } function fetchAnsiClusters (line) { const result = []; /* * group 1: SGR reset sequence * ESC [ m * * group 2: SGR (Select Graphics Rendition) sequences * ESC [ ... m * * group 3-1: Other CSI (Control Sequence Introducer) sequences, except SGR * ESC [ ... * * group 3-2: OSC (Operation System Command) sequences * or DCS (Device Control String) sequences * ST (String Terminator): * BEL | ( ESC \ ) * OSC sequences * ESC ] ... ST * DCS sequences * ESC P ... ST * * group 3-3: Other Fe sequences, except OSC,DCS * ESC ... */ const pattern = /(\u001b\[0*m)|(\u001b\[.*?m)|(\u001b\[.*?[\u0040-\u007e]|\u001b[\]P].+?(?:\u0007|\u001b\\)|\u001b[\u0040-\u005f])/g; let re, plainIndex = 0; while ((re = pattern.exec(line)) !== null) { if (re.index > plainIndex) { const clusters = getLineBreakableClusters(line.substring(plainIndex, re.index)); for (const cluster of clusters) { result.push([cluster.text, getColumnsFor.plain(cluster.text, options.awidth)]); } } if (re[1]) { result.push([re[1], 0, 2]); } else if (re[2]) { result.push([re[2], 0, 1]); } else { result.push([re[3], 0]); } plainIndex = re.index + re[0].length; } if (plainIndex < line.length) { const clusters = getLineBreakableClusters(line.substring(plainIndex)); for (const cluster of clusters) { result.push([cluster.text, getColumnsFor.plain(cluster.text, options.awidth)]); } } return result; } function fetchCharRefClusters (line) { return fetchPlainClusters( line.replace(/&#([xX])?([^;]+);/g, ($0, $1, $2) => { return String.fromCodePoint(parseInt($2, /^x/i.test($1) ? 16 : 10)); }) ); } function fetchAnsiCharRefClusters (line) { return fetchAnsiClusters( line.replace(/&#([xX])?([^;]+);/g, ($0, $1, $2) => { return String.fromCodePoint(parseInt($2, /^x/i.test($1) ? 16 : 10)); }) ); } function esc (s) { return s .replace(/[\x00-\x1f]/g, $0 => { return '\x1b[1;36m^' + String.fromCharCode($0.charCodeAt(0) + 64) + '\x1b[m'; }); } const columnsSource = options.columns || 80; const result = []; let fetchClusters = fetchPlainClusters; let fetchColumns; if (Array.isArray(columnsSource) && columnsSource.length) { fetchColumns = () => { const index = result.length; const columns = index < columnsSource.length ? columnsSource[index] : columnsSource[columnsSource.length - 1]; return columns || options.fallbackColumns || 80; }; } else { fetchColumns = () => columnsSource; } if (options.ansi && options.characterReference) { fetchClusters = fetchAnsiCharRefClusters; } else if (options.ansi) { fetchClusters = fetchAnsiClusters; } else if (options.characterReference) { fetchClusters = fetchCharRefClusters; } while (s != '') { let line = /^(.*?)(\r?\n)/.exec(s), newline; if (line) { s = s.substring(line[0].length); newline = line[2]; line = line[1]; } else { newline = ''; line = s; s = ''; } const breakableClusters = fetchClusters(line); let lineColumns = 0; let lineFragment = ''; let sgrSequence = ''; let columns = fetchColumns(); for (let i = 0; i < breakableClusters.length; i++) { let [clusterText, clusterColumns] = breakableClusters[i]; //console.log(`lineFragment: "${esc(lineFragment)}", clusterText: "${esc(clusterText)}", lineColumns: ${lineColumns}, clusterColumns: ${clusterColumns}`); if (clusterColumns == 0) { switch (breakableClusters[i][2]) { case 1: sgrSequence += clusterText; break; case 2: sgrSequence = ''; break; } } if (lineColumns + clusterColumns > columns) { if (clusterColumns > columns) { let [lead, rest] = divideByColumns.plain( clusterText, columns - lineColumns, options.awidth); if (sgrSequence !== '') { result.push(lineFragment + lead + '\u001b[m'); } else { result.push(lineFragment + lead); } if (rest != '') { breakableClusters.splice( i + 1, 0, [rest, getColumnsFor.plain(rest, options.awidth)]); } lineColumns = 0; lineFragment = sgrSequence; } else { if (sgrSequence !== '') { result.push(lineFragment + '\u001b[m'); } else { result.push(lineFragment); } lineColumns = clusterColumns; lineFragment = sgrSequence + clusterText; } columns = fetchColumns(); } else { lineColumns += clusterColumns; lineFragment += clusterText; } } if (lineFragment !== '') { result.push(lineFragment); } if (newline !== '') { if (result.length && breakableClusters.length) { result[result.length - 1] += newline; } else { result.push(newline); } } } return options.ansi ? normalizeHyperlinks(result) : result; } /* * Grapheme class */ function Grapheme (codePoints, rawIndex) { if (codePoints != undefined) { this.codePoints = codePoints; this.updateRawString(); } if (rawIndex != undefined) { this.rawIndex = rawIndex; } } Grapheme.prototype = { toString: function () { return this.rawString; }, clone: function () { const result = new Grapheme; result.codePoints = this.codePoints.slice(); result.rawString = this.rawString; result.rawIndex = this.rawIndex; return result; }, updateRawString: function () { this.rawString = this.codePoints.reduce((result, cp) => { return result + getUTF16FromCodePoint(cp); }, ''); }, dump: function (detail) { if (detail) { const log = []; log.push('codePoints: [' + this.codePoints.map(cp => { return getCodePointString(cp, 'unicode'); }).join(', ') + ']'); log.push(' rawIndex: ' + this.rawIndex); log.push(' rawString: (' + this.rawString.length + ') "' + this.rawString + '"'); return log.join('\n'); } else { return this.codePoints .map(getCodePointString) .join(' × '); } } }; /* * Unistring class */ function Unistring (s) { if (!(this instanceof Unistring)) { return new Unistring(s); } if (typeof s == 'string') { if (graphemeClusterCache.has(s)) { this.clusters = graphemeClusterCache.get(s).map(g => g.clone()); } else { graphemeClusterCache.set( s, this.clusters = buildGraphemeClusters(resolveSurrogates(s))); } } else if (s instanceof Array) { this.clusters = []; let rawIndex = 0; for (let i = 0, goal = s.length; i < goal; i++) { if (!(s[i] instanceof Grapheme)) { throw new Error( 'Unistring: invalid cluster class: ' + Object.prototype.toString.call(s[i])); } this.clusters[i] = s[i].clone(); this.clusters[i].rawIndex = rawIndex; rawIndex += this.clusters[i].rawString.length; } } else { throw new Error('Unistring: invalid argument'); } } Unistring.prototype = { _ensureIndex: function (index, isEnd) { if (index == undefined) { index = isEnd ? this.clusters.length : 0; } if (index < 0) { index = this.clusters.length + index; } return Math.max(0, Math.min(index, this.clusters.length)); }, _toUnistring: function (s, caller) { if (typeof s == 'string') { return new Unistring(s); } else if (s instanceof Unistring) { return s; } else { throw new Error( 'Unistring#' + (caller || '') + ': invalid argument'); } }, /* * methods */ clone: function () { return new Unistring(this.clusters.slice()); }, dump: function (detail) { const log = []; if (detail) { this.clusters.forEach((g, index) => { log.push('*** Grapheme Cluster #' + index + ' ***'); log.push(g.dump(detail)); }); return log.join('\n'); } else { return '÷ ' + this.clusters .map(g => g.dump(detail)) .join(' ÷ ') + ' ÷'; } }, toString: function () { return this.clusters.reduce((result, g) => result + g.toString(), ''); }, delete: function (start, length) { start = this._ensureIndex(start); if (length == undefined || start + length > this.clusters.length) { length = this.clusters.length - start; } length = Math.max(0, length); let delta = 0; for (let i = start, goal = start + length; i < goal; i++) { delta += this.clusters[i].rawString.length; } for (let i = start + length, goal = this.clusters.length; i < goal; i++) { this.clusters[i].rawIndex -= delta; } this.clusters.splice(start, length); return this; }, insert: function (s, start) { start = this._ensureIndex(start); s = this._toUnistring(s, 'insert').clusters.slice(); let srcDelta = 0; let dstDelta = 0; if (start == this.clusters.length) { if (this.clusters.length) { const last = this.clusters.length - 1; srcDelta = this.clusters[last].rawIndex + this.clusters[last].rawString.length; } } else { srcDelta = this.clusters[start].rawIndex; } for (let i = 0, goal = s.length; i < goal; i++) { s[i].rawIndex += srcDelta; dstDelta += s[i].rawString.length; } for (let i = start, goal = this.clusters.length; i < goal; i++) { this.clusters[i].rawIndex += dstDelta; } s.unshift(start, 0); this.clusters.splice.apply(this.clusters, s); return this; }, append: function (s) { return this.insert(s, this.clusters.length); }, codePointsAt: function (index) { index = this._ensureIndex(index); if (index < 0 || index >= this.clusters.length) return undefined; return this.clusters[index].codePoints; }, clusterAt: function (index) { return this.rawStringAt.apply(this, arguments); }, rawStringAt: function (index) { index = this._ensureIndex(index); if (index < 0 || index >= this.clusters.length) return ''; return this.clusters[index].rawString; }, rawIndexAt: function (index) { index = this._ensureIndex(index); if (index < 0 || this.clusters.length == 0 || index > this.clusters.length) return NaN; if (index == this.clusters.length) { return this.clusters[index - 1].rawIndex + this.clusters[index - 1].rawString.length; } return this.clusters[index].rawIndex; }, forEach: function () { this.clusters.forEach.apply(this.clusters, arguments); }, map: function () { return this.clusters.map.apply(this.clusters, arguments); }, getClusterIndexFromUTF16Index: function (index) { let left = 0, right = this.clusters.length - 1; let middle, rawIndex, length; if (right >= 0 && index == this.clusters[right].rawIndex + this.clusters[right].rawString.length) { return right + 1; } while (left <= right) { middle = ((left + right) / 2) >> 0; rawIndex = this.clusters[middle].rawIndex; length = this.clusters[middle].rawString.length; if (rawIndex + length - 1 < index) { left = middle + 1; } else if (index < rawIndex) { right = middle - 1; } else { return middle; } } return -1; }, /* * string like properties and methods */ get length () { return this.clusters.length; }, charAt: function (index) { if (index < 0 || index >= this.clusters.length) return ''; return this.clusters[index].rawString.charAt(0); }, charCodeAt: function (index) { if (index < 0 || index >= this.clusters.length) return NaN; return this.clusters[index].codePoints[0]; }, substring: function (start, end) { if (start == undefined) { start = 0; } if (end == undefined) { end = this.clusters.length; } start = Math.max(0, Math.min(start, this.clusters.length)); end = Math.max(0, Math.min(end, this.clusters.length)); if (start > end) { const tmp = start; start = end; end = tmp; } return new Unistring(this.clusters.slice(start, end)); }, substr: function (start, length) { start = this._ensureIndex(start); if (length == undefined || start + length > this.clusters.length) { length = this.clusters.length - start; } if (length < 0) { length = 0; } return new Unistring(this.clusters.slice(start, start + length)); }, slice: function (start, end) { start = this._ensureIndex(start); end = this._ensureIndex(end, true); return new Unistring(this.clusters.slice(start, end)); }, concat: function (s) { return this.insert(s, this.clusters.length); }, indexOf: function (s) { s = this._toUnistring(s, 'indexOf'); const whole = this.toString(); const part = s.toString(); let rawIndex = 0; let clusterIndex = 0; while ((rawIndex = whole.indexOf(part, rawIndex)) >= 0) { while (clusterIndex < this.clusters.length && this.clusters[clusterIndex].rawIndex < rawIndex) { clusterIndex++; } if (clusterIndex >= this.clusters.length) { return -1; } if (this.substr(clusterIndex, s.length).toString() == part) { return clusterIndex; } rawIndex++; } return -1; }, lastIndexOf: function (s) { s = this._toUnistring(s, 'lastIndexOf'); const whole = this.toString(); const part = s.toString(); let rawIndex = whole.length - 1; let clusterIndex = this.clusters.length - 1; while (rawIndex >= 0 && (rawIndex = whole.lastIndexOf(part, rawIndex)) >= 0) { while (clusterIndex >= 0 && this.clusters[clusterIndex].rawIndex > rawIndex) { clusterIndex--; } if (clusterIndex < 0) { return -1; } if (this.substr(clusterIndex, s.length).toString() == part) { return clusterIndex; } rawIndex--; } return -1; }, toLowerCase: function (useLocale) { const method = useLocale && 'toLocaleLowerCase' in String.prototype ? 'toLocaleLowerCase' : 'toLowerCase'; return new Unistring(this.toString()[method]()); }, toUpperCase: function (useLocale) { const method = useLocale && 'toLocaleUpperCase' in String.prototype ? 'toLocaleUpperCase' : 'toUpperCase'; return new Unistring(this.toString()[method]()); } }; /* * exporting */ Object.defineProperties(Unistring, { getCodePointArray: {value: resolveSurrogates}, getUTF16FromCodePoint: {value: getUTF16FromCodePoint}, getCodePointString: {value: getCodePointString}, getGraphemeBreakProp: {value: graphemeFinder}, getWordBreakProp: {value: wordFinder}, getSentenceBreakProp: {value: sentenceFinder}, getScriptProp: {value: scriptFinder}, getLineBreakProp: {value: lineBreakFinder}, getEAWProp: {value: eastAsianWidthFinder}, getWords: {value: getWords}, getSentences: {value: getSentences}, getLineBreakableClusters: {value: getLineBreakableClusters}, getColumnsFor: {value: getColumnsFor}, divideByColumns: {value: divideByColumns}, getFoldedLines: {value: getFoldedLines}, GBP: {value: GBP}, WBP: {value: WBP}, SBP: {value: SBP}, SCRIPT: {value: SCRIPT}, LBP: {value: LBP}, EAW: {value: EAW}, GBP_NAMES: {value: GBP_NAMES}, WBP_NAMES: {value: WBP_NAMES}, SBP_NAMES: {value: SBP_NAMES}, SCRIPT_NAMES: {value: SCRIPT_NAMES}, LBP_NAMES: {value: LBP_NAMES}, EAW_NAMES: {value: EAW_NAMES}, awidth: { get: () => { return eawMap[2]; }, set: value => { if (value === 1 || value === 2) { eawMap[2] = value; } } }, printCacheStatus: { value: () => { console.log([ `clusterCache.size: ${clusterCache.size}`, ` request count: ${clusterCacheRequestCount}`, ` hit count: ${clusterCacheHitCount}`, ` miss count: ${clusterCacheMissCount}` ].join('\n')); } } }); export default Unistring; // vim:set ts=4 sw=4 fenc=UTF-8 ff=unix ft=javascript fdm=marker fmr=<<<,>>> :