// generated by diplomat-tool import { DataError } from "./DataError.mjs" import { DataProvider } from "./DataProvider.mjs" import { Locale } from "./Locale.mjs" import { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16.mjs" import wasm from "./diplomat-wasm.mjs"; import * as diplomatRuntime from "./diplomat-runtime.mjs"; const WordSegmenter_box_destroy_registry = new FinalizationRegistry((ptr) => { wasm.icu4x_WordSegmenter_destroy_mv1(ptr); }); /** * An ICU4X word-break segmenter, capable of finding word breakpoints in strings. * * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html) for more information. */ export class WordSegmenter { // Internal ptr reference: #ptr = null; // Lifetimes are only to keep dependencies alive. // Since JS won't garbage collect until there are no incoming edges. #selfEdge = []; #internalConstructor(symbol, ptr, selfEdge) { if (symbol !== diplomatRuntime.internalConstructor) { console.error("WordSegmenter is an Opaque type. You cannot call its constructor."); return; } this.#ptr = ptr; this.#selfEdge = selfEdge; // Are we being borrowed? If not, we can register. if (this.#selfEdge.length === 0) { WordSegmenter_box_destroy_registry.register(this, this.#ptr); } return this; } /** @internal */ get ffiValue() { return this.#ptr; } /** * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM * or dictionary payload data, using compiled data. This does not assume any content locale. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information. */ static createAuto() { const result = wasm.icu4x_WordSegmenter_create_auto_mv1(); try { return new WordSegmenter(diplomatRuntime.internalConstructor, result, []); } finally { } } /** * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM * or dictionary payload data, using compiled data. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. */ static createAutoWithContentLocale(locale) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue); try { if (!diplomatReceive.resultFlag) { const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); throw new globalThis.Error('DataError.' + cause.value, { cause }); } return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); } finally { diplomatReceive.free(); } } /** * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM * or dictionary payload data, using a particular data source. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. */ static createAutoWithContentLocaleAndProvider(provider, locale) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue); try { if (!diplomatReceive.resultFlag) { const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); throw new globalThis.Error('DataError.' + cause.value, { cause }); } return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); } finally { diplomatReceive.free(); } } /** * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and * Thai, using compiled data. This does not assume any content locale. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information. */ static createLstm() { const result = wasm.icu4x_WordSegmenter_create_lstm_mv1(); try { return new WordSegmenter(diplomatRuntime.internalConstructor, result, []); } finally { } } /** * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and * Thai, using compiled data. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. */ static createLstmWithContentLocale(locale) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue); try { if (!diplomatReceive.resultFlag) { const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); throw new globalThis.Error('DataError.' + cause.value, { cause }); } return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); } finally { diplomatReceive.free(); } } /** * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and * Thai, using a particular data source. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. */ static createLstmWithContentLocaleAndProvider(provider, locale) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue); try { if (!diplomatReceive.resultFlag) { const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); throw new globalThis.Error('DataError.' + cause.value, { cause }); } return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); } finally { diplomatReceive.free(); } } /** * Construct an {@link WordSegmenter} with with dictionary payload data for Chinese, Japanese, * Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale. * * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information. */ static createDictionary() { const result = wasm.icu4x_WordSegmenter_create_dictionary_mv1(); try { return new WordSegmenter(diplomatRuntime.internalConstructor, result, []); } finally { } } /** * Construct an {@link WordSegmenter} with dictionary payload data for Chinese, Japanese, * Burmese, Khmer, Lao, and Thai, using compiled data. * * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. */ static createDictionaryWithContentLocale(locale) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue); try { if (!diplomatReceive.resultFlag) { const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); throw new globalThis.Error('DataError.' + cause.value, { cause }); } return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); } finally { diplomatReceive.free(); } } /** * Construct an {@link WordSegmenter} with dictionary payload data for Chinese, Japanese, * Burmese, Khmer, Lao, and Thai, using a particular data source. * * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. */ static createDictionaryWithContentLocaleAndProvider(provider, locale) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue); try { if (!diplomatReceive.resultFlag) { const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); throw new globalThis.Error('DataError.' + cause.value, { cause }); } return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); } finally { diplomatReceive.free(); } } /** * Segments a string. * * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according * to the WHATWG Encoding Standard. * * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information. */ segment(input) { let functionGarbageCollectorGrip = new diplomatRuntime.GarbageCollectorGrip(); const inputSlice = functionGarbageCollectorGrip.alloc(diplomatRuntime.DiplomatBuf.sliceWrapper(wasm, diplomatRuntime.DiplomatBuf.str16(wasm, input))); // This lifetime edge depends on lifetimes 'a let aEdges = [this, inputSlice]; const result = wasm.icu4x_WordSegmenter_segment_utf16_mv1(this.ffiValue, inputSlice.ptr); try { return new WordBreakIteratorUtf16(diplomatRuntime.internalConstructor, result, [], aEdges); } finally { functionGarbageCollectorGrip.releaseToGarbageCollector(); } } constructor(symbol, ptr, selfEdge) { return this.#internalConstructor(...arguments) } }