// generated by diplomat-tool import type { DataError } from "./DataError" import type { DataProvider } from "./DataProvider" import type { Locale } from "./Locale" import type { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16" import type { pointer, codepoint } from "./diplomat-runtime.d.ts"; /** * An ICU4X word-break segmenter, capable of finding word breakpoints in strings. * * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html) for more information. */ export class WordSegmenter { /** @internal */ get ffiValue(): pointer; /** @internal */ constructor(); /** * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM * or dictionary payload data, using compiled data. This does not assume any content locale. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information. */ static createAuto(): WordSegmenter; /** * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM * or dictionary payload data, using compiled data. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. */ static createAutoWithContentLocale(locale: Locale): WordSegmenter; /** * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM * or dictionary payload data, using a particular data source. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. */ static createAutoWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter; /** * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and * Thai, using compiled data. This does not assume any content locale. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information. */ static createLstm(): WordSegmenter; /** * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and * Thai, using compiled data. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. */ static createLstmWithContentLocale(locale: Locale): WordSegmenter; /** * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and * Thai, using a particular data source. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. */ static createLstmWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter; /** * Construct an {@link WordSegmenter} with with dictionary payload data for Chinese, Japanese, * Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale. * * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information. */ static createDictionary(): WordSegmenter; /** * Construct an {@link WordSegmenter} with dictionary payload data for Chinese, Japanese, * Burmese, Khmer, Lao, and Thai, using compiled data. * * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. */ static createDictionaryWithContentLocale(locale: Locale): WordSegmenter; /** * Construct an {@link WordSegmenter} with dictionary payload data for Chinese, Japanese, * Burmese, Khmer, Lao, and Thai, using a particular data source. * * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. */ static createDictionaryWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter; /** * Segments a string. * * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according * to the WHATWG Encoding Standard. * * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information. */ segment(input: string): WordBreakIteratorUtf16; }