// generated by diplomat-tool
import type { DataError } from "./DataError"
import type { DataProvider } from "./DataProvider"
import type { Locale } from "./Locale"
import type { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16"
import type { pointer, codepoint } from "./diplomat-runtime.d.ts";


/**
 * An ICU4X word-break segmenter, capable of finding word breakpoints in strings.
 *
 * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html) for more information.
 */
export class WordSegmenter {
    /** @internal */
    get ffiValue(): pointer;
    /** @internal */
    constructor();


    /**
     * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM
     * or dictionary payload data, using compiled data. This does not assume any content locale.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information.
     */
    static createAuto(): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM
     * or dictionary payload data, using compiled data.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
     */
    static createAutoWithContentLocale(locale: Locale): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with automatically selecting the best available LSTM
     * or dictionary payload data, using a particular data source.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
     */
    static createAutoWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and
     * Thai, using compiled data.  This does not assume any content locale.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information.
     */
    static createLstm(): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and
     * Thai, using compiled data.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
     */
    static createLstmWithContentLocale(locale: Locale): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with LSTM payload data for Burmese, Khmer, Lao, and
     * Thai, using a particular data source.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
     */
    static createLstmWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with with dictionary payload data for Chinese, Japanese,
     * Burmese, Khmer, Lao, and Thai, using compiled data.  This does not assume any content locale.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information.
     */
    static createDictionary(): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with dictionary payload data for Chinese, Japanese,
     * Burmese, Khmer, Lao, and Thai, using compiled data.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
     */
    static createDictionaryWithContentLocale(locale: Locale): WordSegmenter;

    /**
     * Construct an {@link WordSegmenter} with dictionary payload data for Chinese, Japanese,
     * Burmese, Khmer, Lao, and Thai, using a particular data source.
     *
     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
     * Khmer, Lao, and Thai.
     *
     * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
     */
    static createDictionaryWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;

    /**
     * Segments a string.
     *
     * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
     * to the WHATWG Encoding Standard.
     *
     * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/2.1.1/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information.
     */
    segment(input: string): WordBreakIteratorUtf16;
}