module.exports = function str_word_count(str, format, charlist) {
  //  discuss at: https://locutus.io/php/str_word_count/
  // original by: Ole Vrijenhoek
  // bugfixed by: Kevin van Zonneveld (https://kvz.io)
  // bugfixed by: Brett Zamir (https://brett-zamir.me)
  // bugfixed by: Brett Zamir (https://brett-zamir.me)
  //    input by: Bug?
  // improved by: Brett Zamir (https://brett-zamir.me)
  //   example 1: str_word_count("Hello fri3nd, you're\r\n       looking          good today!", 1)
  //   returns 1: ['Hello', 'fri', 'nd', "you're", 'looking', 'good', 'today']
  //   example 2: str_word_count("Hello fri3nd, you're\r\n       looking          good today!", 2)
  //   returns 2: {0: 'Hello', 6: 'fri', 10: 'nd', 14: "you're", 29: 'looking', 46: 'good', 51: 'today'}
  //   example 3: str_word_count("Hello fri3nd, you're\r\n       looking          good today!", 1, '\u00e0\u00e1\u00e3\u00e73')
  //   returns 3: ['Hello', 'fri3nd', "you're", 'looking', 'good', 'today']
  //   example 4: str_word_count('hey', 2)
  //   returns 4: {0: 'hey'}

  const ctypeAlpha = require('../ctype/ctype_alpha')
  const len = str.length
  const cl = charlist && charlist.length
  let chr = ''
  let tmpStr = ''
  let i = 0
  let c = ''
  const wArr = []
  let wC = 0
  const assoc = {}
  let aC = 0
  let reg = ''
  let match = false

  const _pregQuote = function (str) {
    return (str + '').replace(/([\\.+*?[^\]$(){}=!<>|:])/g, '\\$1')
  }
  const _getWholeChar = function (str, i) {
    // Use for rare cases of non-BMP characters
    const code = str.charCodeAt(i)
    if (code < 0xd800 || code > 0xdfff) {
      return str.charAt(i)
    }
    if (code >= 0xd800 && code <= 0xdbff) {
      // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single
      // characters)
      if (str.length <= i + 1) {
        throw new Error('High surrogate without following low surrogate')
      }
      const next = str.charCodeAt(i + 1)
      if (next < 0xdc00 || next > 0xdfff) {
        throw new Error('High surrogate without following low surrogate')
      }
      return str.charAt(i) + str.charAt(i + 1)
    }
    // Low surrogate (0xDC00 <= code && code <= 0xDFFF)
    if (i === 0) {
      throw new Error('Low surrogate without preceding high surrogate')
    }
    const prev = str.charCodeAt(i - 1)
    if (prev < 0xd800 || prev > 0xdbff) {
      // (could change last hex to 0xDB7F to treat high private surrogates as single characters)
      throw new Error('Low surrogate without preceding high surrogate')
    }
    // We can pass over low surrogates now as the second component in a pair which we have already
    // processed
    return false
  }

  if (cl) {
    reg = '^(' + _pregQuote(_getWholeChar(charlist, 0))
    for (i = 1; i < cl; i++) {
      if ((chr = _getWholeChar(charlist, i)) === false) {
        continue
      }
      reg += '|' + _pregQuote(chr)
    }
    reg += ')$'
    reg = new RegExp(reg)
  }

  for (i = 0; i < len; i++) {
    if ((c = _getWholeChar(str, i)) === false) {
      continue
    }
    // No hyphen at beginning or end unless allowed in charlist (or locale)
    // No apostrophe at beginning unless allowed in charlist (or locale)
    // @todo: Make this more readable
    match =
      ctypeAlpha(c) ||
      (reg && c.search(reg) !== -1) ||
      (i !== 0 && i !== len - 1 && c === '-') ||
      (i !== 0 && c === "'")
    if (match) {
      if (tmpStr === '' && format === 2) {
        aC = i
      }
      tmpStr = tmpStr + c
    }
    if (i === len - 1 || (!match && tmpStr !== '')) {
      if (format !== 2) {
        wArr[wArr.length] = tmpStr
      } else {
        assoc[aC] = tmpStr
      }
      tmpStr = ''
      wC++
    }
  }

  if (!format) {
    return wC
  } else if (format === 1) {
    return wArr
  } else if (format === 2) {
    return assoc
  }

  throw new Error('You have supplied an incorrect format')
}