; HTML Entities Encoding
; https://www.autohotkey.com
; Similar to the Transform's HTML sub-command, this function converts a
; string into its HTML equivalent by translating characters whose ASCII
; values are above 127 to their HTML names (e.g. £ becomes £). In
; addition, the four characters "&<> are translated to "&<>.
; Finally, each linefeed (`n) is translated to
`n (i.e.
followed
; by a linefeed).
; In addition of the functionality above, Flags can be zero or a
; combination (sum) of the following values. If omitted, it defaults to 1.
; - 1: Converts certain characters to named expressions. e.g. € is
; converted to €
; - 2: Converts certain characters to numbered expressions. e.g. € is
; converted to €
; Only non-ASCII characters are affected. If Flags is the number 3,
; numbered expressions are used only where a named expression is not
; available. The following characters are always converted: <>"& and `n
; (line feed).
EncodeHTML(String, Flags := 1)
{
static TRANS_HTML_NAMED := 1
static TRANS_HTML_NUMBERED := 2
static ansi := ["euro", "#129", "sbquo", "fnof", "bdquo", "hellip", "dagger", "Dagger", "circ", "permil", "Scaron", "lsaquo", "OElig", "#141", "#381", "#143", "#144", "lsquo", "rsquo", "ldquo", "rdquo", "bull", "ndash", "mdash", "tilde", "trade", "scaron", "rsaquo", "oelig", "#157", "#382", "Yuml", "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro", "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml"]
static unicode := {0x20AC:1, 0x201A:3, 0x0192:4, 0x201E:5, 0x2026:6, 0x2020:7, 0x2021:8, 0x02C6:9, 0x2030:10, 0x0160:11, 0x2039:12, 0x0152:13, 0x2018:18, 0x2019:19, 0x201C:20, 0x201D:21, 0x2022:22, 0x2013:23, 0x2014:24, 0x02DC:25, 0x2122:26, 0x0161:27, 0x203A:28, 0x0153:29, 0x0178:32}
out := ""
for i, char in StrSplit(String)
{
code := Ord(char)
switch code
{
case 10: out .= "
`n"
case 34: out .= """
case 38: out .= "&"
case 60: out .= "<"
case 62: out .= ">"
default:
if (code >= 160 && code <= 255)
{
if (Flags & TRANS_HTML_NAMED)
out .= "&" ansi[code-127] ";"
else if (Flags & TRANS_HTML_NUMBERED)
out .= "" code ";"
else
out .= char
}
else if (code > 255)
{
if (Flags & TRANS_HTML_NAMED && unicode.HasOwnProp(code))
out .= "&" ansi[unicode.%code%] ";"
else if (Flags & TRANS_HTML_NUMBERED)
out .= "" code ";"
else
out .= char
}
else
{
if (code >= 128 && code <= 159)
out .= "&" ansi[code-127] ";"
else
out .= char
}
}
}
return out
}