)?)-/vg, "“$1—");
xml = xml.replace(/-[,.]?”/vg, "—”");
xml = xml.replace(/-(!|\?)”/vg, "—$1”");
xml = xml.replace(/-[,.]?<\/([a-z]+)>”/vg, "—$1>”");
xml = xml.replace(/-“/vg, "—”");
xml = xml.replace(/-/vg, "
—");
xml = xml.replace(/-<\/p>/vg, "—
");
xml = xml.replace(/-
/vg, "—
");
xml = xml.replace(/-<\/([a-z]+)><\/p>/vg, "—$1>");
xml = xml.replace(/\s?\s?–\s?\s?/vg, "—");
xml = xml.replace(/-\s\s?/vg, "—");
xml = xml.replace(/\s?\s-/vg, "—");
xml = xml.replace(/\s+—”/vg, "—”");
xml = xml.replace(/I-I/vg, "I—I");
xml = xml.replace(/I-uh/vg, "I—uh");
xml = xml.replace(/-\?/vg, "—?");
return xml;
}
function enDashJointNames(xml) {
// Joint names should use en dashes
xml = xml.replace(/(A|a)gent-parahuman/vg, "$1gent–parahuman");
xml = xml.replace(/(P|p)arahuman-agent/vg, "$1arahuman–agent");
xml = xml.replace(/(Chinese|Japanese|Asian)-American/vg, "$1–American");
xml = xml.replace(/Alexandria-Pretender/vg, "Alexandria–Pretender");
xml = xml.replace(/Antares-Anelace/vg, "Antares–Anelace");
xml = xml.replace(/Armsmaster-Defiant/vg, "Armsmaster–Defiant");
xml = xml.replace(/Astaroth-Nidhug/vg, "Astaroth–Nidhug");
xml = xml.replace(/Bet-Gimel/vg, "Bet–Gimel");
xml = xml.replace(/Capricorn-Byron/vg, "Capricorn–Byron");
xml = xml.replace(/Capulet-Montague/vg, "Capulet–Montague");
xml = xml.replace(/Challenger-Gallant/vg, "Challenger–Gallant");
xml = xml.replace(/Cheit-Gimel/vg, "Bet–Gimel");
xml = xml.replace(/Crawler-Breed/vg, "Crawler–Breed");
xml = xml.replace(/Creutzfeldt-Jakob/vg, "Creutzfeldt–Jakob");
xml = xml.replace(/Dallon-Pelham/vg, "Dallon–Pelham");
xml = xml.replace(/Damsel-Ashley/vg, "Damsel–Ashley");
xml = xml.replace(/East-West/vg, "east–west");
xml = xml.replace(/G-N/vg, "G–N");
xml = xml.replace(/Gaea-Eden/vg, "Gaea–Eden");
xml = xml.replace(/Gimel-Europe/vg, "Gimel–Europe");
xml = xml.replace(/Imp-Damsel/vg, "Imp–Damsel");
xml = xml.replace(/Matryoshka-Valentin/vg, "Matryoshka–Valentin");
xml = xml.replace(/Night Hag-Nyx/vg, "Night Hag–Nyx");
xml = xml.replace(/Norwalk-Fairfield/vg, "Norwalk–Fairfield");
xml = xml.replace(/Simurgh-Myrddin-plant/vg, "Simurgh–Myrddin–plant");
xml = xml.replace(/Tristan-Byron/vg, "Tristan–Byron");
xml = xml.replace(/Tristan-Capricorn/vg, "Tristan–Capricorn");
xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/vg, "Undersider$1–$2");
xml = xml.replace(/Weaver-Clockblocker/vg, "Weaver–Clockblocker");
xml = xml.replace(/Winter-Mannequin/vg, "Winter–Mannequin");
return xml;
}
function fixPossessives(xml) {
// Fix possessive of names ending in "s".
xml = xml.replace(
/(? would be more semantically appropriate, but loses the author's intent. This is
// especially the case in Ward, which uses a variety of different scene breaks.
xml = xml.replace(/]*)>■<\/p>/vg, `
■
`);
xml = xml.replace(
/⊙<\/strong><\/p>/vg,
`⊙
`
);
xml = xml.replace(
/⊙<\/strong><\/em><\/p>/vg,
`⊙
`
);
xml = xml.replace(
/⊙⊙<\/strong><\/p>/vg,
`⊙
`
);
xml = xml.replace(
/⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/vg,
`⊙ ⊙ ⊙ ⊙ ⊙
`
);
return xml;
}
function fixCapitalization(xml, bookTitle) {
// This occurs enough times it's better to do here than in one-off fixes. There are some cases that get corrected
// back in the substitutions file. Note that Ward contains much talk of "the clairvoyants", so we don't want to
// capitalize plurals.
xml = xml.replace(/([Tt])he clairvoyant(?!s)/vg, "$1he Clairvoyant");
// Cape or other parahuman-ish names
xml = xml.replace(/Butcher one/vg, "Butcher One");
xml = xml.replace(/Butcher six/vg, "Butcher Six");
xml = xml.replace(/Butcher twelve/vg, "Butcher Twelve");
xml = xml.replace(/Butcher two/vg, "Butcher Two");
xml = xml.replace(/doormaker/vg, "Doormaker");
xml = xml.replace(/Dragon’s teeth/vg, "Dragon’s Teeth");
xml = xml.replace(/faerie queen/vg, "Faerie Queen");
xml = xml.replace(/Glory girl/vg, "Glory Girl");
xml = xml.replace(/goblin king/vg, "Goblin King");
xml = xml.replace(/Gray boy/vg, "Gray Boy");
xml = xml.replace(/Harbinger zero/vg, "Harbinger Zero");
xml = xml.replace(/Hatchet face/vg, "Hatchet Face");
xml = xml.replace(/machine army/vg, "Machine Army");
xml = xml.replace(/Number man/vg, "Number Man");
xml = xml.replace(/Resound/vg, "ReSound");
xml = xml.replace(/speedrunners/vg, "Speedrunners");
xml = xml.replace(/the blasphemies/vg, "the Blasphemies");
xml = xml.replace(/three blasphemies/vg, "Three Blasphemies");
// Proper noun/adjectives
xml = xml.replace(/molotov/vg, "Molotov");
xml = xml.replace(/olympic/vg, "Olympic");
xml = xml.replace(/\blatin\b/vg, "Latin");
xml = xml.replace(/\bfreud/vg, "Freud"); // also gets "freudian", but not "schadenfreude"
xml = xml.replace(/\bindian/vg, "Indian");
xml = xml.replace(/statue of liberty/vg, "Statue of Liberty");
xml = xml.replace(/milky way galaxy/vg, "Milky Way galaxy");
xml = xml.replace(/hail mary/vg, "Hail Mary");
// "tesla" could be lowercase if used as a unit of magnetic flux density, so be slightly conservative.
xml = xml.replace(/tesla (coil|tower)/vg, "Tesla $1");
// "Coke" should be capitalized when referring to Coca-Cola, but not when referring to cocaine.
// It's also lowercased if used as a generic term for cola, but plausibly that doesn't happen in
// the books. Here, we fix cases that are definitely not cocaine, and handle other instances via
// one-offs in the substitutions files.
xml = xml.replace(/a coke/vg, "a Coke");
xml = xml.replace(/a can of coke/vg, "a can of Coke");
xml = xml.replace(/cherry coke/vg, "Cherry Coke");
xml = xml.replace(/coke bottle/vg, "Coke bottle");
// Other brand names
xml = xml.replace(/Youtube/vg, "YouTube");
xml = xml.replace(/ken doll/vg, "Ken doll");
xml = xml.replace(/mercedes/vg, "Mercedes");
// Channel names
xml = xml.replace(/channel twelve/vig, "Channel Twelve");
xml = xml.replace(/channel twelve news/vig, "Channel Twelve News");
xml = xml.replace(/channel four/vig, "Channel Four");
// Weapon names should not be capitalized
xml = xml.replace(/(?)Halberd/vg, "halberd");
xml = xml.replace(/(?)cannonblade/vg, "cannonblade");
xml = xml.replace(/(?)Loft/vg, "loft");
// Martial arts or sports names are treated as common nouns and not traditionally capitalized. "Krav Maga" remains
// capitalized, interestingly (according to dictionaries and Wikipedia).
xml = xml.replace(/(?)Judo/vg, "judo");
xml = xml.replace(/(?)Aikido/vg, "aikido");
xml = xml.replace(/(?)Karate/vg, "karate");
xml = xml.replace(/(?)Tae Kwon Do/vg, "tae kwon do");
xml = xml.replace(/(?)Track and Field/vg, "track and field");
// "yakuza" is like "gangster" or "mafia", not a proper name.
xml = xml.replace(/(?)Yakuza/vg, "yakuza");
// There's no reason why university should be capitalized in most contexts, although sometimes it's used as part of
// a compound noun or at the beginning of a sentence.
xml = xml.replace(/(?|Cornell |Nilles )University(?! Road)/vg, "university");
// Organ names (e.g. brain, arm) or scientific names are not capitalized, so the "corona pollentia" and friends should
// not be either. The books are inconsistent.
xml = xml.replace(/(?|-)Corona/vg, "corona");
xml = xml.replace(/Pollentia/vg, "pollentia");
xml = xml.replace(/Radiata/vg, "radiata");
xml = xml.replace(/Gemma/vg, "gemma");
// Place names
xml = xml.replace(/(Stonemast|Shale) avenue/vg, "$1 Avenue");
xml = xml.replace(/(Lord|Slater) street/vg, "$1 Street");
xml = xml.replace(/(Hollow|Cedar) point/vg, "$1 Point");
xml = xml.replace(/(Norwalk|Fenway|Stratford) station/vg, "$1 Station");
xml = xml.replace(/downtown Brockton Bay/vg, "Downtown Brockton Bay");
xml = xml.replace(/the megalopolis/vg, "the Megalopolis");
xml = xml.replace(/earths(?![a-z])/vg, "Earths");
xml = xml.replace(/bible belt/vg, "Bible Belt");
xml = xml.replace(/the birdcage/vg, "the Birdcage");
xml = xml.replace(/Captain’s hill/vg, "Captain’s Hill");
xml = xml.replace(/Weymouth shopping center/vg, "Weymouth Shopping Center");
xml = xml.replace(/rocky mountains/vg, "Rocky Mountains");
xml = xml.replace(/United states/vg, "United States");
if (bookTitle === "Ward") {
xml = xml.replace(/the bunker/vg, "the Bunker");
xml = xml.replace(/‘bunker’/vg, "‘Bunker’");
}
// These seem to be used more as generic terms than as place names.
xml = xml.replace(/the Market/vg, "the market");
xml = xml.replace(/the Bay(?! [A-Z])/vg, "the bay"); // this one gets corrected back a few times in the substitutions file
xml = xml.replace(/the College(?! [A-Z])/vg, "the college");
xml = xml.replace(/(?)North (?:E|e)nd/vg, "north end");
// "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
// instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
// the substitutions file.
xml = xml.replace(/(?)Flock/vg, "flock");
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol ".
xml = xml.replace(
/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus|training)/vig,
(_, $1) => `Patrol ${$1.toLowerCase()}`
);
// This usually works in Ward (some instances corrected back in the substitutions file), and has a few false positives
// in Worm, where it is never needed:
if (bookTitle === "Ward") {
xml = xml.replace(/the patrol(?!s|ling)/vg, "the Patrol");
}
// Especially early in Worm, PRT designations are capitalized; they should not be. This fixes the cases where we
// can be reasonably sure they don't start a sentence, although more specific instances are done in the substitutions
// file, and some need to be back-corrected.
//
// Note: "Master" is specifically omitted because it fails poorly on Worm Interlude 4. Other instances need to be
// corrected via the substitutions file.
//
// This also over-de-capitalizes "The Stranger" in Ward (a titan name). Those also get fixed in the substitutions
// file.
xml = xml.replace(
/(?|\n|: )(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)(?! [A-Z])/vg,
(_, designation) => designation.toLowerCase()
);
xml = xml.replace(
/(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/vig,
"$1 $2"
);
xml = xml.replace(
/(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)[ -\/](mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)/vig,
"$1–$2"
);
// Capitalization is inconsistent, but shard names seems to usually be capitalized.
// Note this must happen after the above, since the above de-capitalizes stranger and shaper.
xml = xml.replace(/Grasping self/vg, "Grasping Self");
xml = xml.replace(/Cloven stranger/vg, "Cloven Stranger");
xml = xml.replace(/Princess shaper/vg, "Princess Shaper");
xml = xml.replace(/Fragile one/vg, "Fragile One");
// I was very torn on what to do with capitalization for "Titan" and "Titans". In general you don't capitalize species
// names or other classifications, e.g. style guides are quite clear you don't capitalize "gods". The author
// capitalizes them more often than not (e.g., 179 raw "Titans" to 49 "titans"), but is quite inconsistent.
//
// In the end, I decided against de-capitalization, based on the precedent set by "Endbringers" (which are
// conceptually paired with Titans several times in the text). However, we only capitalize the class after they are
// _introduced_ as a class in Sundown 17.y. (Before then we still capitalize individual names like "Dauntless Titan"
// or "Kronos Titan".)
if (bookTitle === "Ward") {
// All plural discussions of "Titans" are after Sundown 17.y.
xml = xml.replace(/titans/vg, "Titans");
// Since we can't safely change all instances of "titan", most are in the substitutions file. We can do a few here,
// though.
xml = xml.replace(/dauntless titan/vig, "Dauntless Titan"); // Sometimes "Dauntless" isn't even capitalized.
xml = xml.replace(/Kronos titan/vg, "Kronos Titan");
}
// For the giants, the prevailing usage seems to be to keep the term lowercase, but capitalize when used as a name.
xml = xml.replace(/(?<=Mathers |Goddess )giant/vg, "Giant");
xml = xml.replace(/mother giant/vig, "Mother Giant");
xml = xml.replace(/(?)Giants/vg, "giants");
return xml;
}
function fixMispellings(xml) {
xml = xml.replace(/(S|s)houlderblade/vg, "$1houlder blade");
xml = xml.replace(/(S|s)cott(?: |-)free/vg, "$1cot-free");
xml = xml.replace(/(N|n)on(?: |-)fiction/vg, "$1onfiction");
xml = xml.replace(/(C|c)hanged tacks/vg, "$1hanged tack");
xml = xml.replace(/(E|e)ye witness/vg, "$1yewitness");
xml = xml.replace(/(G|g)asmask/vg, "$1as mask");
xml = xml.replace(/(S|s)oiree/vg, "$1oirée");
xml = xml.replace(/(A|a)ccomodations/vg, "$1ccommodations");
xml = xml.replace(/(L|l)oin cloth/vg, "$1oincloth");
xml = xml.replace(/(S|s)hip(?: |-)shape/vg, "$1hipshape");
return xml;
}
function fixHyphens(xml) {
// Compounds which need hyphens removed
xml = xml.replace(/(above)-(board)/vig, "$1$2");
xml = xml.replace(/(flood)-(lamp)/vig, "$1$2");
xml = xml.replace(/(foot)-(ball)/vig, "$1$2");
xml = xml.replace(/(pre)-(emptive|made)/vig, "$1$2");
xml = xml.replace(/(re)-(establish)/vig, "$1$2");
xml = xml.replace(/(skin)-(tight)/vig, "$1$2");
xml = xml.replace(/(super)-(ego)/vig, "$1$2");
// Compounds which need hyphens made into spaces
xml = xml.replace(/(de)-(facto)/vig, "$1 $2");
xml = xml.replace(/(foster)-/vig, "$1 ");
xml = xml.replace(/(golf)-(ball)/vig, "$1 $2");
xml = xml.replace(/(non)-(sequitur)/vig, "$1 $2");
xml = xml.replace(/(tendril)-(girl)/vig, "$1 $2");
xml = xml.replace(/Middle-Eastern/vig, "Middle Eastern");
// "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
// them.
xml = xml.replace(/(\w+)[ \-]year[ \-]old(s?)(?!\w)/vg, "$1-year-old$2");
xml = xml.replace(/(\w+) or (\w+)-year-old/vg, "$1- or $2-year-old");
// "X-foot-tall" should use hyphens, but we need to avoid "foot taller", "a foot tall", etc.
xml = xml.replace(/(? `Case ${caseNumber[0].toUpperCase()}${caseNumber.substring(1)}`
);
return xml;
}
function fixClassNames(xml) {
// Class names are very inconsistent:
// * 9 class S
// * 15 class-S
// * 8 Class S
// * 13 Class-S
// We standardize on Class-S when used as an adjective, Class S when used as a noun. The heuristic
// that works for the book is that if it's followed by a period, it's a noun.
xml = xml.replace(/\b[Cc]lass[ \-](S|A|B|C|D)\b/vg, "Class-$1");
xml = xml.replace(/\bClass-(S|A|B|C|D)\./vg, "Class $1.");
xml = xml.replace(/\bClass-(S|A|B|C|D)-\b/vg, "Class-$1 ");
return xml;
}
function fixParahumansOnline(xml) {
xml = xml.replaceAll("Using identity “", "Using identity “");
xml = xml.replaceAll(
`Forum thread.`,
`Forum thread.`
);
xml = xml.replaceAll(
`Edit that list Here.`,
`Edit that list Here.`
);
xml = xml.replaceAll(
`Welcome to the Parahumans Online message boards.
`,
`
Welcome to the Parahumans Online message boards.
`
);
xml = xml.replace(
/You are currently logged in, ([^<]+)<\/span>/vg,
`You are currently logged in, $1`
);
// Most cases have the colon but some don't.
xml = xml.replace(/(Replied on \w+ \d{1,2}(?:st|nd|rd|th),? ?Y?\d*)
/vg, "$1:
");
// "You have marked yourself as away." has a period, so this one should too.
xml = xml.replace(/(You have marked yourself as back)(?)/vg, "$1.");
// It's inconsistent to exclude the punctuation from the bolding; fix it.
xml = xml.replace(/Welcome back to (.+?)<\/strong>!/vg, "Welcome back to $1!");
xml = xml.replace(/♦ (.*)<\/strong><\/p>/vg, `♦ $1
`);
return xml;
}
function isEmptyOrGarbage(el) {
const text = el.textContent.trim();
return text === "" ||
text.startsWith("Last Chapter") ||
text.startsWith("Previous Chapter") ||
text.startsWith("Next Chapter");
}
function escapeRegExp(str) {
return str.replace(/[[\]/{}()*+?.\\^$|]/ug, "\\$&");
}
function decodeCloudFlareEmail(hash) {
let email = "";
const xorWithThis = parseInt(hash.substring(0, 2), 16);
for (let i = 2; i < hash.length; i += 2) {
const charCode = parseInt(hash.substring(i, i + 2), 16) ^ xorWithThis;
email += String.fromCharCode(charCode);
}
return email;
}