// ==UserScript== // @name 双击选中文短语 | CJK Double-Click Phrase Select (Firefox) // @name:zh-CN 双击选中文短语(Firefox) // @name:ja CJKダブルクリック語句選択(Firefox) // @name:en CJK Double-Click Phrase Select (Firefox) // @namespace https://github.com/lzblack // @homepageURL https://github.com/lzblack/userscripts // @version 0.2.3 // @author lzblack // @description Restore the pre-ICU4X Firefox double-click behavior for CJK text: double-clicking a CJK character (Chinese, Japanese kana, Bopomofo, Hangul) selects the contiguous run of CJK characters up to the nearest non-CJK boundary (punctuation, space, Latin letters, etc.), instead of just one character. Firefox only — Chrome / Edge / Safari keep their native word-level selection. // @description:zh-CN 恢复 Firefox 双击 CJK 文本的旧行为:双击 CJK 字符(中文、日文假名、注音、谚文)时选中"连续整段 CJK 字符",直到下一个非 CJK 边界(标点、空格、字母等)为止。修复 ICU4X 引入的"双击只选单字"回归。仅 Firefox 生效,Chrome / Edge / Safari 保留原生分词。 // @description:ja ICU4X導入前のFirefoxにおけるCJKテキストのダブルクリック動作を復元します。CJK文字(漢字・かな・注音・ハングル)をダブルクリックすると、1文字だけでなく、最も近い非CJK境界(句読点・空白・ラテン文字など)まで連続するCJK文字をまとめて選択します。Firefox専用——Chrome / Edge / Safari ではネイティブの単語選択を維持します。 // @description:en Restore the pre-ICU4X Firefox double-click behavior for CJK text: double-clicking a CJK character (Chinese, Japanese kana, Bopomofo, Hangul) selects the contiguous run of CJK characters up to the nearest non-CJK boundary (punctuation, space, Latin letters, etc.), instead of just one character. Firefox only — Chrome / Edge / Safari keep their native word-level selection. // @match *://*/* // @run-at document-end // @all-frames true // @grant none // @license MIT // @supportURL https://github.com/lzblack/userscripts/issues // @updateURL https://raw.githubusercontent.com/lzblack/userscripts/main/cjk-dblclick-select/cjk-dblclick-select.user.js // @downloadURL https://raw.githubusercontent.com/lzblack/userscripts/main/cjk-dblclick-select/cjk-dblclick-select.user.js // ==/UserScript== /* * What & why * ---------- * Pre-ICU4X Firefox treated a double-click on a CJK character as "select the * contiguous run of CJK characters up to the nearest punctuation/space". ICU4X * replaced that with per-character selection (a single character). This script * restores the old behavior: when a dblclick's default selection contains any * CJK character (Han, kana, Bopomofo, Hangul), expand the selection outward to * the maximal run of contiguous CJK characters around the cursor. Japanese * kanji+kana mixes count as one run, so kana no longer breaks a phrase apart. * * Upstream bug: https://bugzilla.mozilla.org/show_bug.cgi?id=2040746 * (uninstall this script once that bug is fixed upstream) * * Manual test cases (each is one

in an HTML document; click target shown): * *

今天天气很好我们一起去公园吧

click any char → 今天天气很好我们一起去公园吧 *

今天天气很好,我们一起去公园吧。

click 好 → 今天天气很好 *

今天天气很好,我们一起去公园吧。

click 园 → 我们一起去公园吧 *

Today 今天 is 晴天 weather

click 今 → 今天 *

今日はいい天気ですね

click は → 今日はいい天気ですね (kanji+kana run) *

東京に行きました。

click 東 → 東京に行きました *

안녕하세요 반갑습니다

click 녕 → 안녕하세요 (stops at space) *

价格是 100 元整

click 元 → 元整 *

The quick brown fox

click quick → quick (no CJK, untouched) * click 天 → native (form control, skipped) *

今天天气很好

click 好 → 今天天气很好 (crosses spans) *

今天

新段落
剩余

click 余 → 剩余 (stops at nested block) *
第一行

第二行
click 行(1st) → 第一行 (stops at
; never merges across line break) */ (function () { "use strict"; // Firefox-only: Chrome/Edge/Safari use ICU BreakIterator which gives // reasonable CJK word-level selection; only Firefox post-ICU4X regressed // to single-char selection, and only Firefox is what this script fixes. if (!navigator.userAgent.includes("Firefox")) return; // A "CJK character" is any Han ideograph, Japanese kana, Bopomofo, or Hangul, // plus the iteration / long-vowel marks that live inside CJK runs but carry // Script=Common (々ー〆 etc.). \p{Script=Han} covers every ideograph plane // (basic, Ext A–F, compatibility) without a hand-maintained range list, so // supplementary-plane characters work for free. Everything else — CJK // punctuation (,。「」), full-width digits, ASCII, whitespace, Emoji — // falls outside and therefore acts as a boundary with no allowlist to keep. const CJK_CHAR_RE = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Bopomofo}\p{Script=Hangul}々〆〇ヶヵー]/u; function isCJKCodePoint(cp) { return CJK_CHAR_RE.test(String.fromCodePoint(cp)); } function containsCJK(s) { for (let i = 0; i < s.length;) { const cp = s.codePointAt(i); if (isCJKCodePoint(cp)) return true; i += cp > 0xFFFF ? 2 : 1; } return false; } // Returns { cp, start } for the code point ending at UTF-16 index `i`, or null. function prevCodePoint(text, i) { if (i <= 0) return null; let start = i - 1; const c = text.charCodeAt(start); if (c >= 0xDC00 && c <= 0xDFFF && start > 0) { const h = text.charCodeAt(start - 1); if (h >= 0xD800 && h <= 0xDBFF) start -= 1; } return { cp: text.codePointAt(start), start }; } function walkLeftInNode(node, offset) { const text = node.data; let i = offset; while (i > 0) { const prev = prevCodePoint(text, i); if (!prev || !isCJKCodePoint(prev.cp)) return i; i = prev.start; } return 0; } function walkRightInNode(node, offset) { const text = node.data; let i = offset; while (i < text.length) { const cp = text.codePointAt(i); if (!isCJKCodePoint(cp)) return i; i += cp > 0xFFFF ? 2 : 1; } return text.length; } // Closest ancestor whose computed display is not inline-ish. Bounds the // cross-node walk so we don't merge runs across

,

, list items, etc. function getBlockAncestor(node) { let cur = node.parentElement; while (cur) { const d = window.getComputedStyle(cur).display; if (d && !d.startsWith("inline") && d !== "contents") return cur; cur = cur.parentElement; } return node.ownerDocument.body || node.ownerDocument.documentElement; } // True if `node`'s ancestor chain up to (not including) blockAncestor stays // inline — i.e. no nested block element sits between them. The cross-node // walks STOP (not skip) at the first node where this is false: a nested // block is a hard boundary, and in document order everything past it is on // the far side of that block, so it must not merge into the run. function isInlineWithin(node, blockAncestor) { let cur = node.parentElement; while (cur && cur !== blockAncestor) { const d = window.getComputedStyle(cur).display; if (d && !d.startsWith("inline") && d !== "contents") return false; cur = cur.parentElement; } return true; } // A
or any non-inline sibling element encountered mid-walk is a visual // line/block break: stop, never merge across it.
computes to // display:inline, so the nodeName test must come BEFORE the display test. function isBoundaryElement(el) { if (el.nodeName === "BR") return true; const d = window.getComputedStyle(el).display; return d && !d.startsWith("inline") && d !== "contents"; } // Walk left across text nodes within the block ancestor. In real Chinese // text the run is bounded by punctuation within ~dozens of chars, so an // unbounded walk is fine; no cap is enforced. function walkLeftAcrossNodes(anchorNode, anchorOffset) { const block = getBlockAncestor(anchorNode); let startNode = anchorNode; let startOffset = walkLeftInNode(anchorNode, anchorOffset); if (startOffset > 0) return { node: startNode, offset: startOffset }; const tw = anchorNode.ownerDocument.createTreeWalker(block, NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT); tw.currentNode = anchorNode; while (startOffset === 0) { const prev = tw.previousNode(); if (!prev) break; if (prev.nodeType === Node.ELEMENT_NODE) { if (isBoundaryElement(prev)) break; continue; } if (!isInlineWithin(prev, block)) break; const len = prev.data.length; if (len === 0) { startNode = prev; continue; } const last = prevCodePoint(prev.data, len); if (!last || !isCJKCodePoint(last.cp)) break; startNode = prev; startOffset = walkLeftInNode(prev, len); } return { node: startNode, offset: startOffset }; } function walkRightAcrossNodes(anchorNode, anchorOffset) { const block = getBlockAncestor(anchorNode); let endNode = anchorNode; let endOffset = walkRightInNode(anchorNode, anchorOffset); if (endOffset < anchorNode.data.length) return { node: endNode, offset: endOffset }; const tw = anchorNode.ownerDocument.createTreeWalker(block, NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT); tw.currentNode = anchorNode; while (endOffset === endNode.data.length) { const next = tw.nextNode(); if (!next) break; if (next.nodeType === Node.ELEMENT_NODE) { if (isBoundaryElement(next)) break; continue; } if (!isInlineWithin(next, block)) break; const len = next.data.length; if (len === 0) { endNode = next; endOffset = 0; continue; } const firstCp = next.data.codePointAt(0); if (!isCJKCodePoint(firstCp)) break; endNode = next; endOffset = walkRightInNode(next, 0); } return { node: endNode, offset: endOffset }; } function getCaretFromPoint(x, y, doc, root) { if (typeof doc.caretPositionFromPoint === "function") { try { const p = root instanceof ShadowRoot ? doc.caretPositionFromPoint(x, y, { shadowRoots: [root] }) : doc.caretPositionFromPoint(x, y); if (p) return { node: p.offsetNode, offset: p.offset }; } catch (_) { /* fall through to legacy API */ } } if (typeof doc.caretRangeFromPoint === "function") { const r = doc.caretRangeFromPoint(x, y); if (r) return { node: r.startContainer, offset: r.startOffset }; } return null; } function pickSelection(target) { const root = target && target.getRootNode && target.getRootNode(); if (root instanceof ShadowRoot && typeof root.getSelection === "function") { return root.getSelection(); } return window.getSelection(); } let isApplying = false; function handleDblClick(e) { if (isApplying) return; const composedTarget = (typeof e.composedPath === "function" && e.composedPath()[0]) || e.target; if (!composedTarget) return; // Form controls have their own selection model; leave them alone. if ( composedTarget instanceof HTMLInputElement || composedTarget instanceof HTMLTextAreaElement ) return; const sel = pickSelection(composedTarget); if (!sel || sel.rangeCount === 0) return; if (!containsCJK(sel.toString())) return; const root = composedTarget.getRootNode && composedTarget.getRootNode(); const doc = (root && root.ownerDocument) || document; const caret = getCaretFromPoint(e.clientX, e.clientY, doc, root); if (!caret || caret.node.nodeType !== Node.TEXT_NODE) return; const text = caret.node.data; const offset = caret.offset; // Require that at least one side of the caret is a CJK character. // Without this, a click that lands on whitespace between CJK runs // would still trigger expansion in an unintuitive direction. const right = offset < text.length ? text.codePointAt(offset) : null; const leftInfo = prevCodePoint(text, offset); const leftCJK = leftInfo && isCJKCodePoint(leftInfo.cp); const rightCJK = right !== null && isCJKCodePoint(right); if (!leftCJK && !rightCJK) return; const start = walkLeftAcrossNodes(caret.node, offset); const end = walkRightAcrossNodes(caret.node, offset); if (start.node === end.node && start.offset === end.offset) return; // Re-entry guard: if our computed range already matches the current // selection, do nothing — prevents loops if another handler re-fires // dblclick in response to selectionchange. const cur = sel.getRangeAt(0); if ( cur.startContainer === start.node && cur.startOffset === start.offset && cur.endContainer === end.node && cur.endOffset === end.offset ) return; const range = doc.createRange(); try { range.setStart(start.node, start.offset); range.setEnd(end.node, end.offset); } catch (_) { return; } isApplying = true; try { sel.removeAllRanges(); sel.addRange(range); } finally { isApplying = false; } } document.addEventListener("dblclick", handleDblClick, true); })();