import { parseSrcset, stringifySrcset } from 'srcset'; import replaceElementType from './replace-element-type.js'; import { isImageURL } from './util/file-mimetype.js'; /* Convert AMP markup to HTML markup (naive / incomplete implementation) */ function ampToHtml(doc) { // Convert to Array.from(doc.querySelectorAll('amp-img')).forEach(ampImg => { replaceElementType(ampImg, 'img', doc); }); } function fixLazyLoadedImages(doc) { Array.from( doc.querySelectorAll(` img[data-src], img[data-srcset], img[data-lazy-src], img[data-lazy-srcset] `) ).forEach(img => { ['src', 'srcset', 'lazySrc', 'lazySrcset'].forEach(attr => { if (attr in img.dataset) { img.setAttribute( attr.replace(/^lazyS/, 's'), img.dataset[attr] ); } }); }); Array.from(doc.querySelectorAll('[loading="lazy"]')).forEach(el => { el.removeAttribute('loading'); }); Array.from(doc.querySelectorAll('img[sizes]')).forEach(img => { img.removeAttribute('sizes'); }); } /* Replace: With: */ const IMAGE_EXCLUDE_PATTERNS = [ /* Exclude Wikipedia links to image file pages */ /wikipedia\.org\/wiki\/[a-z]+:/i, /* Exclude images embedded in Markdown files hosted on github.com. See: https://github.com/danburzo/percollate/issues/84 */ /github\.com/ ]; /* Some image URLs can be used but must be adjusted */ const IMAGE_URL_MAPPERS = [ /* Some Blogger images use a /s1600-h/ segment that loads an HTML file with an element, while /s1600/ loads the image itself. */ { matcher: /\/blogger.googleusercontent.com\/img\//, replace: url => url.replace(/\/(s\d+)-h\//, '/$1/') } ]; function imagesAtFullSize(doc) { Array.from(doc.querySelectorAll('a > img:only-child')).forEach(img => { let anchor = img.parentNode; /* Handle cases where the `href` to the full-size image includes query parameters, eg. `image.png?w=1024`. */ let href = anchor.href; try { let url = new URL(anchor.href, doc.baseURI); url.search = ''; href = url.href; } catch (err) { // no-op, probably due to bad `doc.baseURI`. } // Only replace if the `href` matches an image file if ( isImageURL(href, doc) && !IMAGE_EXCLUDE_PATTERNS.some(pattern => pattern.test(href)) ) { href = anchor.href; IMAGE_URL_MAPPERS.forEach(it => { if (it.matcher.test(href)) { href = it.replace(href); } }); img.setAttribute('src', href); anchor.parentNode.replaceChild(img, anchor); } }); /* Remove width/height attributes from elements and style them in CSS. (Should we do this, actually?) */ Array.from(doc.querySelectorAll('img')).forEach(img => { img.removeAttribute('width'); img.removeAttribute('height'); }); } function wikipediaSpecific(doc) { /* Remove some screen-only things from wikipedia pages: - edit links next to headings */ Array.from( doc.querySelectorAll(` .mw-editsection `) ).forEach(el => el.remove()); } function githubSpecific(doc) { /* Fix heading links See: https://github.com/danburzo/percollate/issues/49 */ Array.from( doc.querySelectorAll('h1 > a, h2 > a, h3 > a, h4 > a, h5 > a, h6 > a') ).forEach(el => { let id = el.id; if (id === el.getAttribute('href').replace(/^#/, 'user-content-')) { el.id = id.replace('user-content-', ''); /* `aria-hidden` causes Readability to remove the element, so we remove the attribute. */ if (el.getAttribute('aria-hidden')) { el.removeAttribute('aria-hidden'); } } }); } /* Mark some links as not needing their HREF appended. */ function noUselessHref(doc) { Array.from(doc.querySelectorAll('a')) .filter(function (el) { let href = el.getAttribute('href') || ''; // in-page anchors if (href.match(/^#/)) { return true; } let textContent = el.textContent.trim(); // links whose text content is the HREF // or which don't have any content. return !textContent || textContent === href; }) .forEach(el => el.classList.add('no-href')); } /* Convert relative URIs to absolute URIs: * the `href` attribute of elements (except for in-page anchors) * the `src` attribute of elements * the `srcset` attribute of elements (inside elements) */ function relativeToAbsoluteURIs(doc) { function absoluteSrcset(str) { return stringifySrcset( parseSrcset(str).map(item => ({ ...item, url: new URL(item.url, doc.baseURI).href })) ); } Array.from(doc.querySelectorAll('a:not([href^="#"])')).forEach(el => { el.setAttribute('href', el.href); }); Array.from(doc.querySelectorAll('img')).forEach(el => { el.setAttribute('src', el.src); }); Array.from( doc.querySelectorAll('picture source[srcset], img[srcset]') ).forEach(el => { try { el.setAttribute( 'srcset', absoluteSrcset(el.getAttribute('srcset')) ); } catch (err) { console.error(err); } }); } /* Wraps single images into
elements, adding the image's `alt` attribute as
*/ function singleImgToFigure(doc) { Array.from(doc.querySelectorAll('img:only-child')).forEach(image => { /* Some images have been left as the only children of elements by exclusion rules in `imagesAtFullSize()` (eg. on Wikipedia). If that's the case, include the as well in the
. */ const content = image.parentNode.tagName === 'A' ? image.parentNode : image; let fig = doc.createElement('figure'); fig.appendChild(content.cloneNode(true)); let alt = image.getAttribute('alt'); if (alt) { let figcaption = doc.createElement('figcaption'); figcaption.textContent = alt; fig.appendChild(figcaption); } if (content.parentNode.children.length === 1) { content.parentNode.replaceWith(fig); } else { content.replaceWith(fig); } }); } /* Expands
elements */ function expandDetailsElements(doc) { Array.from(doc.querySelectorAll('details')).forEach(el => el.setAttribute('open', true) ); } /* Wrap
 blocks in 
elements, to make sure Readability preserves them. */ function wrapPreBlocks(doc) { Array.from(doc.querySelectorAll('pre')).forEach(pre => { /* Avoid processing nested
 elements (#165) */
		if (pre.querySelector('pre')) {
			return;
		}
		if (pre.parentNode && !pre.parentNode.matches('figure')) {
			let fig = doc.createElement('figure');
			fig.appendChild(pre.cloneNode(true));
			/*
				If the 
 is the only child (of a 
or

), also remove this parent in the process. */ let to_replace = pre.matches(':only-child') ? pre.parentNode : pre; to_replace.parentNode.replaceChild(fig, to_replace); } }); } export { ampToHtml, fixLazyLoadedImages, imagesAtFullSize, noUselessHref, wikipediaSpecific, relativeToAbsoluteURIs, singleImgToFigure, expandDetailsElements, githubSpecific, wrapPreBlocks };