/* globals silo */
(function(){
"use strict";
// Abort script if running outside GitHub
if("github.com" !== document.location.host)
return alert("This script must be run whilst on github.com");
// Don't clobber any existing globals
if(window.harvest || window.silo) return;
// URL constants for different search modes
const BY_MATCH = "";
const BY_NEWEST = "&s=indexed&o=desc";
const BY_OLDEST = "&s=indexed&o=asc";
window.silo = {};
window.harvest = harvest;
// Ensure the silo's reap method can't be reassigned
Object.defineProperties(window.silo, {
badApples: {
value: [],
writable: false,
configurable: false,
},
reap: {
value: reap,
writable: false,
configurable: false,
},
});
// An easier method of accessing the last successful search result
let lastHarvest = null;
Object.defineProperty(window, "that", {
get: () => lastHarvest ? reap(lastHarvest) : "",
});
// Prevent accidental page navigation from interrupting a harvest
let harvesting = false;
window.addEventListener("beforeunload", e => harvesting
? e.returnValue = "Your harvest hasn't finished. Are you sure you wish to cancel?"
: undefined);
// Request permission to show desktop notifications, if needed
Notification.requestPermission();
/**
* Collate a list of URLs of public search results for a filetype.
*
* @param {String} realQuery - What we're really searching for
* @param {String} bogusQuery - What GitHub thinks we're searching for
* @example harvest("extension:pic")
* @return {Promise}
* @public
*/
async function harvest(realQuery, bogusQuery = null){
if(!realQuery){
const {pathname, search} = window.location;
if("/search" === pathname && /[?&]q=[^?%\s]/.test(search)){
bogusQuery = new URL(window.location)
.searchParams.get("q")
.replace(/\s+NOT\s+nothack\s*$/, "")
.replace(/(?:^|\s+)(filename|extension):(\S+)/, (_, key, value) => {
realQuery = key + ":" + value;
return "";
}).trim();
// Check if we still need a hothack (such as if bogusQuery only contains modifier keywords)
if(!isplit(bogusQuery, /((?:^|\s)NOT)\s+(?!NOT)\S+/).some(chunk => /(?:\s|^)NOT\s+/.test(chunk)))
bogusQuery += " " + nothack();
if(realQuery)
return harvest(realQuery, bogusQuery || null);
}
throw new TypeError("Missing query parameter");
}
harvesting = true;
if(!/extension:|filename:|in:file|in:path/.test(realQuery))
realQuery = `extension:${realQuery}`;
// Default to the usual "nothack" with a random number attached
if(null == bogusQuery)
bogusQuery = nothack();
const query = encodeURIComponent(`${realQuery} ${bogusQuery}`).replace(/%20/g, "+");
const url = `https://github.com/search?q=${query}&type=Code`;
try{
const numResults = await runSearch(url, BY_MATCH, realQuery);
if(numResults > 1000){
await runSearch(url, BY_NEWEST, realQuery);
await runSearch(url, BY_OLDEST, realQuery);
}
const body = "Run `copy(that);` in your console to copy the URLs to your clipboard.";
new Notification(`Harvest complete for ${realQuery}`, {body});
lastHarvest = realQuery;
harvesting = false;
}
catch(error){
harvesting = false;
console.error(error);
if(parseHTML.lastResult)
console.log({lastPageSnapshot: parseHTML.lastResult});
throw error;
}
}
/**
* Load each page of results for a file search.
*
* @param {String} url - Absolute URL, sans page variable ("?p=1")
* @param {String} vars - Additional search variables, if any
* @param {String} query - File-related half of search's query ("extension:pic")
* @return {Promise}
* @internal
*/
async function runSearch(url, vars, query){
const results = silo[query] || (silo[query] = {length: 0});
let page = 0;
let pageCount;
let resultCount;
return resultCount = await next();
async function loadNextPage(){
++page;
// No more pages to load
if(page >= pageCount)
return resultCount;
// Throttle the next request so GitHub doesn't bite our head off
await wait(2000);
return next();
}
async function next(){
const pageURL = url + vars + (page ? `&p=${page+1}` : "");
const response = await grab(pageURL);
const htmlTree = await response.text().then(html => {
html = html.replace(/ htmlTree.querySelector(s);
const $$ = s => htmlTree.querySelectorAll(s);
// No results. Reject.
if($("div.blankslate")){
const notice = "Must include at least one user, organization, or repository";
const match = notice.split(" ").join("\\s+");
new RegExp(match, "i").test(htmlTree.textContent)
? ["Failed.", "GitHub's doing that weird thing again:", `\t> "${notice}"`].join("\n\n")
: "No results";
console.error(`Skipping this one: ${pageURL}\n`);
console.error("Find it in window.silo.badApples");
window.silo.badApples.push(pageURL);
loadNextPage();
}
// Extract the result-entry row from this page of results
const listContainer = $("#code_search_results > .code-list") || die("Search-result list not found");
const listItems = listContainer.querySelectorAll(".code-list-item");
if(listItems.length < 1) die("Expected at least one entry to match `.code-list-item`");
for(const item of listItems){
const match = /^((?:\/[^/]+){2})\/blob(?=\/)/gmi;
const link = [...item.querySelectorAll("a")].find(link => {
const href = link.getAttribute("href");
return href && match.test(href);
});
if(link && !results[link.href]){
++results.length;
results[link.href] = link.href.replace(
match,
"https://raw.githubusercontent.com$1"
);
}
}
// Examine how many pages there are
if(undefined === pageCount){
// Get all numeric links in pagination footer
const pageLinks = $$(".pagination > a[href]");
// Two or more pages: Find out how many results we're lookin' at.
if(pageLinks.length){
const pageIndexes = Array.from(pageLinks)
.filter(a => /^\s*[0-9]+\s*$/.test(a.textContent))
.map(a => parseInt(a.textContent.trim()));
pageCount = Math.max(...pageIndexes);
// Title that says "Showing 263,443,068 code results"
const h3 = $(".codesearch-results h3");
if(h3 && h3.textContent.match(/\b([0-9.,\s]+)\s/)){
resultCount = +(RegExp.$1.replace(/\D/g, ""));
// If the matched text doesn't include "code results", then it's too
// high a risk we've extracted a number from a different heading.
if(!/\b(code\s+results?)\b/.test(h3.textContent)){
let message = `Missing text found where "${resultCount} code results" expected. `;
message += "Please double-check