#!/usr/bin/php
[test]\nnote: settings- /", "", $b);
$b = preg_replace("#.*?
#ms', ' (...) ', $e);
$e = preg_replace('#(.*?)
#ms', " $1 ", $e);
$e = str_replace('
", "\n\n", $b);
$b = str_replace("
", "\n", $b);
$b = str_replace("
", "\n", $b);
$b = preg_replace('#.*?(.*)#s', "$1", $b);
$b = str_replace(" ", " ", str_replace(" ", " ", str_replace("", "", $b))); # weird invis char
$b = preg_replace("/ +/", " ", $b);
$b = str_replace("https://truthsocial.com/tags/", "#", $b);
$b = trim(preg_replace('/\s+/', ' ', $b));
// save quote link
$ql = !empty($r->quote) ? ' (re: ' . make_short_url($r->quote->url) . ')' : '';
// shorten and add hint for links
$hl = 0; // track hint lengths to increase max tweet length so never cut off
if (preg_match_all('#.*?#', $b, $m) && !empty($m[0])) {
foreach ($m[0] as $v) {
preg_match('#(.*)#', $v, $m2); // m2[0] full anchor [1] href [2] text
// shorten displayed link if possible, add hint if needed
$fu = get_final_url($m2[1], ['no_body' => 1]);
$s = make_short_url($fu);
if (mb_strlen($s) < mb_strlen($m2[1])) {
$m2[1] = $s;
}
$h = get_url_hint($fu);
if ($h <> get_url_hint($m2[1])) {
if (mb_strlen("$m2[1] ($h)") < mb_strlen($fu)) {
$b = str_replace($m2[0], "$m2[1] ($h)", $b);
$hl += mb_strlen($h) + 3;
} else {
$b = str_replace($m2[0], $fu, $b);
} // no hint, final url < short+hint
} else {
$b = str_replace($m2[0], $m2[1], $b);
} // no hint, same as displayed domain
}
}
// pre-finalize
$t = "{$r->account->display_name}: $b";
$t = str_shorten($t, mb_strlen($r->account->display_name) + 282 + $hl);
// count attachments
foreach (['image', 'gifv', 'tv', 'video'] as $m) {
$n = 0;
foreach ($r->media_attachments as $ma) {
if ($ma->type == $m) {
$n++;
}
}
if ($m == 'gifv') {
$m = 'gif';
}
if ($n > 0) {
$t = trim($t) . ($n == 1 ? " ($m)" : " ($n {$m}s)");
}
}
$t .= $ql; // add quote link, no hint
// finalize and output
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
} elseif (isset($r->error) and $r->error == 'Record not found') {
send("PRIVMSG $channel : Post does not exist.\n");
} else {
echo "Error getting Truth Social post. Result: " . print_r($r, true) . "\n";
}
} else {
echo "Truth Social links require \$curl_impersonate_enabled\n";
}
continue;
}
// bluesky
// TODO convert to at-uri without loading page?
if (preg_match('#^https?://bsky.app/profile/[^/]+/post/[^/]+#', $u)) {
$html = curlget([CURLOPT_URL => $u]);
if ($curl_info['RESPONSE_CODE'] == 200) {
$dom = new DomDocument();
@$dom->loadHTML('' . $html);
$f = new DomXPath($dom);
$n = $f->query("/html/head/link[starts-with(@href,'at://')]");
if (!empty($n) && $n->length > 0) {
$at = $n[0]->getAttribute('href');
// echo "found bluesky at-uri $at\n";
$r = @json_decode(curlget([CURLOPT_URL => "https://public.api.bsky.app/xrpc/app.bsky.feed.getPosts?uris=$at"]));
if (!empty($r)) {
// print_r($r);
if (isset($r->posts[0])) {
// clean up content
$b = $r->posts[0]->record->text;
$b = html_entity_decode($b, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$b = str_replace(["\r\n", "\n", "\t"], ' ', $b);
$b = str_replace('…', '...', $b);
$b = str_replace("‘", "'", str_replace("’", "'", $b)); # fancy quotes
$b = str_replace("“", '"', str_replace("”", '"', $b));
$b = preg_replace('#(?posts[0]->author->displayName}: $b";
$t = str_shorten($t, mb_strlen($r->posts[0]->author->displayName) + 282); // + $hl
$ql = '';
if (!empty($r->posts[0]->record->embed)) {
$et = explode('.', $r->posts[0]->record->embed->{'$type'})[3];
if (($et == 'record' || $et == 'recordWithMedia') && strpos('/app.bsky.feed.post/', $r->posts[0]->record->embed->record->uri) !== -1) {
if (isset($r->posts[0]->record->embed->record->uri)) {
$uri = explode('/', $r->posts[0]->record->embed->record->uri);
} else {
$uri = explode('/', $r->posts[0]->record->embed->record->record->uri);
}
$ql = ' (re: ' . make_short_url("https://bsky.app/profile/{$uri[2]}/post/{$uri[4]}") . ')';
}
if ($et == 'images' || ($et == 'recordWithMedia' && !empty($r->posts[0]->record->embed->media->images))) {
if (isset($r->posts[0]->record->embed->images)) {
$n = count($r->posts[0]->record->embed->images);
} else {
$n = count($r->posts[0]->record->embed->media->images);
}
$t = rtrim($t) . ' (' . ($n > 1 ? "$n " : '') . 'image' . ($n > 1 ? 's' : '') . ')';
}
if ($et == 'video' || ($et == 'recordWithMedia' && !empty($r->posts[0]->record->embed->media->video))) {
$t = rtrim($t) . ' (video)';
}
if ($et == 'external' || ($et == 'recordWithMedia' && !empty($r->posts[0]->record->embed->media->external))) {
if (isset($r->posts[0]->record->embed->external->uri)) {
$uri = $r->posts[0]->record->embed->external->uri;
} else {
$uri = $r->posts[0]->record->embed->media->external->uri;
}
// if post text ends in part of the embed link, get rid of it
$tmp = explode(' ', $r->posts[0]->record->text);
$tmp = rtrim(trim($tmp[count($tmp) - 1]), '.');
$tmp2 = preg_replace('#^https?://#', '', $uri);
if (substr($tmp2, 0, strlen($tmp)) == $tmp) {
$t = trim(preg_replace('#' . preg_quote($tmp) . '\.+#', '', $t));
}
// add link at the end (post-shorten; not like twitter, etc)
$fu = get_final_url($uri, ['no_body' => 1]);
$s = make_short_url($fu);
if ($s <> $fu) {
$h = get_url_hint($fu);
if (mb_strlen("$s ($h)") < mb_strlen($fu)) {
$t = rtrim($t) . " $s ($h)";
} else {
$t = rtrim($t) . " $fu";
}
} else {
$t = rtrim($t) . ' (link)';
} // no short url, could be very long
}
// TODO facets, so mid-text / non-external embed links are processed properly, excluding duplicate externals. e.g. https://bsky.app/profile/propublica.org/post/3lmky7ypvhs2k https://bsky.app/profile/joshuajfriedman.com/post/3lmikawq2ds2j
}
$t = rtrim($t) . $ql; // add quote link, no hint
// finalize and output
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
continue;
}
} else {
// post not found
echo "error reading bluesky post\n";
}
}
}
}
// tiktok
if (preg_match('#^https?://(?:www\.)?tiktok\.com/@[A-Za-z0-9._]+/video/\d+#', $u, $m)) {
$r = curlget([CURLOPT_URL => "https://www.tiktok.com/oembed?url=$m[0]"]);
$j = @json_decode($r);
if (isset($j->title) && isset($j->author_name)) {
$j->title = str_shorten(trim($j->title), 160);
$t = "{$j->author_name}: {$j->title}"; // author_name <= 30
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
continue;
} else {
echo "Error getting TikTok video URL details, got:\n" . trim($r) . "\n";
}
}
// instagram
if (preg_match('#https?://(?:www\.)?instagram\.com/p/([A-Za-z0-9-_]*)#', $u, $m)) {
echo "getting instagram post info\n";
if (!empty($m[1])) {
$t = '';
$r = @json_decode(file_get_contents("https://www.instagram.com/p/$m[1]/?__a=1"));
if (!empty($r) && !empty($r->graphql->shortcode_media)) {
$m = $r->graphql->shortcode_media;
$i = 0;
$v = 0;
if ($m->__typename == 'GraphImage') {
$i = 1;
} elseif ($m->__typename == 'GraphVideo') {
$v = 1;
} elseif ($m->__typename == 'GraphSidecar') {
foreach ($m->edge_sidecar_to_children->edges as $a) {
if ($a->node->__typename == 'GraphImage') {
$i++;
} elseif ($a->node->__typename == 'GraphVideo') {
$v++;
}
}
}
if ($i > 0 || $v > 0) {
if ($i > 0 && $v > 0) {
$p = "$i image" . ($i > 1 ? 's' : '') . ", $v video" . ($v > 1 ? 's' : '');
} else {
if ($i > 0) {
$p = $i == 1 ? 'image' : "$i images";
} elseif ($v > 0) {
$p = $v == 1 ? 'video' : "$v videos";
}
}
} else {
$p = '';
}
$c = $m->edge_media_to_caption->edges[0]->node->text;
// $n=$m->owner->username;
$f = $r->graphql->shortcode_media->owner->full_name;
if (!empty($n)) {
if (!empty($c)) {
$t = str_replace(["\r\n", "\n", "\t", "\xC2\xA0"], ' ', "$f: $c");
$t = trim(preg_replace('/\s+/', ' ', $t));
$t = str_shorten($t, 280);
} else {
$t = "$n:";
}
if (!empty($p)) {
$t .= " ($p)";
}
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
continue;
}
}
}
}
# Facebook
if (preg_match('#^https?://(?:www\.)?facebook\.com/reel/(\d+)#', $u, $m)) {
$use_meta_tag = 'description';
}
if (preg_match('#^https?://(?:www\.)?facebook\.com/photo/#', $u, $m)) {
$use_meta_tag = 'description';
}
// twitch via api
if (!empty($twitch_client_id) && preg_match('#https?://(?:www\.)?twitch\.tv/(\w+)(/\w+)?#', $u, $m)) {
// get token, don't revalidate because won't be revoked - https://dev.twitch.tv/docs/authentication
echo "Getting Twitch token.. ";
if (empty($twitch_token) || $twitch_token_expires < time()) {
$r = json_decode(curlget([CURLOPT_URL => "https://id.twitch.tv/oauth2/token?client_id=$twitch_client_id=&client_secret=$twitch_client_secret&grant_type=client_credentials", CURLOPT_POST => 1, CURLOPT_HTTPHEADER => ["Client-ID: $twitch_client_id"]]));
if (!empty($r) && !empty($r->access_token)) {
echo "ok.\n";
$twitch_token = $r->access_token;
$twitch_token_expires = time() + $r->expires_in - 30;
} else {
if (isset($r->message)) {
echo "error: $r->message\n";
} else {
echo "error, r=" . print_r($r, true);
}
$t = '[ API error ]';
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
$twitch_token = '';
$twitch_token_expires = 0;
continue;
}
} else {
echo "ok.\n";
}
if (!empty($twitch_token)) {
// get user info - https://dev.twitch.tv/docs/api/reference#get-users
echo "Getting user info for \"$m[1]\".. ";
$r = json_decode(curlget([CURLOPT_URL => "https://api.twitch.tv/helix/users?login=$m[1]", CURLOPT_HTTPHEADER => ["Client-ID: $twitch_client_id", "Authorization: Bearer $twitch_token"]]));
if (!empty($r) && isset($r->data)) {
if (isset($r->data[0])) {
echo "ok.\n";
$un = $r->data[0]->display_name;
$ud = $r->data[0]->description; // shorten
if (!empty($m[2])) {
// just show subdir
$t = "[ $un: " . ucfirst(substr($m[2], 1)) . " ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
continue;
} else {
// get live stream info - https://dev.twitch.tv/docs/api/reference#get-streams-metadata
echo "Getting live stream info.. ";
$r = json_decode(curlget([CURLOPT_URL => "https://api.twitch.tv/helix/streams?user_login=$m[1]", CURLOPT_HTTPHEADER => ["Client-ID: $twitch_client_id", "Authorization: Bearer $twitch_token"]]));
if (!empty($r) && isset($r->data)) {
if (count($r->data) > 0) {
// check for live stream
foreach ($r->data as $d) {
if ($d->type == 'live') {
echo "ok.\n";
$t = str_replace(["\r\n", "\n", "\t", "\xC2\xA0"], ' ', $d->title);
$t = trim(preg_replace('/\s+/', ' ', $t));
$t = str_shorten($t, 424);
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
continue(2);
}
}
}
// no streams, show user info
echo "not streaming\n";
$t = str_replace(["\r\n", "\n", "\t", "\xC2\xA0"], ' ', "$un: $ud");
$t = trim(preg_replace('/\s+/', ' ', $t));
$t = str_shorten($t, 424);
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
continue;
} else {
if (isset($r->message)) {
echo "error: $r->message\n";
} else {
echo "error, r=" . print_r($r, true);
}
}
}
} else {
echo "not found, abort\n";
}
} else {
// api or connection error, shouldnt usually happen, continue silently
if (isset($r->message)) {
echo "error: $r->message\n";
} else {
echo "error, r=" . print_r($r, true);
}
continue;
}
}
}
// gab social
if (preg_match('#https://(?:www\.)?gab\.com/[^/]+/posts/(\d+)#', $u)) {
$gab_post = true;
$use_meta_tag = 'og:description';
} else {
$gab_post = false;
}
// telegram (todo: use api to get message details i.e. whether has a video or image)
if (preg_match("#^https?://t\.me/#", $u, $m)) {
$use_meta_tag = 'og:description';
$meta_skip_blank = true;
}
// poa.st
if (preg_match("#^https?://poa\.st/@[^/]+/posts/#", $u) || preg_match("#^https?://poa\.st/notice/#", $u)) {
$use_meta_tag = 'og:description';
$meta_skip_blank = true;
}
// msn.com articles - title via ajax
if (preg_match("#^(?:www\.)?msn\.com/.*?/ar-([^/]*)$#", $parse_url['host'] . $parse_url['path'], $m)) {
$r = json_decode(curlget([CURLOPT_URL => "https://assets.msn.com/content/view/v2/Detail/en-us/$m[1]"]));
if (isset($r->title)) {
$t = "[ " . str_shorten($r->title, 424) . " ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
continue;
}
}
// militarywatchmagazine.com articles - title via ajax
if (preg_match("#^https?://(?:www\.)?militarywatchmagazine\.com/article/([^?\#]*)#", $u, $m)) {
$r = json_decode(curlget([CURLOPT_URL => "https://militarywatchmagazine.com/i_s/api/records/articles?filter=article_identifier,eq,$m[1]"]));
if (isset($r->records[0]->article_title)) {
$t = "[ " . str_shorten($r->records[0]->article_title, 424) . " ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
continue;
}
}
$og_title_urls_regex = ['#https?://(?:www\.)?brighteon\.com#', '#https?://(?:www\.)?campusreform\.org#',];
foreach ($og_title_urls_regex as $r) {
if (preg_match($r, $u)) {
$use_meta_tag = 'og:title';
}
}
// ai media summaries
$ai_image_title_done = false;
if (!empty($ai_media_titles_enabled) && preg_match("#^https?://[^ ]+?\.(?:jpg|jpeg|png|webp|gif" . ($ai_media_titles_more_types ? $amt_mt_regex : "") . ")$#i", $u)) {
echo "Using AI to summarize\n";
$t = get_ai_media_title($u);
if (!empty($t)) {
$t = str_shorten($t);
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
continue;
}
$ai_image_title_done = true;
}
// skips
$pathinfo = pathinfo($u);
if (in_array($pathinfo['extension'], ['gif', 'gifv', 'mp4', 'webm', 'jpg', 'jpeg', 'png', 'csv', 'pdf', 'xls', 'doc', 'txt', 'xml', 'json', 'zip', 'gz', 'bz2', '7z', 'jar'])) {
echo "skipping url due to extension \"{$pathinfo['extension']}\"\n";
continue;
}
if (!isset($header)) {
$header = [];
}
if (!empty($tor_enabled) && (preg_match('#^https?://.*?\.onion(?:$|/)#', $u) || !empty($tor_all))) {
echo "getting url title via tor\n";
/** @noinspection HttpUrlsUsage */
$html = curlget([CURLOPT_URL => $u, CURLOPT_PROXYTYPE => CURLPROXY_SOCKS5_HOSTNAME, CURLOPT_PROXY => "$tor_host:$tor_port", CURLOPT_CONNECTTIMEOUT => 60, CURLOPT_TIMEOUT => 60, CURLOPT_HTTPHEADER => $header]);
if (empty($html)) {
if (strpos($curl_error, "Failed to connect to $tor_host port $tor_port") !== false) {
send("PRIVMSG $channel :Tor error - is it running?\n");
} elseif (strpos($curl_error, "Connection timed out after") !== false) {
send("PRIVMSG $channel :Tor connection timed out\n");
}
// else send("PRIVMSG $channel :Tor error or site down\n");
continue;
}
} else {
if (!empty($scrapingbee_enabled)) {
$html = curlget([CURLOPT_URL => $u, CURLOPT_HTTPHEADER => $header], ['scrapingbee_support' => 1]);
} else {
$html = curlget([CURLOPT_URL => $u, CURLOPT_HTTPHEADER => $header]);
}
}
// echo "response[2048/".strlen($html)."]=".print_r(substr($html,0,2048),true)."\n";
if (empty($html)) {
if (strpos($curl_error, 'SSL certificate problem') !== false) {
echo "set \$allow_invalid_certs=true; in settings to skip certificate checking\n";
$t = '[ SSL certificate problem ]';
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
continue;
}
echo "Error: response blank\n";
continue;
}
// check if it's an image for ai
if ($ai_media_titles_enabled && !$ai_image_title_done) {
$finfo = new finfo(FILEINFO_MIME);
$mime = explode(';', $finfo->buffer($html))[0];
if (preg_match("#(?:jpeg|png|webp|avif|gif" . ($ai_media_titles_more_types ? $amt_mt_regex : "") . ")$#", $mime)) {
echo "Using AI to summarize\n";
$t = get_ai_media_title($u, $html, $mime);
if (!empty($t)) {
$t = str_shorten($t);
$t = "[ $t ]";
send("PRIVMSG $channel :$title_bold$t$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $t);
}
continue;
}
}
}
// default
$title = '';
$html = str_replace('<<', '<<', $html); // rottentomatoes bad title html
$dom = new DOMDocument();
if ($dom->loadHTML('' . $html)) {
if ($use_meta_tag) {
$list = $dom->getElementsByTagName("meta");
foreach ($list as $l) {
if (((!empty($l->attributes->getNamedItem('name')) && $l->attributes->getNamedItem('name')->value == $use_meta_tag) || (!empty($l->attributes->getNamedItem('property')) && $l->attributes->getNamedItem('property')->value == $use_meta_tag)) && !empty($l->attributes->getNamedItem('content')->value)) {
$title = $l->attributes->getNamedItem('content')->value;
}
}
if ($gab_post) {
$title = rtrim(preg_replace('/' . preg_quote(": '", '/') . '/', ': ', $title, 1), "'");
}
if (empty($title) && $meta_skip_blank) {
continue;
}
}
if (empty($title)) {
$list = $dom->getElementsByTagName("title");
if ($list->length > 0) {
$title = $list->item(0)->textContent;
}
}
// auto translate title
if (!empty($auto_translate_titles) && !empty($gcloud_translate_keyfile) && !empty($title)) {
$h = $dom->getElementsByTagName('html')[0];
if (!empty($h) && !empty($h->attributes->getNamedItem('lang'))) {
$lc = strtolower(explode('-', $h->attributes->getNamedItem('lang')->value)[0]);
if ($lc <> 'en' && get_lang($lc) <> 'Unknown') {
$r = google_translate(['text' => $title, 'from_lang' => $lc, 'to_lang' => 'en']);
if (!empty($r->text) && !isset($r->error)) {
$l = make_short_url("https://translate.google.com/translate?js=n&sl=$lc&tl=en&u=" . urlencode($u));
$title = '(' . get_lang($lc) . ") $r->text (→EN: $l)";
}
}
}
}
}
$orig_title = $title;
// if potential invidious mirror rewrite URL and jump back to yt/invidious. if not an api URL will continue past here and parse already-fetched html. yewtu.be has a captcha so handle manually
if (!empty($youtube_api_key) && (preg_match('/ - Invidious$/', $title) || preg_match('#^https://yewtu.be/#', $u)) && empty($invidious_mirror) && !preg_match('#^https://invidio\.us#', $u)) {
$u = preg_replace('#^https?://.*?/(.*)#', 'https://invidio.us/$1', $u);
$invidious_mirror = true;
goto invidious;
}
invidious_continue:
// echo "orig title= ".print_r($title,true)."\n";
$title = html_entity_decode($title, ENT_QUOTES | ENT_HTML5, 'UTF-8');
// strip numeric entities that don't seem to display right on IRC when converted
$title = preg_replace('/([0-9]+;)/', '', $title);
$title = str_replace(["\r\n", "\n", "\t", "\xC2\xA0"], ' ', $title);
$title = preg_replace('/\s+/', ' ', $title);
$tmp = " \u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{202F}\u{205F}\u{3000}\u{200E}\u{200F}"; // unicode spaces, ltr, rtl
$title = preg_replace("/^[$tmp]+|[$tmp]+$/u", '', $title);
$notitletitles = [$parse_url["host"], 'Imgur', 'Imgur: The .*', 'Login • Instagram', 'Access denied .* used Cloudflare to restrict access', 'Amazon.* Something Went Wrong.*', 'Sorry! Something went wrong!', 'Bloomberg - Are you a robot?', 'Attention Required! | Cloudflare', 'Access denied', 'Access Denied', 'Please Wait... | Cloudflare', 'Log into Facebook', 'DDOS-GUARD', 'Just a moment...', 'Amazon.com', 'Amazon.ca', 'Blocked - 4plebs', 'MSN', 'Access to this page has been denied', 'You are being redirected...', 'Instagram', 'The Donald', 'Facebook', 'Discord', 'Cloudflare capcha page', 'ChatGPT', 'Before you continue', 'Blocked', 'Verification Required', 'Log into Facebook.*', 'Captcha Page'];
foreach ($notitletitles as $ntt) {
if (preg_match('/^' . str_replace('\.\*', '.*', preg_quote($ntt)) . '$/', $title)) {
echo "Skipping output of title: $title\n";
continue(2);
}
}
if ($title == get_base_domain($parse_url['host'])) {
echo "Skipping output of title: $title\n";
continue;
}
foreach ($title_replaces as $k => $v) {
$title = str_replace($k, $v, $title);
}
if (strpos($u, '//x.com/') !== false) {
$title = str_replace_one(' on X: "', ': "', $title);
}
if ($title && $outline) {
preg_match('#.*?>(.*)›.*?#', $html, $m);
if (!empty($m[1])) {
$title .= ' - ' . trim($m[1]);
}
}
$title = str_shorten($title, 438);
if ($title) {
$title = "[ $title ]";
send("PRIVMSG $channel :$title_bold$title$title_bold\n");
if ($title_cache_enabled) {
add_to_title_cache($u, $title);
}
} else {
if (preg_match('#^https://x.com/#', $u)) { // retry non-api X
if ($u_tries < 2) {
echo "No title found, retrying..\n";
sleep(1);
$ui--;
} else {
echo "No title found.\n";
}
} else {
echo "No title found.\n";
}
}
}
}
// flood protection
if ($flood_protection_on) {
// process all PRIVMSG to $channel
if ($ex[1] == 'PRIVMSG' && $ex[2] == $channel) {
list($tmpnick, $tmphost) = parsemask($ex[0]);
$flood_lines[] = [$tmphost, $msg, microtime()];
if (count($flood_lines) > $flood_max_buffer_size) {
$tmp = array_shift($flood_lines);
}
// if X consequtive lines by one person, quiet for X secs
if (count($flood_lines) >= $flood_max_conseq_lines) {
$flooding = true;
$index = count($flood_lines) - 1;
for ($i = 1; $i <= ($flood_max_conseq_lines - 1); $i++) {
$index2 = $index - $i;
if ($flood_lines[$index2][0] <> $flood_lines[$index][0]) {
$flooding = false;
}
}
if ($flooding && !isme() && !isadmin()) {
$tmphost = str_replace('@gateway/web/freenode/ip.', '@', $tmphost);
timedquiet($flood_max_conseq_time, "*!*@$tmphost");
}
}
// todo: if X within X micro seconds, quiet
// if X of the same lines in a row by one person, quiet for 15 mins
if (count($flood_lines) >= $flood_max_dupe_lines) {
$flooding = true;
$index = count($flood_lines) - 1;
for ($i = 1; $i <= ($flood_max_dupe_lines - 1); $i++) {
$index2 = $index - $i;
if ($flood_lines[$index2][0] <> $flood_lines[$index][0] || $flood_lines[($index2)][1] <> $flood_lines[$index][1]) {
$flooding = false;
}
}
if ($flooding && !isme() && !isadmin()) {
$tmphost = str_replace('@gateway/web/freenode/ip.', '@', $tmphost);
timedquiet($flood_max_dupe_time, "*!*@$tmphost");
// $flood_lines=[];
}
}
}
}
if (empty($data) || ($ex[1] == 'NOTICE' && strpos($data, ':Server Terminating. Received SIGTERM') !== false) || (isme() && $ex[1] == 'QUIT' && strpos($data, ':Ping timeout') !== false)) {
break;
}
if (!empty($nitter_links_via_twitter) && ($time - $nitter_hosts_time) >= 10800) {
nitter_hosts_update();
}
}
echo "Stream closed or timed out, reconnecting..\n";
$connect = 1;
}
// End Loop
function curlget($opts = [], $more_opts = [])
{
global $custom_curl_iface, $curl_iface, $user_agent, $allow_invalid_certs, $curl_response, $curl_info, $curl_error, $max_download_size, $curl_impersonate_enabled, $curl_impersonate_binary, $curl_impersonate_skip_hosts, $proxy_by_host_enabled, $proxy_by_host_iface, $proxy_by_hosts, $rapidapi_key, $scrapingbee_enabled, $scrapingbee_hosts;
$parse_url = parse_url($opts[CURLOPT_URL]);
$curl_info = [];
$curl_error = '';
$is_scrapingbee = false;
if (!empty($more_opts['scrapingbee_support']) && !empty($scrapingbee_enabled) && ($scrapingbee_hosts == 'all' || in_array(parse_url($opts[CURLOPT_URL], PHP_URL_HOST), $scrapingbee_hosts))) {
$opts[CURLOPT_URL] = 'https://scrapingbee.p.rapidapi.com/?url=' . urlencode($opts[CURLOPT_URL]) . '&render_js=true';
$opts[CURLOPT_HTTPHEADER][] = 'x-rapidapi-host: scrapingbee.p.rapidapi.com';
$opts[CURLOPT_HTTPHEADER][] = 'x-rapidapi-key: ' . $rapidapi_key;
$opts[CURLOPT_TIMEOUT] = 31;
$is_scrapingbee = true;
}
if ($curl_impersonate_enabled && !empty($curl_impersonate_skip_hosts) && in_array($parse_url['host'], $curl_impersonate_skip_hosts)) {
echo "skipping impersonate for host " . $parse_url['host'] . " in \$curl_impersonate_skip_hosts\n";
$more_opts['no_curl_impersonate'] = true;
}
// determine interface
if ($proxy_by_host_enabled && in_array(parse_url($opts[CURLOPT_URL], PHP_URL_HOST), $proxy_by_hosts)) {
$set_iface = $proxy_by_host_iface;
} elseif ($custom_curl_iface && !in_array(parse_url($opts[CURLOPT_URL], PHP_URL_HOST), ['localhost', '127.0.0.1']) && !(isset($opts[CURLOPT_PROXY]) && in_array(parse_url($opts[CURLOPT_PROXY], PHP_URL_HOST), ['localhost', '127.0.0.1']))) {
$set_iface = $curl_iface;
} else {
$set_iface = false;
}
if ($curl_impersonate_enabled && empty($more_opts['no_curl_impersonate'])) {
// commandline impersonate
$cmd = "$curl_impersonate_binary -Ls -w '%{stderr}%{json}' --retry 1 --max-redirs 7 -b cookies.txt -c cookies.txt --ipv4";
$cmd .= ' --connect-timeout ' . (!empty($opts[CURLOPT_CONNECTTIMEOUT]) ? $opts[CURLOPT_CONNECTTIMEOUT] : 15);
$cmd .= ' --max-time ' . (!empty($opts[CURLOPT_TIMEOUT]) ? $opts[CURLOPT_TIMEOUT] : 15);
if (!empty($set_iface)) {
$cmd .= " --interface $set_iface";
}
if (!empty($opts[CURLOPT_PROXY]) && !empty($opts[CURLOPT_PROXYTYPE])) {
$cmd .= ' --proxy ' . escapeshellarg(['http', 'http', 'https', '', 'socks4', 'socks5', 'socks4a', 'socks5h'][$opts[CURLOPT_PROXYTYPE]] . '://' . $opts[CURLOPT_PROXY]);
}
if (!empty($allow_invalid_certs)) {
$cmd .= " --insecure";
}
if (!empty($opts[CURLOPT_HTTPHEADER])) {
foreach ($opts[CURLOPT_HTTPHEADER] as $h) {
$cmd .= ' -H ' . escapeshellarg($h);
}
}
if (!empty($opts[CURLOPT_USERPWD])) {
$cmd .= ' -u ' . escapeshellarg($opts[CURLOPT_USERPWD]);
}
if (!empty($opts[CURLOPT_CUSTOMREQUEST])) {
$cmd .= " -X {$opts[CURLOPT_CUSTOMREQUEST]}";
} elseif (!empty($opts[CURLOPT_POST])) {
$cmd .= " -X POST";
}
if (!empty($opts[CURLOPT_POSTFIELDS])) {
$cmd .= ' -d ' . escapeshellarg($opts[CURLOPT_POSTFIELDS]);
}
if (!empty($opts[CURLOPT_NOBODY])) {
$cmd .= ' -I';
}
$cmd .= " --max-filesize $max_download_size";
$cmd .= ' ' . escapeshellarg($opts[CURLOPT_URL]);
// get stdout and stderr separately https://stackoverflow.com/a/25879953
$tries = 0; // retry on rare error
while (1) {
$proc = proc_open($cmd, [1 => ['pipe', 'w'], 2 => ['pipe', 'w']], $pipes);
$tries++;
if (!is_resource($pipes[1]) || !is_resource($pipes[2])) {
if ($tries == 5) {
$curl_info = ['EFFECTIVE_URL' => $opts[CURLOPT_URL], 'RESPONSE_CODE' => 0];
$curl_error = 'proc_open error';
echo "Error: curl_impersonate max tries reached.\nstdout: " . print_r($pipes[1], true) . "\nstderr: " . print_r($pipes[2], true) . "\n";
return '';
}
echo "Error: curl_impersonate did not return a resource.\nstdout: " . print_r($pipes[1], true) . "\nstderr: " . print_r($pipes[2], true) . "\nretrying\n";
sleep(2);
continue;
}
break;
}
$curl_response = stream_get_contents($pipes[1]); // stdout
fclose($pipes[1]);
$info = json_decode(stream_get_contents($pipes[2])); // stderr
fclose($pipes[2]);
proc_close($proc);
$curl_info = [
'EFFECTIVE_URL' => $info->url_effective,
'RESPONSE_CODE' => $info->http_code
];
if ($info->exitcode == CURLE_FILESIZE_EXCEEDED) {
$curl_info['SIZE_ABORT'] = true;
}
$curl_error = $info->errormsg;
} else {
// PHP curl
$curl_response = '';
$ch = curl_init();
curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
if (!empty($set_iface)) {
curl_setopt($ch, CURLOPT_INTERFACE, $set_iface);
}
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookies.txt');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
// curl_setopt($ch,CURLOPT_VERBOSE,1);
// curl_setopt($ch,CURLOPT_HEADER,1);
curl_setopt($ch, CURLOPT_MAXREDIRS, 7);
if (!empty($allow_invalid_certs)) {
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
}
curl_setopt($ch, CURLOPT_ENCODING, ''); // avoid gzipped result per http://stackoverflow.com/a/28295417
curl_setopt($ch, CURLOPT_MAXFILESIZE, $max_download_size);
curl_setopt_array($ch, $opts);
$curl_response = curl_exec($ch);
$curl_info = [
'EFFECTIVE_URL' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
'RESPONSE_CODE' => curl_getinfo($ch, CURLINFO_RESPONSE_CODE)
];
$curl_error = curl_error($ch);
if (curl_errno($ch) == CURLE_FILESIZE_EXCEEDED || strpos($curl_error, 'Exceeded the maximum allowed file size') !== -1) {
$curl_info['SIZE_ABORT'] = true;
} // str check for PHP<8.4
curl_close($ch);
}
if ($is_scrapingbee && $curl_info['RESPONSE_CODE'] <> 200) {
echo "ScrapingBee error: " . trim($curl_response) . "\n";
}
// both methods
if (parse_url($curl_info['EFFECTIVE_URL'], PHP_URL_HOST) == 'consent.youtube.com') {
parse_str(parse_url($curl_info['EFFECTIVE_URL'], PHP_URL_QUERY), $q);
if (isset($q['continue'])) {
$curl_info['EFFECTIVE_URL'] = $q['continue'];
}
}
if (!empty($curl_error)) {
echo "curl error: $curl_error\n";
}
return $curl_response;
}
function isadmin()
{
// todo: verify admins that send commands without being in channel user list
global $admins, $incnick, $users;
$r = search_multi($users, 'nick', $incnick);
if ($r === false) {
return false;
}
if (in_array($users[$r]['account'], $admins, true)) {
return true;
} else {
return false;
}
}
function isme()
{
global $ex, $nick;
if (substr($ex[0], 1, strpos($ex[0], '!') - 1) == $nick) {
return true;
} else {
return false;
}
}
function doopdop()
{
global $datafile, $nick, $channel, $opped, $opqueue, $doopdop_lock, $always_opped;
if ($doopdop_lock || empty($opqueue)) {
return;
}
$doopdop_lock = true;
foreach ($opqueue as $oq) {
list($what, $who, $opts) = $oq;
// kick
if ($what == 'kick') {
if ($opts['msg']) {
$msg = ' :' . $opts['msg'];
} else {
$msg = '';
}
send("KICK $channel $who$msg\n");
if (empty($always_opped)) {
send("MODE $channel -o $nick\n");
}
} elseif ($what == 'remove') {
if ($opts['msg']) {
$msg = ' :' . $opts['msg'];
} else {
$msg = '';
}
send("REMOVE $channel $who$msg\n");
if (empty($always_opped)) {
send("MODE $channel -o $nick\n");
}
} elseif ($what == 'remove_quiet') {
if ($opts['msg']) {
$msg = ' :' . $opts['msg'];
} else {
$msg = '';
}
send("REMOVE $channel {$opts['nick']}$msg\n");
if (empty($always_opped)) {
send("MODE $channel -o $nick\n");
}
if ($opts['timed']) {
timedquiet($opts['tqtime'], $who);
} else {
send("PRIVMSG chanserv :QUIET $channel $who\n");
}
} elseif ($what == 'topic') {
if (empty($opts['msg'])) {
continue;
}
send("TOPIC $channel :{$opts['msg']}\n");
if (empty($always_opped)) {
send("MODE $channel -o $nick\n");
}
} elseif ($what == 'invite') {
if (empty($who)) {
continue;
}
send("INVITE $who $channel\n");
if (empty($always_opped)) {
send("MODE $channel -o $nick\n");
}
} elseif ($what == '+q') {
if (empty($always_opped)) {
send("MODE $channel +q-o $who $nick\n");
} else {
send("MODE $channel +q $who\n");
}
} elseif ($what == '-q') {
if (empty($always_opped)) {
if (count($who) > 3) {
$who = array_slice($who, 0, 3);
}
$mode = '-' . str_repeat('q', count($who)) . 'o';
send("MODE $channel $mode " . implode(' ', $who) . " $nick\n");
} else {
if (count($who) > 4) {
$who = array_slice($who, 0, 4);
}
$mode = '-' . str_repeat('q', count($who));
send("MODE $channel $mode " . implode(' ', $who) . "\n");
}
} elseif ($what == '+b') {
list($tmpmask, $tmpreason, $tmpnick) = $who;
if (empty($always_opped)) {
if (!empty($tmpnick)) {
send("MODE $channel +b $tmpmask\n");
send("KICK $channel $tmpnick :$tmpreason\n");
send("MODE $channel -o $nick\n");
} else {
send("MODE $channel +b-o $tmpmask $nick\n");
} // todo: find nick by mask and kick
} else {
send("MODE $channel +b $tmpmask\n");
if (!empty($tmpnick)) {
send("KICK $channel $tmpnick :$tmpreason\n");
}
}
} elseif ($what == '-b') {
if (empty($always_opped)) {
if (count($who) > 3) {
$who = array_slice($who, 0, 3);
}
$mode = '-' . str_repeat('b', count($who)) . 'o';
send("MODE $channel $mode " . implode(' ', $who) . " $nick\n");
} else {
if (count($who) > 4) {
$who = array_slice($who, 0, 4);
}
$mode = '-' . str_repeat('b', count($who));
send("MODE $channel $mode " . implode(' ', $who) . "\n");
}
}
}
$opped = false;
sleep(2);
$opqueue = [];
$doopdop_lock = false;
}
function getops()
{
global $channel, $getops_lock, $always_opped;
if ($always_opped) {
doopdop();
return;
} // just run the queue
if ($getops_lock) {
return;
}
$getops_lock = true;
send("PRIVMSG ChanServ :OP $channel\n");
// wait for ops in main loop
}
function send($a)
{
global $socket, $skip_dupe_output, $last_send;
if ($skip_dupe_output) {
if ($a == $last_send) {
return true;
} else {
$last_send = $a;
}
}
echo "> $a";
fputs($socket, "$a");
if (timedout()) {
return false;
}
return true;
}
function send_no_filter($a)
{
global $socket;
echo "> $a";
fputs($socket, "$a");
if (timedout()) {
return false;
}
return true;
}
function timedout()
{
global $socket;
$meta = stream_get_meta_data($socket);
if ($meta['timed_out']) {
return true;
} else {
return false;
}
}
function make_short_url($url, $fail_url = '')
{
global $short_url_service, $short_url_token, $bitly_token, $short_url_token_index;
if (empty($fail_url)) {
$fail_url = $url;
}
$tries = 0;
while (true) {
if (!empty($short_url_token)) {
if (is_array($short_url_token)) {
if ($short_url_token_index == count($short_url_token)) {
$short_url_token_index = 0;
}
$use_token = $short_url_token[$short_url_token_index];
$short_url_token_index++;
} else {
$use_token = $short_url_token;
}
} elseif (!empty($bitly_token) && empty($short_url_service)) { # deprecated $bitly_token
$short_url_service = 'bit.ly';
$use_token = $bitly_token;
$short_url_token = '';
} else {
$use_token = '';
}
if ($short_url_service == 'tiny.cc') {
$body = new stdClass();
$body->urls = array();
$body2 = new stdClass();
$body2->long_url = $url;
$body->urls[0] = $body2;
$query = [];
$r = json_decode(curlget([CURLOPT_URL => 'https://tiny.cc/tiny/api/3/urls' . http_build_query($query), CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_POSTFIELDS => json_encode($body), CURLOPT_HTTPHEADER => ['Authorization: Basic ' . base64_encode($use_token), 'Content-Type: application/json', 'Accept: application/json', 'Cache-Control: no-cache']]));
if (empty($r) || !isset($r->urls[0]->error->code) || $r->urls[0]->error->code <> 0 || !isset($r->urls[0]->short_url)) {
echo 'tiny.cc error. Response: ' . print_r($r, true);
if (is_array($short_url_token) && $tries <> count($short_url_token)) {
$tries++;
echo "retrying\n";
} else {
return $fail_url;
}
} else {
return 'https://' . $r->urls[0]->short_url;
}
} elseif ($short_url_service == 'tinyurl') {
$r = json_decode(curlget([CURLOPT_URL => 'https://api.tinyurl.com/create', CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_POSTFIELDS => json_encode(['url' => $url]), CURLOPT_HTTPHEADER => ['Authorization: Bearer ' . $use_token, 'Content-Type: application/json', 'Accept: application/json']]));
if (empty($r) || !isset($r->code) || $r->code <> 0 || empty($r->data) || empty($r->data->tiny_url)) {
echo 'TinyURL error. Response: ' . print_r($r, true);
if (is_array($short_url_token) && $tries <> count($short_url_token)) {
$tries++;
echo "retrying\n";
} else {
return $fail_url;
}
} else {
return $r->data->tiny_url;
}
} elseif ($short_url_service == 'bit.ly') {
$r = json_decode(curlget([CURLOPT_URL => 'https://api-ssl.bitly.com/v4/shorten', CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_POSTFIELDS => json_encode(['long_url' => $url]), CURLOPT_HTTPHEADER => ['Authorization: Bearer ' . $use_token, 'Content-Type: application/json', 'Accept: application/json']]));
if (empty($r->id)) {
echo 'Bitly error. Response: ' . print_r($r, true);
if (is_array($short_url_token) && $tries <> count($short_url_token)) {
$tries++;
echo "retrying\n";
} else {
return $fail_url;
}
} else {
return 'https://' . $r->id;
}
} elseif ($short_url_service == 'da.gd') {
$r = curlget([CURLOPT_URL => 'https://da.gd/s?url=' . rawurlencode($url), CURLOPT_HTTPHEADER => ['Accept: text/plain']], ['no_curl_impersonate' => 1]);
if (empty($r) || !preg_match('#^https://da\.gd#', $r)) {
echo 'da.gd error. Response: ' . print_r($r, true);
return $fail_url;
} else {
return rtrim($r);
}
} else {
echo "Warning: Can't make short URL. Configure \$short_url_service / \$short_url_token in the settings file.\n";
return $fail_url;
}
}
}
// get url hint e.g. https://one.microsoft.com -> microsoft.com, https://www.telegraph.co.uk -> telegraph.co.uk
function get_url_hint($u)
{
return get_base_domain(parse_url($u, PHP_URL_HOST));
}
function get_final_url($u, $more_opts = ['no_body' => false, 'header' => []])
{
global $curl_info;
$more_opts['no_body'] = $more_opts['no_body'] ?? false;
$more_opts['header'] = $more_opts['header'] ?? [];
curlget([
CURLOPT_URL => $u,
CURLOPT_NOBODY => $more_opts['no_body'] ? 1 : 0, // nobody failed for e.g. http://help.urbanup.com/14769269
CURLOPT_HTTPHEADER => $more_opts['header']
], ['no_curl_impersonate' => 1]); // impersonate -L doesn't seem to get effective URL properly
return !empty($curl_info['EFFECTIVE_URL']) ? $curl_info['EFFECTIVE_URL'] : $u;
}
// get base domain considering public suffix from https://publicsuffix.org/list/
function get_base_domain($d)
{
global $public_suffixes;
$d = strtolower($d);
if (empty($public_suffixes)) {
// todo: refresh like once a month on bot start; for now, delete entry manually from data.db
if (!get_data('public_suffix_list', '*')) {
echo "Updating public suffix list\n";
$f = file_get_contents('https://publicsuffix.org/list/public_suffix_list.dat');
if (!empty($f)) {
$lines = explode("\n", $f);
$f = "// Source: https://publicsuffix.org/list/ (modified) License: https://mozilla.org/MPL/2.0/\n";
foreach ($lines as $l) {
if (substr($l, 0, 2) == '//' || $l == "\n") {
continue;
} elseif (substr($l, 0, 2) == '*.') {
$l = substr($l, 2);
} elseif (substr($l, 0, 1) == '!') {
$l = substr($l, 1);
}
$f .= "$l\n";
}
unset($lines);
set_data('public_suffix_list', json_encode([time(), $f]), '*');
unset($f);
} else {
echo "Error downloading public_suffix_list.dat\n";
return $d;
}
}
$public_suffixes = explode("\n", json_decode(get_data('public_suffix_list', '*'), true)[1]); // store in memory (fastest)
}
$l = substr($d, 0, strpos($d, '.')); // save last stripped sub/dom
$c = substr($d, strpos($d, '.') + 1); // strip first sub/dom to save an iteration
$n = substr_count($d, '.');
for ($i = 0; $i <= $n; $i++) {
if (in_array($c, $public_suffixes)) {
if (substr($c, 0, 4) == 'www.' && $d <> "www.$c") {
$c = preg_replace('/^www\./', '', $c);
} // strip www if not main domain
return "$l.$c";
}
$l = substr($c, 0, strpos($c, '.'));
$c = substr($c, strpos($c, '.') + 1);
}
return $d; // not found
}
function dorestart($msg, $sendquit = true)
{
echo "Restarting...\n";
if (strncasecmp(PHP_OS, 'WIN', 3) == 0) { // windows
$cmd = 'cmd /C start "" /B "php" ' . escapeshellarg($_SERVER['PHP_SELF']) . ' ' . implode(' ', array_map('escapeshellarg', array_slice($_SERVER['argv'], 1)));
$process = proc_open($cmd, [], $pipes, null, $_SERVER);
if (is_resource($process)) {
proc_close($process);
}
} else { // linux
global $_, $argv;
$_ = $_SERVER['_'];
register_shutdown_function(function () {
global $_, $argv;
pcntl_exec($_, $argv);
});
}
$msg = $msg ?: 'restart';
if ($sendquit) {
send("QUIT :$msg\n");
}
exit;
}
// convert youtube v3 api duration e.g. PT1M3S to HH:MM:SS per https://stackoverflow.com/a/35836604
function covtime($yt)
{
$yt = str_replace(['P', 'T'], '', $yt);
foreach (['D', 'H', 'M', 'S'] as $a) {
$pos = strpos($yt, $a);
if ($pos !== false) {
${$a} = substr($yt, 0, $pos);
} else {
${$a} = 0;
continue;
}
$yt = substr($yt, $pos + 1);
}
if ($D > 0) {
$M = str_pad($M, 2, '0', STR_PAD_LEFT);
$S = str_pad($S, 2, '0', STR_PAD_LEFT);
return ($H + (24 * $D)) . ":$M:$S"; // add days to hours
} elseif ($H > 0) {
$M = str_pad($M, 2, '0', STR_PAD_LEFT);
$S = str_pad($S, 2, '0', STR_PAD_LEFT);
return "$H:$M:$S";
} else {
$S = str_pad($S, 2, '0', STR_PAD_LEFT);
return "$M:$S";
}
}
// search multi-dimensional array and return id
function search_multi($arr, $key, $val)
{
foreach ($arr as $k => $v) {
if ($v[$key] == $val) {
return $k;
}
}
return false;
}
function parsemask($mask)
{
$tmp = explode('!', $mask);
$tmpnick = substr($tmp[0], 1);
$tmp = explode('@', $mask);
$tmphost = $tmp[1];
return [$tmpnick, $tmphost];
}
// disabled
//function check_dnsbl($nick, $host, $skip = false)
//{
// global $dnsbls, $opqueue;
// $ignores = []; // nicks to ignore for this
// if (in_array($nick, $ignores)) {
// echo "DNSBL: ignoring nick $nick\n";
// return;
// }
// $dnsbls = ['all.s5h.net',
// 'cbl.abuseat.org',
// 'dnsbl.sorbs.net',
// 'bl.spamcop.net'];
// // ip check
// if (substr($host, 0, 8) == 'gateway/' && strpos($host, '/ip.') !== false) $ip = gethostbyname(substr($host, strpos($host, '/ip.') + 4));
// else $ip = gethostbyname($host);
// if (filter_var($ip, FILTER_VALIDATE_IP) !== false) {
// echo "IP $ip detected.\n";
// echo ".. checking against " . count($dnsbls) . " DNSBLs\n";
// $rip = implode('.', array_reverse(explode('.', $ip)));
// foreach ($dnsbls as $bl) {
// $result = dns_get_record("$rip.$bl");
// echo "$bl result: " . print_r($result, true) . "\n";
// if (!empty($result)) {
// if (!$skip) {
// echo "found in dnsbl. taking action.\n";
// $opqueue[] = ['+b', ["*!*@$ip", "IP found in DNSBL. Please don't spam.", $nick]];
// getops();
// // timedquiet($host_blacklist_time,"*!*@$ip");
// dnsbl_msg($nick);
// return;
// } else echo "found in dnsbl, but action skipped.\n";
// } else echo "not found in dnsbl.\n";
// }
// }
//}
//function dnsbl_msg($nick)
//{
// global $channel;
// send("PRIVMSG $nick :You have been automatically banned in $channel due to abuse from spammers. If this is a mistake please contact an op seen in /msg chanserv access $channel list\n");
//}
function check_blacklist($nick, $host)
{
global $host_blacklist_strings, $host_blacklist_ips, $host_blacklist_time;
echo "Checking blacklist, nick: $nick host: $host\n";
// ip check
if (substr($host, 0, 8) == 'gateway/' && strpos($host, '/ip.') !== false) {
$ip = gethostbyname(substr($host, strpos($host, '/ip.') + 4));
} else {
$ip = gethostbyname($host);
}
if (filter_var($ip, FILTER_VALIDATE_IP) !== false) {
echo "IP $ip detected.\n";
echo ".. checking against " . count($host_blacklist_ips) . " IP blacklists\n";
foreach ($host_blacklist_ips as $ib) {
if (cidr_match($ip, $ib)) {
echo "* IP $ip matched blacklisted $ib\n";
// 100115 - shadowban
// $opqueue[]=['remove_quiet',$who,['nick'=>$thenick,'msg'=>$msg,'timed'=>$timed,'tqtime'=>$tqtime]];
// getops();
timedquiet($host_blacklist_time, "*!*@$ip");
blacklisted_msg($nick);
return;
}
}
}
// host check
echo ".. checking against " . count($host_blacklist_strings) . " string blacklists\n";
foreach ($host_blacklist_strings as $sb) {
if (strpos($host, $sb) !== false) {
echo "* Host $host matched blacklisted $sb\n";
timedquiet($host_blacklist_time, "*!*@$host");
blacklisted_msg($nick);
return;
}
}
}
function blacklisted_msg($nick)
{
global $channel;
send("PRIVMSG $nick :You have been automatically quieted in $channel due to abuse. If this is a mistake please contact an op seen in /msg chanserv access $channel list\n");
}
// http://stackoverflow.com/a/594134
function cidr_match($ip, $range)
{
list($subnet, $bits) = explode('/', $range);
$bits = $bits ?: 32;
$ip = ip2long($ip);
$subnet = ip2long($subnet);
// supposedly needed for 64 bit machines per http://tinyurl.com/oxz4lrw
$mask = (-1 << (32 - $bits)) & ip2long('255.255.255.255');
$subnet &= $mask; // nb: in case the supplied subnet wasn't correctly aligned
return ($ip & $mask) == $subnet;
}
function timedquiet($secs, $mask)
{
global $network, $channel, $datafile, $opqueue;
if ($network == 'freenode') {
send("PRIVMSG chanserv :QUIET $channel $mask\n");
} elseif ($network == 'libera') {
$opqueue[] = ['+q', $mask];
getops();
}
if (is_numeric($secs) && $secs > 0) {
$tqs = @json_decode(get_data('timed_quiets'), true) ?: [];
foreach ($tqs as $k => $tq) {
$tq = explode('|', $tq);
echo "tq[2]=$tq[2] mask=$mask";
if ($tq[2] == $mask) {
echo "removing dupe\n";
unset($tqs[$k]);
}
}
$tqs[] = time() . "|$secs|$mask";
set_data('timed_quiets', json_encode($tqs));
}
}
function get_wiki_extract($q, $len = 280)
{
$q = urldecode($q);
$pu = parse_url($q);
$q = $pu['path'] . ($pu['fragment'] ? '#' . $pu['fragment'] : ''); # strip query vars like ?useskin
$url = "https://en.wikipedia.org/w/api.php?action=query&titles=" . urlencode($q) . "&prop=extracts&format=json&redirects&formatversion=2&explaintext";
while (1) {
$tmp = curlget([CURLOPT_URL => $url]);
if (empty($tmp)) {
echo "No response from Wikipedia, retrying..\n";
continue;
}
break;
}
$tmp = json_decode($tmp);
$k = $tmp->query->pages[0];
unset($tmp);
$foundfrag = false;
if ($pu['fragment']) { // jump to fragment
$frag = trim(str_replace('_', ' ', $pu['fragment']));
$k->extract = str_replace(['======', '=====', '====', '==='], '==', $k->extract);
// try to find some sections with multiple ids, e.g. https://en.wikipedia.org/wiki/Microphone#Dynamic https://en.wikipedia.org/wiki/Microphone#Dynamic_microphone by removing additional words from fragment - useful for found hidden search-friendly ids with a shorter version in the table of contents e.g. !w dynamic microphone
$frags = explode(' ', $frag);
while (1) {
$pos = mb_stripos($k->extract, "\n== $frag ==\n");
if ($pos !== false) {
$k->extract = mb_substr($k->extract, $pos);
$foundfrag = true;
break;
} else {
if (count($frags) == 1) {
break;
}
array_pop($frags);
}
}
}
$arr = explode("\n", trim($k->extract));
unset($k);
$unset = false;
foreach ($arr as $k => $v) { // reformat section headers
if (substr($v, 0, 3) == '== ') {
if ($foundfrag && $v == "== $frag ==") {
$unset = $k;
} // remove current header
else {
$arr[$k] = trim(str_replace('==', '', $v)) . ': ';
}
}
}
if ($unset !== false) {
unset($arr[$unset]);
$arr = array_values($arr);
}
$e = implode("\n", $arr);
$e = str_replace(' ()', '', $e); // phonetics are often missing from extract
$e = preg_replace('/\.(\w{2,}|A )/', ". $1", $e); // wikipedia seems to omit a space between paragraphs often
return format_extract($e, $len);
}
function format_extract($e, $len = 280, $opts = [])
{
$e = str_replace(["\n", "\t"], ' ', $e);
$e = html_entity_decode($e, ENT_QUOTES);
$e = preg_replace_callback("/([0-9]+;)/", function ($m) {
return mb_convert_encoding($m[1], 'UTF-8', 'HTML-ENTITIES');
}, $e);
$e = strip_tags($e);
$e = preg_replace('/\s+/m', ' ', $e);
$e = str_shorten($e, $len);
if (!isset($opts['keep_quotes'])) {
$e = trim(trim($e, '"'));
} // remove outside quotes because we wrap in quotes
return $e;
}
function twitter_api($u, $op)
{ // https://stackoverflow.com/a/12939923
global $twitter_consumer_key, $twitter_consumer_secret, $twitter_access_token, $twitter_access_token_secret;
// init params
$u = "https://api.twitter.com/1.1$u";
$p = array_merge(['oauth_consumer_key' => $twitter_consumer_key, 'oauth_nonce' => uniqid('', true), 'oauth_signature_method' => 'HMAC-SHA1', 'oauth_token' => $twitter_access_token, 'oauth_timestamp' => time(), 'oauth_version' => '1.0'], $op);
// build base string
$t = [];
ksort($p);
foreach ($p as $k => $v) {
$t[] = "$k=" . rawurlencode($v);
}
$b = 'GET&' . rawurlencode($u) . '&' . rawurlencode(implode('&', $t));
// sign
$k = rawurlencode($twitter_consumer_secret) . '&' . rawurlencode($twitter_access_token_secret);
$s = base64_encode(hash_hmac('sha1', $b, $k, true));
$p['oauth_signature'] = $s;
// build header
$t = 'Authorization: OAuth ';
$t2 = [];
foreach ($p as $k => $v) {
$t2[] = "$k=\"" . rawurlencode($v) . "\"";
}
$t .= implode(', ', $t2);
$h = [$t];
// request
$t = [];
foreach ($op as $k => $v) {
$t[] = "$k=" . rawurlencode($v);
}
return @json_decode(curlget([CURLOPT_URL => "$u?" . implode('&', $t), CURLOPT_HTTPHEADER => $h]));
}
function get_true_random($min = 1, $max = 100, $num = 1)
{
$max = ((int)$max >= 1) ? (int)$max : 100;
$min = ((int)$min < $max) ? (int)$min : 1;
$num = ((int)$num >= 1) ? (int)$num : 1;
$r = curlget([CURLOPT_URL => "https://www.random.org/integers/?num=$num&min=$min&max=$max&col=1&base=10&format=plain&rnd=new"]);
$r = trim(str_replace("\n", ' ', $r));
return str_shorten($r);
}
// Google translate, requires gcloud commandline tool installed and $gcloud_translate_keyfile set
function google_translate($opts = ['text' => '', 'from_lang' => '', 'to_lang' => ''])
{
global $datafile, $gcloud_translate_keyfile, $gcloud_translate_max_chars;
// check limit, only store current year-month
list($ym, $cnt) = json_decode(get_data('google_translate_count'), true) ?: [date("Y-m"), 0];
if ($ym <> date("Y-m")) {
list($ym, $cnt) = [date("Y-m"), 0];
}
echo "Translating ($cnt/$gcloud_translate_max_chars" . ")...\n";
if ($cnt + strlen($opts['text']) > $gcloud_translate_max_chars) {
$ret = new stdClass();
$ret->error = 'Monthly translate limit exceeded';
return $ret;
}
// get a token
passthru("gcloud auth activate-service-account --key-file=$gcloud_translate_keyfile");
$token = rtrim(shell_exec("gcloud auth print-access-token"));
$body = json_encode(['q' => $opts['text'], 'source' => $opts['from_lang'], 'target' => $opts['to_lang']]);
$orig_r = curlget([CURLOPT_URL => 'https://translation.googleapis.com/language/translate/v2', CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => ['Content-Type: application/json', 'Authorization: Bearer ' . $token]]);
$r = json_decode($orig_r);
if (isset($r->data->translations[0])) {
$cnt += strlen($opts['text']);
set_data('google_translate_count', json_encode([$ym, $cnt]));
$ret = new stdClass();
$ret->text = html_entity_decode($r->data->translations[0]->translatedText, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$ret->from_lang = isset($r->data->translations[0]->detectedSourceLanguage) ? $r->data->translations[0]->detectedSourceLanguage : $opts['from_lang'];
$ret->to_lang = $opts['to_lang'];
return $ret;
} else {
echo "Translation error." . (!empty($orig_r) ? " Response: $orig_r" : "") . "\n";
$ret = new stdClass();
$ret->error = true;
return $ret;
}
}
// ISO 639-1 Language Codes
function get_lang($c)
{
global $language_codes;
list($c) = explode('-', $c);
if (!isset($language_codes)) {
$language_codes = ['en' => 'English', 'aa' => 'Afar', 'ab' => 'Abkhazian', 'af' => 'Afrikaans', 'am' => 'Amharic', 'ar' => 'Arabic', 'as' => 'Assamese', 'ay' => 'Aymara', 'az' => 'Azerbaijani', 'ba' => 'Bashkir', 'be' => 'Byelorussian', 'bg' => 'Bulgarian', 'bh' => 'Bihari', 'bi' => 'Bislama', 'bn' => 'Bengali/Bangla', 'bo' => 'Tibetan', 'br' => 'Breton', 'ca' => 'Catalan', 'co' => 'Corsican', 'cs' => 'Czech', 'cy' => 'Welsh', 'da' => 'Danish', 'de' => 'German', 'dz' => 'Bhutani', 'el' => 'Greek', 'eo' => 'Esperanto', 'es' => 'Spanish', 'et' => 'Estonian', 'eu' => 'Basque', 'fa' => 'Persian', 'fi' => 'Finnish', 'fj' => 'Fiji', 'fo' => 'Faeroese', 'fr' => 'French', 'fy' => 'Frisian', 'ga' => 'Irish', 'gd' => 'Scots/Gaelic', 'gl' => 'Galician', 'gn' => 'Guarani', 'gu' => 'Gujarati', 'ha' => 'Hausa', 'hi' => 'Hindi', 'hr' => 'Croatian', 'hu' => 'Hungarian', 'hy' => 'Armenian', 'ia' => 'Interlingua', 'id' => 'Indonesian', 'ie' => 'Interlingue', 'ik' => 'Inupiak', 'in' => 'Indonesian', 'is' => 'Icelandic', 'it' => 'Italian', 'iw' => 'Hebrew', 'ja' => 'Japanese', 'ji' => 'Yiddish', 'jw' => 'Javanese', 'ka' => 'Georgian', 'kk' => 'Kazakh', 'kl' => 'Greenlandic', 'km' => 'Cambodian', 'kn' => 'Kannada', 'ko' => 'Korean', 'ks' => 'Kashmiri', 'ku' => 'Kurdish', 'ky' => 'Kirghiz', 'la' => 'Latin', 'ln' => 'Lingala', 'lo' => 'Laothian', 'lt' => 'Lithuanian', 'lv' => 'Latvian/Lettish', 'mg' => 'Malagasy', 'mi' => 'Maori', 'mk' => 'Macedonian', 'ml' => 'Malayalam', 'mn' => 'Mongolian', 'mo' => 'Moldavian', 'mr' => 'Marathi', 'ms' => 'Malay', 'mt' => 'Maltese', 'my' => 'Burmese', 'na' => 'Nauru', 'ne' => 'Nepali', 'nl' => 'Dutch', 'no' => 'Norwegian', 'oc' => 'Occitan', 'om' => '(Afan)/Oromoor/Oriya', 'pa' => 'Punjabi', 'pl' => 'Polish', 'ps' => 'Pashto/Pushto', 'pt' => 'Portuguese', 'qu' => 'Quechua', 'rm' => 'Rhaeto-Romance', 'rn' => 'Kirundi', 'ro' => 'Romanian', 'ru' => 'Russian', 'rw' => 'Kinyarwanda', 'sa' => 'Sanskrit', 'sd' => 'Sindhi', 'sg' => 'Sangro', 'sh' => 'Serbo-Croatian', 'si' => 'Singhalese', 'sk' => 'Slovak', 'sl' => 'Slovenian', 'sm' => 'Samoan', 'sn' => 'Shona', 'so' => 'Somali', 'sq' => 'Albanian', 'sr' => 'Serbian', 'ss' => 'Siswati', 'st' => 'Sesotho', 'su' => 'Sundanese', 'sv' => 'Swedish', 'sw' => 'Swahili', 'ta' => 'Tamil', 'te' => 'Tegulu', 'tg' => 'Tajik', 'th' => 'Thai', 'ti' => 'Tigrinya', 'tk' => 'Turkmen', 'tl' => 'Tagalog', 'tn' => 'Setswana', 'to' => 'Tonga', 'tr' => 'Turkish', 'ts' => 'Tsonga', 'tt' => 'Tatar', 'tw' => 'Twi', 'uk' => 'Ukrainian', 'ur' => 'Urdu', 'uz' => 'Uzbek', 'vi' => 'Vietnamese', 'vo' => 'Volapuk', 'wo' => 'Wolof', 'xh' => 'Xhosa', 'yo' => 'Yoruba', 'zh' => 'Chinese', 'zu' => 'Zulu'];
}
if (array_key_exists($c, $language_codes)) {
return $language_codes[$c];
} else {
return 'Unknown';
}
}
function str_replace_one($needle, $replace, $haystack)
{
$pos = strpos($haystack, $needle);
if ($pos !== false) {
$newstring = substr_replace($haystack, $replace, $pos, strlen($needle));
} else {
$newstring = $haystack;
}
return $newstring;
}
// shorten string to last whole word within x characters and max bytes
function str_shorten($s, $len = 999, $opts = [])
{
global $baselen;
$e = false;
if (mb_strlen($s) > $len) { // desired max chars
$s = mb_substr($s, 0, $len);
if (!$opts['nowordcut']) {
$s = mb_substr($s, 0, mb_strrpos($s, ' ') + 1);
} // cut to last word
$e = true;
}
$m = 502 - $baselen; // max 512 - 4(ellipses) - 4(brackets) - 2(bold) - baselen bytes
if ($opts['nodots']) {
$m += 4;
}
if ($opts['nobrackets']) {
$m += 4;
}
if ($opts['nobold']) {
$m += 2;
}
if (strlen($s) > $m) {
$s = mb_strcut($s, 0, $m); // mb-safe cut to bytes
if (!$opts['nowordcut']) {
$s = mb_substr($s, 0, mb_strrpos($s, ' ') + 1);
} // cut to last word
$e = true;
}
if ($e) {
$s = ($opts['keeppunc'] ? rtrim($s, ' ') : rtrim($s, ' ;.,')) . (!$opts['nodots'] ? ' ...' : '');
} // trim punc & add ellipses
return $s;
}
function register_loop_function($f)
{
global $custom_loop_functions;
if (!isset($custom_loop_functions)) {
$custom_loop_functions = [];
}
if (!in_array($f, $custom_loop_functions)) {
echo "Adding custom loop function \"$f\"\n";
$custom_loop_functions[] = $f;
} else {
echo "Skipping duplicate custom loop function \"$f\"\n";
}
}
function add_to_title_cache($u, $t)
{
global $db, $title_cache_size;
$s = $db->prepare('INSERT OR REPLACE INTO title_cache (url, title) VALUES (:url, :title)');
$s->bindValue(':url', $u);
$s->bindValue(':title', $t);
$s->execute();
$db->query("DELETE FROM title_cache WHERE ROWID IN (SELECT ROWID FROM title_cache ORDER BY ROWID DESC LIMIT -1 OFFSET $title_cache_size)");
}
function get_from_title_cache($u)
{
global $db;
$s = $db->prepare('SELECT title FROM title_cache WHERE url = :url LIMIT 1;');
$s->bindValue(':url', $u);
$r = $s->execute();
$r = $r->fetchArray(SQLITE3_NUM);
return $r ? $r[0] : false;
}
function nitter_hosts_update()
{
global $nitter_hosts, $nitter_hosts_time, $run_dir;
$time = time();
list($ctime, $chosts) = json_decode(get_data('nitter_hosts', '*'), true) ?: [0, '']; // shared cache
if ($time - $ctime >= 43200) {
set_data('nitter_hosts', json_encode([$time, $nitter_hosts]), '*'); // pseudo-lock. note on boot should sleep a few secs after loading first bot to update
echo "Updating list of nitter hosts (for link titles)... ";
$html = curlget([CURLOPT_URL => 'https://status.d420.de/api/v1/instances']);
$json = @json_decode($html);
if (isset($json->hosts)) {
$hosts = ['nitter.net'];
foreach ($json->hosts as $host) {
if ($host->healthy || $host->points > 0 || $host->rss == 1 || array_sum($host->recent_pings) > 0 || $time - strtotime($host->last_healthy) <= 86400 * 30) {
$hosts[] = explode('://', $host->url)[1];
}
}
echo "Success:\n" . join(', ', $hosts) . "\n";
$nitter_hosts = '(?:' . str_replace('\|', '|', preg_quote(implode('|', $hosts))) . ')'; # for direct insertion into preg_replace
set_data('nitter_hosts', json_encode([$time, $nitter_hosts]), '*');
$nitter_hosts_time = $time;
} else {
echo "Failed to get instance info. Will retry in 15 mins.\n";
set_data('nitter_hosts', json_encode([$time - 42300, $nitter_hosts]), '*');
$nitter_hosts_time += 900;
}
} else {
$nitter_hosts = $chosts;
$nitter_hosts_time = $time;
}
}
function get_ai_media_title($url, $image_data = null, $mime = null)
{
global $ai_media_titles_key, $ai_media_titles_baseurl, $ai_media_titles_model, $ai_media_titles_prompt, $ai_media_titles_dl_hosts, $ai_media_titles_more_types, $amt_is_gemini, $amt_mt_regex, $amt_debug, $parse_url, $curl_error;
$orig_url = $url;
if (!preg_match("#^https?://[^ ]+?\.(?:jpg|jpeg|png)$#i", $url) || (!empty($ai_media_titles_dl_hosts) && ($ai_media_titles_dl_hosts == "all" || in_array($parse_url['host'], $ai_media_titles_dl_hosts))) || $amt_is_gemini) { // download to check mime type, convert, create data uri if necessary. skip urls with image extension
if (!$image_data) {
$image_data = curlget([CURLOPT_URL => $url], ['scrapingbee_support' => 1]);
if (empty($image_data)) {
if (!empty($curl_error)) {
return false;
} // curlget will output the error
echo "[get_ai_media_title] Failed to download, response blank\n";
return false;
}
}
if (!$mime) {
$finfo = new finfo(FILEINFO_MIME);
$mime = explode(';', $finfo->buffer($image_data))[0];
}
if (!preg_match("#(?:jpeg|png|webp|avif|gif" . ($ai_media_titles_more_types ? $amt_mt_regex : "") . ")$#", $mime)) {
echo "[get_ai_media_title] Only jpg, png, webp, avif, gif" . ($ai_media_titles_more_types ? str_replace('|', ', ', $amt_mt_regex) : "") . " links supported (got $mime)\n";
return false;
}
if (preg_match("#image/(?:webp|avif|gif)#", $mime)) { // convert to png and use data-uri
$im = imagecreatefromstring($image_data);
if (!$im) {
echo "[get_ai_media_title] Error converting image. Corrupt image, missing php-gd or no $mime support?\n";
return false;
}
ob_start();
imagepng($im);
$image_data = ob_get_clean();
$mime = "image/png";
}
$url = "data:$mime;base64," . base64_encode($image_data);
}
$img_obj = new stdClass();
$img_obj->type = "image_url";
$i = new stdClass();
$i->url = $url;
$i->detail = "high";
$img_obj->image_url = $i;
// query
$data = new stdClass();
$data->messages = [];
$msg_obj = new stdClass();
$msg_obj->role = "user";
$c = new stdClass();
$c->type = "text";
$c->text = !empty($ai_media_titles_prompt) ? $ai_media_titles_prompt : 'very short summary on one line. dont describe the format e.g. "the image", "the chart", "a meme", just the subject/content/data. dont add unnecessary moral judgments like "outdated", "controversial", "offensive", "antisemitic". keep it short!';
$msg_obj->content[] = $c;
$data->messages[] = $msg_obj;
// separate message for now due to gemini bug https://tinyurl.com/2v45b99a
$msg_obj = new stdClass();
$msg_obj->role = "user";
$msg_obj->content[] = $img_obj;
$data->messages[] = $msg_obj;
$data->model = $ai_media_titles_model;
$data->stream = false;
$data->temperature = 0;
$r = curlget([
CURLOPT_URL => $ai_media_titles_baseurl . "/chat/completions",
CURLOPT_HTTPHEADER => ["Content-Type: application/json", "Authorization: Bearer " . $ai_media_titles_key],
CURLOPT_CUSTOMREQUEST => "POST",
CURLOPT_POSTFIELDS => json_encode($data),
CURLOPT_CONNECTTIMEOUT => 45,
CURLOPT_TIMEOUT => 45
], ["no_curl_impersonate" => 1]); // image data uris too big for escapeshellarg with curl_impersonate
if ($amt_debug && substr($data->messages[1]->content[0]->image_url->url, 0, 5) == "data:") { // after req to avoid large copy
$data->messages[1]->content[0]->image_url->url = substr($data->messages[1]->content[0]->image_url->url, 0, strpos($data->messages[1]->content[0]->image_url->url, ',') + 17) . '