#!/bin/bash # HNyoutubes # get youtube links and stuff from some web page # usage example: # HNyoutubes "https://youtube/somelink/to/video" # if no arguments, use default url # config rnd="$RANDOM" final="/mnt/d/tmp/final${rnd}.md" debuglist="/mnt/d/tmp/page${rnd}.txt" # config test touch "${final}" || { echo "Configure output dir/file.md in script itself." ; exit 1; } # required command -v yt-dlp >/dev/null 2>&1 || { >&2 echo "yt-dlp: see https://github.com/yt-dlp/yt-dlp#installation" ; exit 1; } command -v curl >/dev/null 2>&1 || { >&2 echo "I need curl (in repos)" ; exit 1; } command -v awk >/dev/null 2>&1 || { >&2 echo "I need awk (in repos)" ; exit 1; } # if no argument, use default url if [ $# -eq 0 ] then echo "No argument supplied, using default url" url="https://news.ycombinator.com/item?id=32220192" echo "$url" else url="$1" fi # tmp dir tmp="/tmp/$RANDOM-$$" trap '[ -n "$tmp" ] && rm -fr "$tmp"' EXIT mkdir -m 700 "$tmp" || { echo '!! unable to create a tmpdir' >&2; tmp=; exit 1; } # get page to disk curl "https://news.ycombinator.com/item?id=32220192" --output "${tmp}/page".htm # filter stuff to only youtube like urls and remove duplicates (magic awk) without sorting #cat "${tmp}/page".htm | grep -Po '(?<=href=\")[^\"]*(?=\")' | grep "\.you" | sed 's///\//g' | sed 's:/*$::' | sort -u | tee "${tmp}/page".txt cat "${tmp}/page".htm | grep -Po '(?<=href=\")[^\"]*(?=\")' | grep "\.you" | sed 's///\//g' | sed 's:/*$::' | awk '!x[$0]++' | tee "${tmp}/page".txt # how many lines wc -l "${tmp}/page".txt echo; echo # tmp debug, lets just take first 10 urls for testing #tail -30 "${tmp}/page".txt > "${tmp}/page10".txt #cp "${tmp}/page10".txt "${tmp}/page".txt # tmp debug cp "${tmp}/page".txt "${debuglist}" # print func superecho () { echo "## $title [$count]" echo "[![$line]($thumbnail)]($line) " echo "${description}..." | while read -r desc; do if [[ ${#desc} -gt 20 ]] ; then echo "> $desc" fi done echo; echo; } # main count="1" while read -r line; do yt-dlp --no-abort-on-error --get-title --get-thumbnail --get-description --playlist-end 1 "$line" > "${tmp}/meta".txt title="$(sed -n '1p' "${tmp}/meta".txt)" # assumes title is on single line thumbnail="$(sed -n '2p' "${tmp}/meta".txt)" # assumes thumb url is on single line description="$(sed -e '1,3d' "${tmp}/meta".txt | head -c500)" [[ -n "$title" ]] && superecho | tee -a "${final}" (( count ++ )) done < "${tmp}/page".txt echo "file ${final} is where the newly generated markdown is."