#!/bin/bash

# HNyoutubes

# get youtube links and stuff from some web page
# usage example:
#   HNyoutubes "https://youtube/somelink/to/video"
# if no arguments, use default url

# config
rnd="$RANDOM"
final="/mnt/d/tmp/final${rnd}.md"
debuglist="/mnt/d/tmp/page${rnd}.txt"

# config test
touch "${final}" || { echo "Configure output dir/file.md in script itself." ; exit 1; }

# required
command -v yt-dlp >/dev/null 2>&1 || { >&2 echo "yt-dlp: see https://github.com/yt-dlp/yt-dlp#installation" ; exit 1; }
command -v curl >/dev/null 2>&1 || { >&2 echo "I need curl (in repos)" ; exit 1; }
command -v awk >/dev/null 2>&1 || { >&2 echo "I need awk (in repos)" ; exit 1; }

# if no argument, use default url
if [ $# -eq 0 ]
  then
    echo "No argument supplied, using default url"
    url="https://news.ycombinator.com/item?id=32220192"
    echo "$url"
else
    url="$1"
fi

# tmp dir
tmp="/tmp/$RANDOM-$$"
trap '[ -n "$tmp" ] && rm -fr "$tmp"' EXIT
mkdir -m 700 "$tmp" || { echo '!! unable to create a tmpdir' >&2; tmp=; exit 1; }

# get page to disk
curl "https://news.ycombinator.com/item?id=32220192" --output "${tmp}/page".htm
# filter stuff to only youtube like urls and remove duplicates (magic awk) without sorting
#cat "${tmp}/page".htm | grep -Po '(?<=href=\")[^\"]*(?=\")' | grep "\.you" | sed 's/&#x2F;/\//g' | sed 's:/*$::' | sort -u | tee "${tmp}/page".txt
cat "${tmp}/page".htm | grep -Po '(?<=href=\")[^\"]*(?=\")' | grep "\.you" | sed 's/&#x2F;/\//g' | sed 's:/*$::' | awk '!x[$0]++' | tee "${tmp}/page".txt

# how many lines
wc -l "${tmp}/page".txt
echo; echo

# tmp debug, lets just take first 10 urls for testing
#tail -30 "${tmp}/page".txt > "${tmp}/page10".txt
#cp "${tmp}/page10".txt "${tmp}/page".txt

# tmp debug
cp "${tmp}/page".txt "${debuglist}"

# print func
superecho () {
    echo "## $title  [$count]"
    echo "[![$line]($thumbnail)]($line)  "
    echo "${description}..." | while read -r desc; do
        if [[ ${#desc} -gt 20 ]] ; then
            echo "> $desc"
        fi
    done
    echo; echo;
}

# main
count="1"
while read -r line; do

    yt-dlp --no-abort-on-error --get-title --get-thumbnail --get-description --playlist-end 1 "$line" > "${tmp}/meta".txt
    title="$(sed -n '1p' "${tmp}/meta".txt)" # assumes title is on single line
    thumbnail="$(sed -n '2p' "${tmp}/meta".txt)" # assumes thumb url is on single line
    description="$(sed -e '1,3d' "${tmp}/meta".txt | head -c500)"
    
    [[ -n "$title" ]] && superecho | tee -a "${final}"

    (( count ++ ))    

done < "${tmp}/page".txt

echo "file ${final} is where the newly generated markdown is."