{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# YouTube video to audio"
      ],
      "metadata": {
        "id": "kNt1V_xZCYzb"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n",
        "- Blog post: [Video-to-Audio | A free Web APP to get mp3 audio file from any YouTube video](https://medium.com/@pierre_guillou/video-to-audio-a-notebook-and-web-app-to-get-mp3-audio-file-from-any-youtube-video-bb6a1c85390d)\n",
        "- Date: 06/12/2023"
      ],
      "metadata": {
        "id": "Fa6V8oEynFe-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%%capture\n",
        "!python3 -m pip install -U yt-dlp\n",
        "!pip install unidecode\n",
        "!pip install gradio\n",
        "!pip install pydub"
      ],
      "metadata": {
        "id": "S4yB5r9RCdkH"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import gradio as gr\n",
        "import re, unidecode\n",
        "from unidecode import unidecode\n",
        "import yt_dlp\n",
        "import os\n",
        "import pydub\n",
        "import numpy as np\n",
        "\n",
        "# no space, punctuation, accent in lower string\n",
        "def cleanString(string):\n",
        "    cleanString = unidecode(string)\n",
        "    # cleanString = re.sub('\\W+','_', cleanString)\n",
        "    cleanString = re.sub(r'[^\\w\\s]','',cleanString)\n",
        "    cleanString = cleanString.replace(\" \", \"_\")\n",
        "    return cleanString.lower()\n",
        "\n",
        "# from YouTube url to audio file path and sample rate + numpy array\n",
        "def download_audio(url):\n",
        "\n",
        "    path_to_folder_audio_mp3 = \"./\"\n",
        "    ydl_opts = {\n",
        "        'format': 'm4a/bestaudio/best',\n",
        "        'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n",
        "        'postprocessors': [{\n",
        "            'key': 'FFmpegExtractAudio',\n",
        "            'preferredcodec': 'mp3',\n",
        "        }]\n",
        "    }\n",
        "\n",
        "    with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
        "        info_dict = ydl.extract_info(url, download=True)\n",
        "        video_title = info_dict['title']\n",
        "\n",
        "        # Rename the audio file\n",
        "        local_link = video_title + \".mp3\"\n",
        "        new_local_link = cleanString(video_title) + \".mp3\"\n",
        "        for filename in os.listdir(path_to_folder_audio_mp3):\n",
        "            if cleanString(local_link) == cleanString(filename):\n",
        "                os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n",
        "\n",
        "        # get audio file path\n",
        "        file_path = path_to_folder_audio_mp3 + new_local_link\n",
        "\n",
        "    return file_path, file_path\n",
        "\n",
        "with gr.Blocks() as demo:\n",
        "    gr.Markdown(\"<h1><center>Free YouTube URL Video-to-Audio</center></h1>\")\n",
        "    gr.Markdown(\"<center>Enter the link of any YouTube video to generate its mp3 audio file.</center>\")\n",
        "\n",
        "    input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL')\n",
        "    result_button_audio = gr.Button('Get Audio File')\n",
        "    output_audio_file = gr.File(label='mp3 audio file')\n",
        "    output_audio_play = gr.Audio(type=\"filepath\", label=\"Listen to audio\")\n",
        "\n",
        "    result_button_audio.click(download_audio, inputs = input_text_url, outputs = [output_audio_file, output_audio_play])\n",
        "\n",
        "demo.queue().launch(debug = False)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 625
        },
        "id": "qaTz946afx2p",
        "outputId": "b99aa586-c7a0-4798-af5e-60db35247e92"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
            "\n",
            "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
            "Running on public URL: https://0021f2cde11396ba91.gradio.live\n",
            "\n",
            "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "<div><iframe src=\"https://0021f2cde11396ba91.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": []
          },
          "metadata": {},
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# END"
      ],
      "metadata": {
        "id": "u9QYxqjtnzCD"
      }
    }
  ]
}