{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# YouTube video to audio" ], "metadata": { "id": "kNt1V_xZCYzb" } }, { "cell_type": "markdown", "source": [ "- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n", "- Blog post: [Video-to-Audio | A free Web APP to get mp3 audio file from any YouTube video](https://medium.com/@pierre_guillou/video-to-audio-a-notebook-and-web-app-to-get-mp3-audio-file-from-any-youtube-video-bb6a1c85390d)\n", "- Date: 06/12/2023" ], "metadata": { "id": "Fa6V8oEynFe-" } }, { "cell_type": "code", "source": [ "%%capture\n", "!python3 -m pip install -U yt-dlp\n", "!pip install unidecode\n", "!pip install gradio\n", "!pip install pydub" ], "metadata": { "id": "S4yB5r9RCdkH" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "import gradio as gr\n", "import re, unidecode\n", "from unidecode import unidecode\n", "import yt_dlp\n", "import os\n", "import pydub\n", "import numpy as np\n", "\n", "# no space, punctuation, accent in lower string\n", "def cleanString(string):\n", " cleanString = unidecode(string)\n", " # cleanString = re.sub('\\W+','_', cleanString)\n", " cleanString = re.sub(r'[^\\w\\s]','',cleanString)\n", " cleanString = cleanString.replace(\" \", \"_\")\n", " return cleanString.lower()\n", "\n", "# from YouTube url to audio file path and sample rate + numpy array\n", "def download_audio(url):\n", "\n", " path_to_folder_audio_mp3 = \"./\"\n", " ydl_opts = {\n", " 'format': 'm4a/bestaudio/best',\n", " 'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n", " 'postprocessors': [{\n", " 'key': 'FFmpegExtractAudio',\n", " 'preferredcodec': 'mp3',\n", " }]\n", " }\n", "\n", " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", " info_dict = ydl.extract_info(url, download=True)\n", " video_title = info_dict['title']\n", "\n", " # Rename the audio file\n", " local_link = video_title + \".mp3\"\n", " new_local_link = cleanString(video_title) + \".mp3\"\n", " for filename in os.listdir(path_to_folder_audio_mp3):\n", " if cleanString(local_link) == cleanString(filename):\n", " os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n", "\n", " # get audio file path\n", " file_path = path_to_folder_audio_mp3 + new_local_link\n", "\n", " return file_path, file_path\n", "\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"

Free YouTube URL Video-to-Audio

\")\n", " gr.Markdown(\"
Enter the link of any YouTube video to generate its mp3 audio file.
\")\n", "\n", " input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL')\n", " result_button_audio = gr.Button('Get Audio File')\n", " output_audio_file = gr.File(label='mp3 audio file')\n", " output_audio_play = gr.Audio(type=\"filepath\", label=\"Listen to audio\")\n", "\n", " result_button_audio.click(download_audio, inputs = input_text_url, outputs = [output_audio_file, output_audio_play])\n", "\n", "demo.queue().launch(debug = False)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 625 }, "id": "qaTz946afx2p", "outputId": "b99aa586-c7a0-4798-af5e-60db35247e92" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n", "\n", "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "Running on public URL: https://0021f2cde11396ba91.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "
" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [] }, "metadata": {}, "execution_count": 2 } ] }, { "cell_type": "markdown", "source": [ "# END" ], "metadata": { "id": "u9QYxqjtnzCD" } } ] }