{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# YouTube video to audio" ], "metadata": { "id": "kNt1V_xZCYzb" } }, { "cell_type": "markdown", "source": [ "- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n", "- Blog post: [Video-to-Audio | A free Web APP to get mp3 audio file from any YouTube video](https://medium.com/@pierre_guillou/video-to-audio-a-notebook-and-web-app-to-get-mp3-audio-file-from-any-youtube-video-bb6a1c85390d)\n", "- Date: 06/12/2023" ], "metadata": { "id": "Fa6V8oEynFe-" } }, { "cell_type": "code", "source": [ "%%capture\n", "!python3 -m pip install -U yt-dlp\n", "!pip install unidecode\n", "!pip install gradio\n", "!pip install pydub" ], "metadata": { "id": "S4yB5r9RCdkH" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "import gradio as gr\n", "import re, unidecode\n", "from unidecode import unidecode\n", "import yt_dlp\n", "import os\n", "import pydub\n", "import numpy as np\n", "\n", "# no space, punctuation, accent in lower string\n", "def cleanString(string):\n", " cleanString = unidecode(string)\n", " # cleanString = re.sub('\\W+','_', cleanString)\n", " cleanString = re.sub(r'[^\\w\\s]','',cleanString)\n", " cleanString = cleanString.replace(\" \", \"_\")\n", " return cleanString.lower()\n", "\n", "# from YouTube url to audio file path and sample rate + numpy array\n", "def download_audio(url):\n", "\n", " path_to_folder_audio_mp3 = \"./\"\n", " ydl_opts = {\n", " 'format': 'm4a/bestaudio/best',\n", " 'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n", " 'postprocessors': [{\n", " 'key': 'FFmpegExtractAudio',\n", " 'preferredcodec': 'mp3',\n", " }]\n", " }\n", "\n", " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", " info_dict = ydl.extract_info(url, download=True)\n", " video_title = info_dict['title']\n", "\n", " # Rename the audio file\n", " local_link = video_title + \".mp3\"\n", " new_local_link = cleanString(video_title) + \".mp3\"\n", " for filename in os.listdir(path_to_folder_audio_mp3):\n", " if cleanString(local_link) == cleanString(filename):\n", " os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n", "\n", " # get audio file path\n", " file_path = path_to_folder_audio_mp3 + new_local_link\n", "\n", " return file_path, file_path\n", "\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"