{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Understanding the Interface class" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers, Datasets, and Evaluate libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install datasets evaluate transformers[sentencepiece]\n", "!pip install gradio" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import gradio as gr\n", "\n", "\n", "def reverse_audio(audio):\n", " sr, data = audio\n", " reversed_audio = (sr, np.flipud(data))\n", " return reversed_audio\n", "\n", "\n", "mic = gr.Audio(source=\"microphone\", type=\"numpy\", label=\"Speak here...\")\n", "gr.Interface(reverse_audio, mic, \"audio\").launch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import gradio as gr\n", "\n", "notes = [\"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\", \"A\", \"A#\", \"B\"]\n", "\n", "\n", "def generate_tone(note, octave, duration):\n", " sr = 48000\n", " a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)\n", " frequency = a4_freq * 2 ** (tones_from_a4 / 12)\n", " duration = int(duration)\n", " audio = np.linspace(0, duration, duration * sr)\n", " audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(np.int16)\n", " return (sr, audio)\n", "\n", "\n", "gr.Interface(\n", " generate_tone,\n", " [\n", " gr.Dropdown(notes, type=\"index\"),\n", " gr.Slider(minimum=4, maximum=6, step=1),\n", " gr.Textbox(type=\"number\", value=1, label=\"Duration in seconds\"),\n", " ],\n", " \"audio\",\n", ").launch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import pipeline\n", "import gradio as gr\n", "\n", "model = pipeline(\"automatic-speech-recognition\")\n", "\n", "\n", "def transcribe_audio(mic=None, file=None):\n", " if mic is not None:\n", " audio = mic\n", " elif file is not None:\n", " audio = file\n", " else:\n", " return \"You must either provide a mic recording or a file\"\n", " transcription = model(audio)[\"text\"]\n", " return transcription\n", "\n", "\n", "gr.Interface(\n", " fn=transcribe_audio,\n", " inputs=[\n", " gr.Audio(source=\"microphone\", type=\"filepath\", optional=True),\n", " gr.Audio(source=\"upload\", type=\"filepath\", optional=True),\n", " ],\n", " outputs=\"text\",\n", ").launch()" ] } ], "metadata": { "colab": { "name": "Understanding the Interface class", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }