{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Understanding the Interface class"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install the Transformers, Datasets, and Evaluate libraries to run this notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install datasets evaluate transformers[sentencepiece]\n",
    "!pip install gradio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import gradio as gr\n",
    "\n",
    "\n",
    "def reverse_audio(audio):\n",
    "    sr, data = audio\n",
    "    reversed_audio = (sr, np.flipud(data))\n",
    "    return reversed_audio\n",
    "\n",
    "\n",
    "mic = gr.Audio(source=\"microphone\", type=\"numpy\", label=\"Speak here...\")\n",
    "gr.Interface(reverse_audio, mic, \"audio\").launch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import gradio as gr\n",
    "\n",
    "notes = [\"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\", \"A\", \"A#\", \"B\"]\n",
    "\n",
    "\n",
    "def generate_tone(note, octave, duration):\n",
    "    sr = 48000\n",
    "    a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)\n",
    "    frequency = a4_freq * 2 ** (tones_from_a4 / 12)\n",
    "    duration = int(duration)\n",
    "    audio = np.linspace(0, duration, duration * sr)\n",
    "    audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(np.int16)\n",
    "    return (sr, audio)\n",
    "\n",
    "\n",
    "gr.Interface(\n",
    "    generate_tone,\n",
    "    [\n",
    "        gr.Dropdown(notes, type=\"index\"),\n",
    "        gr.Slider(minimum=4, maximum=6, step=1),\n",
    "        gr.Textbox(type=\"number\", value=1, label=\"Duration in seconds\"),\n",
    "    ],\n",
    "    \"audio\",\n",
    ").launch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import pipeline\n",
    "import gradio as gr\n",
    "\n",
    "model = pipeline(\"automatic-speech-recognition\")\n",
    "\n",
    "\n",
    "def transcribe_audio(mic=None, file=None):\n",
    "    if mic is not None:\n",
    "        audio = mic\n",
    "    elif file is not None:\n",
    "        audio = file\n",
    "    else:\n",
    "        return \"You must either provide a mic recording or a file\"\n",
    "    transcription = model(audio)[\"text\"]\n",
    "    return transcription\n",
    "\n",
    "\n",
    "gr.Interface(\n",
    "    fn=transcribe_audio,\n",
    "    inputs=[\n",
    "        gr.Audio(source=\"microphone\", type=\"filepath\", optional=True),\n",
    "        gr.Audio(source=\"upload\", type=\"filepath\", optional=True),\n",
    "    ],\n",
    "    outputs=\"text\",\n",
    ").launch()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "Understanding the Interface class",
   "provenance": []
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}